Java Code Examples for org.apache.lucene.document.FieldType#freeze()
The following examples show how to use
org.apache.lucene.document.FieldType#freeze() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TestMemoryIndex.java From lucene-solr with Apache License 2.0 | 6 votes |
public void testIndexingPointsAndDocValues() throws Exception { FieldType type = new FieldType(); type.setDimensions(1, 4); type.setDocValuesType(DocValuesType.BINARY); type.freeze(); Document doc = new Document(); byte[] packedPoint = "term".getBytes(StandardCharsets.UTF_8); doc.add(new BinaryPoint("field", packedPoint, type)); MemoryIndex mi = MemoryIndex.fromDocument(doc, analyzer); LeafReader leafReader = mi.createSearcher().getIndexReader().leaves().get(0).reader(); assertEquals(1, leafReader.getPointValues("field").size()); assertArrayEquals(packedPoint, leafReader.getPointValues("field").getMinPackedValue()); assertArrayEquals(packedPoint, leafReader.getPointValues("field").getMaxPackedValue()); BinaryDocValues dvs = leafReader.getBinaryDocValues("field"); assertEquals(0, dvs.nextDoc()); assertEquals("term", dvs.binaryValue().utf8ToString()); }
Example 2
Source File: FloatFieldTypeDefinition.java From incubator-retired-blur with Apache License 2.0 | 6 votes |
@Override public void configure(String fieldNameForThisInstance, Map<String, String> properties, Configuration configuration) { String precisionStepStr = properties.get(NUMERIC_PRECISION_STEP); if (precisionStepStr != null) { _precisionStep = Integer.parseInt(precisionStepStr); _typeStored = new FieldType(FloatField.TYPE_STORED); _typeStored.setNumericPrecisionStep(_precisionStep); _typeStored.freeze(); _typeNotStored = new FieldType(FloatField.TYPE_NOT_STORED); _typeNotStored.setNumericPrecisionStep(_precisionStep); _typeNotStored.freeze(); } else { _typeStored = FloatField.TYPE_STORED; _typeNotStored = FloatField.TYPE_NOT_STORED; } }
Example 3
Source File: TestOmitTf.java From lucene-solr with Apache License 2.0 | 6 votes |
/** test that when freqs are omitted, that totalTermFreq and sumTotalTermFreq are docFreq, and sumDocFreq */ public void testStats() throws Exception { Directory dir = newDirectory(); RandomIndexWriter iw = new RandomIndexWriter(random(), dir, newIndexWriterConfig(new MockAnalyzer(random()))); Document doc = new Document(); FieldType ft = new FieldType(TextField.TYPE_NOT_STORED); ft.setIndexOptions(IndexOptions.DOCS); ft.freeze(); Field f = newField("foo", "bar", ft); doc.add(f); iw.addDocument(doc); IndexReader ir = iw.getReader(); iw.close(); assertEquals(ir.docFreq(new Term("foo", new BytesRef("bar"))), ir.totalTermFreq(new Term("foo", new BytesRef("bar")))); assertEquals(ir.getSumDocFreq("foo"), ir.getSumTotalTermFreq("foo")); ir.close(); dir.close(); }
Example 4
Source File: DateFieldTypeDefinition.java From incubator-retired-blur with Apache License 2.0 | 6 votes |
@Override public void configure(String fieldNameForThisInstance, Map<String, String> properties, Configuration configuration) { final String dateFormat = properties.get(DATE_FORMAT); if (dateFormat == null) { throw new RuntimeException("The property [" + DATE_FORMAT + "] can not be null."); } final String timeUnitStr = properties.get(TIME_UNIT); if (timeUnitStr != null) { _timeUnit = TimeUnit.valueOf(timeUnitStr.trim().toUpperCase()); } _simpleDateFormat = new ThreadValue<SimpleDateFormat>() { @Override protected SimpleDateFormat initialValue() { return new SimpleDateFormat(dateFormat); } }; String precisionStepStr = properties.get(NUMERIC_PRECISION_STEP); if (precisionStepStr != null) { _precisionStep = Integer.parseInt(precisionStepStr); _typeNotStored = new FieldType(LongField.TYPE_NOT_STORED); _typeNotStored.setNumericPrecisionStep(_precisionStep); _typeNotStored.freeze(); } else { _typeNotStored = LongField.TYPE_NOT_STORED; } }
Example 5
Source File: TestUnifiedHighlighter.java From lucene-solr with Apache License 2.0 | 6 votes |
private IndexReader indexSomeFields() throws IOException { RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer); FieldType ft = new FieldType(); ft.setIndexOptions(IndexOptions.NONE); ft.setTokenized(false); ft.setStored(true); ft.freeze(); Field title = new Field("title", "", fieldType); Field text = new Field("text", "", fieldType); Field category = new Field("category", "", fieldType); Document doc = new Document(); doc.add(title); doc.add(text); doc.add(category); title.setStringValue("This is the title field."); text.setStringValue("This is the text field. You can put some text if you want."); category.setStringValue("This is the category field."); iw.addDocument(doc); IndexReader ir = iw.getReader(); iw.close(); return ir; }
Example 6
Source File: TestUnifiedHighlighterTermIntervals.java From lucene-solr with Apache License 2.0 | 6 votes |
private IndexReader indexSomeFields() throws IOException { RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer); FieldType ft = new FieldType(); ft.setIndexOptions(IndexOptions.NONE); ft.setTokenized(false); ft.setStored(true); ft.freeze(); Field title = new Field("title", "", fieldType); Field text = new Field("text", "", fieldType); Field category = new Field("category", "", fieldType); Document doc = new Document(); doc.add(title); doc.add(text); doc.add(category); title.setStringValue("This is the title field."); text.setStringValue("This is the text field. You can put some text if you want."); category.setStringValue("This is the category field."); iw.addDocument(doc); IndexReader ir = iw.getReader(); iw.close(); return ir; }
Example 7
Source File: TestIndexableField.java From lucene-solr with Apache License 2.0 | 5 votes |
@Override public IndexableFieldType fieldType() { FieldType ft = new FieldType(StoredField.TYPE); ft.setStoreTermVectors(true); ft.freeze(); return ft; }
Example 8
Source File: FastVectorHighlighterTest.java From lucene-solr with Apache License 2.0 | 5 votes |
public void testWithSynonym() throws IOException { Directory dir = newDirectory(); IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random()))); FieldType type = new FieldType(TextField.TYPE_STORED); type.setStoreTermVectorOffsets(true); type.setStoreTermVectorPositions(true); type.setStoreTermVectors(true); type.freeze(); Document doc = new Document(); doc.add( new Field("field", "the quick brown fox", type )); writer.addDocument(doc); FastVectorHighlighter highlighter = new FastVectorHighlighter(); IndexReader reader = DirectoryReader.open(writer); int docId = 0; // query1: simple synonym query SynonymQuery synQuery = new SynonymQuery.Builder("field") .addTerm(new Term("field", "quick")) .addTerm(new Term("field", "fast")) .build(); FieldQuery fieldQuery = highlighter.getFieldQuery(synQuery, reader); String[] bestFragments = highlighter.getBestFragments(fieldQuery, reader, docId, "field", 54, 1); assertEquals("the <b>quick</b> brown fox", bestFragments[0]); // query2: boolean query with synonym query BooleanQuery.Builder bq = new BooleanQuery.Builder() .add(new BooleanClause(synQuery, Occur.MUST)) .add(new BooleanClause(new TermQuery(new Term("field", "fox")), Occur.MUST)); fieldQuery = highlighter.getFieldQuery(bq.build(), reader); bestFragments = highlighter.getBestFragments(fieldQuery, reader, docId, "field", 54, 1); assertEquals("the <b>quick</b> brown <b>fox</b>", bestFragments[0]); reader.close(); writer.close(); dir.close(); }
Example 9
Source File: FastVectorHighlighterTest.java From lucene-solr with Apache License 2.0 | 5 votes |
public void testSimpleHighlightTest() throws IOException { Directory dir = newDirectory(); IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random()))); Document doc = new Document(); FieldType type = new FieldType(TextField.TYPE_STORED); type.setStoreTermVectorOffsets(true); type.setStoreTermVectorPositions(true); type.setStoreTermVectors(true); type.freeze(); Field field = new Field("field", "This is a test where foo is highlighed and should be highlighted", type); doc.add(field); writer.addDocument(doc); FastVectorHighlighter highlighter = new FastVectorHighlighter(); IndexReader reader = DirectoryReader.open(writer); int docId = 0; FieldQuery fieldQuery = highlighter.getFieldQuery( new TermQuery(new Term("field", "foo")), reader ); String[] bestFragments = highlighter.getBestFragments(fieldQuery, reader, docId, "field", 54, 1); // highlighted results are centered assertEquals("This is a test where <b>foo</b> is highlighed and should be highlighted", bestFragments[0]); bestFragments = highlighter.getBestFragments(fieldQuery, reader, docId, "field", 52, 1); assertEquals("This is a test where <b>foo</b> is highlighed and should be", bestFragments[0]); bestFragments = highlighter.getBestFragments(fieldQuery, reader, docId, "field", 30, 1); assertEquals("a test where <b>foo</b> is highlighed", bestFragments[0]); reader.close(); writer.close(); dir.close(); }
Example 10
Source File: FilterAccessControlFactory.java From incubator-retired-blur with Apache License 2.0 | 5 votes |
private static FieldType getFieldTypeNotStored(IndexableField indexableField) { Field field = (Field) indexableField; FieldType fieldType = field.fieldType(); FieldType result = new FieldType(fieldType); result.setStored(false); result.freeze(); return result; }
Example 11
Source File: BaseTermVectorsFormatTestCase.java From lucene-solr with Apache License 2.0 | 5 votes |
protected FieldType fieldType(Options options) { FieldType ft = new FieldType(TextField.TYPE_NOT_STORED); ft.setStoreTermVectors(true); ft.setStoreTermVectorPositions(options.positions); ft.setStoreTermVectorOffsets(options.offsets); ft.setStoreTermVectorPayloads(options.payloads); ft.freeze(); return ft; }
Example 12
Source File: FastVectorHighlighterTest.java From lucene-solr with Apache License 2.0 | 5 votes |
public void testFunctionScoreQueryHighlight() throws IOException { Directory dir = newDirectory(); IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random()))); Document doc = new Document(); FieldType type = new FieldType(TextField.TYPE_STORED); type.setStoreTermVectorOffsets(true); type.setStoreTermVectorPositions(true); type.setStoreTermVectors(true); type.freeze(); Field field = new Field("field", "This is a test where foo is highlighed and should be highlighted", type); doc.add(field); writer.addDocument(doc); FastVectorHighlighter highlighter = new FastVectorHighlighter(); IndexReader reader = DirectoryReader.open(writer); int docId = 0; FieldQuery fieldQuery = highlighter.getFieldQuery( new FunctionScoreQuery(new TermQuery(new Term("field", "foo")), DoubleValuesSource.constant(1)), reader ); String[] bestFragments = highlighter.getBestFragments(fieldQuery, reader, docId, "field", 54, 1); // highlighted results are centered assertEquals("This is a test where <b>foo</b> is highlighed and should be highlighted", bestFragments[0]); bestFragments = highlighter.getBestFragments(fieldQuery, reader, docId, "field", 52, 1); assertEquals("This is a test where <b>foo</b> is highlighed and should be", bestFragments[0]); bestFragments = highlighter.getBestFragments(fieldQuery, reader, docId, "field", 30, 1); assertEquals("a test where <b>foo</b> is highlighed", bestFragments[0]); reader.close(); writer.close(); dir.close(); }
Example 13
Source File: FastVectorHighlighterTest.java From lucene-solr with Apache License 2.0 | 4 votes |
public void testBooleanPhraseWithSynonym() throws IOException { Directory dir = newDirectory(); IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random()))); Document doc = new Document(); FieldType type = new FieldType(TextField.TYPE_NOT_STORED); type.setStoreTermVectorOffsets(true); type.setStoreTermVectorPositions(true); type.setStoreTermVectors(true); type.freeze(); Token syn = new Token("httpwwwfacebookcom", 6, 29); syn.setPositionIncrement(0); CannedTokenStream ts = new CannedTokenStream( new Token("test", 0, 4), new Token("http", 6, 10), syn, new Token("www", 13, 16), new Token("facebook", 17, 25), new Token("com", 26, 29) ); Field field = new Field("field", ts, type); doc.add(field); doc.add(new StoredField("field", "Test: http://www.facebook.com")); writer.addDocument(doc); FastVectorHighlighter highlighter = new FastVectorHighlighter(); IndexReader reader = DirectoryReader.open(writer); int docId = 0; // query1: match PhraseQuery pq = new PhraseQuery("field", "test", "http", "www", "facebook", "com"); FieldQuery fieldQuery = highlighter.getFieldQuery(pq, reader); String[] bestFragments = highlighter.getBestFragments(fieldQuery, reader, docId, "field", 54, 1); assertEquals("<b>Test: http://www.facebook.com</b>", bestFragments[0]); // query2: match PhraseQuery pq2 = new PhraseQuery("field", "test", "httpwwwfacebookcom", "www", "facebook", "com"); fieldQuery = highlighter.getFieldQuery(pq2, reader); bestFragments = highlighter.getBestFragments(fieldQuery, reader, docId, "field", 54, 1); assertEquals("<b>Test: http://www.facebook.com</b>", bestFragments[0]); // query3: OR query1 and query2 together BooleanQuery.Builder bq = new BooleanQuery.Builder(); bq.add(pq, BooleanClause.Occur.SHOULD); bq.add(pq2, BooleanClause.Occur.SHOULD); fieldQuery = highlighter.getFieldQuery(bq.build(), reader); bestFragments = highlighter.getBestFragments(fieldQuery, reader, docId, "field", 54, 1); assertEquals("<b>Test: http://www.facebook.com</b>", bestFragments[0]); reader.close(); writer.close(); dir.close(); }
Example 14
Source File: BBoxStrategy.java From lucene-solr with Apache License 2.0 | 4 votes |
/** * Creates this strategy. * {@code fieldType} is used to customize the indexing options of the 4 number fields, and to a lesser degree the XDL * field too. Search requires pointValues (or legacy numerics), and relevancy requires docValues. If these features * aren't needed then disable them. */ public BBoxStrategy(SpatialContext ctx, String fieldNamePrefix, FieldType fieldType) { super(ctx, fieldNamePrefix); field_bbox = fieldNamePrefix; field_minX = fieldNamePrefix + SUFFIX_MINX; field_maxX = fieldNamePrefix + SUFFIX_MAXX; field_minY = fieldNamePrefix + SUFFIX_MINY; field_maxY = fieldNamePrefix + SUFFIX_MAXY; field_xdl = fieldNamePrefix + SUFFIX_XDL; fieldType.freeze(); this.optionsFieldType = fieldType; int numQuads = 0; if ((this.hasStored = fieldType.stored())) { numQuads++; } if ((this.hasDocVals = fieldType.docValuesType() != DocValuesType.NONE)) { numQuads++; } if ((this.hasPointVals = fieldType.pointDimensionCount() > 0)) { numQuads++; } if (fieldType.indexOptions() != IndexOptions.NONE && fieldType instanceof LegacyFieldType && ((LegacyFieldType)fieldType).numericType() != null) { if (hasPointVals) { throw new IllegalArgumentException("pointValues and LegacyNumericType are mutually exclusive"); } final LegacyFieldType legacyType = (LegacyFieldType) fieldType; if (legacyType.numericType() != LegacyNumericType.DOUBLE) { throw new IllegalArgumentException(getClass() + " does not support " + legacyType.numericType()); } numQuads++; legacyNumericFieldType = new LegacyFieldType(LegacyDoubleField.TYPE_NOT_STORED); legacyNumericFieldType.setNumericPrecisionStep(legacyType.numericPrecisionStep()); legacyNumericFieldType.freeze(); } else { legacyNumericFieldType = null; } if (hasPointVals || legacyNumericFieldType != null) { // if we have an index... xdlFieldType = new FieldType(StringField.TYPE_NOT_STORED); xdlFieldType.setIndexOptions(IndexOptions.DOCS); xdlFieldType.freeze(); } else { xdlFieldType = null; } this.fieldsLen = numQuads * 4 + (xdlFieldType != null ? 1 : 0); }
Example 15
Source File: DocumentMaker.java From SourcererCC with GNU General Public License v3.0 | 4 votes |
public Document prepareDocument(Bag bag) { Document document = new Document(); StoredField strField = new StoredField("id", bag.getId() + ""); document.add(strField); StoredField functionId = new StoredField("functionId", bag.getFunctionId() + ""); document.add(functionId); StoredField sizeField = new StoredField("size", bag.getSize() + ""); document.add(sizeField); StringBuilder tokenString = new StringBuilder(); int ct = BlockInfo.getMinimumSimilarityThreshold(bag.getSize(), SearchManager.th); StoredField computedThresholdField = new StoredField("ct", ct + ""); int lct = BlockInfo.getMinimumSimilarityThreshold(bag.getSize(), (SearchManager.th - 0.5f)); StoredField lenientComputedThresholdField = new StoredField("lct", lct + ""); document.add(sizeField); document.add(computedThresholdField); document.add(lenientComputedThresholdField); int prefixLength = BlockInfo.getPrefixSize(bag.getSize(), ct); for (TokenFrequency tf : bag) { for (int i = 0; i < tf.getFrequency(); i++) { tokenString.append(tf.getToken().getValue() + " "); // System.out.println(tf.getToken().getValue()); } prefixLength -= tf.getFrequency(); if (prefixLength <= 0) { break; } } @SuppressWarnings("deprecation") //Field field = new Field("tokens", tokenString.trim(), Field.Store.NO, // Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS); FieldType fieldType = new FieldType(); fieldType.setIndexed(true); fieldType.setStoreTermVectorPositions(true); fieldType.setStoreTermVectors(true); fieldType.setTokenized(true); fieldType.freeze(); Field field = new Field("tokens",tokenString.toString().trim(),fieldType); /* TextField textField = new TextField("tokens", tokenString.trim(), Field.Store.NO); textField.fieldType().setIndexed(true); textField.fieldType().setStoreTermVectorPositions(true); textField.fieldType().setStoreTermVectors(true); textField.fieldType().freeze();*/ //field.fieldType().setIndexed(true); document.add(field); return document; }
Example 16
Source File: TestMemoryIndexAgainstDirectory.java From lucene-solr with Apache License 2.0 | 4 votes |
public void testDuelMemoryIndexCoreDirectoryWithArrayField() throws Exception { final String field_name = "text"; MockAnalyzer mockAnalyzer = new MockAnalyzer(random()); if (random().nextBoolean()) { mockAnalyzer.setOffsetGap(random().nextInt(100)); } //index into a random directory FieldType type = new FieldType(TextField.TYPE_STORED); type.setStoreTermVectorOffsets(true); type.setStoreTermVectorPayloads(false); type.setStoreTermVectorPositions(true); type.setStoreTermVectors(true); type.freeze(); Document doc = new Document(); doc.add(new Field(field_name, "la la", type)); doc.add(new Field(field_name, "foo bar foo bar foo", type)); Directory dir = newDirectory(); IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(random(), mockAnalyzer)); writer.updateDocument(new Term("id", "1"), doc); writer.commit(); writer.close(); DirectoryReader reader = DirectoryReader.open(dir); //Index document in Memory index MemoryIndex memIndex = new MemoryIndex(true); memIndex.addField(field_name, "la la", mockAnalyzer); memIndex.addField(field_name, "foo bar foo bar foo", mockAnalyzer); //compare term vectors Terms ramTv = reader.getTermVector(0, field_name); IndexReader memIndexReader = memIndex.createSearcher().getIndexReader(); TestUtil.checkReader(memIndexReader); Terms memTv = memIndexReader.getTermVector(0, field_name); compareTermVectors(ramTv, memTv, field_name); memIndexReader.close(); reader.close(); dir.close(); }
Example 17
Source File: TestExceedMaxTermLength.java From lucene-solr with Apache License 2.0 | 4 votes |
public void test() throws Exception { IndexWriter w = new IndexWriter (dir, newIndexWriterConfig(random(), new MockAnalyzer(random()))); try { final FieldType ft = new FieldType(); ft.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS); ft.setStored(random().nextBoolean()); ft.freeze(); final Document doc = new Document(); if (random().nextBoolean()) { // totally ok short field value doc.add(new Field(TestUtil.randomSimpleString(random(), 1, 10), TestUtil.randomSimpleString(random(), 1, 10), ft)); } // problematic field final String name = TestUtil.randomSimpleString(random(), 1, 50); final String value = TestUtil.randomSimpleString(random(), minTestTermLength, maxTestTermLegnth); final Field f = new Field(name, value, ft); if (random().nextBoolean()) { // totally ok short field value doc.add(new Field(TestUtil.randomSimpleString(random(), 1, 10), TestUtil.randomSimpleString(random(), 1, 10), ft)); } doc.add(f); IllegalArgumentException expected = expectThrows(IllegalArgumentException.class, () -> { w.addDocument(doc); }); String maxLengthMsg = String.valueOf(IndexWriter.MAX_TERM_LENGTH); String msg = expected.getMessage(); assertTrue("IllegalArgumentException didn't mention 'immense term': " + msg, msg.contains("immense term")); assertTrue("IllegalArgumentException didn't mention max length ("+maxLengthMsg+"): " + msg, msg.contains(maxLengthMsg)); assertTrue("IllegalArgumentException didn't mention field name ("+name+"): " + msg, msg.contains(name)); assertTrue("IllegalArgumentException didn't mention original message: " + msg, msg.contains("bytes can be at most") && msg.contains("in length; got")); } finally { w.close(); } }
Example 18
Source File: LuceneTestCase.java From lucene-solr with Apache License 2.0 | 4 votes |
public synchronized static Field newField(Random random, String name, Object value, FieldType type) { // Defeat any consumers that illegally rely on intern'd // strings (we removed this from Lucene a while back): name = new String(name); FieldType prevType = fieldToType.get(name); if (usually(random) || type.indexOptions() == IndexOptions.NONE || prevType != null) { // most of the time, don't modify the params if (prevType == null) { fieldToType.put(name, new FieldType(type)); } else { type = mergeTermVectorOptions(type, prevType); } return createField(name, value, type); } // TODO: once all core & test codecs can index // offsets, sometimes randomly turn on offsets if we are // already indexing positions... FieldType newType = new FieldType(type); if (!newType.stored() && random.nextBoolean()) { newType.setStored(true); // randomly store it } // Randomly turn on term vector options, but always do // so consistently for the same field name: if (!newType.storeTermVectors() && random.nextBoolean()) { newType.setStoreTermVectors(true); if (!newType.storeTermVectorPositions()) { newType.setStoreTermVectorPositions(random.nextBoolean()); if (newType.storeTermVectorPositions()) { if (!newType.storeTermVectorPayloads()) { newType.setStoreTermVectorPayloads(random.nextBoolean()); } } } if (!newType.storeTermVectorOffsets()) { newType.setStoreTermVectorOffsets(random.nextBoolean()); } if (VERBOSE) { System.out.println("NOTE: LuceneTestCase: upgrade name=" + name + " type=" + newType); } } newType.freeze(); fieldToType.put(name, newType); // TODO: we need to do this, but smarter, ie, most of // the time we set the same value for a given field but // sometimes (rarely) we change it up: /* if (newType.omitNorms()) { newType.setOmitNorms(random.nextBoolean()); } */ return createField(name, value, newType); }
Example 19
Source File: TestReqOptSumScorer.java From lucene-solr with Apache License 2.0 | 4 votes |
public void testMaxBlock() throws IOException { Directory dir = newDirectory(); IndexWriter w = new IndexWriter(dir, newIndexWriterConfig().setMergePolicy(newLogMergePolicy())); FieldType ft = new FieldType(); ft.setIndexOptions(IndexOptions.DOCS_AND_FREQS); ft.setTokenized(true); ft.freeze(); for (int i = 0; i < 1024; i++) { // create documents with an increasing number of As and one B Document doc = new Document(); doc.add(new Field("foo", new TermFreqTokenStream("a", i+1), ft)); if (random().nextFloat() < 0.5f) { doc.add(new Field("foo", new TermFreqTokenStream("b", 1), ft)); } w.addDocument(doc); } w.forceMerge(1); w.close(); IndexReader reader = DirectoryReader.open(dir); IndexSearcher searcher = newSearcher(reader); searcher.setSimilarity(new TestSimilarity.SimpleSimilarity()); // freq == score // searcher.setSimilarity(new TestSimilarity.SimpleSimilarity()); final Query reqQ = new TermQuery(new Term("foo", "a")); final Query optQ = new TermQuery(new Term("foo", "b")); final Query boolQ = new BooleanQuery.Builder() .add(reqQ, Occur.MUST) .add(optQ, Occur.SHOULD) .build(); Scorer actual = reqOptScorer(searcher, reqQ, optQ, true); Scorer expected = searcher .createWeight(boolQ, ScoreMode.COMPLETE, 1) .scorer(searcher.getIndexReader().leaves().get(0)); actual.setMinCompetitiveScore(Math.nextUp(1)); // Checks that all blocks are fully visited for (int i = 0; i < 1024; i++) { assertEquals(i, actual.iterator().nextDoc()); assertEquals(i, expected.iterator().nextDoc()); assertEquals(actual.score(),expected.score(), 0); } reader.close(); dir.close(); }
Example 20
Source File: FastVectorHighlighterTest.java From lucene-solr with Apache License 2.0 | 4 votes |
public void testCommonTermsQueryHighlight() throws IOException { Directory dir = newDirectory(); IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET)) .setMergePolicy(newLogMergePolicy())); // don't reorder doc ids FieldType type = new FieldType(TextField.TYPE_STORED); type.setStoreTermVectorOffsets(true); type.setStoreTermVectorPositions(true); type.setStoreTermVectors(true); type.freeze(); String[] texts = { "Hello this is a piece of text that is very long and contains too much preamble and the meat is really here which says kennedy has been shot", "This piece of text refers to Kennedy at the beginning then has a longer piece of text that is very long in the middle and finally ends with another reference to Kennedy", "JFK has been shot", "John Kennedy has been shot", "This text has a typo in referring to Keneddy", "wordx wordy wordz wordx wordy wordx worda wordb wordy wordc", "y z x y z a b", "lets is a the lets is a the lets is a the lets" }; for (int i = 0; i < texts.length; i++) { Document doc = new Document(); Field field = new Field("field", texts[i], type); doc.add(field); writer.addDocument(doc); } CommonTermsQuery query = new CommonTermsQuery(Occur.MUST, Occur.SHOULD, 2); query.add(new Term("field", "text")); query.add(new Term("field", "long")); query.add(new Term("field", "very")); FastVectorHighlighter highlighter = new FastVectorHighlighter(); IndexReader reader = DirectoryReader.open(writer); IndexSearcher searcher = newSearcher(reader); TopDocs hits = searcher.search(query, 10); assertEquals(2, hits.totalHits.value); FieldQuery fieldQuery = highlighter.getFieldQuery(query, reader); String[] bestFragments = highlighter.getBestFragments(fieldQuery, reader, 1, "field", 1000, 1); assertEquals("This piece of <b>text</b> refers to Kennedy at the beginning then has a longer piece of <b>text</b> that is <b>very</b> <b>long</b> in the middle and finally ends with another reference to Kennedy", bestFragments[0]); fieldQuery = highlighter.getFieldQuery(query, reader); bestFragments = highlighter.getBestFragments(fieldQuery, reader, 0, "field", 1000, 1); assertEquals("Hello this is a piece of <b>text</b> that is <b>very</b> <b>long</b> and contains too much preamble and the meat is really here which says kennedy has been shot", bestFragments[0]); reader.close(); writer.close(); dir.close(); }