org.apache.lucene.index.FieldInvertState Java Examples
The following examples show how to use
org.apache.lucene.index.FieldInvertState.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: AssertingSimilarity.java From lucene-solr with Apache License 2.0 | 6 votes |
@Override public long computeNorm(FieldInvertState state) { assert state != null; assert state.getLength() > 0; assert state.getPosition() >= 0; assert state.getOffset() >= 0; assert state.getMaxTermFrequency() >= 0; // TODO: seems to be 0 for omitTFAP? assert state.getMaxTermFrequency() <= state.getLength(); assert state.getNumOverlap() >= 0; assert state.getNumOverlap() < state.getLength(); assert state.getUniqueTermCount() > 0; assert state.getUniqueTermCount() <= state.getLength(); long norm = delegate.computeNorm(state); assert norm != 0; return norm; }
Example #2
Source File: TestClassicSimilarity.java From lucene-solr with Apache License 2.0 | 6 votes |
public void testSameNormsAsBM25() { ClassicSimilarity sim1 = new ClassicSimilarity(); BM25Similarity sim2 = new BM25Similarity(); sim2.setDiscountOverlaps(true); for (int iter = 0; iter < 100; ++iter) { final int length = TestUtil.nextInt(random(), 1, 1000); final int position = random().nextInt(length); final int numOverlaps = random().nextInt(length); final int maxTermFrequency = 1; final int uniqueTermCount = 1; FieldInvertState state = new FieldInvertState(Version.LATEST.major, "foo", IndexOptions.DOCS_AND_FREQS, position, length, numOverlaps, 100, maxTermFrequency, uniqueTermCount); assertEquals( sim2.computeNorm(state), sim1.computeNorm(state), 0f); } }
Example #3
Source File: TestBooleanSimilarity.java From lucene-solr with Apache License 2.0 | 6 votes |
public void testSameNormsAsBM25() { BooleanSimilarity sim1 = new BooleanSimilarity(); BM25Similarity sim2 = new BM25Similarity(); sim2.setDiscountOverlaps(true); for (int iter = 0; iter < 100; ++iter) { final int length = TestUtil.nextInt(random(), 1, 100); final int position = random().nextInt(length); final int numOverlaps = random().nextInt(length); final int maxTermFrequency = 1; final int uniqueTermCount = 1; FieldInvertState state = new FieldInvertState(Version.LATEST.major, "foo", IndexOptions.DOCS_AND_FREQS, position, length, numOverlaps, 100, maxTermFrequency, uniqueTermCount); assertEquals( sim2.computeNorm(state), sim1.computeNorm(state), 0f); } }
Example #4
Source File: TestMemoryIndex.java From lucene-solr with Apache License 2.0 | 5 votes |
@Test public void testSimilarities() throws IOException { MemoryIndex mi = new MemoryIndex(); mi.addField("f1", "a long text field that contains many many terms", analyzer); IndexSearcher searcher = mi.createSearcher(); LeafReader reader = (LeafReader) searcher.getIndexReader(); NumericDocValues norms = reader.getNormValues("f1"); assertEquals(0, norms.nextDoc()); float n1 = norms.longValue(); // Norms are re-computed when we change the Similarity mi.setSimilarity(new Similarity() { @Override public long computeNorm(FieldInvertState state) { return 74; } @Override public SimScorer scorer(float boost, CollectionStatistics collectionStats, TermStatistics... termStats) { throw new UnsupportedOperationException(); } }); norms = reader.getNormValues("f1"); assertEquals(0, norms.nextDoc()); float n2 = norms.longValue(); assertTrue(n1 != n2); TestUtil.checkReader(reader); }
Example #5
Source File: LindenSimilarity.java From linden with Apache License 2.0 | 5 votes |
/** Implemented as * <code>state.getBoost()*lengthNorm(numTerms)</code>, where * <code>numTerms</code> is {@link org.apache.lucene.index.FieldInvertState#getLength()} if {@link * #setDiscountOverlaps} is false, else it's {@link * org.apache.lucene.index.FieldInvertState#getLength()} - {@link * org.apache.lucene.index.FieldInvertState#getNumOverlap()}. * * @lucene.experimental */ @Override public float lengthNorm(FieldInvertState state) { final int numTerms; if (discountOverlaps) numTerms = state.getLength() - state.getNumOverlap(); else numTerms = state.getLength(); return state.getBoost() * ((float) (1.0 / Math.sqrt(numTerms))); }
Example #6
Source File: SimilarityBase.java From lucene-solr with Apache License 2.0 | 5 votes |
/** Encodes the document length in the same way as {@link BM25Similarity}. */ @Override public final long computeNorm(FieldInvertState state) { final int numTerms; if (state.getIndexOptions() == IndexOptions.DOCS && state.getIndexCreatedVersionMajor() >= 8) { numTerms = state.getUniqueTermCount(); } else if (discountOverlaps) { numTerms = state.getLength() - state.getNumOverlap(); } else { numTerms = state.getLength(); } return SmallFloat.intToByte4(numTerms); }
Example #7
Source File: BM25Similarity.java From lucene-solr with Apache License 2.0 | 5 votes |
@Override public final long computeNorm(FieldInvertState state) { final int numTerms; if (state.getIndexOptions() == IndexOptions.DOCS && state.getIndexCreatedVersionMajor() >= 8) { numTerms = state.getUniqueTermCount(); } else if (discountOverlaps) { numTerms = state.getLength() - state.getNumOverlap(); } else { numTerms = state.getLength(); } return SmallFloat.intToByte4(numTerms); }
Example #8
Source File: TFIDFSimilarity.java From lucene-solr with Apache License 2.0 | 5 votes |
@Override public final long computeNorm(FieldInvertState state) { final int numTerms; if (state.getIndexOptions() == IndexOptions.DOCS && state.getIndexCreatedVersionMajor() >= 8) { numTerms = state.getUniqueTermCount(); } else if (discountOverlaps) { numTerms = state.getLength() - state.getNumOverlap(); } else { numTerms = state.getLength(); } return SmallFloat.intToByte4(numTerms); }
Example #9
Source File: TestSimilarityBase.java From lucene-solr with Apache License 2.0 | 5 votes |
public void testDiscountOverlapsBoost() throws IOException { BM25Similarity expected = new BM25Similarity(); SimilarityBase actual = new DFRSimilarity(new BasicModelIne(), new AfterEffectB(), new NormalizationH2()); expected.setDiscountOverlaps(false); actual.setDiscountOverlaps(false); FieldInvertState state = new FieldInvertState(Version.LATEST.major, "foo", IndexOptions.DOCS_AND_FREQS); state.setLength(5); state.setNumOverlap(2); assertEquals(expected.computeNorm(state), actual.computeNorm(state)); expected.setDiscountOverlaps(true); actual.setDiscountOverlaps(true); assertEquals(expected.computeNorm(state), actual.computeNorm(state)); }
Example #10
Source File: TFSimilarity.java From lumongo with Apache License 2.0 | 4 votes |
@Override public long computeNorm(FieldInvertState state) { return BM25_SIM.computeNorm(state); }
Example #11
Source File: FairSimilarity.java From incubator-retired-blur with Apache License 2.0 | 4 votes |
@Override public float lengthNorm(FieldInvertState fieldInvertState) { throw new RuntimeException("not sure"); // return 0; }
Example #12
Source File: CustomSimilarity.java From modernmt with Apache License 2.0 | 4 votes |
@Override public float lengthNorm(FieldInvertState state) { return 1.f; }
Example #13
Source File: OKAPIBM25Similarity.java From lucene4ir with Apache License 2.0 | 4 votes |
@Override public final long computeNorm(FieldInvertState state) { return state.getLength(); }
Example #14
Source File: BM25Similarity.java From lucene4ir with Apache License 2.0 | 4 votes |
@Override public final long computeNorm(FieldInvertState state) { final int numTerms = discountOverlaps ? state.getLength() - state.getNumOverlap() : state.getLength(); return encodeNormValue(state.getBoost(), numTerms); }
Example #15
Source File: SMARTBNNBNNSimilarity.java From lucene4ir with Apache License 2.0 | 4 votes |
@Override public final long computeNorm(FieldInvertState state) { return state.getLength(); }
Example #16
Source File: TestSubScorerFreqs.java From lucene-solr with Apache License 2.0 | 4 votes |
@Override public long computeNorm(FieldInvertState state) { return 1; }
Example #17
Source File: TestConjunctions.java From lucene-solr with Apache License 2.0 | 4 votes |
@Override public long computeNorm(FieldInvertState state) { return 1; // we dont care }
Example #18
Source File: JustCompileSearch.java From lucene-solr with Apache License 2.0 | 4 votes |
@Override public long computeNorm(FieldInvertState state) { throw new UnsupportedOperationException(UNSUPPORTED_MSG); }
Example #19
Source File: TestBooleanQueryVisitSubscorers.java From lucene-solr with Apache License 2.0 | 4 votes |
@Override public long computeNorm(FieldInvertState state) { return 1; }
Example #20
Source File: TestSimilarityProvider.java From lucene-solr with Apache License 2.0 | 4 votes |
@Override public long computeNorm(FieldInvertState state) { return 10; }
Example #21
Source File: TestSimilarityProvider.java From lucene-solr with Apache License 2.0 | 4 votes |
@Override public long computeNorm(FieldInvertState state) { return 1; }
Example #22
Source File: MultiSimilarity.java From lucene-solr with Apache License 2.0 | 4 votes |
@Override public long computeNorm(FieldInvertState state) { return sims[0].computeNorm(state); }
Example #23
Source File: PerFieldSimilarityWrapper.java From lucene-solr with Apache License 2.0 | 4 votes |
@Override public final long computeNorm(FieldInvertState state) { return get(state.getName()).computeNorm(state); }
Example #24
Source File: BooleanSimilarity.java From lucene-solr with Apache License 2.0 | 4 votes |
@Override public long computeNorm(FieldInvertState state) { return BM25_SIM.computeNorm(state); }
Example #25
Source File: LegacyBM25Similarity.java From lucene-solr with Apache License 2.0 | 4 votes |
@Override public long computeNorm(FieldInvertState state) { return bm25Similarity.computeNorm(state); }
Example #26
Source File: Similarity.java From lucene-solr with Apache License 2.0 | 2 votes |
/** * Computes the normalization value for a field, given the accumulated * state of term processing for this field (see {@link FieldInvertState}). * * <p>Matches in longer fields are less precise, so implementations of this * method usually set smaller values when <code>state.getLength()</code> is large, * and larger values when <code>state.getLength()</code> is small. * * <p>Note that for a given term-document frequency, greater unsigned norms * must produce scores that are lower or equal, ie. for two encoded norms * {@code n1} and {@code n2} so that * {@code Long.compareUnsigned(n1, n2) > 0} then * {@code SimScorer.score(freq, n1) <= SimScorer.score(freq, n2)} * for any legal {@code freq}. * * <p>{@code 0} is not a legal norm, so {@code 1} is the norm that produces * the highest scores. * * @lucene.experimental * * @param state current processing state for this field * @return computed norm value */ public abstract long computeNorm(FieldInvertState state);