org.apache.lucene.search.similarities.TFIDFSimilarity Java Examples
The following examples show how to use
org.apache.lucene.search.similarities.TFIDFSimilarity.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: NormValueSource.java From lucene-solr with Apache License 2.0 | 6 votes |
@Override public FunctionValues getValues(Map<Object, Object> context, LeafReaderContext readerContext) throws IOException { IndexSearcher searcher = (IndexSearcher)context.get("searcher"); final TFIDFSimilarity similarity = IDFValueSource.asTFIDF(searcher.getSimilarity(), field); if (similarity == null) { throw new UnsupportedOperationException("requires a TFIDFSimilarity (such as ClassicSimilarity)"); } // Only works if the contribution of the tf is 1 when the freq is 1 and contribution of the idf // is 1 when docCount == docFreq == 1 final SimScorer simScorer = similarity.scorer(1f, new CollectionStatistics(field, 1, 1, 1, 1), new TermStatistics(new BytesRef("bogus"), 1, 1)); final LeafSimScorer leafSimScorer = new LeafSimScorer(simScorer, readerContext.reader(), field, true); return new FloatDocValues(this) { int lastDocID = -1; @Override public float floatVal(int docID) throws IOException { if (docID < lastDocID) { throw new AssertionError("docs out of order: lastDocID=" + lastDocID + " docID=" + docID); } lastDocID = docID; return leafSimScorer.score(docID, 1f); } }; }
Example #2
Source File: SweetSpotSimilarityTest.java From lucene-solr with Apache License 2.0 | 6 votes |
public void testHyperbolicSweetSpot() { SweetSpotSimilarity ss = new SweetSpotSimilarity() { @Override public float tf(float freq) { return hyperbolicTf(freq); } }; ss.setHyperbolicTfFactors(3.3f, 7.7f, Math.E, 5.0f); TFIDFSimilarity s = ss; for (int i = 1; i <=1000; i++) { assertTrue("MIN tf: i="+i+" : s="+s.tf(i), 3.3f <= s.tf(i)); assertTrue("MAX tf: i="+i+" : s="+s.tf(i), s.tf(i) <= 7.7f); } assertEquals("MID tf", 3.3f+(7.7f - 3.3f)/2.0f, s.tf(5), 0.00001f); // stupidity assertEquals("tf zero", 0.0f, s.tf(0), 0.0f); }
Example #3
Source File: TestFieldMaskingSpanQuery.java From lucene-solr with Apache License 2.0 | 6 votes |
public void testSpans2() throws Exception { assumeTrue("Broken scoring: LUCENE-3723", searcher.getSimilarity() instanceof TFIDFSimilarity); SpanQuery qA1 = new SpanTermQuery(new Term("gender", "female")); SpanQuery qA2 = new SpanTermQuery(new Term("first", "james")); SpanQuery qA = new SpanOrQuery(qA1, new FieldMaskingSpanQuery(qA2, "gender")); SpanQuery qB = new SpanTermQuery(new Term("last", "jones")); SpanQuery q = new SpanNearQuery(new SpanQuery[] { new FieldMaskingSpanQuery(qA, "id"), new FieldMaskingSpanQuery(qB, "id") }, -1, false ); check(q, new int[] { 0, 1, 2, 3 }); Spans span = q.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); assertNext(span, 0,0,1); assertNext(span, 1,1,2); assertNext(span, 2,0,1); assertNext(span, 2,2,3); assertNext(span, 3,0,1); assertFinished(span); }
Example #4
Source File: LtrQueryTests.java From elasticsearch-learning-to-rank with Apache License 2.0 | 6 votes |
private void assertScoresMatch(List<PrebuiltFeature> features, float[] scores, RankerQuery ltrQuery, ScoreDoc scoreDoc) throws IOException { Document d = searcherUnderTest.doc(scoreDoc.doc); String idVal = d.get("id"); int docId = Integer.decode(idVal); float modelScore = scores[docId]; float queryScore = scoreDoc.score; assertEquals("Scores match with similarity " + similarity.getClass(), modelScore, queryScore, SCORE_NB_ULP_PREC *Math.ulp(modelScore)); if (!(similarity instanceof TFIDFSimilarity)) { // There are precision issues with these similarities when using explain // It produces 0.56103003 for feat:0 in doc1 using score() but 0.5610301 using explain Explanation expl = searcherUnderTest.explain(ltrQuery, docId); assertEquals("Explain scores match with similarity " + similarity.getClass(), expl.getValue().floatValue(), queryScore, 5 * Math.ulp(modelScore)); checkFeatureNames(expl, features); } }
Example #5
Source File: IDFValueSource.java From lucene-solr with Apache License 2.0 | 5 votes |
@Override public FunctionValues getValues(Map<Object, Object> context, LeafReaderContext readerContext) throws IOException { IndexSearcher searcher = (IndexSearcher)context.get("searcher"); TFIDFSimilarity sim = asTFIDF(searcher.getSimilarity(), field); if (sim == null) { throw new UnsupportedOperationException("requires a TFIDFSimilarity (such as ClassicSimilarity)"); } int docfreq = searcher.getIndexReader().docFreq(new Term(indexedField, indexedBytes)); float idf = sim.idf(docfreq, searcher.getIndexReader().maxDoc()); return new DocFreqValueSource.ConstDoubleDocValues(idf, this); }
Example #6
Source File: IDFValueSource.java From lucene-solr with Apache License 2.0 | 5 votes |
static TFIDFSimilarity asTFIDF(Similarity sim, String field) { while (sim instanceof PerFieldSimilarityWrapper) { sim = ((PerFieldSimilarityWrapper)sim).get(field); } if (sim instanceof TFIDFSimilarity) { return (TFIDFSimilarity)sim; } else { return null; } }
Example #7
Source File: TestFieldMaskingSpanQuery.java From lucene-solr with Apache License 2.0 | 5 votes |
public void testSimple2() throws Exception { assumeTrue("Broken scoring: LUCENE-3723", searcher.getSimilarity() instanceof TFIDFSimilarity); SpanQuery q1 = new SpanTermQuery(new Term("gender", "female")); SpanQuery q2 = new SpanTermQuery(new Term("last", "smith")); SpanQuery q = new SpanNearQuery(new SpanQuery[] { q1, new FieldMaskingSpanQuery(q2, "gender")}, -1, false ); check(q, new int[] { 2, 4 }); q = new SpanNearQuery(new SpanQuery[] { new FieldMaskingSpanQuery(q1, "id"), new FieldMaskingSpanQuery(q2, "id") }, -1, false ); check(q, new int[] { 2, 4 }); }
Example #8
Source File: XMoreLikeThis.java From Elasticsearch with Apache License 2.0 | 4 votes |
public XMoreLikeThis(IndexReader ir, TFIDFSimilarity sim) { this.ir = ir; this.similarity = sim; }
Example #9
Source File: XMoreLikeThis.java From Elasticsearch with Apache License 2.0 | 4 votes |
public TFIDFSimilarity getSimilarity() { return similarity; }
Example #10
Source File: XMoreLikeThis.java From Elasticsearch with Apache License 2.0 | 4 votes |
public void setSimilarity(TFIDFSimilarity similarity) { this.similarity = similarity; }
Example #11
Source File: MoreLikeThis.java From lucene-solr with Apache License 2.0 | 4 votes |
public MoreLikeThis(IndexReader ir, TFIDFSimilarity sim) { this.ir = ir; this.similarity = sim; }
Example #12
Source File: MoreLikeThis.java From lucene-solr with Apache License 2.0 | 4 votes |
public TFIDFSimilarity getSimilarity() { return similarity; }
Example #13
Source File: MoreLikeThis.java From lucene-solr with Apache License 2.0 | 4 votes |
public void setSimilarity(TFIDFSimilarity similarity) { this.similarity = similarity; }
Example #14
Source File: SweetSpotSimilarityTest.java From lucene-solr with Apache License 2.0 | 4 votes |
public void testSweetSpotTf() { SweetSpotSimilarity ss = new SweetSpotSimilarity(); TFIDFSimilarity d = new ClassicSimilarity(); TFIDFSimilarity s = ss; // tf equal ss.setBaselineTfFactors(0.0f, 0.0f); for (int i = 1; i < 1000; i++) { assertEquals("tf: i="+i, d.tf(i), s.tf(i), 0.0f); } // tf higher ss.setBaselineTfFactors(1.0f, 0.0f); for (int i = 1; i < 1000; i++) { assertTrue("tf: i="+i+" : d="+d.tf(i)+ " < s="+s.tf(i), d.tf(i) < s.tf(i)); } // tf flat ss.setBaselineTfFactors(1.0f, 6.0f); for (int i = 1; i <=6; i++) { assertEquals("tf flat1: i="+i, 1.0f, s.tf(i), 0.0f); } ss.setBaselineTfFactors(2.0f, 6.0f); for (int i = 1; i <=6; i++) { assertEquals("tf flat2: i="+i, 2.0f, s.tf(i), 0.0f); } for (int i = 6; i <=1000; i++) { assertTrue("tf: i="+i+" : s="+s.tf(i)+ " < d="+d.tf(i), s.tf(i) < d.tf(i)); } // stupidity assertEquals("tf zero", 0.0f, s.tf(0), 0.0f); }
Example #15
Source File: MoreLikeThisQuery.java From Elasticsearch with Apache License 2.0 | 3 votes |
public void setSimilarity(Similarity similarity) { if (similarity == null || similarity instanceof TFIDFSimilarity) { //LUCENE 4 UPGRADE we need TFIDF similarity here so I only set it if it is an instance of it this.similarity = (TFIDFSimilarity) similarity; } }