org.apache.lucene.search.similarities.ClassicSimilarity Java Examples
The following examples show how to use
org.apache.lucene.search.similarities.ClassicSimilarity.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: TestBooleanQueryVisitSubscorers.java From lucene-solr with Apache License 2.0 | 6 votes |
@Override public void setUp() throws Exception { super.setUp(); analyzer = new MockAnalyzer(random()); dir = newDirectory(); IndexWriterConfig config = newIndexWriterConfig(analyzer); config.setMergePolicy(newLogMergePolicy()); // we will use docids to validate RandomIndexWriter writer = new RandomIndexWriter(random(), dir, config); writer.addDocument(doc("lucene", "lucene is a very popular search engine library")); writer.addDocument(doc("solr", "solr is a very popular search server and is using lucene")); writer.addDocument(doc("nutch", "nutch is an internet search engine with web crawler and is using lucene and hadoop")); reader = writer.getReader(); writer.close(); // we do not use newSearcher because the assertingXXX layers break // the toString representations we are relying on // TODO: clean that up searcher = new IndexSearcher(reader); searcher.setSimilarity(new ClassicSimilarity()); scorerSearcher = new ScorerIndexSearcher(reader); scorerSearcher.setSimilarity(new CountingSimilarity()); }
Example #2
Source File: SORecommender.java From scava with Eclipse Public License 2.0 | 6 votes |
public TopDocs executeQuery(org.apache.lucene.search.Query query) throws IOException, ParseException { Directory indexDir = FSDirectory.open(Paths.get(INDEX_DIRECTORY)); try { IndexReader reader = DirectoryReader.open(indexDir); IndexSearcher searcher = new IndexSearcher(reader); if (isBm25 == false) { ClassicSimilarity CS = new ClassicSimilarity(); searcher.setSimilarity(CS); } TopDocs docs = searcher.search(query, hitsPerPage); return docs; } catch (Exception e) { logger.error(e.getMessage()); return null; } }
Example #3
Source File: TestTermScorer.java From lucene-solr with Apache License 2.0 | 6 votes |
@Override public void setUp() throws Exception { super.setUp(); directory = newDirectory(); RandomIndexWriter writer = new RandomIndexWriter(random(), directory, newIndexWriterConfig(new MockAnalyzer(random())) .setMergePolicy(newLogMergePolicy()) .setSimilarity(new ClassicSimilarity())); for (int i = 0; i < values.length; i++) { Document doc = new Document(); doc.add(newTextField(FIELD, values[i], Field.Store.YES)); writer.addDocument(doc); } writer.forceMerge(1); indexReader = getOnlyLeafReader(writer.getReader()); writer.close(); indexSearcher = newSearcher(indexReader, false); indexSearcher.setSimilarity(new ClassicSimilarity()); }
Example #4
Source File: TestSimilarities.java From lucene-solr with Apache License 2.0 | 6 votes |
public void testNonStandardSimilarity() throws Exception { try (Monitor monitor = newMonitor()) { monitor.register(new MonitorQuery("1", MonitorTestBase.parse("test"))); Similarity similarity = new ClassicSimilarity() { @Override public float tf(float freq) { return 1000f; } }; Document doc = new Document(); doc.add(newTextField("field", "this is a test", Field.Store.NO)); MatchingQueries<ScoringMatch> standard = monitor.match(doc, ScoringMatch.matchWithSimilarity(new ClassicSimilarity())); MatchingQueries<ScoringMatch> withSim = monitor.match(doc, ScoringMatch.matchWithSimilarity(similarity)); float standScore = standard.getMatches().iterator().next().getScore(); float simScore = withSim.getMatches().iterator().next().getScore(); assertEquals(standScore, simScore / 1000, 0.1f); } }
Example #5
Source File: TestValueSources.java From lucene-solr with Apache License 2.0 | 6 votes |
public void testNorm() throws Exception { Similarity saved = searcher.getSimilarity(); try { // no norm field (so agnostic to indexed similarity) searcher.setSimilarity(new ClassicSimilarity()); ValueSource vs = new NormValueSource("byte"); assertHits(new FunctionQuery(vs), new float[] { 1f, 1f }); // regardless of whether norms exist, value source exists == 0 assertAllExist(vs); vs = new NormValueSource("text"); assertAllExist(vs); } finally { searcher.setSimilarity(saved); } }
Example #6
Source File: TestValueSources.java From lucene-solr with Apache License 2.0 | 6 votes |
public void testTF() throws Exception { Similarity saved = searcher.getSimilarity(); try { // no norm field (so agnostic to indexed similarity) searcher.setSimilarity(new ClassicSimilarity()); ValueSource vs = new TFValueSource("bogus", "bogus", "text", new BytesRef("test")); assertHits(new FunctionQuery(vs), new float[] { (float)Math.sqrt(3d), (float)Math.sqrt(1d) }); assertAllExist(vs); vs = new TFValueSource("bogus", "bogus", "string", new BytesRef("bar")); assertHits(new FunctionQuery(vs), new float[] { 0f, 1f }); assertAllExist(vs); // regardless of whether norms exist, value source exists == 0 vs = new TFValueSource("bogus", "bogus", "bogus", new BytesRef("bogus")); assertHits(new FunctionQuery(vs), new float[] { 0F, 0F }); assertAllExist(vs); } finally { searcher.setSimilarity(saved); } }
Example #7
Source File: TestPayloadSpanUtil.java From lucene-solr with Apache License 2.0 | 6 votes |
public void testPayloadSpanUtil() throws Exception { Directory directory = newDirectory(); RandomIndexWriter writer = new RandomIndexWriter(random(), directory, newIndexWriterConfig(new PayloadAnalyzer()).setSimilarity(new ClassicSimilarity())); Document doc = new Document(); doc.add(newTextField(FIELD, "xx rr yy mm pp", Field.Store.YES)); writer.addDocument(doc); IndexReader reader = writer.getReader(); writer.close(); IndexSearcher searcher = newSearcher(reader); PayloadSpanUtil psu = new PayloadSpanUtil(searcher.getTopReaderContext()); Collection<byte[]> payloads = psu.getPayloadsForQuery(new TermQuery(new Term(FIELD, "rr"))); if(VERBOSE) { System.out.println("Num payloads:" + payloads.size()); for (final byte [] bytes : payloads) { System.out.println(new String(bytes, StandardCharsets.UTF_8)); } } reader.close(); directory.close(); }
Example #8
Source File: TestTaxonomyFacetCounts.java From lucene-solr with Apache License 2.0 | 6 votes |
public void testReallyNoNormsForDrillDown() throws Exception { Directory dir = newDirectory(); Directory taxoDir = newDirectory(); IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random())); iwc.setSimilarity(new PerFieldSimilarityWrapper() { final Similarity sim = new ClassicSimilarity(); @Override public Similarity get(String name) { assertEquals("field", name); return sim; } }); TaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir, IndexWriterConfig.OpenMode.CREATE); RandomIndexWriter writer = new RandomIndexWriter(random(), dir, iwc); FacetsConfig config = new FacetsConfig(); Document doc = new Document(); doc.add(newTextField("field", "text", Field.Store.NO)); doc.add(new FacetField("a", "path")); writer.addDocument(config.build(taxoWriter, doc)); writer.close(); IOUtils.close(taxoWriter, dir, taxoDir); }
Example #9
Source File: TestSweetSpotSimilarityFactory.java From lucene-solr with Apache License 2.0 | 5 votes |
/** default parameters */ public void testDefaults() throws Exception { SweetSpotSimilarity sim = getSimilarity("text", SweetSpotSimilarity.class); // SSS tf w/defaults should behave just like DS ClassicSimilarity d = new ClassicSimilarity(); for (int i = 0; i <=1000; i++) { assertEquals("tf: i="+i, d.tf(i), sim.tf(i), 0.0F); } // default norm sanity check assertEquals("norm 1", 1.00F, computeNorm(sim, 1), 0.0F); assertEquals("norm 4", 0.50F, computeNorm(sim, 4), 0.0F); assertEquals("norm 16", 0.25F, computeNorm(sim, 16), 0.0F); }
Example #10
Source File: TestQueryRescorer.java From lucene-solr with Apache License 2.0 | 5 votes |
private IndexSearcher getSearcher(IndexReader r) { IndexSearcher searcher = newSearcher(r); // We rely on more tokens = lower score: searcher.setSimilarity(new ClassicSimilarity()); return searcher; }
Example #11
Source File: TestBooleanQuery.java From lucene-solr with Apache License 2.0 | 5 votes |
public void testNullOrSubScorer() throws Throwable { Directory dir = newDirectory(); RandomIndexWriter w = new RandomIndexWriter(random(), dir); Document doc = new Document(); doc.add(newTextField("field", "a b c d", Field.Store.NO)); w.addDocument(doc); IndexReader r = w.getReader(); IndexSearcher s = newSearcher(r); // this test relies upon coord being the default implementation, // otherwise scores are different! s.setSimilarity(new ClassicSimilarity()); BooleanQuery.Builder q = new BooleanQuery.Builder(); q.add(new TermQuery(new Term("field", "a")), BooleanClause.Occur.SHOULD); // PhraseQuery w/ no terms added returns a null scorer PhraseQuery pq = new PhraseQuery("field", new String[0]); q.add(pq, BooleanClause.Occur.SHOULD); assertEquals(1, s.search(q.build(), 10).totalHits.value); // A required clause which returns null scorer should return null scorer to // IndexSearcher. q = new BooleanQuery.Builder(); pq = new PhraseQuery("field", new String[0]); q.add(new TermQuery(new Term("field", "a")), BooleanClause.Occur.SHOULD); q.add(pq, BooleanClause.Occur.MUST); assertEquals(0, s.search(q.build(), 10).totalHits.value); DisjunctionMaxQuery dmq = new DisjunctionMaxQuery( Arrays.asList(new TermQuery(new Term("field", "a")), pq), 1.0f); assertEquals(1, s.search(dmq, 10).totalHits.value); r.close(); w.close(); dir.close(); }
Example #12
Source File: TestPhraseQuery.java From lucene-solr with Apache License 2.0 | 5 votes |
public void testSlopScoring() throws IOException { Directory directory = newDirectory(); RandomIndexWriter writer = new RandomIndexWriter(random(), directory, newIndexWriterConfig(new MockAnalyzer(random())) .setMergePolicy(newLogMergePolicy()) .setSimilarity(new BM25Similarity())); Document doc = new Document(); doc.add(newTextField("field", "foo firstname lastname foo", Field.Store.YES)); writer.addDocument(doc); Document doc2 = new Document(); doc2.add(newTextField("field", "foo firstname zzz lastname foo", Field.Store.YES)); writer.addDocument(doc2); Document doc3 = new Document(); doc3.add(newTextField("field", "foo firstname zzz yyy lastname foo", Field.Store.YES)); writer.addDocument(doc3); IndexReader reader = writer.getReader(); writer.close(); IndexSearcher searcher = newSearcher(reader); searcher.setSimilarity(new ClassicSimilarity()); PhraseQuery query = new PhraseQuery(Integer.MAX_VALUE, "field", "firstname", "lastname"); ScoreDoc[] hits = searcher.search(query, 1000).scoreDocs; assertEquals(3, hits.length); // Make sure that those matches where the terms appear closer to // each other get a higher score: assertEquals(1.0, hits[0].score, 0.01); assertEquals(0, hits[0].doc); assertEquals(0.63, hits[1].score, 0.01); assertEquals(1, hits[1].doc); assertEquals(0.47, hits[2].score, 0.01); assertEquals(2, hits[2].doc); QueryUtils.check(random(), query,searcher); reader.close(); directory.close(); }
Example #13
Source File: TestSweetSpotSimilarityFactory.java From lucene-solr with Apache License 2.0 | 5 votes |
/** baseline with parameters */ public void testBaselineParameters() throws Exception { SweetSpotSimilarity sim = getSimilarity("text_baseline", SweetSpotSimilarity.class); ClassicSimilarity d = new ClassicSimilarity(); // constant up to 6 for (int i = 1; i <=6; i++) { assertEquals("tf i="+i, 1.5F, sim.tf(i), 0.0F); } // less then default sim above 6 for (int i = 6; i <=1000; i++) { assertTrue("tf: i="+i+" : s="+sim.tf(i)+ " < d="+d.tf(i), sim.tf(i) < d.tf(i)); } // norms: plateau from 3-5 assertEquals("norm 1 == 7", computeNorm(sim, 1), computeNorm(sim, 7), 0.0F); assertEquals("norm 2 == 6", computeNorm(sim, 1), computeNorm(sim, 7), 0.0F); assertEquals("norm 3", 1.00F, computeNorm(sim, 3), 0.0F); assertEquals("norm 4", 1.00F, computeNorm(sim, 4), 0.0F); assertEquals("norm 5", 1.00F, computeNorm(sim, 5), 0.0F); assertTrue("norm 6 too high: " + computeNorm(sim, 6), computeNorm(sim, 6) < 1.0F); assertTrue("norm 7 higher then norm 6", computeNorm(sim, 7) < computeNorm(sim, 6)); assertEquals("norm 20", 0.25F, computeNorm(sim, 20), 0.0F); }
Example #14
Source File: LtrQueryTests.java From elasticsearch-learning-to-rank with Apache License 2.0 | 5 votes |
@Before public void setupIndex() throws IOException { dirUnderTest = newDirectory(); List<Similarity> sims = Arrays.asList( new ClassicSimilarity(), new SweetSpotSimilarity(), // extends Classic new BM25Similarity(), new LMDirichletSimilarity(), new BooleanSimilarity(), new LMJelinekMercerSimilarity(0.2F), new AxiomaticF3LOG(0.5F, 10), new DFISimilarity(new IndependenceChiSquared()), new DFRSimilarity(new BasicModelG(), new AfterEffectB(), new NormalizationH1()), new IBSimilarity(new DistributionLL(), new LambdaDF(), new NormalizationH3()) ); similarity = sims.get(random().nextInt(sims.size())); indexWriterUnderTest = new RandomIndexWriter(random(), dirUnderTest, newIndexWriterConfig().setSimilarity(similarity)); for (int i = 0; i < docs.length; i++) { Document doc = new Document(); doc.add(newStringField("id", "" + i, Field.Store.YES)); doc.add(newField("field", docs[i], Store.YES)); indexWriterUnderTest.addDocument(doc); } indexWriterUnderTest.commit(); indexWriterUnderTest.forceMerge(1); indexWriterUnderTest.flush(); indexReaderUnderTest = indexWriterUnderTest.getReader(); searcherUnderTest = newSearcher(indexReaderUnderTest); searcherUnderTest.setSimilarity(similarity); }
Example #15
Source File: LuceneTermQueryBuilderTest.java From querqy with Apache License 2.0 | 5 votes |
@Test public void testThatQueryUsesTermButNoFieldBoost() throws Exception { Analyzer analyzer = new StandardAnalyzer(); Directory directory = new ByteBuffersDirectory(); IndexWriterConfig config = new IndexWriterConfig(analyzer); config.setSimilarity(new ClassicSimilarity()); IndexWriter indexWriter = new IndexWriter(directory, config); TestUtil.addNumDocsWithTextField("f1", "v1 v1", indexWriter, 4); TestUtil.addNumDocsWithTextField("f1", "v2", indexWriter, 1); indexWriter.close(); IndexReader indexReader = DirectoryReader.open(directory); IndexSearcher indexSearcher = new IndexSearcher(indexReader); indexSearcher.setSimilarity(new ClassicSimilarity()); final TermQuery termQuery = new LuceneTermQueryBuilder() .createTermQuery(new Term("f1", "v1"), new ConstantFieldBoost(3f)); final Term term = termQuery.getTerm(); assertEquals("f1", term.field()); assertEquals("v1", term.text()); TopDocs topDocs = indexSearcher.search(termQuery, 10); final Weight weight = termQuery.createWeight(indexSearcher, ScoreMode.COMPLETE, 4.5f); final Explanation explain = weight.explain(indexReader.getContext().leaves().get(0), topDocs.scoreDocs[0].doc); String explainText = explain.toString(); assertTrue(explainText.contains("4.5 = boost")); // 4.5 (query) but ignore field boost assertTrue(explainText.contains("4 = docFreq")); // 4 * v1 assertTrue(explainText.contains("2.0 = freq")); // 2 * v1 in field }
Example #16
Source File: DocFreq.java From lumongo with Apache License 2.0 | 5 votes |
public DocFreq(IndexReader indexReader, String field) { this.indexReader = indexReader; this.field = field; this.docFreqMap = new HashMap<>(); this.similarity = new ClassicSimilarity(); this.numDocs = indexReader.numDocs(); }
Example #17
Source File: LumongoSegment.java From lumongo with Apache License 2.0 | 5 votes |
private PerFieldSimilarityWrapper getSimilarity(final QueryWithFilters queryWithFilters) { return new PerFieldSimilarityWrapper() { @Override public Similarity get(String name) { AnalyzerSettings analyzerSettings = indexConfig.getAnalyzerSettingsForIndexField(name); AnalyzerSettings.Similarity similarity = AnalyzerSettings.Similarity.BM25; if (analyzerSettings != null) { similarity = analyzerSettings.getSimilarity(); } AnalyzerSettings.Similarity fieldSimilarityOverride = queryWithFilters.getFieldSimilarityOverride(name); if (fieldSimilarityOverride != null) { similarity = fieldSimilarityOverride; } if (AnalyzerSettings.Similarity.TFIDF.equals(similarity)) { return new ClassicSimilarity(); } else if (AnalyzerSettings.Similarity.BM25.equals(similarity)) { return new BM25Similarity(); } else if (AnalyzerSettings.Similarity.CONSTANT.equals(similarity)) { return new ConstantSimilarity(); } else if (AnalyzerSettings.Similarity.TF.equals(similarity)) { return new TFSimilarity(); } else { throw new RuntimeException("Unknown similarity type <" + similarity + ">"); } } }; }
Example #18
Source File: TestFuzzyQuery.java From lucene-solr with Apache License 2.0 | 5 votes |
public void testSingleQueryExactMatchScoresHighest() throws Exception { //See issue LUCENE-329 - IDF shouldn't wreck similarity ranking Directory directory = newDirectory(); RandomIndexWriter writer = new RandomIndexWriter(random(), directory); addDoc("smith", writer); addDoc("smith", writer); addDoc("smith", writer); addDoc("smith", writer); addDoc("smith", writer); addDoc("smith", writer); addDoc("smythe", writer); addDoc("smdssasd", writer); IndexReader reader = writer.getReader(); IndexSearcher searcher = newSearcher(reader); searcher.setSimilarity(new ClassicSimilarity()); //avoid randomisation of similarity algo by test framework writer.close(); String searchTerms[] = { "smith", "smythe", "smdssasd" }; for (String searchTerm : searchTerms) { FuzzyQuery query = new FuzzyQuery(new Term("field", searchTerm), 2, 1); ScoreDoc[] hits = searcher.search(query, 1000).scoreDocs; Document bestDoc = searcher.doc(hits[0].doc); assertTrue(hits.length > 0); String topMatch = bestDoc.get("field"); assertEquals(searchTerm, topMatch); if (hits.length > 1) { Document worstDoc = searcher.doc(hits[hits.length - 1].doc); String worstMatch = worstDoc.get("field"); assertNotSame(searchTerm, worstMatch); } } reader.close(); directory.close(); }
Example #19
Source File: TestMemoryIndex.java From lucene-solr with Apache License 2.0 | 5 votes |
@Test public void testFreezeAPI() { MemoryIndex mi = new MemoryIndex(); mi.addField("f1", "some text", analyzer); assertThat(mi.search(new MatchAllDocsQuery()), not(is(0.0f))); assertThat(mi.search(new TermQuery(new Term("f1", "some"))), not(is(0.0f))); // check we can add a new field after searching mi.addField("f2", "some more text", analyzer); assertThat(mi.search(new TermQuery(new Term("f2", "some"))), not(is(0.0f))); // freeze! mi.freeze(); RuntimeException expected = expectThrows(RuntimeException.class, () -> { mi.addField("f3", "and yet more", analyzer); }); assertThat(expected.getMessage(), containsString("frozen")); expected = expectThrows(RuntimeException.class, () -> { mi.setSimilarity(new BM25Similarity(1, 1)); }); assertThat(expected.getMessage(), containsString("frozen")); assertThat(mi.search(new TermQuery(new Term("f1", "some"))), not(is(0.0f))); mi.reset(); mi.addField("f1", "wibble", analyzer); assertThat(mi.search(new TermQuery(new Term("f1", "some"))), is(0.0f)); assertThat(mi.search(new TermQuery(new Term("f1", "wibble"))), not(is(0.0f))); // check we can set the Similarity again mi.setSimilarity(new ClassicSimilarity()); }
Example #20
Source File: SearchImpl.java From lucene-solr with Apache License 2.0 | 5 votes |
private Similarity createSimilarity(SimilarityConfig config) { Similarity similarity; if (config.isUseClassicSimilarity()) { ClassicSimilarity tfidf = new ClassicSimilarity(); tfidf.setDiscountOverlaps(config.isDiscountOverlaps()); similarity = tfidf; } else { BM25Similarity bm25 = new BM25Similarity(config.getK1(), config.getB()); bm25.setDiscountOverlaps(config.isDiscountOverlaps()); similarity = bm25; } return similarity; }
Example #21
Source File: TestValueSources.java From lucene-solr with Apache License 2.0 | 5 votes |
public void testIDF() throws Exception { Similarity saved = searcher.getSimilarity(); try { searcher.setSimilarity(new ClassicSimilarity()); ValueSource vs = new IDFValueSource("bogus", "bogus", "text", new BytesRef("test")); assertHits(new FunctionQuery(vs), new float[] { 1.0f, 1.0f }); assertAllExist(vs); } finally { searcher.setSimilarity(saved); } }
Example #22
Source File: TestPayloadScoreQuery.java From lucene-solr with Apache License 2.0 | 5 votes |
@Test public void testNestedNearQuery() throws Exception { // (one OR hundred) NEAR (twenty two) ~ 1 // 2 4 4 4 // one hundred twenty two // two hundred twenty two SpanNearQuery q = new SpanNearQuery(new SpanQuery[]{ new SpanOrQuery(new SpanTermQuery(new Term("field", "one")), new SpanTermQuery(new Term("field", "hundred"))), new SpanNearQuery(new SpanQuery[]{ new SpanTermQuery(new Term("field", "twenty")), new SpanTermQuery(new Term("field", "two")) }, 0, true) }, 1, true); // check includeSpanScore makes a difference here searcher.setSimilarity(new ClassicSimilarity()); try { checkQuery(q, new MaxPayloadFunction(), new int[]{ 122, 222 }, new float[]{ 20.901256561279297f, 17.06580352783203f }); checkQuery(q, new MinPayloadFunction(), new int[]{ 222, 122 }, new float[]{ 17.06580352783203f, 10.450628280639648f }); checkQuery(q, new AveragePayloadFunction(), new int[] { 122, 222 }, new float[]{ 19.15948486328125f, 17.06580352783203f }); checkQuery(q, new MaxPayloadFunction(), false, new int[]{122, 222}, new float[]{4.0f, 4.0f}); checkQuery(q, new MinPayloadFunction(), false, new int[]{222, 122}, new float[]{4.0f, 2.0f}); checkQuery(q, new AveragePayloadFunction(), false, new int[]{222, 122}, new float[]{4.0f, 3.666666f}); } finally { searcher.setSimilarity(similarity); } }
Example #23
Source File: TestMinShouldMatch2.java From lucene-solr with Apache License 2.0 | 5 votes |
@BeforeClass public static void beforeClass() throws Exception { dir = newDirectory(); RandomIndexWriter iw = new RandomIndexWriter(random(), dir); final int numDocs = atLeast(300); for (int i = 0; i < numDocs; i++) { Document doc = new Document(); addSome(doc, alwaysTerms); if (random().nextInt(100) < 90) { addSome(doc, commonTerms); } if (random().nextInt(100) < 50) { addSome(doc, mediumTerms); } if (random().nextInt(100) < 10) { addSome(doc, rareTerms); } iw.addDocument(doc); } iw.forceMerge(1); iw.close(); r = DirectoryReader.open(dir); reader = getOnlyLeafReader(r); searcher = new IndexSearcher(reader); searcher.setSimilarity(new ClassicSimilarity()); }
Example #24
Source File: TestElevationComparator.java From lucene-solr with Apache License 2.0 | 5 votes |
public void testSorting() throws Throwable { Directory directory = newDirectory(); IndexWriter writer = new IndexWriter( directory, newIndexWriterConfig(new MockAnalyzer(random())). setMaxBufferedDocs(2). setMergePolicy(newLogMergePolicy(1000)). setSimilarity(new ClassicSimilarity()) ); writer.addDocument(adoc(new String[] {"id", "a", "title", "ipod", "str_s", "a"})); writer.addDocument(adoc(new String[] {"id", "b", "title", "ipod ipod", "str_s", "b"})); writer.addDocument(adoc(new String[] {"id", "c", "title", "ipod ipod ipod", "str_s","c"})); writer.addDocument(adoc(new String[] {"id", "x", "title", "boosted", "str_s", "x"})); writer.addDocument(adoc(new String[] {"id", "y", "title", "boosted boosted", "str_s","y"})); writer.addDocument(adoc(new String[] {"id", "z", "title", "boosted boosted boosted","str_s", "z"})); IndexReader r = DirectoryReader.open(writer); writer.close(); IndexSearcher searcher = newSearcher(r); searcher.setSimilarity(new BM25Similarity()); runTest(searcher, true); runTest(searcher, false); r.close(); directory.close(); }
Example #25
Source File: TestQueryRescorer.java From lucene-solr with Apache License 2.0 | 4 votes |
public static IndexWriterConfig newIndexWriterConfig() { // We rely on more tokens = lower score: return LuceneTestCase.newIndexWriterConfig().setSimilarity(new ClassicSimilarity()); }
Example #26
Source File: SweetSpotSimilarityTest.java From lucene-solr with Apache License 2.0 | 4 votes |
public void testSweetSpotTf() { SweetSpotSimilarity ss = new SweetSpotSimilarity(); TFIDFSimilarity d = new ClassicSimilarity(); TFIDFSimilarity s = ss; // tf equal ss.setBaselineTfFactors(0.0f, 0.0f); for (int i = 1; i < 1000; i++) { assertEquals("tf: i="+i, d.tf(i), s.tf(i), 0.0f); } // tf higher ss.setBaselineTfFactors(1.0f, 0.0f); for (int i = 1; i < 1000; i++) { assertTrue("tf: i="+i+" : d="+d.tf(i)+ " < s="+s.tf(i), d.tf(i) < s.tf(i)); } // tf flat ss.setBaselineTfFactors(1.0f, 6.0f); for (int i = 1; i <=6; i++) { assertEquals("tf flat1: i="+i, 1.0f, s.tf(i), 0.0f); } ss.setBaselineTfFactors(2.0f, 6.0f); for (int i = 1; i <=6; i++) { assertEquals("tf flat2: i="+i, 2.0f, s.tf(i), 0.0f); } for (int i = 6; i <=1000; i++) { assertTrue("tf: i="+i+" : s="+s.tf(i)+ " < d="+d.tf(i), s.tf(i) < d.tf(i)); } // stupidity assertEquals("tf zero", 0.0f, s.tf(0), 0.0f); }
Example #27
Source File: ClassicSimilarityFactory.java From lucene-solr with Apache License 2.0 | 4 votes |
@Override public Similarity getSimilarity() { ClassicSimilarity sim = new ClassicSimilarity(); sim.setDiscountOverlaps(discountOverlaps); return sim; }
Example #28
Source File: TestBoolean2.java From lucene-solr with Apache License 2.0 | 4 votes |
@Test public void testRandomQueries() throws Exception { String[] vals = {"w1","w2","w3","w4","w5","xx","yy","zzz"}; int tot=0; BooleanQuery q1 = null; try { // increase number of iterations for more complete testing int num = atLeast(3); for (int i=0; i<num; i++) { int level = random().nextInt(3); q1 = randBoolQuery(new Random(random().nextLong()), random().nextBoolean(), level, field, vals, null).build(); // Can't sort by relevance since floating point numbers may not quite // match up. Sort sort = Sort.INDEXORDER; QueryUtils.check(random(), q1,searcher); // baseline sim try { // a little hackish, QueryUtils.check is too costly to do on bigSearcher in this loop. searcher.setSimilarity(bigSearcher.getSimilarity()); // random sim QueryUtils.check(random(), q1, searcher); } finally { searcher.setSimilarity(new ClassicSimilarity()); // restore } // check diff (randomized) scorers (from AssertingSearcher) produce the same results TopFieldCollector collector = TopFieldCollector.create(sort, 1000, 1); searcher.search(q1, collector); ScoreDoc[] hits1 = collector.topDocs().scoreDocs; collector = TopFieldCollector.create(sort, 1000, 1); searcher.search(q1, collector); ScoreDoc[] hits2 = collector.topDocs().scoreDocs; tot+=hits2.length; CheckHits.checkEqual(q1, hits1, hits2); BooleanQuery.Builder q3 = new BooleanQuery.Builder(); q3.add(q1, BooleanClause.Occur.SHOULD); q3.add(new PrefixQuery(new Term("field2", "b")), BooleanClause.Occur.SHOULD); assertEquals(mulFactor*collector.totalHits + NUM_EXTRA_DOCS/2, bigSearcher.count(q3.build())); // test diff (randomized) scorers produce the same results on bigSearcher as well collector = TopFieldCollector.create(sort, 1000 * mulFactor, 1); bigSearcher.search(q1, collector); hits1 = collector.topDocs().scoreDocs; collector = TopFieldCollector.create(sort, 1000 * mulFactor, 1); bigSearcher.search(q1, collector); hits2 = collector.topDocs().scoreDocs; CheckHits.checkEqual(q1, hits1, hits2); } } catch (Exception e) { // For easier debugging System.out.println("failed query: " + q1); throw e; } // System.out.println("Total hits:"+tot); }
Example #29
Source File: TestComplexExplanations.java From lucene-solr with Apache License 2.0 | 4 votes |
@Override public void setUp() throws Exception { super.setUp(); // TODO: switch to BM25? searcher.setSimilarity(new ClassicSimilarity()); }
Example #30
Source File: TestClassicSimilarityFactory.java From lucene-solr with Apache License 2.0 | 4 votes |
/** Classic w/ default parameters */ public void testDefaults() throws Exception { ClassicSimilarity sim = getSimilarity("text", ClassicSimilarity.class); assertEquals(true, sim.getDiscountOverlaps()); }