org.apache.lucene.search.similarities.Similarity Java Examples
The following examples show how to use
org.apache.lucene.search.similarities.Similarity.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: CustomSpanPayloadCheckQuery.java From elasticsearch-plugin-bundle with GNU Affero General Public License v3.0 | 6 votes |
@Override public SpanScorer scorer(LeafReaderContext context) throws IOException { if (field == null) return null; Terms terms = context.reader().terms(field); if (terms != null && !terms.hasPositions()) { throw new IllegalStateException("field \"" + field + "\" was indexed without position data; cannot run SpanQuery (query=" + parentQuery + ")"); } final Spans spans = getSpans(context, Postings.PAYLOADS); if (spans == null) { return null; } final Similarity.SimScorer docScorer = getSimScorer(context); return new SpanScorer(this, spans, docScorer); }
Example #2
Source File: ScoringMatch.java From lucene-solr with Apache License 2.0 | 6 votes |
public static final MatcherFactory<ScoringMatch> matchWithSimilarity(Similarity similarity) { return searcher -> { searcher.setSimilarity(similarity); return new CollectingMatcher<ScoringMatch>(searcher, ScoreMode.COMPLETE) { @Override protected ScoringMatch doMatch(String queryId, int doc, Scorable scorer) throws IOException { float score = scorer.score(); if (score > 0) return new ScoringMatch(queryId, score); return null; } @Override public ScoringMatch resolve(ScoringMatch match1, ScoringMatch match2) { return new ScoringMatch(match1.getQueryId(), match1.getScore() + match2.getScore()); } }; }; }
Example #3
Source File: KNearestNeighborClassifier.java From lucene-solr with Apache License 2.0 | 6 votes |
/** * Creates a {@link KNearestNeighborClassifier}. * * @param indexReader the reader on the index to be used for classification * @param analyzer an {@link Analyzer} used to analyze unseen text * @param similarity the {@link Similarity} to be used by the underlying {@link IndexSearcher} or {@code null} * (defaults to {@link org.apache.lucene.search.similarities.BM25Similarity}) * @param query a {@link Query} to eventually filter the docs used for training the classifier, or {@code null} * if all the indexed docs should be used * @param k the no. of docs to select in the MLT results to find the nearest neighbor * @param minDocsFreq {@link MoreLikeThis#minDocFreq} parameter * @param minTermFreq {@link MoreLikeThis#minTermFreq} parameter * @param classFieldName the name of the field used as the output for the classifier * @param textFieldNames the name of the fields used as the inputs for the classifier, they can contain boosting indication e.g. title^10 */ public KNearestNeighborClassifier(IndexReader indexReader, Similarity similarity, Analyzer analyzer, Query query, int k, int minDocsFreq, int minTermFreq, String classFieldName, String... textFieldNames) { this.textFieldNames = textFieldNames; this.classFieldName = classFieldName; this.mlt = new MoreLikeThis(indexReader); this.mlt.setAnalyzer(analyzer); this.mlt.setFieldNames(textFieldNames); this.indexSearcher = new IndexSearcher(indexReader); if (similarity != null) { this.indexSearcher.setSimilarity(similarity); } else { this.indexSearcher.setSimilarity(new BM25Similarity()); } if (minDocsFreq > 0) { mlt.setMinDocFreq(minDocsFreq); } if (minTermFreq > 0) { mlt.setMinTermFreq(minTermFreq); } this.query = query; this.k = k; }
Example #4
Source File: TestTaxonomyFacetCounts.java From lucene-solr with Apache License 2.0 | 6 votes |
public void testReallyNoNormsForDrillDown() throws Exception { Directory dir = newDirectory(); Directory taxoDir = newDirectory(); IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random())); iwc.setSimilarity(new PerFieldSimilarityWrapper() { final Similarity sim = new ClassicSimilarity(); @Override public Similarity get(String name) { assertEquals("field", name); return sim; } }); TaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir, IndexWriterConfig.OpenMode.CREATE); RandomIndexWriter writer = new RandomIndexWriter(random(), dir, iwc); FacetsConfig config = new FacetsConfig(); Document doc = new Document(); doc.add(newTextField("field", "text", Field.Store.NO)); doc.add(new FacetField("a", "path")); writer.addDocument(config.build(taxoWriter, doc)); writer.close(); IOUtils.close(taxoWriter, dir, taxoDir); }
Example #5
Source File: TestValueSources.java From lucene-solr with Apache License 2.0 | 6 votes |
public void testTF() throws Exception { Similarity saved = searcher.getSimilarity(); try { // no norm field (so agnostic to indexed similarity) searcher.setSimilarity(new ClassicSimilarity()); ValueSource vs = new TFValueSource("bogus", "bogus", "text", new BytesRef("test")); assertHits(new FunctionQuery(vs), new float[] { (float)Math.sqrt(3d), (float)Math.sqrt(1d) }); assertAllExist(vs); vs = new TFValueSource("bogus", "bogus", "string", new BytesRef("bar")); assertHits(new FunctionQuery(vs), new float[] { 0f, 1f }); assertAllExist(vs); // regardless of whether norms exist, value source exists == 0 vs = new TFValueSource("bogus", "bogus", "bogus", new BytesRef("bogus")); assertHits(new FunctionQuery(vs), new float[] { 0F, 0F }); assertAllExist(vs); } finally { searcher.setSimilarity(saved); } }
Example #6
Source File: TestValueSources.java From lucene-solr with Apache License 2.0 | 6 votes |
public void testNorm() throws Exception { Similarity saved = searcher.getSimilarity(); try { // no norm field (so agnostic to indexed similarity) searcher.setSimilarity(new ClassicSimilarity()); ValueSource vs = new NormValueSource("byte"); assertHits(new FunctionQuery(vs), new float[] { 1f, 1f }); // regardless of whether norms exist, value source exists == 0 assertAllExist(vs); vs = new NormValueSource("text"); assertAllExist(vs); } finally { searcher.setSimilarity(saved); } }
Example #7
Source File: TestBulkSchemaAPI.java From lucene-solr with Apache License 2.0 | 6 votes |
/** * whitebox checks the Similarity for the specified field according to {@link SolrCore#getLatestSchema} * * Executes each of the specified Similarity-accepting validators. */ @SafeVarargs @SuppressWarnings({"unchecked", "varargs"}) private static <T extends Similarity> void assertFieldSimilarity(String fieldname, Class<T> expected, Consumer<T>... validators) { CoreContainer cc = jetty.getCoreContainer(); try (SolrCore core = cc.getCore("collection1")) { SimilarityFactory simfac = core.getLatestSchema().getSimilarityFactory(); assertNotNull(simfac); assertTrue("test only works with SchemaSimilarityFactory", simfac instanceof SchemaSimilarityFactory); Similarity mainSim = core.getLatestSchema().getSimilarity(); assertNotNull(mainSim); // sanity check simfac vs sim in use - also verify infom called on simfac, otherwise exception assertEquals(mainSim, simfac.getSimilarity()); assertTrue("test only works with PerFieldSimilarityWrapper, SchemaSimilarityFactory redefined?", mainSim instanceof PerFieldSimilarityWrapper); Similarity fieldSim = ((PerFieldSimilarityWrapper)mainSim).get(fieldname); assertEquals("wrong sim for field=" + fieldname, expected, fieldSim.getClass()); Arrays.asList(validators).forEach(v -> v.accept((T)fieldSim)); } }
Example #8
Source File: IndexSchema.java From lucene-solr with Apache License 2.0 | 6 votes |
static SimilarityFactory readSimilarity(SolrResourceLoader loader, Node node) { if (node==null) { return null; } else { SimilarityFactory similarityFactory; final String classArg = ((Element) node).getAttribute(SimilarityFactory.CLASS_NAME); final Object obj = loader.newInstance(classArg, Object.class, "search.similarities."); if (obj instanceof SimilarityFactory) { // configure a factory, get a similarity back final NamedList<Object> namedList = DOMUtil.childNodesToNamedList(node); namedList.add(SimilarityFactory.CLASS_NAME, classArg); SolrParams params = namedList.toSolrParams(); similarityFactory = (SimilarityFactory)obj; similarityFactory.init(params); } else { // just like always, assume it's a Similarity and get a ClassCastException - reasonable error handling similarityFactory = new SimilarityFactory() { @Override public Similarity getSimilarity() { return (Similarity) obj; } }; } return similarityFactory; } }
Example #9
Source File: IndexManager.java From incubator-retired-blur with Apache License 2.0 | 6 votes |
public SimpleQueryParallelCall(AtomicBoolean running, String table, QueryStatus status, Query query, Selector selector, Meter queriesInternalMeter, ShardServerContext shardServerContext, boolean runSlow, int fetchCount, int maxHeapPerRowFetch, Similarity similarity, TableContext context, Sort sort, DeepPagingCache deepPagingCache, MemoryAllocationWatcher memoryAllocationWatcher) { _running = running; _table = table; _status = status; _query = query; _selector = selector; _queriesInternalMeter = queriesInternalMeter; _shardServerContext = shardServerContext; _runSlow = runSlow; _fetchCount = fetchCount; _maxHeapPerRowFetch = maxHeapPerRowFetch; _similarity = similarity; _context = context; _sort = sort; _deepPagingCache = deepPagingCache; _memoryAllocationWatcher = memoryAllocationWatcher; }
Example #10
Source File: CustomSpanWeight.java From pyramid with Apache License 2.0 | 5 votes |
private Similarity.SimWeight buildSimWeight(CustomSpanQuery query, IndexSearcher searcher, Map<Term, TermContext> termContexts) throws IOException { if (termContexts == null || termContexts.size() == 0 || query.getField() == null) return null; TermStatistics[] termStats = new TermStatistics[termContexts.size()]; int i = 0; for (Term term : termContexts.keySet()) { termStats[i] = searcher.termStatistics(term, termContexts.get(term)); i++; } CollectionStatistics collectionStats = searcher.collectionStatistics(query.getField()); return similarity.computeWeight(collectionStats, termStats); }
Example #11
Source File: Config.java From xltsearch with Apache License 2.0 | 5 votes |
void resolve() { if (resolved) { return; } // else: resolved == false if (getLastUpdated() == INDEX_INVALIDATED) { return; } // hashAlgorithm hashAlgorithm = get("hash.algorithm"); if (hashAlgorithm == null) { return; } // version version = get("lucene.version"); if (version == null) { return; } // analyzer Function<Version,Analyzer> analyzerFactory = get("lucene.analyzer"); if (analyzerFactory == null) { return; } analyzer = analyzerFactory.apply(version); // similarity Supplier<Similarity> similarityFactory = get("scoring.model"); if (similarityFactory == null) { return; } similarity = similarityFactory.get(); // directory Function<File,Directory> directoryFactory = get("directory.type"); if (directoryFactory == null) { return; } directory = directoryFactory.apply( new File(configDir.getPath() + File.separator + INDEX_DIR)); if (directory == null) { return; } // we made it: config is properly resolved resolved = true; }
Example #12
Source File: TestLongNormValueSource.java From lucene-solr with Apache License 2.0 | 5 votes |
public void testNorm() throws Exception { Similarity saved = searcher.getSimilarity(); try { // no norm field (so agnostic to indexed similarity) searcher.setSimilarity(sim); assertHits(new FunctionQuery( new NormValueSource("text")), new float[] { 0f, 0f }); } finally { searcher.setSimilarity(saved); } }
Example #13
Source File: SimilarityService.java From Elasticsearch with Apache License 2.0 | 5 votes |
@Inject public SimilarityService(Index index, IndexSettingsService indexSettingsService, final SimilarityLookupService similarityLookupService, final MapperService mapperService) { super(index, indexSettingsService.getSettings()); this.similarityLookupService = similarityLookupService; this.mapperService = mapperService; Similarity defaultSimilarity = similarityLookupService.similarity(SimilarityLookupService.DEFAULT_SIMILARITY).get(); // Expert users can configure the base type as being different to default, but out-of-box we use default. Similarity baseSimilarity = (similarityLookupService.similarity("base") != null) ? similarityLookupService.similarity("base").get() : defaultSimilarity; this.perFieldSimilarity = (mapperService != null) ? new PerFieldSimilarity(defaultSimilarity, baseSimilarity, mapperService) : defaultSimilarity; }
Example #14
Source File: SchemaSimilarityFactory.java From lucene-solr with Apache License 2.0 | 5 votes |
@Override public Similarity get(String name) { FieldType fieldType = core.getLatestSchema().getFieldTypeNoEx(name); if (fieldType == null) { return defaultSimilarity; } else { Similarity similarity = fieldType.getSimilarity(); return similarity == null ? defaultSimilarity : similarity; } }
Example #15
Source File: TestValueSources.java From lucene-solr with Apache License 2.0 | 5 votes |
public void testIDF() throws Exception { Similarity saved = searcher.getSimilarity(); try { searcher.setSimilarity(new ClassicSimilarity()); ValueSource vs = new IDFValueSource("bogus", "bogus", "text", new BytesRef("test")); assertHits(new FunctionQuery(vs), new float[] { 1.0f, 1.0f }); assertAllExist(vs); } finally { searcher.setSimilarity(saved); } }
Example #16
Source File: IntervalScoreFunction.java From lucene-solr with Apache License 2.0 | 5 votes |
@Override public Similarity.SimScorer scorer(float weight) { return new Similarity.SimScorer() { @Override public float score(float freq, long norm) { // should be f^a / (f^a + k^a) but we rewrite it to // 1 - k^a / (f + k^a) to make sure it doesn't decrease // with f in spite of rounding return (float) (weight * (1.0f - pivotPa / (Math.pow(freq, a) + pivotPa))); } }; }
Example #17
Source File: IntervalScoreFunction.java From lucene-solr with Apache License 2.0 | 5 votes |
@Override public Similarity.SimScorer scorer(float weight) { return new Similarity.SimScorer() { @Override public float score(float freq, long norm) { // should be f / (f + k) but we rewrite it to // 1 - k / (f + k) to make sure it doesn't decrease // with f in spite of rounding return weight * (1.0f - pivot / (pivot + freq)); } }; }
Example #18
Source File: IDFValueSource.java From lucene-solr with Apache License 2.0 | 5 votes |
static TFIDFSimilarity asTFIDF(Similarity sim, String field) { while (sim instanceof PerFieldSimilarityWrapper) { sim = ((PerFieldSimilarityWrapper)sim).get(field); } if (sim instanceof TFIDFSimilarity) { return (TFIDFSimilarity)sim; } else { return null; } }
Example #19
Source File: SearchImpl.java From lucene-solr with Apache License 2.0 | 5 votes |
private Similarity createSimilarity(SimilarityConfig config) { Similarity similarity; if (config.isUseClassicSimilarity()) { ClassicSimilarity tfidf = new ClassicSimilarity(); tfidf.setDiscountOverlaps(config.isDiscountOverlaps()); similarity = tfidf; } else { BM25Similarity bm25 = new BM25Similarity(config.getK1(), config.getB()); bm25.setDiscountOverlaps(config.isDiscountOverlaps()); similarity = bm25; } return similarity; }
Example #20
Source File: LMDirichletSimilarityFactory.java From lucene-solr with Apache License 2.0 | 5 votes |
@Override public Similarity getSimilarity() { LMDirichletSimilarity sim = (mu != null) ? new LMDirichletSimilarity(mu) : new LMDirichletSimilarity(); sim.setDiscountOverlaps(discountOverlaps); return sim; }
Example #21
Source File: KNearestFuzzyClassifier.java From lucene-solr with Apache License 2.0 | 5 votes |
/** * Creates a {@link KNearestFuzzyClassifier}. * * @param indexReader the reader on the index to be used for classification * @param analyzer an {@link Analyzer} used to analyze unseen text * @param similarity the {@link Similarity} to be used by the underlying {@link IndexSearcher} or {@code null} * (defaults to {@link BM25Similarity}) * @param query a {@link Query} to eventually filter the docs used for training the classifier, or {@code null} * if all the indexed docs should be used * @param k the no. of docs to select in the MLT results to find the nearest neighbor * @param classFieldName the name of the field used as the output for the classifier * @param textFieldNames the name of the fields used as the inputs for the classifier, they can contain boosting indication e.g. title^10 */ public KNearestFuzzyClassifier(IndexReader indexReader, Similarity similarity, Analyzer analyzer, Query query, int k, String classFieldName, String... textFieldNames) { this.textFieldNames = textFieldNames; this.classFieldName = classFieldName; this.analyzer = analyzer; this.indexSearcher = new IndexSearcher(indexReader); if (similarity != null) { this.indexSearcher.setSimilarity(similarity); } else { this.indexSearcher.setSimilarity(new BM25Similarity()); } this.query = query; this.k = k; }
Example #22
Source File: TestNorms.java From lucene-solr with Apache License 2.0 | 5 votes |
@Override public Similarity get(String field) { if (BYTE_TEST_FIELD.equals(field)) { return new ByteEncodingBoostSimilarity(); } else { return delegate; } }
Example #23
Source File: TestDFISimilarityFactory.java From lucene-solr with Apache License 2.0 | 5 votes |
/** * dfi with discountOverlaps parameter set to false */ public void testParameters() throws Exception { Similarity sim = getSimilarity("text_params"); assertEquals(DFISimilarity.class, sim.getClass()); DFISimilarity dfr = (DFISimilarity) sim; assertFalse(dfr.getDiscountOverlaps()); }
Example #24
Source File: TestCustomNorms.java From lucene-solr with Apache License 2.0 | 5 votes |
@Override public Similarity get(String field) { if (FLOAT_TEST_FIELD.equals(field)) { return new FloatEncodingBoostSimilarity(); } else { return delegate; } }
Example #25
Source File: TestLegacyBM25SimilarityFactory.java From lucene-solr with Apache License 2.0 | 5 votes |
/** bm25 with parameters */ public void testParameters() throws Exception { Similarity sim = getSimilarity("legacy_text_params"); assertEquals(LegacyBM25Similarity.class, sim.getClass()); LegacyBM25Similarity bm25 = (LegacyBM25Similarity) sim; assertEquals(1.2f, bm25.getK1(), 0.01f); assertEquals(0.76f, bm25.getB(), 0.01f); }
Example #26
Source File: TestCustomNorms.java From lucene-solr with Apache License 2.0 | 5 votes |
public void testFloatNorms() throws IOException { Directory dir = newDirectory(); MockAnalyzer analyzer = new MockAnalyzer(random()); analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 2, IndexWriter.MAX_TERM_LENGTH)); IndexWriterConfig config = newIndexWriterConfig(analyzer); Similarity provider = new MySimProvider(); config.setSimilarity(provider); RandomIndexWriter writer = new RandomIndexWriter(random(), dir, config); final LineFileDocs docs = new LineFileDocs(random()); int num = atLeast(100); for (int i = 0; i < num; i++) { Document doc = docs.nextDoc(); int boost = TestUtil.nextInt(random(), 1, 10); String value = IntStream.range(0, boost).mapToObj(k -> Integer.toString(boost)).collect(Collectors.joining(" ")); Field f = new TextField(FLOAT_TEST_FIELD, value, Field.Store.YES); doc.add(f); writer.addDocument(doc); doc.removeField(FLOAT_TEST_FIELD); if (rarely()) { writer.commit(); } } writer.commit(); writer.close(); DirectoryReader open = DirectoryReader.open(dir); NumericDocValues norms = MultiDocValues.getNormValues(open, FLOAT_TEST_FIELD); assertNotNull(norms); for (int i = 0; i < open.maxDoc(); i++) { Document document = open.document(i); int expected = Integer.parseInt(document.get(FLOAT_TEST_FIELD).split(" ")[0]); assertEquals(i, norms.nextDoc()); assertEquals(expected, norms.longValue()); } open.close(); dir.close(); docs.close(); }
Example #27
Source File: TestSimilarityProvider.java From lucene-solr with Apache License 2.0 | 5 votes |
@Override public Similarity get(String field) { if (field.equals("foo")) { return sim1; } else { return sim2; } }
Example #28
Source File: TestDFRSimilarityFactory.java From lucene-solr with Apache License 2.0 | 5 votes |
/** dfr with parametrized normalization */ public void testParameters() throws Exception { Similarity sim = getSimilarity("text_params"); assertEquals(DFRSimilarity.class, sim.getClass()); DFRSimilarity dfr = (DFRSimilarity) sim; assertEquals(BasicModelIF.class, dfr.getBasicModel().getClass()); assertEquals(AfterEffectB.class, dfr.getAfterEffect().getClass()); assertEquals(NormalizationH3.class, dfr.getNormalization().getClass()); NormalizationH3 norm = (NormalizationH3) dfr.getNormalization(); assertEquals(900f, norm.getMu(), 0.01f); }
Example #29
Source File: DefaultIndexingChain.java From lucene-solr with Apache License 2.0 | 5 votes |
PerField(int indexCreatedVersionMajor, FieldInfo fieldInfo, boolean invert, Similarity similarity, InfoStream infoStream, Analyzer analyzer) { this.indexCreatedVersionMajor = indexCreatedVersionMajor; this.fieldInfo = fieldInfo; this.similarity = similarity; this.infoStream = infoStream; this.analyzer = analyzer; if (invert) { setInvertState(); } }
Example #30
Source File: BaseSimilarityTestCase.java From lucene-solr with Apache License 2.0 | 5 votes |
/** * Returns the similarity in use for the field, * after asserting that it implements the specified class */ protected <T extends Similarity> T getSimilarity(String field, Class<T> clazz) { Similarity sim = getSimilarity(field); assertTrue("Similarity for Field " + field + " does not match expected class: " + clazz.getName(), clazz.isInstance(sim)); return clazz.cast(sim); }