org.apache.lucene.search.similarities.Similarity Java Exaples

Source File: CustomSpanPayloadCheckQuery.java From elasticsearch-plugin-bundle with GNU Affero General Public License v3.0

6 votes

@Override
public SpanScorer scorer(LeafReaderContext context) throws IOException {
    if (field == null)
        return null;

    Terms terms = context.reader().terms(field);
    if (terms != null && !terms.hasPositions()) {
        throw new IllegalStateException("field \"" + field +
                "\" was indexed without position data; cannot run SpanQuery (query=" + parentQuery + ")");
    }

    final Spans spans = getSpans(context, Postings.PAYLOADS);
    if (spans == null) {
        return null;
    }
    final Similarity.SimScorer docScorer = getSimScorer(context);
    return new SpanScorer(this, spans, docScorer);
}

Source File: ScoringMatch.java From lucene-solr with Apache License 2.0

6 votes

public static final MatcherFactory<ScoringMatch> matchWithSimilarity(Similarity similarity) {
  return searcher -> {
    searcher.setSimilarity(similarity);
    return new CollectingMatcher<ScoringMatch>(searcher, ScoreMode.COMPLETE) {
      @Override
      protected ScoringMatch doMatch(String queryId, int doc, Scorable scorer) throws IOException {
        float score = scorer.score();
        if (score > 0)
          return new ScoringMatch(queryId, score);
        return null;
      }

      @Override
      public ScoringMatch resolve(ScoringMatch match1, ScoringMatch match2) {
        return new ScoringMatch(match1.getQueryId(), match1.getScore() + match2.getScore());
      }
    };
  };
}

Source File: KNearestNeighborClassifier.java From lucene-solr with Apache License 2.0

6 votes

/**
 * Creates a {@link KNearestNeighborClassifier}.
 *
 * @param indexReader     the reader on the index to be used for classification
 * @param analyzer       an {@link Analyzer} used to analyze unseen text
 * @param similarity     the {@link Similarity} to be used by the underlying {@link IndexSearcher} or {@code null}
 *                       (defaults to {@link org.apache.lucene.search.similarities.BM25Similarity})
 * @param query          a {@link Query} to eventually filter the docs used for training the classifier, or {@code null}
 *                       if all the indexed docs should be used
 * @param k              the no. of docs to select in the MLT results to find the nearest neighbor
 * @param minDocsFreq    {@link MoreLikeThis#minDocFreq} parameter
 * @param minTermFreq    {@link MoreLikeThis#minTermFreq} parameter
 * @param classFieldName the name of the field used as the output for the classifier
 * @param textFieldNames the name of the fields used as the inputs for the classifier, they can contain boosting indication e.g. title^10
 */
public KNearestNeighborClassifier(IndexReader indexReader, Similarity similarity, Analyzer analyzer, Query query, int k, int minDocsFreq,
                                  int minTermFreq, String classFieldName, String... textFieldNames) {
  this.textFieldNames = textFieldNames;
  this.classFieldName = classFieldName;
  this.mlt = new MoreLikeThis(indexReader);
  this.mlt.setAnalyzer(analyzer);
  this.mlt.setFieldNames(textFieldNames);
  this.indexSearcher = new IndexSearcher(indexReader);
  if (similarity != null) {
    this.indexSearcher.setSimilarity(similarity);
  } else {
    this.indexSearcher.setSimilarity(new BM25Similarity());
  }
  if (minDocsFreq > 0) {
    mlt.setMinDocFreq(minDocsFreq);
  }
  if (minTermFreq > 0) {
    mlt.setMinTermFreq(minTermFreq);
  }
  this.query = query;
  this.k = k;
}

Source File: TestTaxonomyFacetCounts.java From lucene-solr with Apache License 2.0

6 votes

public void testReallyNoNormsForDrillDown() throws Exception {
  Directory dir = newDirectory();
  Directory taxoDir = newDirectory();
  IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random()));
  iwc.setSimilarity(new PerFieldSimilarityWrapper() {
      final Similarity sim = new ClassicSimilarity();

      @Override
      public Similarity get(String name) {
        assertEquals("field", name);
        return sim;
      }
    });
  TaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir, IndexWriterConfig.OpenMode.CREATE);
  RandomIndexWriter writer = new RandomIndexWriter(random(), dir, iwc);
  FacetsConfig config = new FacetsConfig();

  Document doc = new Document();
  doc.add(newTextField("field", "text", Field.Store.NO));
  doc.add(new FacetField("a", "path"));
  writer.addDocument(config.build(taxoWriter, doc));
  writer.close();
  IOUtils.close(taxoWriter, dir, taxoDir);
}

Source File: TestValueSources.java From lucene-solr with Apache License 2.0

6 votes

public void testTF() throws Exception {
  Similarity saved = searcher.getSimilarity();
  try {
    // no norm field (so agnostic to indexed similarity)
    searcher.setSimilarity(new ClassicSimilarity());

    ValueSource vs = new TFValueSource("bogus", "bogus", "text", new BytesRef("test"));
    assertHits(new FunctionQuery(vs), 
               new float[] { (float)Math.sqrt(3d), (float)Math.sqrt(1d) });
    assertAllExist(vs);
               
    vs = new TFValueSource("bogus", "bogus", "string", new BytesRef("bar"));
    assertHits(new FunctionQuery(vs), new float[] { 0f, 1f });
    assertAllExist(vs);
    
    // regardless of whether norms exist, value source exists == 0
    vs = new TFValueSource("bogus", "bogus", "bogus", new BytesRef("bogus"));
    assertHits(new FunctionQuery(vs), new float[] { 0F, 0F });
    assertAllExist(vs);

  } finally {
    searcher.setSimilarity(saved);
  }
}

Source File: TestValueSources.java From lucene-solr with Apache License 2.0

6 votes

public void testNorm() throws Exception {
  Similarity saved = searcher.getSimilarity();
  try {
    // no norm field (so agnostic to indexed similarity)
    searcher.setSimilarity(new ClassicSimilarity());
    ValueSource vs = new NormValueSource("byte");
    assertHits(new FunctionQuery(vs), new float[] { 1f, 1f });

    // regardless of whether norms exist, value source exists == 0
    assertAllExist(vs);

    vs = new NormValueSource("text");
    assertAllExist(vs);
    
  } finally {
    searcher.setSimilarity(saved);
  }
}

Source File: TestBulkSchemaAPI.java From lucene-solr with Apache License 2.0

6 votes

/**
 * whitebox checks the Similarity for the specified field according to {@link SolrCore#getLatestSchema}
 * 
 * Executes each of the specified Similarity-accepting validators.
 */
@SafeVarargs
@SuppressWarnings({"unchecked", "varargs"})
private static <T extends Similarity> void assertFieldSimilarity(String fieldname, Class<T> expected, Consumer<T>... validators) {
  CoreContainer cc = jetty.getCoreContainer();
  try (SolrCore core = cc.getCore("collection1")) {
    SimilarityFactory simfac = core.getLatestSchema().getSimilarityFactory();
    assertNotNull(simfac);
    assertTrue("test only works with SchemaSimilarityFactory",
               simfac instanceof SchemaSimilarityFactory);
    
    Similarity mainSim = core.getLatestSchema().getSimilarity();
    assertNotNull(mainSim);
    
    // sanity check simfac vs sim in use - also verify infom called on simfac, otherwise exception
    assertEquals(mainSim, simfac.getSimilarity());
    
    assertTrue("test only works with PerFieldSimilarityWrapper, SchemaSimilarityFactory redefined?",
               mainSim instanceof PerFieldSimilarityWrapper);
    Similarity fieldSim = ((PerFieldSimilarityWrapper)mainSim).get(fieldname);
    assertEquals("wrong sim for field=" + fieldname, expected, fieldSim.getClass());
    Arrays.asList(validators).forEach(v -> v.accept((T)fieldSim));
  }
}

Source File: IndexSchema.java From lucene-solr with Apache License 2.0

6 votes

static SimilarityFactory readSimilarity(SolrResourceLoader loader, Node node) {
  if (node==null) {
    return null;
  } else {
    SimilarityFactory similarityFactory;
    final String classArg = ((Element) node).getAttribute(SimilarityFactory.CLASS_NAME);
    final Object obj = loader.newInstance(classArg, Object.class, "search.similarities.");
    if (obj instanceof SimilarityFactory) {
      // configure a factory, get a similarity back
      final NamedList<Object> namedList = DOMUtil.childNodesToNamedList(node);
      namedList.add(SimilarityFactory.CLASS_NAME, classArg);
      SolrParams params = namedList.toSolrParams();
      similarityFactory = (SimilarityFactory)obj;
      similarityFactory.init(params);
    } else {
      // just like always, assume it's a Similarity and get a ClassCastException - reasonable error handling
      similarityFactory = new SimilarityFactory() {
        @Override
        public Similarity getSimilarity() {
          return (Similarity) obj;
        }
      };
    }
    return similarityFactory;
  }
}

Source File: IndexManager.java From incubator-retired-blur with Apache License 2.0

6 votes

public SimpleQueryParallelCall(AtomicBoolean running, String table, QueryStatus status, Query query,
    Selector selector, Meter queriesInternalMeter, ShardServerContext shardServerContext, boolean runSlow,
    int fetchCount, int maxHeapPerRowFetch, Similarity similarity, TableContext context, Sort sort,
    DeepPagingCache deepPagingCache, MemoryAllocationWatcher memoryAllocationWatcher) {
  _running = running;
  _table = table;
  _status = status;
  _query = query;
  _selector = selector;
  _queriesInternalMeter = queriesInternalMeter;
  _shardServerContext = shardServerContext;
  _runSlow = runSlow;
  _fetchCount = fetchCount;
  _maxHeapPerRowFetch = maxHeapPerRowFetch;
  _similarity = similarity;
  _context = context;
  _sort = sort;
  _deepPagingCache = deepPagingCache;
  _memoryAllocationWatcher = memoryAllocationWatcher;
}

Source File: CustomSpanWeight.java From pyramid with Apache License 2.0

5 votes

private Similarity.SimWeight buildSimWeight(CustomSpanQuery query, IndexSearcher searcher, Map<Term, TermContext> termContexts) throws IOException {
  if (termContexts == null || termContexts.size() == 0 || query.getField() == null)
    return null;
  TermStatistics[] termStats = new TermStatistics[termContexts.size()];
  int i = 0;
  for (Term term : termContexts.keySet()) {
    termStats[i] = searcher.termStatistics(term, termContexts.get(term));
    i++;
  }
  CollectionStatistics collectionStats = searcher.collectionStatistics(query.getField());
  return similarity.computeWeight(collectionStats, termStats);
}

Source File: Config.java From xltsearch with Apache License 2.0

5 votes

void resolve() {
    if (resolved) { return; }
    // else: resolved == false
    if (getLastUpdated() == INDEX_INVALIDATED) { return; }
    // hashAlgorithm
    hashAlgorithm = get("hash.algorithm");
    if (hashAlgorithm == null) { return; }
    // version
    version = get("lucene.version");
    if (version == null) { return; }
    // analyzer
    Function<Version,Analyzer> analyzerFactory = get("lucene.analyzer");
    if (analyzerFactory == null) { return; }
    analyzer = analyzerFactory.apply(version);
    // similarity
    Supplier<Similarity> similarityFactory = get("scoring.model");
    if (similarityFactory == null) { return; }
    similarity = similarityFactory.get();
    // directory
    Function<File,Directory> directoryFactory = get("directory.type");
    if (directoryFactory == null) { return; }
    directory = directoryFactory.apply(
        new File(configDir.getPath() + File.separator + INDEX_DIR));
    if (directory == null) { return; }
    // we made it: config is properly resolved
    resolved = true;
}

Source File: TestLongNormValueSource.java From lucene-solr with Apache License 2.0

5 votes

public void testNorm() throws Exception {
  Similarity saved = searcher.getSimilarity();
  try {
    // no norm field (so agnostic to indexed similarity)
    searcher.setSimilarity(sim);
    assertHits(new FunctionQuery(
        new NormValueSource("text")),
        new float[] { 0f, 0f });
  } finally {
    searcher.setSimilarity(saved);
  }
}

Source File: SimilarityService.java From Elasticsearch with Apache License 2.0

5 votes

@Inject
public SimilarityService(Index index, IndexSettingsService indexSettingsService,
                         final SimilarityLookupService similarityLookupService, final MapperService mapperService) {
    super(index, indexSettingsService.getSettings());
    this.similarityLookupService = similarityLookupService;
    this.mapperService = mapperService;

    Similarity defaultSimilarity = similarityLookupService.similarity(SimilarityLookupService.DEFAULT_SIMILARITY).get();
    // Expert users can configure the base type as being different to default, but out-of-box we use default.
    Similarity baseSimilarity = (similarityLookupService.similarity("base") != null) ? similarityLookupService.similarity("base").get() :
            defaultSimilarity;

    this.perFieldSimilarity = (mapperService != null) ? new PerFieldSimilarity(defaultSimilarity, baseSimilarity, mapperService) :
            defaultSimilarity;
}

Source File: SchemaSimilarityFactory.java From lucene-solr with Apache License 2.0

5 votes

@Override
public Similarity get(String name) {
  FieldType fieldType = core.getLatestSchema().getFieldTypeNoEx(name);
  if (fieldType == null) {
    return defaultSimilarity;
  } else {
    Similarity similarity = fieldType.getSimilarity();
    return similarity == null ? defaultSimilarity : similarity;
  }
}

Source File: TestValueSources.java From lucene-solr with Apache License 2.0

5 votes

public void testIDF() throws Exception {
  Similarity saved = searcher.getSimilarity();
  try {
    searcher.setSimilarity(new ClassicSimilarity());
    ValueSource vs = new IDFValueSource("bogus", "bogus", "text", new BytesRef("test"));
    assertHits(new FunctionQuery(vs), new float[] { 1.0f, 1.0f });
    assertAllExist(vs);
  } finally {
    searcher.setSimilarity(saved);
  }
}

Source File: IntervalScoreFunction.java From lucene-solr with Apache License 2.0

5 votes

@Override
public Similarity.SimScorer scorer(float weight) {
  return new Similarity.SimScorer() {
    @Override
    public float score(float freq, long norm) {
      // should be f^a / (f^a + k^a) but we rewrite it to
      // 1 - k^a / (f + k^a) to make sure it doesn't decrease
      // with f in spite of rounding
      return (float) (weight * (1.0f - pivotPa / (Math.pow(freq, a) + pivotPa)));
    }
  };
}

Source File: IntervalScoreFunction.java From lucene-solr with Apache License 2.0

5 votes

@Override
public Similarity.SimScorer scorer(float weight) {
  return new Similarity.SimScorer() {
    @Override
    public float score(float freq, long norm) {
      // should be f / (f + k) but we rewrite it to
      // 1 - k / (f + k) to make sure it doesn't decrease
      // with f in spite of rounding
      return weight * (1.0f - pivot / (pivot + freq));
    }
  };
}

Source File: IDFValueSource.java From lucene-solr with Apache License 2.0

5 votes

static TFIDFSimilarity asTFIDF(Similarity sim, String field) {
  while (sim instanceof PerFieldSimilarityWrapper) {
    sim = ((PerFieldSimilarityWrapper)sim).get(field);
  }
  if (sim instanceof TFIDFSimilarity) {
    return (TFIDFSimilarity)sim;
  } else {
    return null;
  }
}

Source File: SearchImpl.java From lucene-solr with Apache License 2.0

5 votes

private Similarity createSimilarity(SimilarityConfig config) {
  Similarity similarity;

  if (config.isUseClassicSimilarity()) {
    ClassicSimilarity tfidf = new ClassicSimilarity();
    tfidf.setDiscountOverlaps(config.isDiscountOverlaps());
    similarity = tfidf;
  } else {
    BM25Similarity bm25 = new BM25Similarity(config.getK1(), config.getB());
    bm25.setDiscountOverlaps(config.isDiscountOverlaps());
    similarity = bm25;
  }

  return similarity;
}

Source File: LMDirichletSimilarityFactory.java From lucene-solr with Apache License 2.0

5 votes

@Override
public Similarity getSimilarity() {
  LMDirichletSimilarity sim = (mu != null) ? new LMDirichletSimilarity(mu)
                                           : new LMDirichletSimilarity();
  sim.setDiscountOverlaps(discountOverlaps);
  return sim;
}

Source File: KNearestFuzzyClassifier.java From lucene-solr with Apache License 2.0

5 votes

/**
 * Creates a {@link KNearestFuzzyClassifier}.
 *
 * @param indexReader    the reader on the index to be used for classification
 * @param analyzer       an {@link Analyzer} used to analyze unseen text
 * @param similarity     the {@link Similarity} to be used by the underlying {@link IndexSearcher} or {@code null}
 *                       (defaults to {@link BM25Similarity})
 * @param query          a {@link Query} to eventually filter the docs used for training the classifier, or {@code null}
 *                       if all the indexed docs should be used
 * @param k              the no. of docs to select in the MLT results to find the nearest neighbor
 * @param classFieldName the name of the field used as the output for the classifier
 * @param textFieldNames the name of the fields used as the inputs for the classifier, they can contain boosting indication e.g. title^10
 */
public KNearestFuzzyClassifier(IndexReader indexReader, Similarity similarity, Analyzer analyzer, Query query, int k,
                               String classFieldName, String... textFieldNames) {
  this.textFieldNames = textFieldNames;
  this.classFieldName = classFieldName;
  this.analyzer = analyzer;
  this.indexSearcher = new IndexSearcher(indexReader);
  if (similarity != null) {
    this.indexSearcher.setSimilarity(similarity);
  } else {
    this.indexSearcher.setSimilarity(new BM25Similarity());
  }
  this.query = query;
  this.k = k;
}

Source File: TestNorms.java From lucene-solr with Apache License 2.0

5 votes

@Override
public Similarity get(String field) {
  if (BYTE_TEST_FIELD.equals(field)) {
    return new ByteEncodingBoostSimilarity();
  } else {
    return delegate;
  }
}

Source File: TestDFISimilarityFactory.java From lucene-solr with Apache License 2.0

5 votes

/**
 * dfi with discountOverlaps parameter set to false
 */
public void testParameters() throws Exception {
  Similarity sim = getSimilarity("text_params");
  assertEquals(DFISimilarity.class, sim.getClass());
  DFISimilarity dfr = (DFISimilarity) sim;
  assertFalse(dfr.getDiscountOverlaps());
}

Source File: TestCustomNorms.java From lucene-solr with Apache License 2.0

5 votes

@Override
public Similarity get(String field) {
  if (FLOAT_TEST_FIELD.equals(field)) {
    return new FloatEncodingBoostSimilarity();
  } else {
    return delegate;
  }
}

Source File: TestLegacyBM25SimilarityFactory.java From lucene-solr with Apache License 2.0

5 votes

/** bm25 with parameters */
public void testParameters() throws Exception {
  Similarity sim = getSimilarity("legacy_text_params");
  assertEquals(LegacyBM25Similarity.class, sim.getClass());
  LegacyBM25Similarity bm25 = (LegacyBM25Similarity) sim;
  assertEquals(1.2f, bm25.getK1(), 0.01f);
  assertEquals(0.76f, bm25.getB(), 0.01f);
}

Source File: TestCustomNorms.java From lucene-solr with Apache License 2.0

5 votes

public void testFloatNorms() throws IOException {

    Directory dir = newDirectory();
    MockAnalyzer analyzer = new MockAnalyzer(random());
    analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 2, IndexWriter.MAX_TERM_LENGTH));

    IndexWriterConfig config = newIndexWriterConfig(analyzer);
    Similarity provider = new MySimProvider();
    config.setSimilarity(provider);
    RandomIndexWriter writer = new RandomIndexWriter(random(), dir, config);
    final LineFileDocs docs = new LineFileDocs(random());
    int num = atLeast(100);
    for (int i = 0; i < num; i++) {
      Document doc = docs.nextDoc();
      int boost = TestUtil.nextInt(random(), 1, 10);
      String value = IntStream.range(0, boost).mapToObj(k -> Integer.toString(boost)).collect(Collectors.joining(" "));
      Field f = new TextField(FLOAT_TEST_FIELD, value, Field.Store.YES);

      doc.add(f);
      writer.addDocument(doc);
      doc.removeField(FLOAT_TEST_FIELD);
      if (rarely()) {
        writer.commit();
      }
    }
    writer.commit();
    writer.close();
    DirectoryReader open = DirectoryReader.open(dir);
    NumericDocValues norms = MultiDocValues.getNormValues(open, FLOAT_TEST_FIELD);
    assertNotNull(norms);
    for (int i = 0; i < open.maxDoc(); i++) {
      Document document = open.document(i);
      int expected = Integer.parseInt(document.get(FLOAT_TEST_FIELD).split(" ")[0]);
      assertEquals(i, norms.nextDoc());
      assertEquals(expected, norms.longValue());
    }
    open.close();
    dir.close();
    docs.close();
  }

Source File: TestSimilarityProvider.java From lucene-solr with Apache License 2.0

5 votes

@Override
public Similarity get(String field) {
  if (field.equals("foo")) {
    return sim1;
  } else {
    return sim2;
  }
}

Source File: TestDFRSimilarityFactory.java From lucene-solr with Apache License 2.0

5 votes

/** dfr with parametrized normalization */
public void testParameters() throws Exception {
  Similarity sim = getSimilarity("text_params");
  assertEquals(DFRSimilarity.class, sim.getClass());
  DFRSimilarity dfr = (DFRSimilarity) sim;
  assertEquals(BasicModelIF.class, dfr.getBasicModel().getClass());
  assertEquals(AfterEffectB.class, dfr.getAfterEffect().getClass());
  assertEquals(NormalizationH3.class, dfr.getNormalization().getClass());
  NormalizationH3 norm = (NormalizationH3) dfr.getNormalization();
  assertEquals(900f, norm.getMu(), 0.01f);
}

Source File: DefaultIndexingChain.java From lucene-solr with Apache License 2.0

5 votes

PerField(int indexCreatedVersionMajor, FieldInfo fieldInfo, boolean invert, Similarity similarity, InfoStream infoStream, Analyzer analyzer) {
  this.indexCreatedVersionMajor = indexCreatedVersionMajor;
  this.fieldInfo = fieldInfo;
  this.similarity = similarity;
  this.infoStream = infoStream;
  this.analyzer = analyzer;
  if (invert) {
    setInvertState();
  }
}

Source File: BaseSimilarityTestCase.java From lucene-solr with Apache License 2.0

5 votes

/** 
 * Returns the similarity in use for the field, 
 * after asserting that it implements the specified class 
 */
protected <T extends Similarity> T getSimilarity(String field, 
                                                 Class<T> clazz) {
  Similarity sim = getSimilarity(field);
  assertTrue("Similarity for Field " + field + 
             " does not match expected class: " + clazz.getName(), 
             clazz.isInstance(sim));
  return clazz.cast(sim);
}

org.apache.lucene.search.similarities.Similarity Java Examples