org.apache.lucene.search.spell.SuggestMode Java Exaples

Source File: DirectCandidateGenerator.java From Elasticsearch with Apache License 2.0

6 votes

public DirectCandidateGenerator(DirectSpellChecker spellchecker, String field, SuggestMode suggestMode, IndexReader reader, double nonErrorLikelihood,  int numCandidates, Analyzer preFilter, Analyzer postFilter, Terms terms) throws IOException {
    if (terms == null) {
        throw new IllegalArgumentException("generator field [" + field + "] doesn't exist");
    }
    this.spellchecker = spellchecker;
    this.field = field;
    this.numCandidates = numCandidates;
    this.suggestMode = suggestMode;
    this.reader = reader;
    final long dictSize = terms.getSumTotalTermFreq();
    this.useTotalTermFrequency = dictSize != -1;
    this.dictSize =  dictSize == -1 ? reader.maxDoc() : dictSize;
    this.preFilter = preFilter;
    this.postFilter = postFilter;
    this.nonErrorLikelihood = nonErrorLikelihood;
    float thresholdFrequency = spellchecker.getThresholdFrequency();
    this.frequencyPlateau = thresholdFrequency >= 1.0f ? (int) thresholdFrequency: (int)(dictSize * thresholdFrequency);
    termsEnum = terms.iterator();
}

Source File: DirectCandidateGenerator.java From Elasticsearch with Apache License 2.0

6 votes

@Override
public CandidateSet drawCandidates(CandidateSet set) throws IOException {
    Candidate original = set.originalTerm;
    BytesRef term = preFilter(original.term, spare, byteSpare);
    final long frequency = original.frequency;
    spellchecker.setThresholdFrequency(this.suggestMode == SuggestMode.SUGGEST_ALWAYS ? 0 : thresholdFrequency(frequency, dictSize));
    SuggestWord[] suggestSimilar = spellchecker.suggestSimilar(new Term(field, term), numCandidates, reader, this.suggestMode);
    List<Candidate> candidates = new ArrayList<>(suggestSimilar.length);
    for (int i = 0; i < suggestSimilar.length; i++) {
        SuggestWord suggestWord = suggestSimilar[i];
        BytesRef candidate = new BytesRef(suggestWord.string);
        postFilter(new Candidate(candidate, internalFrequency(candidate), suggestWord.score, score(suggestWord.freq, suggestWord.score, dictSize), false), spare, byteSpare, candidates);
    }
    set.addCandidates(candidates);
    return set;
}

Source File: WordBreakCompoundRewriter.java From querqy with Apache License 2.0

6 votes

protected List<SuggestWord[]> suggestWordbreaks(final Term term) throws IOException {
    final SuggestWord[][] rawSuggestions = wordBreakSpellChecker
            .suggestWordBreaks(toLuceneTerm(term), decompoundsToQuery, indexReader, SuggestMode.SUGGEST_ALWAYS,
                    WordBreakSpellChecker.BreakSuggestionSortMethod.NUM_CHANGES_THEN_MAX_FREQUENCY);

    if (rawSuggestions.length == 0) {
        return Collections.emptyList();
    }

    if (!verifyDecompoundCollation) {
        return Arrays.stream(rawSuggestions)
                .filter(suggestion -> suggestion != null && suggestion.length > 1)
                .limit(maxDecompoundExpansions).collect(Collectors.toList());
    }

    final IndexSearcher searcher = new IndexSearcher(indexReader);
    return Arrays.stream(rawSuggestions)
            .filter(suggestion -> suggestion != null && suggestion.length > 1)
            .map(suggestion -> new MaxSortable<>(suggestion, countCollatedMatches(suggestion, searcher)))
            .filter(sortable -> sortable.count > 0)
            .sorted()
            .limit(maxDecompoundExpansions) // TODO: use PriorityQueue
            .map(sortable -> sortable.obj)
            .collect(Collectors.toList());

}

Source File: LuceneWordSearch.java From preDict with GNU Lesser General Public License v3.0

5 votes

private List<String> getUsingSpellcheck(String searchQuery) throws IOException {
	SuggestWord[] suggestions = spellChecker.suggestSimilar(new Term(WORD_FIELD, searchQuery), 2, reader, SuggestMode.SUGGEST_ALWAYS);
	List<String> result = new ArrayList<>();
	for(SuggestWord suggestion : suggestions) {
		result.add(suggestion.string);
	}
	return result;
}

Source File: SpellingOptions.java From lucene-solr with Apache License 2.0

5 votes

public SpellingOptions(Collection<Token> tokens, IndexReader reader,
    int count, SuggestMode suggestMode, boolean extendedResults,
    float accuracy, SolrParams customParams) {
  this.tokens = tokens;
  this.reader = reader;
  this.count = count;
  this.suggestMode = suggestMode;
  this.extendedResults = extendedResults;
  this.accuracy = accuracy;
  this.customParams = customParams;
}

Source File: SpellingOptions.java From lucene-solr with Apache License 2.0

5 votes

public SpellingOptions(Collection<Token> tokens, IndexReader reader,
    int count, int alternativeTermCount, SuggestMode suggestMode,
    boolean extendedResults, float accuracy, SolrParams customParams) {
  this.tokens = tokens;
  this.reader = reader;
  this.count = count;
  this.alternativeTermCount = alternativeTermCount;
  this.suggestMode = suggestMode;
  this.extendedResults = extendedResults;
  this.accuracy = accuracy;
  this.customParams = customParams;
}

Source File: Suggester.java From lucene-solr with Apache License 2.0

5 votes

@Override
public SpellingResult getSuggestions(SpellingOptions options) throws IOException {
  log.debug("getSuggestions: {}", options.tokens);
  if (lookup == null) {
    log.info("Lookup is null - invoke spellchecker.build first");
    return EMPTY_RESULT;
  }
  SpellingResult res = new SpellingResult();
  CharsRef scratch = new CharsRef();
  for (Token t : options.tokens) {
    scratch.chars = t.buffer();
    scratch.offset = 0;
    scratch.length = t.length();
    boolean onlyMorePopular = (options.suggestMode == SuggestMode.SUGGEST_MORE_POPULAR) &&
      !(lookup instanceof WFSTCompletionLookup) &&
      !(lookup instanceof AnalyzingSuggester);
    List<LookupResult> suggestions = lookup.lookup(scratch, onlyMorePopular, options.count);
    if (suggestions == null) {
      continue;
    }
    if (options.suggestMode != SuggestMode.SUGGEST_MORE_POPULAR) {
      Collections.sort(suggestions);
    }
    for (LookupResult lr : suggestions) {
      res.add(t, lr.key.toString(), (int)lr.value);
    }
  }
  return res;
}

Source File: DirectSpellcheckerSettings.java From Elasticsearch with Apache License 2.0

4 votes

public SuggestMode suggestMode() {
    return suggestMode;
}

Source File: DirectCandidateGenerator.java From Elasticsearch with Apache License 2.0

4 votes

public DirectCandidateGenerator(DirectSpellChecker spellchecker, String field, SuggestMode suggestMode, IndexReader reader, double nonErrorLikelihood, int numCandidates) throws IOException {
    this(spellchecker, field, suggestMode, reader,  nonErrorLikelihood, numCandidates, null, null, MultiFields.getTerms(reader, field));
}

Source File: SpellCheckComponent.java From lucene-solr with Apache License 2.0

4 votes

@Override
@SuppressWarnings("unchecked")
public void process(ResponseBuilder rb) throws IOException {
  SolrParams params = rb.req.getParams();
  if (!params.getBool(COMPONENT_NAME, false) || spellCheckers.isEmpty()) {
    return;
  }
  boolean shardRequest = "true".equals(params.get(ShardParams.IS_SHARD));
  String q = params.get(SPELLCHECK_Q);
  SolrSpellChecker spellChecker = getSpellChecker(params);
  Collection<Token> tokens = null;

  if (q != null) {
    //we have a spell check param, tokenize it with the query analyzer applicable for this spellchecker
    tokens = getTokens(q, spellChecker.getQueryAnalyzer());
  } else {
    q = rb.getQueryString();
    if (q == null) {
      q = params.get(CommonParams.Q);
    }
    tokens = queryConverter.convert(q);
  }
  if (tokens != null && tokens.isEmpty() == false) {
    if (spellChecker != null) {
      int count = params.getInt(SPELLCHECK_COUNT, 1);
      boolean onlyMorePopular = params.getBool(SPELLCHECK_ONLY_MORE_POPULAR, DEFAULT_ONLY_MORE_POPULAR);
      boolean extendedResults = params.getBool(SPELLCHECK_EXTENDED_RESULTS, false);
      boolean collate = params.getBool(SPELLCHECK_COLLATE, false);
      float accuracy = params.getFloat(SPELLCHECK_ACCURACY, Float.MIN_VALUE);
      int alternativeTermCount = params.getInt(SpellingParams.SPELLCHECK_ALTERNATIVE_TERM_COUNT, 0);
      //If specified, this can be a discrete # of results, or a percentage of fq results.
      Integer maxResultsForSuggest = maxResultsForSuggest(rb);
      
      ModifiableSolrParams customParams = new ModifiableSolrParams();
      for (String checkerName : getDictionaryNames(params)) {
        customParams.add(getCustomParams(checkerName, params));
      }

      Number hitsLong = (Number) rb.rsp.getToLog().get("hits");
      long hits = 0;
      if (hitsLong == null) {
        hits = rb.getNumberDocumentsFound();
      } else {
        hits = hitsLong.longValue();
      }
      
      SpellingResult spellingResult = null;
      if (maxResultsForSuggest == null || hits <= maxResultsForSuggest) {
        SuggestMode suggestMode = SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX;
        if (onlyMorePopular) {
          suggestMode = SuggestMode.SUGGEST_MORE_POPULAR;
        } else if (alternativeTermCount > 0) {
          suggestMode = SuggestMode.SUGGEST_ALWAYS;
        }

        IndexReader reader = rb.req.getSearcher().getIndexReader();
        SpellingOptions options = new SpellingOptions(tokens, reader, count,
            alternativeTermCount, suggestMode, extendedResults, accuracy,
            customParams);
        spellingResult = spellChecker.getSuggestions(options);
      } else {
        spellingResult = new SpellingResult();
      }
      boolean isCorrectlySpelled = hits > (maxResultsForSuggest==null ? 0 : maxResultsForSuggest);

      @SuppressWarnings({"rawtypes"})
      NamedList response = new SimpleOrderedMap();
      @SuppressWarnings({"rawtypes"})
      NamedList suggestions = toNamedList(shardRequest, spellingResult, q, extendedResults);
      response.add("suggestions", suggestions);

      if (extendedResults) {
        response.add("correctlySpelled", isCorrectlySpelled);
      }
      if (collate) {
        addCollationsToResponse(params, spellingResult, rb, q, response, spellChecker.isSuggestionsMayOverlap());
      }
      if (shardRequest) {
        addOriginalTermsToResponse(response, tokens);
      }

      rb.rsp.add("spellcheck", response);

    } else {
      throw new SolrException(SolrException.ErrorCode.NOT_FOUND,
          "Specified dictionaries do not exist: " + getDictionaryNameAsSingleString(getDictionaryNames(params)));
    }
  }
}

Source File: IndexBasedSpellCheckerTest.java From lucene-solr with Apache License 2.0

4 votes

@Test
@SuppressWarnings({"unchecked"})
public void testExtendedResults() throws Exception {
  IndexBasedSpellChecker checker = new IndexBasedSpellChecker();
  @SuppressWarnings({"rawtypes"})
  NamedList spellchecker = new NamedList();
  spellchecker.add("classname", IndexBasedSpellChecker.class.getName());

  File indexDir = createTempDir().toFile();
  indexDir.mkdirs();
  spellchecker.add(AbstractLuceneSpellChecker.INDEX_DIR, indexDir.getAbsolutePath());
  spellchecker.add(AbstractLuceneSpellChecker.FIELD, "title");
  spellchecker.add(AbstractLuceneSpellChecker.SPELLCHECKER_ARG_NAME, spellchecker);
  SolrCore core = h.getCore();
  String dictName = checker.init(spellchecker, core);
  assertTrue(dictName + " is not equal to " + SolrSpellChecker.DEFAULT_DICTIONARY_NAME,
          dictName.equals(SolrSpellChecker.DEFAULT_DICTIONARY_NAME) == true);
  h.getCore().withSearcher(searcher -> {
    checker.build(core, searcher);

    IndexReader reader = searcher.getIndexReader();
    Collection<Token> tokens = queryConverter.convert("documemt");
    SpellingOptions spellOpts = new SpellingOptions(tokens, reader, 1, SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX, true, 0.5f, null);
    SpellingResult result = checker.getSuggestions(spellOpts);
    assertTrue("result is null and it shouldn't be", result != null);
    //should be lowercased, b/c we are using a lowercasing analyzer
    Map<String, Integer> suggestions = result.get(spellOpts.tokens.iterator().next());
    assertTrue("documemt is null and it shouldn't be", suggestions != null);
    assertTrue("documemt Size: " + suggestions.size() + " is not: " + 1, suggestions.size() == 1);
    Map.Entry<String, Integer> entry = suggestions.entrySet().iterator().next();
    assertTrue(entry.getKey() + " is not equal to " + "document", entry.getKey().equals("document") == true);
    assertTrue(entry.getValue() + " does not equal: " + 2, entry.getValue() == 2);

    //test something not in the spell checker
    spellOpts.tokens = queryConverter.convert("super");
    result = checker.getSuggestions(spellOpts);
    assertTrue("result is null and it shouldn't be", result != null);
    suggestions = result.get(spellOpts.tokens.iterator().next());
    assertTrue("suggestions size should be 0", suggestions.size()==0);

    spellOpts.tokens = queryConverter.convert("document");
    result = checker.getSuggestions(spellOpts);
    assertTrue("result is null and it shouldn't be", result != null);
    suggestions = result.get(spellOpts.tokens.iterator().next());
    assertTrue("suggestions is not null and it should be", suggestions == null);
    return null;
  });
}

Source File: IndexBasedSpellCheckerTest.java From lucene-solr with Apache License 2.0

4 votes

@Test
@SuppressWarnings({"unchecked"})
public void testAlternateLocation() throws Exception {
  String[] ALT_DOCS = new String[]{
          "jumpin jack flash",
          "Sargent Peppers Lonely Hearts Club Band",
          "Born to Run",
          "Thunder Road",
          "Londons Burning",
          "A Horse with No Name",
          "Sweet Caroline"
  };

  IndexBasedSpellChecker checker = new IndexBasedSpellChecker();
  @SuppressWarnings({"rawtypes"})
  NamedList spellchecker = new NamedList();
  spellchecker.add("classname", IndexBasedSpellChecker.class.getName());
  
  File tmpDir = createTempDir().toFile();
  File indexDir = new File(tmpDir, "spellingIdx");
  //create a standalone index
  File altIndexDir = new File(tmpDir, "alternateIdx" + new Date().getTime());
  Directory dir = newFSDirectory(altIndexDir.toPath());
  IndexWriter iw = new IndexWriter(
      dir,
      new IndexWriterConfig(new WhitespaceAnalyzer())
  );
  for (int i = 0; i < ALT_DOCS.length; i++) {
    Document doc = new Document();
    doc.add(new TextField("title", ALT_DOCS[i], Field.Store.YES));
    iw.addDocument(doc);
  }
  iw.forceMerge(1);
  iw.close();
  dir.close();
  indexDir.mkdirs();
  spellchecker.add(AbstractLuceneSpellChecker.INDEX_DIR, indexDir.getAbsolutePath());
  spellchecker.add(AbstractLuceneSpellChecker.LOCATION, altIndexDir.getAbsolutePath());
  spellchecker.add(AbstractLuceneSpellChecker.FIELD, "title");
  spellchecker.add(AbstractLuceneSpellChecker.SPELLCHECKER_ARG_NAME, spellchecker);
  SolrCore core = h.getCore();
  String dictName = checker.init(spellchecker, core);
  assertTrue(dictName + " is not equal to " + SolrSpellChecker.DEFAULT_DICTIONARY_NAME,
          dictName.equals(SolrSpellChecker.DEFAULT_DICTIONARY_NAME) == true);
  h.getCore().withSearcher(searcher -> {
    checker.build(core, searcher);

    IndexReader reader = searcher.getIndexReader();
    Collection<Token> tokens = queryConverter.convert("flesh");
    SpellingOptions spellOpts = new SpellingOptions(tokens, reader, 1, SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX, true, 0.5f, null);
    SpellingResult result = checker.getSuggestions(spellOpts);
    assertTrue("result is null and it shouldn't be", result != null);
    //should be lowercased, b/c we are using a lowercasing analyzer
    Map<String, Integer> suggestions = result.get(spellOpts.tokens.iterator().next());
    assertTrue("flesh is null and it shouldn't be", suggestions != null);
    assertTrue("flesh Size: " + suggestions.size() + " is not: " + 1, suggestions.size() == 1);
    Map.Entry<String, Integer> entry = suggestions.entrySet().iterator().next();
    assertTrue(entry.getKey() + " is not equal to " + "flash", entry.getKey().equals("flash") == true);
    assertTrue(entry.getValue() + " does not equal: " + 1, entry.getValue() == 1);

    //test something not in the spell checker
    spellOpts.tokens = queryConverter.convert("super");
    result = checker.getSuggestions(spellOpts);
    assertTrue("result is null and it shouldn't be", result != null);
    suggestions = result.get(spellOpts.tokens.iterator().next());
    assertTrue("suggestions size should be 0", suggestions.size()==0);

    spellOpts.tokens = queryConverter.convert("Caroline");
    result = checker.getSuggestions(spellOpts);
    assertTrue("result is null and it shouldn't be", result != null);
    suggestions = result.get(spellOpts.tokens.iterator().next());
    assertTrue("suggestions is not null and it should be", suggestions == null);
    return null;
  });
}

Source File: WordBreakCompoundRewriter.java From querqy with Apache License 2.0

3 votes

protected CombineSuggestion[] suggestCombination(final Iterator<Term> terms) throws IOException {

        final List<org.apache.lucene.index.Term> luceneTerms = new ArrayList<>(COMPOUND_WINDOW);

        terms.forEachRemaining(term -> luceneTerms.add(toLuceneTerm(term)));

        return wordBreakSpellChecker.suggestWordCombinations(
                luceneTerms.toArray(new org.apache.lucene.index.Term[0]), 10, indexReader, SuggestMode.SUGGEST_ALWAYS);
    }

org.apache.lucene.search.spell.SuggestMode Java Examples