org.apache.lucene.analysis.CharArraySet#EMPTY

Source File: Analysis.java From crate with Apache License 2.0

6 votes

public static CharArraySet parseWords(Environment env, Settings settings, String name, CharArraySet defaultWords,
                                      Map<String, Set<?>> namedWords, boolean ignoreCase) {
    String value = settings.get(name);
    if (value != null) {
        if ("_none_".equals(value)) {
            return CharArraySet.EMPTY_SET;
        } else {
            return resolveNamedWords(settings.getAsList(name), namedWords, ignoreCase);
        }
    }
    List<String> pathLoadedWords = getWordList(env, settings, name);
    if (pathLoadedWords != null) {
        return resolveNamedWords(pathLoadedWords, namedWords, ignoreCase);
    }
    return defaultWords;
}

Source File: TestDutchAnalyzer.java From lucene-solr with Apache License 2.0

5 votes

public void testExclusionTableViaCtor() throws IOException {
  CharArraySet set = new CharArraySet( 1, true);
  set.add("lichamelijk");
  DutchAnalyzer a = new DutchAnalyzer( CharArraySet.EMPTY_SET, set);
  assertAnalyzesTo(a, "lichamelijk lichamelijke", new String[] { "lichamelijk", "licham" });
  a.close();

  a = new DutchAnalyzer( CharArraySet.EMPTY_SET, set);
  assertAnalyzesTo(a, "lichamelijk lichamelijke", new String[] { "lichamelijk", "licham" });
  a.close();
}

Source File: StandardHtmlStripAnalyzerProvider.java From crate with Apache License 2.0

5 votes

StandardHtmlStripAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) {
    super(indexSettings, name, settings);
    final CharArraySet defaultStopwords = CharArraySet.EMPTY_SET;
    CharArraySet stopWords = Analysis.parseStopWords(env, settings, defaultStopwords);
    analyzer = new StandardHtmlStripAnalyzer(stopWords);
    analyzer.setVersion(version);
}

Source File: TestThaiAnalyzer.java From lucene-solr with Apache License 2.0

5 votes

public void testOffsets() throws Exception {
  Analyzer analyzer = new ThaiAnalyzer(CharArraySet.EMPTY_SET);
  assertAnalyzesTo(analyzer, "การที่ได้ต้องแสดงว่างานดี",
      new String[] { "การ", "ที่", "ได้", "ต้อง", "แสดง", "ว่า", "งาน", "ดี" },
      new int[] { 0, 3, 6, 9, 13, 17, 20, 23 },
      new int[] { 3, 6, 9, 13, 17, 20, 23, 25 });
  analyzer.close();
}

Source File: StandardAnalyzerProvider.java From crate with Apache License 2.0

5 votes

public StandardAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) {
    super(indexSettings, name, settings);
    final CharArraySet defaultStopwords = CharArraySet.EMPTY_SET;
    CharArraySet stopWords = Analysis.parseStopWords(env, settings, defaultStopwords);
    int maxTokenLength = settings.getAsInt("max_token_length", StandardAnalyzer.DEFAULT_MAX_TOKEN_LENGTH);
    standardAnalyzer = new StandardAnalyzer(stopWords);
    standardAnalyzer.setVersion(version);
    standardAnalyzer.setMaxTokenLength(maxTokenLength);
}

Source File: TestThaiAnalyzer.java From lucene-solr with Apache License 2.0

5 votes

public void testReusableTokenStream() throws Exception {
  ThaiAnalyzer analyzer = new ThaiAnalyzer(CharArraySet.EMPTY_SET);
  assertAnalyzesTo(analyzer, "", new String[] {});
  
  assertAnalyzesTo(
      analyzer,
      "การที่ได้ต้องแสดงว่างานดี",
      new String[] { "การ", "ที่", "ได้", "ต้อง", "แสดง", "ว่า", "งาน", "ดี"});
  
  assertAnalyzesTo(
      analyzer,
      "บริษัทชื่อ XY&Z - คุยกับ xyz@demo.com",
      new String[] { "บริษัท", "ชื่อ", "xy", "z", "คุย", "กับ", "xyz", "demo.com" });
  analyzer.close();
}

Source File: CJKBigramFilterTests.java From elasticsearch-plugin-bundle with GNU Affero General Public License v3.0

5 votes

@Before
public void up() {
    analyzer = new Analyzer() {
        @Override
        protected TokenStreamComponents createComponents(String fieldName) {
            Tokenizer source = new IcuTokenizer(AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY,
                    new DefaultIcuTokenizerConfig(false, true));
            TokenStream result = new CJKBigramFilter(source);
            return new TokenStreamComponents(source, new StopFilter(result, CharArraySet.EMPTY_SET));
        }
    };
}

Source File: BulgarianAnalyzer.java From lucene-solr with Apache License 2.0

4 votes

/**
 * Builds an analyzer with the given stop words.
 */
public BulgarianAnalyzer(CharArraySet stopwords) {
  this(stopwords, CharArraySet.EMPTY_SET);
}

Source File: TestDutchAnalyzer.java From lucene-solr with Apache License 2.0

4 votes

/** 
 * check that the default stem overrides are used
 * even if you use a non-default ctor.
 */
public void testStemOverrides() throws IOException {
  DutchAnalyzer a = new DutchAnalyzer( CharArraySet.EMPTY_SET);
  checkOneTerm(a, "fiets", "fiets");
  a.close();
}

Source File: UkrainianMorfologikAnalyzer.java From lucene-solr with Apache License 2.0

2 votes

/**
 * Builds an analyzer with the given stop words.
 * 
 * @param stopwords a stopword set
 */
public UkrainianMorfologikAnalyzer(CharArraySet stopwords) {
  this(stopwords, CharArraySet.EMPTY_SET);
}

Source File: IndonesianAnalyzer.java From lucene-solr with Apache License 2.0

2 votes

/**
 * Builds an analyzer with the given stop words
 * 
 * @param stopwords
 *          a stopword set
 */
public IndonesianAnalyzer(CharArraySet stopwords){
  this(stopwords, CharArraySet.EMPTY_SET);
}

Source File: IrishAnalyzer.java From lucene-solr with Apache License 2.0

2 votes

/**
 * Builds an analyzer with the given stop words.
 * 
 * @param stopwords a stopword set
 */
public IrishAnalyzer(CharArraySet stopwords) {
  this(stopwords, CharArraySet.EMPTY_SET);
}

Source File: CatalanAnalyzer.java From lucene-solr with Apache License 2.0

2 votes

/**
 * Builds an analyzer with the given stop words.
 * 
 * @param stopwords a stopword set
 */
public CatalanAnalyzer(CharArraySet stopwords) {
  this(stopwords, CharArraySet.EMPTY_SET);
}

Source File: GermanAnalyzer.java From lucene-solr with Apache License 2.0

2 votes

/**
 * Builds an analyzer with the given stop words 
 * 
 * @param stopwords
 *          a stopword set
 */
public GermanAnalyzer(CharArraySet stopwords) {
  this(stopwords, CharArraySet.EMPTY_SET);
}

Source File: PolishAnalyzer.java From lucene-solr with Apache License 2.0

2 votes

/**
 * Builds an analyzer with the given stop words.
 * 
 * @param stopwords a stopword set
 */
public PolishAnalyzer(CharArraySet stopwords) {
  this(stopwords, CharArraySet.EMPTY_SET);
}

Source File: SmartChineseAnalyzer.java From lucene-solr with Apache License 2.0

2 votes

/**
 * <p>
 * Create a new SmartChineseAnalyzer, optionally using the default stopword list.
 * </p>
 * <p>
 * The included default stopword list is simply a list of punctuation.
 * If you do not use this list, punctuation will not be removed from the text!
 * </p>
 * 
 * @param useDefaultStopWords true to use the default stopword list.
 */
public SmartChineseAnalyzer(boolean useDefaultStopWords) {
  stopWords = useDefaultStopWords ? DefaultSetHolder.DEFAULT_STOP_SET
    : CharArraySet.EMPTY_SET;
}

Source File: SoraniAnalyzer.java From lucene-solr with Apache License 2.0

2 votes

/**
 * Builds an analyzer with the given stop words.
 * 
 * @param stopwords a stopword set
 */
public SoraniAnalyzer(CharArraySet stopwords) {
  this(stopwords, CharArraySet.EMPTY_SET);
}

Source File: BlendedInfixSuggesterTest.java From lucene-solr with Apache License 2.0

2 votes

public void testSuggesterCountForAllLookups() throws IOException {


    Input keys[] = new Input[]{
        new Input("lend me your ears", 1),
        new Input("as you sow so shall you reap", 1),
    };

    Path tempDir = createTempDir("BlendedInfixSuggesterTest");
    Analyzer a = new StandardAnalyzer(CharArraySet.EMPTY_SET);

    // BlenderType.LINEAR is used by default (remove position*10%)
    BlendedInfixSuggester suggester = new BlendedInfixSuggester(newFSDirectory(tempDir), a);
    suggester.build(new InputArrayIterator(keys));


    String term = "you";

    List<Lookup.LookupResult> responses = suggester.lookup(term, false, 1);
    assertEquals(1, responses.size());

    responses = suggester.lookup(term, false, 2);
    assertEquals(2, responses.size());


    responses = suggester.lookup(term, 1, false, false);
    assertEquals(1, responses.size());

    responses = suggester.lookup(term, 2, false, false);
    assertEquals(2, responses.size());


    responses = suggester.lookup(term, (Map<BytesRef, BooleanClause.Occur>) null, 1, false, false);
    assertEquals(1, responses.size());

    responses = suggester.lookup(term, (Map<BytesRef, BooleanClause.Occur>) null, 2, false, false);
    assertEquals(2, responses.size());


    responses = suggester.lookup(term, (Set<BytesRef>) null, 1, false, false);
    assertEquals(1, responses.size());

    responses = suggester.lookup(term, (Set<BytesRef>) null, 2, false, false);
    assertEquals(2, responses.size());


    responses = suggester.lookup(term, null, false, 1);
    assertEquals(1, responses.size());

    responses = suggester.lookup(term, null, false, 2);
    assertEquals(2, responses.size());


    responses = suggester.lookup(term, (BooleanQuery) null, 1, false, false);
    assertEquals(1, responses.size());

    responses = suggester.lookup(term, (BooleanQuery) null, 2, false, false);
    assertEquals(2, responses.size());


    suggester.close();

  }

Source File: GalicianAnalyzer.java From lucene-solr with Apache License 2.0

2 votes

/**
 * Builds an analyzer with the given stop words.
 * 
 * @param stopwords a stopword set
 */
public GalicianAnalyzer(CharArraySet stopwords) {
  this(stopwords, CharArraySet.EMPTY_SET);
}

Source File: BengaliAnalyzer.java From lucene-solr with Apache License 2.0

2 votes

/**
 * Builds an analyzer with the given stop words 
 * 
 * @param stopwords a stopword set
 */
public BengaliAnalyzer(CharArraySet stopwords) {
  this(stopwords, CharArraySet.EMPTY_SET);
}

Java Code Examples for org.apache.lucene.analysis.CharArraySet#EMPTY_SET