org.apache.solr.schema.FieldType#getIndexAnalyzer

Source File: LanguagePrefixedTokenStream.java From SearchServices with GNU Lesser General Public License v3.0

6 votes

/**
 * Returns the {@link Analyzer} associated with the given language.
 * The proper {@link Analyzer} is retrieved from the first field type not null in the following list:
 *
 * <ul>
 *     <li>highlighted_text_ + locale (e.g. highlighted_text_en)</li>
 *     <li>text_ + locale (e.g. text_en)</li>
 *     <li>text___ (text general field)</li>
 * </ul>
 *
 * @param language the language code.
 * @return the {@link Analyzer} associated with the given language.
 */
Analyzer analyzer(String language) {
    FieldType localisedFieldType =
            ofNullable(indexSchema.getFieldTypeByName(highlightingFieldTypeName(language)))
                    .orElseGet(() -> indexSchema.getFieldTypeByName(localisedFieldTypeName(language)));

    FieldType targetFieldType =
            ofNullable(localisedFieldType)
                    .orElseGet(() ->  indexSchema.getFieldTypeByName(FALLBACK_TEXT_FIELD_TYPE_NAME));
    switch (mode)
    {
        case QUERY:
            return targetFieldType.getQueryAnalyzer();
        case INDEX:
        default:
            return targetFieldType.getIndexAnalyzer();
    }
}

Source File: SolrQueryParserBase.java From lucene-solr with Apache License 2.0

6 votes

protected ReversedWildcardFilterFactory getReversedWildcardFilterFactory(FieldType fieldType) {
  if (leadingWildcards == null) leadingWildcards = new HashMap<>();
  ReversedWildcardFilterFactory fac = leadingWildcards.get(fieldType);
  if (fac != null || leadingWildcards.containsKey(fieldType)) {
    return fac;
  }

  Analyzer a = fieldType.getIndexAnalyzer();
  if (a instanceof TokenizerChain) {
    // examine the indexing analysis chain if it supports leading wildcards
    TokenizerChain tc = (TokenizerChain)a;
    TokenFilterFactory[] factories = tc.getTokenFilterFactories();
    for (TokenFilterFactory factory : factories) {
      if (factory instanceof ReversedWildcardFilterFactory) {
        fac = (ReversedWildcardFilterFactory)factory;
        break;
      }
    }
  }

  leadingWildcards.put(fieldType, fac);
  return fac;
}

Source File: PayloadUtils.java From lucene-solr with Apache License 2.0

6 votes

public static String getPayloadEncoder(FieldType fieldType) {
  // TODO: support custom payload encoding fields too somehow - maybe someone has a custom component that encodes payloads as floats
  String encoder = null;
  Analyzer a = fieldType.getIndexAnalyzer();
  if (a instanceof TokenizerChain) {
    // examine the indexing analysis chain for DelimitedPayloadTokenFilterFactory or NumericPayloadTokenFilterFactory
    TokenizerChain tc = (TokenizerChain)a;
    TokenFilterFactory[] factories = tc.getTokenFilterFactories();
    for (TokenFilterFactory factory : factories) {
      if (factory instanceof DelimitedPayloadTokenFilterFactory) {
        encoder = factory.getOriginalArgs().get(DelimitedPayloadTokenFilterFactory.ENCODER_ATTR);
        break;
      }

      if (factory instanceof NumericPayloadTokenFilterFactory) {
        // encodes using `PayloadHelper.encodeFloat(payload)`
        encoder = "float";
        break;
      }
    }
  }

  return encoder;
}

Source File: FreeTextLookupFactory.java From lucene-solr with Apache License 2.0

6 votes

@Override
public Lookup create(@SuppressWarnings({"rawtypes"})NamedList params, SolrCore core) {
  Object fieldTypeName = params.get(QUERY_ANALYZER);
  if (fieldTypeName == null) {
    throw new IllegalArgumentException("Error in configuration: " + QUERY_ANALYZER + " parameter is mandatory");
  }
  FieldType ft = core.getLatestSchema().getFieldTypeByName(fieldTypeName.toString());
  if (ft == null) {
    throw new IllegalArgumentException("Error in configuration: " + fieldTypeName.toString() + " is not defined in the schema");
  }
  
  Analyzer indexAnalyzer = ft.getIndexAnalyzer();
  Analyzer queryAnalyzer = ft.getQueryAnalyzer();
  
  int grams = (params.get(NGRAMS) != null) 
      ? Integer.parseInt(params.get(NGRAMS).toString()) 
      : FreeTextSuggester.DEFAULT_GRAMS;
  
  byte separator = (params.get(SEPARATOR) != null) 
      ? params.get(SEPARATOR).toString().getBytes(StandardCharsets.UTF_8)[0]
      : FreeTextSuggester.DEFAULT_SEPARATOR;
  
  return new FreeTextSuggester(indexAnalyzer, queryAnalyzer, grams, separator);
}

Source File: TokenizeTextBuilder.java From kite with Apache License 2.0

6 votes

public TokenizeText(CommandBuilder builder, Config config, Command parent, Command child, MorphlineContext context) {
  super(builder, config, parent, child, context);
  this.inputFieldName = getConfigs().getString(config, "inputField");
  this.outputFieldName = getConfigs().getString(config, "outputField");      
  String solrFieldType = getConfigs().getString(config, "solrFieldType");      
  Config solrLocatorConfig = getConfigs().getConfig(config, "solrLocator");
  SolrLocator locator = new SolrLocator(solrLocatorConfig, context);
  LOG.debug("solrLocator: {}", locator);
  IndexSchema schema = locator.getIndexSchema();
  FieldType fieldType = schema.getFieldTypeByName(solrFieldType);
  if (fieldType == null) {
    throw new MorphlineCompilationException("Missing Solr field type in schema.xml for name: " + solrFieldType, config);
  }
  this.analyzer = fieldType.getIndexAnalyzer();
  Preconditions.checkNotNull(analyzer);
  // register CharTermAttribute for later (implicit) reuse
  this.token = analyzer.tokenStream("content", reader).addAttribute(CharTermAttribute.class);
  Preconditions.checkNotNull(token);
  validateArguments();
}

Source File: MLAnalayser.java From SearchServices with GNU Lesser General Public License v3.0

5 votes

private Analyzer selectAnalyzer(FieldType fieldType) {
	 if(mode == Mode.INDEX)
	 {
		 return fieldType.getIndexAnalyzer();
	 }
	 else if(mode == Mode.QUERY)
	 {
		 return fieldType.getQueryAnalyzer();
	 }
	 else
	 {
		 return null;
	 }
}

Source File: TaggerRequestHandler.java From lucene-solr with Apache License 2.0

5 votes

private boolean fieldHasIndexedStopFilter(String field, SolrQueryRequest req) {
  FieldType fieldType = req.getSchema().getFieldType(field);
  Analyzer analyzer = fieldType.getIndexAnalyzer();//index analyzer
  if (analyzer instanceof TokenizerChain) {
    TokenizerChain tokenizerChain = (TokenizerChain) analyzer;
    TokenFilterFactory[] tokenFilterFactories = tokenizerChain.getTokenFilterFactories();
    for (TokenFilterFactory tokenFilterFactory : tokenFilterFactories) {
      if (tokenFilterFactory instanceof StopFilterFactory)
        return true;
    }
  }
  return false;
}

Source File: TestLuceneMatchVersion.java From lucene-solr with Apache License 2.0

5 votes

public void testStandardTokenizerVersions() throws Exception {
  assertEquals(DEFAULT_VERSION, solrConfig.luceneMatchVersion);
  
  final IndexSchema schema = h.getCore().getLatestSchema();
  
  FieldType type = schema.getFieldType("textDefault");
  TokenizerChain ana = (TokenizerChain) type.getIndexAnalyzer();
  assertEquals(DEFAULT_VERSION, (ana.getTokenizerFactory()).getLuceneMatchVersion());
  assertEquals(DEFAULT_VERSION, (ana.getTokenFilterFactories()[2]).getLuceneMatchVersion());

  type = schema.getFieldType("textTurkishAnalyzerDefault");
  Analyzer ana1 = type.getIndexAnalyzer();
  assertTrue(ana1 instanceof TurkishAnalyzer);
  assertEquals(DEFAULT_VERSION, ana1.getVersion());
}

Source File: MMSegTokenizerFactoryTest.java From mmseg4j-solr with Apache License 2.0

5 votes

private Dictionary getDictionaryByFieldType(String fieldTypeName) {
	FieldType ft = h.getCore().getLatestSchema().getFieldTypeByName(fieldTypeName);
	Analyzer a = ft.getIndexAnalyzer();
	Assert.assertEquals(a.getClass(), TokenizerChain.class);
	
	TokenizerChain tc = (TokenizerChain) a;
	TokenizerFactory tf = tc.getTokenizerFactory();
	Assert.assertEquals(tf.getClass(), MMSegTokenizerFactory.class);
	
	MMSegTokenizerFactory mtf = (MMSegTokenizerFactory) tf;
	
	Assert.assertNotNull(mtf.dic);
	return mtf.dic;
}

Source File: TaggerRequestHandler.java From SolrTextTagger with Apache License 2.0

5 votes

private boolean fieldHasIndexedStopFilter(String field, SolrQueryRequest req) {
  FieldType fieldType = req.getSchema().getFieldType(field);
  Analyzer analyzer = fieldType.getIndexAnalyzer();//index analyzer
  if (analyzer instanceof TokenizerChain) {
    TokenizerChain tokenizerChain = (TokenizerChain) analyzer;
    TokenFilterFactory[] tokenFilterFactories = tokenizerChain.getTokenFilterFactories();
    for (TokenFilterFactory tokenFilterFactory : tokenFilterFactories) {
      if (tokenFilterFactory instanceof StopFilterFactory)
        return true;
    }
  }
  return false;
}

Source File: FuzzyLookupFactory.java From lucene-solr with Apache License 2.0

4 votes

@Override
public Lookup create(@SuppressWarnings({"rawtypes"})NamedList params, SolrCore core) {
  
  // mandatory parameter
  Object fieldTypeName = params.get(AnalyzingLookupFactory.QUERY_ANALYZER);
  if (fieldTypeName == null) {
    throw new IllegalArgumentException("Error in configuration: " + AnalyzingLookupFactory.QUERY_ANALYZER + " parameter is mandatory");
  }
  // retrieve index and query analyzers for the field
  FieldType ft = core.getLatestSchema().getFieldTypeByName(fieldTypeName.toString());
  if (ft == null) {
    throw new IllegalArgumentException("Error in configuration: " + fieldTypeName.toString() + " is not defined in the schema");
  }
  Analyzer indexAnalyzer = ft.getIndexAnalyzer();
  Analyzer queryAnalyzer = ft.getQueryAnalyzer();
  
  // optional parameters
  boolean exactMatchFirst = (params.get(AnalyzingLookupFactory.EXACT_MATCH_FIRST) != null)
  ? Boolean.valueOf(params.get(AnalyzingLookupFactory.EXACT_MATCH_FIRST).toString())
  : true;
      
  boolean preserveSep = (params.get(AnalyzingLookupFactory.PRESERVE_SEP) != null)
  ? Boolean.valueOf(params.get(AnalyzingLookupFactory.PRESERVE_SEP).toString())
  : true;
      
  int options = 0;
  if (exactMatchFirst) {
    options |= FuzzySuggester.EXACT_FIRST;
  }
  if (preserveSep) {
    options |= FuzzySuggester.PRESERVE_SEP;
  }
  
  int maxSurfaceFormsPerAnalyzedForm = (params.get(AnalyzingLookupFactory.MAX_SURFACE_FORMS) != null)
  ? Integer.parseInt(params.get(AnalyzingLookupFactory.MAX_SURFACE_FORMS).toString())
  : 256;
      
  int maxGraphExpansions = (params.get(AnalyzingLookupFactory.MAX_EXPANSIONS) != null)
  ? Integer.parseInt(params.get(AnalyzingLookupFactory.MAX_EXPANSIONS).toString())
  : -1;

  boolean preservePositionIncrements = params.get(AnalyzingLookupFactory.PRESERVE_POSITION_INCREMENTS) != null
  ? Boolean.valueOf(params.get(AnalyzingLookupFactory.PRESERVE_POSITION_INCREMENTS).toString())
  : false;
  
  int maxEdits = (params.get(MAX_EDITS) != null)
  ? Integer.parseInt(params.get(MAX_EDITS).toString())
  : FuzzySuggester.DEFAULT_MAX_EDITS;
  
  boolean transpositions = (params.get(TRANSPOSITIONS) != null)
  ? Boolean.parseBoolean(params.get(TRANSPOSITIONS).toString())
  : FuzzySuggester.DEFAULT_TRANSPOSITIONS;
      
  int nonFuzzyPrefix = (params.get(NON_FUZZY_PREFIX) != null)
  ? Integer.parseInt(params.get(NON_FUZZY_PREFIX).toString())
  :FuzzySuggester.DEFAULT_NON_FUZZY_PREFIX;
  
  
  int minFuzzyLength = (params.get(MIN_FUZZY_LENGTH) != null)
  ? Integer.parseInt(params.get(MIN_FUZZY_LENGTH).toString())
  :FuzzySuggester.DEFAULT_MIN_FUZZY_LENGTH;
  
  boolean unicodeAware = (params.get(UNICODE_AWARE) != null)
  ? Boolean.valueOf(params.get(UNICODE_AWARE).toString())
  : FuzzySuggester.DEFAULT_UNICODE_AWARE;
  
  return new FuzzySuggester(getTempDir(), "suggester", indexAnalyzer, queryAnalyzer, options, maxSurfaceFormsPerAnalyzedForm,
      maxGraphExpansions, preservePositionIncrements, maxEdits, transpositions, nonFuzzyPrefix,
      minFuzzyLength, unicodeAware);
}

Source File: AnalyzingLookupFactory.java From lucene-solr with Apache License 2.0

4 votes

@Override
public Lookup create(@SuppressWarnings({"rawtypes"})NamedList params, SolrCore core) {
  // mandatory parameter
  Object fieldTypeName = params.get(QUERY_ANALYZER);
  if (fieldTypeName == null) {
    throw new IllegalArgumentException("Error in configuration: " + QUERY_ANALYZER + " parameter is mandatory");
  }
  FieldType ft = core.getLatestSchema().getFieldTypeByName(fieldTypeName.toString());
  if (ft == null) {
    throw new IllegalArgumentException("Error in configuration: " + fieldTypeName.toString() + " is not defined in the schema");
  }
  
  Analyzer indexAnalyzer = ft.getIndexAnalyzer();
  Analyzer queryAnalyzer = ft.getQueryAnalyzer();
  
  // optional parameters
  
  boolean exactMatchFirst = params.get(EXACT_MATCH_FIRST) != null
  ? Boolean.valueOf(params.get(EXACT_MATCH_FIRST).toString())
  : true;
  
  boolean preserveSep = params.get(PRESERVE_SEP) != null
  ? Boolean.valueOf(params.get(PRESERVE_SEP).toString())
  : true;
  
  int flags = 0;
  if (exactMatchFirst) {
    flags |= AnalyzingSuggester.EXACT_FIRST;
  }
  if (preserveSep) {
    flags |= AnalyzingSuggester.PRESERVE_SEP;
  }
  
  int maxSurfaceFormsPerAnalyzedForm = params.get(MAX_SURFACE_FORMS) != null
  ? Integer.parseInt(params.get(MAX_SURFACE_FORMS).toString())
  : 256;
  
  int maxGraphExpansions = params.get(MAX_EXPANSIONS) != null
  ? Integer.parseInt(params.get(MAX_EXPANSIONS).toString())
  : -1;
  
  boolean preservePositionIncrements = params.get(PRESERVE_POSITION_INCREMENTS) != null
  ? Boolean.valueOf(params.get(PRESERVE_POSITION_INCREMENTS).toString())
  : false;

  return new AnalyzingSuggester(getTempDir(), "suggester", indexAnalyzer, queryAnalyzer, flags, maxSurfaceFormsPerAnalyzedForm,
      maxGraphExpansions, preservePositionIncrements);
}

Java Code Examples for org.apache.solr.schema.FieldType#getIndexAnalyzer()