org.apache.lucene.analysis.util.ResourceLoader Java Exaples

Source File: SynonymFilterFactory.java From lucene-solr with Apache License 2.0

6 votes

/**
 * Load synonyms with the given {@link SynonymMap.Parser} class.
 */
protected SynonymMap loadSynonyms(ResourceLoader loader, String cname, boolean dedup, Analyzer analyzer) throws IOException, ParseException {
  CharsetDecoder decoder = StandardCharsets.UTF_8.newDecoder()
      .onMalformedInput(CodingErrorAction.REPORT)
      .onUnmappableCharacter(CodingErrorAction.REPORT);

  SynonymMap.Parser parser;
  Class<? extends SynonymMap.Parser> clazz = loader.findClass(cname, SynonymMap.Parser.class);
  try {
    parser = clazz.getConstructor(boolean.class, boolean.class, Analyzer.class).newInstance(dedup, expand, analyzer);
  } catch (Exception e) {
    throw new RuntimeException(e);
  }

  List<String> files = splitFileNames(synonyms);
  for (String file : files) {
    decoder.reset();
    try (final Reader isr = new InputStreamReader(loader.openResource(file), decoder)) {
      parser.parse(isr);
    }
  }
  return parser.build();
}

Source File: PhoneticFilterFactory.java From lucene-solr with Apache License 2.0

6 votes

@Override
public void inform(ResourceLoader loader) throws IOException {
  clazz = registry.get(name.toUpperCase(Locale.ROOT));
  if( clazz == null ) {
    clazz = resolveEncoder(name, loader);
  }

  if (maxCodeLength != null) {
    try {
      setMaxCodeLenMethod = clazz.getMethod("setMaxCodeLen", int.class);
    } catch (Exception e) {
      throw new IllegalArgumentException("Encoder " + name + " / " + clazz + " does not support " + MAX_CODE_LENGTH, e);
    }
  }

  getEncoder();//trigger initialization for potential problems to be thrown now
}

Source File: KoreanTokenizerFactory.java From lucene-solr with Apache License 2.0

6 votes

@Override
public void inform(ResourceLoader loader) throws IOException {
  if (userDictionaryPath != null) {
    try (InputStream stream = loader.openResource(userDictionaryPath)) {
      String encoding = userDictionaryEncoding;
      if (encoding == null) {
        encoding = IOUtils.UTF_8;
      }
      CharsetDecoder decoder = Charset.forName(encoding).newDecoder()
        .onMalformedInput(CodingErrorAction.REPORT)
        .onUnmappableCharacter(CodingErrorAction.REPORT);
      Reader reader = new InputStreamReader(stream, decoder);
      userDictionary = UserDictionary.open(reader);
    }
  } else {
    userDictionary = null;
  }
}

Source File: HyphenationCompoundWordTokenFilterFactory.java From lucene-solr with Apache License 2.0

6 votes

@Override
public void inform(ResourceLoader loader) throws IOException {
  InputStream stream = null;
  try {
    if (dictFile != null) // the dictionary can be empty.
      dictionary = getWordSet(loader, dictFile, false);
    // TODO: Broken, because we cannot resolve real system id
    // ResourceLoader should also supply method like ClassLoader to get resource URL
    stream = loader.openResource(hypFile);
    final InputSource is = new InputSource(stream);
    is.setEncoding(encoding); // if it's null let xml parser decide
    is.setSystemId(hypFile);
    hyphenator = HyphenationCompoundWordTokenFilter.getHyphenationTree(is);
  } finally {
    IOUtils.closeWhileHandlingException(stream);
  }
}

Source File: ICUTokenizerFactory.java From lucene-solr with Apache License 2.0

6 votes

@Override
public void inform(ResourceLoader loader) throws IOException {
  assert tailored != null : "init must be called first!";
  if (tailored.isEmpty()) {
    config = new DefaultICUTokenizerConfig(cjkAsWords, myanmarAsWords);
  } else {
    final BreakIterator breakers[] = new BreakIterator[1 + UCharacter.getIntPropertyMaxValue(UProperty.SCRIPT)];
    for (Map.Entry<Integer,String> entry : tailored.entrySet()) {
      int code = entry.getKey();
      String resourcePath = entry.getValue();
      breakers[code] = parseRules(resourcePath, loader);
    }
    config = new DefaultICUTokenizerConfig(cjkAsWords, myanmarAsWords) {
      
      @Override
      public RuleBasedBreakIterator getBreakIterator(int script) {
        if (breakers[script] != null) {
          return (RuleBasedBreakIterator) breakers[script].clone();
        } else {
          return super.getBreakIterator(script);
        }
      }
      // TODO: we could also allow codes->types mapping
    };
  }
}

Source File: SafeXMLParsing.java From lucene-solr with Apache License 2.0

6 votes

/** Parses a config file from ResourceLoader. Xinclude and external entities are enabled, but cannot escape the resource loader. */
public static Document parseConfigXML(Logger log, ResourceLoader loader, String file) throws SAXException, IOException {
  try (InputStream in = loader.openResource(file)) {
    final DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
    dbf.setValidating(false);
    dbf.setNamespaceAware(true);
    trySetDOMFeature(dbf, XMLConstants.FEATURE_SECURE_PROCESSING, true);
    try {
      dbf.setXIncludeAware(true);
    } catch (UnsupportedOperationException e) {
      throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "XML parser doesn't support XInclude option", e);
    }
    
    final DocumentBuilder db = dbf.newDocumentBuilder();
    db.setEntityResolver(new SystemIdResolver(loader));
    db.setErrorHandler(new XMLErrorLogger(log));
    return db.parse(in, SystemIdResolver.createSystemIdFromResourceName(file));
  } catch (ParserConfigurationException pce) {
    throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "XML parser cannot be configured", pce);
  }
}

Source File: TestKeepFilterFactory.java From lucene-solr with Apache License 2.0

6 votes

public void testInform() throws Exception {
  ResourceLoader loader = new ClasspathResourceLoader(getClass());
  assertTrue("loader is null and it shouldn't be", loader != null);
  KeepWordFilterFactory factory = (KeepWordFilterFactory) tokenFilterFactory("KeepWord",
      "words", "keep-1.txt",
      "ignoreCase", "true");
  CharArraySet words = factory.getWords();
  assertTrue("words is null and it shouldn't be", words != null);
  assertTrue("words Size: " + words.size() + " is not: " + 2, words.size() == 2);

  factory = (KeepWordFilterFactory) tokenFilterFactory("KeepWord",
      "words", "keep-1.txt, keep-2.txt",
      "ignoreCase", "true");
  words = factory.getWords();
  assertTrue("words is null and it shouldn't be", words != null);
  assertTrue("words Size: " + words.size() + " is not: " + 4, words.size() == 4);
}

Source File: MappingCharFilterFactory.java From lucene-solr with Apache License 2.0

6 votes

@Override
public void inform(ResourceLoader loader) throws IOException {
  if (mapping != null) {
    List<String> wlist = null;
    List<String> files = splitFileNames(mapping);
    wlist = new ArrayList<>();
    for (String file : files) {
      List<String> lines = getLines(loader, file.trim());
      wlist.addAll(lines);
    }
    final NormalizeCharMap.Builder builder = new NormalizeCharMap.Builder();
    parseRules(wlist, builder);
    normMap = builder.build();
    if (normMap.map == null) {
      // if the inner FST is null, it means it accepts nothing (e.g. the file is empty)
      // so just set the whole map to null
      normMap = null;
    }
  }
}

Source File: DefaultQuerqyDismaxQParserTest.java From querqy with Apache License 2.0

6 votes

@Override
public RewriterFactory createFactory(final String rewriterId, NamedList<?> args, ResourceLoader resourceLoader) {
    return new RewriterFactory(rewriterId) {
        @Override
        public QueryRewriter createRewriter(ExpandedQuery input, SearchEngineRequestAdapter searchEngineRequestAdapter) {
            return query -> {
                query.setUserQuery(new MatchAllQuery());
                query.addFilterQuery(WhiteSpaceQuerqyParser.parseString("a"));
                return query;
            };
        }

        @Override
        public Set<Term> getGenerableTerms() {
            return Collections.emptySet();
        }
    };
}

Source File: AbstractQuerqyDismaxQParserPluginTest.java From querqy with Apache License 2.0

6 votes

@Test
public void testThatASimpleQuerqyQParserFactoryIsCreatedIfOnlyTheParserClassIsConfigured() throws Exception {

    NamedList<NamedList<String>> args = mock(NamedList.class);
    when(args.get("parser")).thenReturn(parserConfig);

    when(parserConfig.get("factory")).thenReturn(null);
    when(parserConfig.get("class")).thenReturn("querqy.parser.WhiteSpaceQuerqyParser");
    ResourceLoader resourceLoader = new ClasspathResourceLoader(getClass().getClassLoader());

    final SolrQuerqyParserFactory factory = plugin.loadSolrQuerqyParserFactory(resourceLoader, args);

    assertNotNull(factory);
    assertTrue(factory instanceof SimpleQuerqyQParserFactory);
    SimpleQuerqyQParserFactory qParserFactory = (SimpleQuerqyQParserFactory) factory;
    assertEquals(WhiteSpaceQuerqyParser.class, qParserFactory.querqyParserClass);

}

Source File: EnglishLemmatisationFilterFactory.java From jate with GNU Lesser General Public License v3.0

6 votes

@Override
public void inform(ResourceLoader loader) throws IOException {
    if (lemmatiserResourceDir != null ) {
        try {
            String path=((SolrResourceLoader) loader).getConfigDir();
            if(!path.endsWith(File.separator))
                    path=path+File.separator;
            lemmatiser = new EngLemmatiser(path+lemmatiserResourceDir,
                    false, false);
        } catch (Exception e) {
            StringBuilder sb = new StringBuilder("Initiating ");
            sb.append(this.getClass().getName()).append(" failed due to:\n");
            sb.append(ExceptionUtils.getFullStackTrace(e));
            throw new IllegalArgumentException(sb.toString());
        }
    }
}

Source File: OpenNLPTokenizerFactory.java From jate with GNU Lesser General Public License v3.0

6 votes

@Override
public void inform(ResourceLoader loader) throws IOException {
    if(sentenceModelFile!=null) {
        sentenceOp = new SentenceDetectorME(new SentenceModel(
                loader.openResource(sentenceModelFile)));
    }

    if(tokenizerModelFile==null)
        throw new IOException("Parameter 'tokenizerModle' is required, but is invalid:"+tokenizerModelFile);
    tokenizerOp = new TokenizerME(new TokenizerModel(
            loader.openResource(tokenizerModelFile)
    ));

    if(parChunkingClass!=null) {
        try {
            Class c = Class.forName(parChunkingClass);
            Object o = c.newInstance();
            paragraphChunker = (ParagraphChunker) o;
        }catch (Exception e){
            throw new IOException(e);
        }
    }

}

Source File: MtasTokenizerFactory.java From mtas with Apache License 2.0

6 votes

/**
 * Instantiates a new mtas tokenizer factory.
 *
 * @param args the args
 * @param resourceLoader the resource loader
 * @throws IOException Signals that an I/O exception has occurred.
 */
public MtasTokenizerFactory(Map<String, String> args,
    ResourceLoader resourceLoader) throws IOException {
  super(args);
  configFileArgument = get(args, ARGUMENT_CONFIGFILE);
  configArgument = get(args, ARGUMENT_CONFIG);
  analyzerArgument = get(args, ARGUMENT_PARSER);
  analyzerArgumentParserArgs = get(args, ARGUMENT_PARSER_ARGS);
  defaultArgument = get(args, ARGUMENT_DEFAULT);
  int numberOfArgs = 0;
  numberOfArgs = (configFileArgument==null)?numberOfArgs:numberOfArgs+1;
  numberOfArgs = (configArgument==null)?numberOfArgs:numberOfArgs+1;
  numberOfArgs = (analyzerArgument==null)?numberOfArgs:numberOfArgs+1;
  
  if (numberOfArgs>1) {
    throw new IOException(this.getClass().getName() + " can't have multiple of "
        + ARGUMENT_CONFIGFILE + ", " + ARGUMENT_CONFIG+" AND "+ARGUMENT_PARSER);
  } else if (configArgument == null && defaultArgument != null) {
    throw new IOException(this.getClass().getName() + " can't have "
        + ARGUMENT_DEFAULT + " without " + ARGUMENT_CONFIG);
  } else if (numberOfArgs==0) {
    throw new IOException(this.getClass().getName() + " should have "
        + ARGUMENT_CONFIGFILE + " or " + ARGUMENT_CONFIG+" or "+ARGUMENT_PARSER);
  }
  init(resourceLoader);
}

Source File: SynonymGraphFilterFactory.java From lucene-solr with Apache License 2.0

6 votes

/**
 * Load synonyms with the given {@link SynonymMap.Parser} class.
 */
protected SynonymMap loadSynonyms(ResourceLoader loader, String cname, boolean dedup, Analyzer analyzer) throws IOException, ParseException {
  CharsetDecoder decoder = StandardCharsets.UTF_8.newDecoder()
      .onMalformedInput(CodingErrorAction.REPORT)
      .onUnmappableCharacter(CodingErrorAction.REPORT);

  SynonymMap.Parser parser;
  Class<? extends SynonymMap.Parser> clazz = loader.findClass(cname, SynonymMap.Parser.class);
  try {
    parser = clazz.getConstructor(boolean.class, boolean.class, Analyzer.class).newInstance(dedup, expand, analyzer);
  } catch (Exception e) {
    throw new RuntimeException(e);
  }

  List<String> files = splitFileNames(synonyms);
  for (String file : files) {
    decoder.reset();
    parser.parse(new InputStreamReader(loader.openResource(file), decoder));
  }
  return parser.build();
}

Source File: StemmerOverrideFilterFactory.java From lucene-solr with Apache License 2.0

6 votes

@Override
public void inform(ResourceLoader loader) throws IOException {
  if (dictionaryFiles != null) {
    List<String> files = splitFileNames(dictionaryFiles);
    if (files.size() > 0) {
      StemmerOverrideFilter.Builder builder = new StemmerOverrideFilter.Builder(ignoreCase);
      for (String file : files) {
        List<String> list = getLines(loader, file.trim());
        for (String line : list) {
          String[] mapping = line.split("\t", 2);
          builder.add(mapping[0], mapping[1]);
        }
      }
      dictionary = builder.build();
    }
  }
}

Source File: MtasConfiguration.java From mtas with Apache License 2.0

6 votes

/**
 * Read mtas tokenizer configurations.
 *
 * @param resourceLoader
 *          the resource loader
 * @param configFile
 *          the config file
 * @return the hash map
 * @throws IOException
 *           Signals that an I/O exception has occurred.
 */
public static HashMap<String, MtasConfiguration> readMtasTokenizerConfigurations(
    ResourceLoader resourceLoader, String configFile) throws IOException {
  HashMap<String, HashMap<String, String>> configs = readConfigurations(
      resourceLoader, configFile, MtasTokenizerFactory.class.getName());
  if (configs == null) {
    throw new IOException("no configurations");
  } else {
    HashMap<String, MtasConfiguration> result = new HashMap<String, MtasConfiguration>();
    for (Entry<String, HashMap<String, String>> entry : configs.entrySet()) {
      HashMap<String, String> config = entry.getValue();
      if (config.containsKey(TOKENIZER_CONFIGURATION_FILE)) {
        result.put(entry.getKey(), readConfiguration(resourceLoader
            .openResource(config.get(TOKENIZER_CONFIGURATION_FILE))));
      } else {
        throw new IOException("configuration " + entry.getKey() + " has no "
            + TOKENIZER_CONFIGURATION_FILE);
      }
    }
    return result;
  }
}

Source File: ReplaceRewriterFactory.java From querqy with Apache License 2.0

5 votes

@Override
public RewriterFactory createFactory(String id, NamedList<?> args, ResourceLoader resourceLoader) throws IOException {

    final String rulesResourceName = (String) args.get("rules");
    if (rulesResourceName == null) {
        throw new IllegalArgumentException("Property 'rules' not configured");
    }

    final InputStreamReader reader = new InputStreamReader(resourceLoader.openResource(rulesResourceName), StandardCharsets.UTF_8);

    final Boolean ignoreCase = args.getBooleanArg("ignoreCase");

    final String inputDelimiter = (String) args.get("inputDelimiter");

    // querqy parser for queries that are part of the instructions in the rules
    String rulesQuerqyParser = (String) args.get("querqyParser");
    QuerqyParserFactory querqyParser = null;
    if (rulesQuerqyParser != null) {
        rulesQuerqyParser = rulesQuerqyParser.trim();
        if (rulesQuerqyParser.length() > 0) {
            querqyParser = resourceLoader.newInstance(rulesQuerqyParser, QuerqyParserFactory.class);
        }
    }

    if (querqyParser == null) {
        querqyParser = new WhiteSpaceQuerqyParserFactory();
    }

    return new querqy.rewrite.contrib.ReplaceRewriterFactory(id, reader,
            ignoreCase != null ? ignoreCase : DEFAULT_IGNORE_CASE,
            inputDelimiter != null ? inputDelimiter : DEFAULT_INPUT_DELIMITER,
            querqyParser.createParser());
}

Source File: CollationField.java From lucene-solr with Apache License 2.0

5 votes

/**
 * Read custom rules from a file, and create a RuleBasedCollator
 * The file cannot support comments, as # might be in the rules!
 */
private Collator createFromRules(String fileName, ResourceLoader loader) {
  InputStream input = null;
  try {
   input = loader.openResource(fileName);
   String rules = IOUtils.toString(input, "UTF-8");
   return new RuleBasedCollator(rules);
  } catch (IOException | ParseException e) {
    // io error or invalid rules
    throw new RuntimeException(e);
  } finally {
    IOUtils.closeQuietly(input);
  }
}

Source File: MMSegTokenizerFactory.java From jstarcraft-nlp with Apache License 2.0

5 votes

@Override
public void inform(ResourceLoader loader) {
    String dicPath = getOriginalArgs().get("dicPath");

    dic = Utils.getDict(dicPath, loader);

    logger.info("dic load... in={}", dic.getDicPath().toURI());
}

Source File: TestSystemIdResolver.java From lucene-solr with Apache License 2.0

5 votes

public void testUnsafeResolving() throws Exception {
  System.setProperty("solr.allow.unsafe.resourceloading", "true");
  
  final Path testHome = SolrTestCaseJ4.getFile("solr/collection1").getParentFile().toPath();
  final ResourceLoader loader = new SolrResourceLoader(testHome.resolve("collection1"), this.getClass().getClassLoader());
  final SystemIdResolver resolver = new SystemIdResolver(loader);
  
  assertEntityResolving(resolver, SystemIdResolver.createSystemIdFromResourceName(testHome+"/crazy-path-to-schema.xml"),
    SystemIdResolver.createSystemIdFromResourceName(testHome+"/crazy-path-to-config.xml"), "crazy-path-to-schema.xml");    
}

Source File: OpenNLPRegexChunkerFactory.java From jate with GNU Lesser General Public License v3.0

5 votes

@Override
public void inform(ResourceLoader loader) throws IOException {
    super.inform(loader);
    if (patternFile != null) {
        try {
            List<String> lines = getLines(loader, patternFile.trim());
            initPatterns(lines, patterns);
        } catch (IOException ioe) {
            StringBuilder sb = new StringBuilder("Initiating ");
            sb.append(this.getClass().getName()).append(" failed due to patterns. Details:\n");
            sb.append(ExceptionUtils.getFullStackTrace(ioe));
            throw new IllegalArgumentException(sb.toString());
        }
    }
}

Source File: NumberUnitRewriterFactory.java From querqy with Apache License 2.0

5 votes

@Override
public RewriterFactory createFactory(final String id, final NamedList<?> args, final ResourceLoader resourceLoader)
        throws IOException {

    final Object obj = args.get(KEY_CONFIG_FILE);
    if (!(obj instanceof String)) {
        throw new IllegalArgumentException("Property 'config' not or not properly configured");
    }

    final String rulesResourceName = (String) obj;

    // resource InputStream will be closed by Jackson Json Parser
    final NumberUnitConfigObject numberUnitConfigObject = JSON_DEFAULT_OBJECT_MAPPER.readValue(
            resourceLoader.openResource(rulesResourceName), NumberUnitConfigObject.class);

    final int scale = getOrDefaultInt(numberUnitConfigObject::getScaleForLinearFunctions,
            DEFAULT_SCALE_FOR_LINEAR_FUNCTIONS);
    final List<NumberUnitDefinition> numberUnitDefinitions = parseConfig(numberUnitConfigObject);

    numberUnitDefinitions.stream()
            .filter(this::numberUnitDefinitionHasDuplicateUnitDefinition)
            .findFirst()
            .ifPresent(numberUnitDefinition -> {
                throw new IllegalArgumentException("Units must only defined once per NumberUnitDefinition");});

    return new querqy.rewrite.contrib.NumberUnitRewriterFactory(id, numberUnitDefinitions,
            new NumberUnitQueryCreatorSolr(scale));
}

Source File: ManagedSynonymGraphFilterFactory.java From lucene-solr with Apache License 2.0

5 votes

/**
 * Called once, during core initialization, to initialize any analysis components
 * that depend on the data managed by this resource. It is important that the
 * analysis component is only initialized once during core initialization so that
 * text analysis is consistent, especially in a distributed environment, as we
 * don't want one server applying a different set of stop words than other servers.
 */
@SuppressWarnings("unchecked")
@Override
public void onManagedResourceInitialized(NamedList<?> initArgs, final ManagedResource res)
    throws SolrException
{
  NamedList<Object> args = (NamedList<Object>)initArgs;
  args.add("synonyms", getResourceId());
  args.add("expand", "false");
  args.add("format", "solr");

  Map<String,String> filtArgs = new HashMap<>();
  for (Map.Entry<String,?> entry : args) {
    filtArgs.put(entry.getKey(), entry.getValue().toString());
  }
  // create the actual filter factory that pulls the synonym mappings
  // from synonymMappings using a custom parser implementation
  delegate = new SynonymGraphFilterFactory(filtArgs) {
    @Override
    protected SynonymMap loadSynonyms
        (ResourceLoader loader, String cname, boolean dedup, Analyzer analyzer)
        throws IOException, ParseException {

      ManagedSynonymParser parser =
          new ManagedSynonymParser((SynonymManager)res, dedup, analyzer);
      // null is safe here because there's no actual parsing done against a input Reader
      parser.parse(null);
      return parser.build();
    }
  };
  try {
    delegate.inform(res.getResourceLoader());
  } catch (IOException e) {
    throw new SolrException(ErrorCode.SERVER_ERROR, e);
  }
}

Source File: MtasCharFilterFactory.java From mtas with Apache License 2.0

5 votes

/**
 * Instantiates a new mtas char filter factory.
 *
 * @param args the args
 * @param resourceLoader the resource loader
 * @throws IOException Signals that an I/O exception has occurred.
 */
public MtasCharFilterFactory(Map<String, String> args,
    ResourceLoader resourceLoader) throws IOException {
  super(args);
  typeArgument = get(args, ARGUMENT_TYPE);
  prefixArgument = get(args, ARGUMENT_PREFIX);
  postfixArgument = get(args, ARGUMENT_POSTFIX);
  configArgument = get(args, ARGUMENT_CONFIG);
  defaultArgument = get(args, ARGUMENT_DEFAULT);
  if (typeArgument != null && configArgument != null) {
    throw new IOException(this.getClass().getName() + " can't have both "
        + ARGUMENT_TYPE + " and " + ARGUMENT_CONFIG);
  } else if (typeArgument == null && prefixArgument != null) {
    throw new IOException(this.getClass().getName() + " can't have "
        + ARGUMENT_PREFIX + " without " + ARGUMENT_TYPE);
  } else if (typeArgument == null && postfixArgument != null) {
    throw new IOException(this.getClass().getName() + " can't have "
        + ARGUMENT_POSTFIX + " without " + ARGUMENT_TYPE);
  } else if (configArgument == null && defaultArgument != null) {
    throw new IOException(this.getClass().getName() + " can't have "
        + ARGUMENT_DEFAULT + " without " + ARGUMENT_CONFIG);
  } else if (typeArgument == null && configArgument == null) {
    throw new IOException(this.getClass().getName() + " should have "
        + ARGUMENT_TYPE + " or " + ARGUMENT_CONFIG);
  }
  init(resourceLoader);
}

Source File: TestCommonGramsQueryFilterFactory.java From lucene-solr with Apache License 2.0

5 votes

public void testInform() throws Exception {
  ResourceLoader loader = new ClasspathResourceLoader(TestStopFilterFactory.class);
  assertTrue("loader is null and it shouldn't be", loader != null);
  CommonGramsQueryFilterFactory factory = (CommonGramsQueryFilterFactory) tokenFilterFactory("CommonGramsQuery", Version.LATEST, loader,
      "words", "stop-1.txt", 
      "ignoreCase", "true");
  CharArraySet words = factory.getCommonWords();
  assertTrue("words is null and it shouldn't be", words != null);
  assertTrue("words Size: " + words.size() + " is not: " + 2,
      words.size() == 2);
  assertTrue(factory.isIgnoreCase() + " does not equal: " + true, factory
      .isIgnoreCase() == true);

  factory = (CommonGramsQueryFilterFactory) tokenFilterFactory("CommonGramsQuery", Version.LATEST, loader,
      "words", "stop-1.txt, stop-2.txt", 
      "ignoreCase", "true");
  words = factory.getCommonWords();
  assertTrue("words is null and it shouldn't be", words != null);
  assertTrue("words Size: " + words.size() + " is not: " + 4,
      words.size() == 4);
  assertTrue(factory.isIgnoreCase() + " does not equal: " + true, factory
      .isIgnoreCase() == true);

  factory = (CommonGramsQueryFilterFactory) tokenFilterFactory("CommonGramsQuery", Version.LATEST, loader,
      "words", "stop-snowball.txt", 
      "format", "snowball", 
      "ignoreCase", "true");
  words = factory.getCommonWords();
  assertEquals(8, words.size());
  assertTrue(words.contains("he"));
  assertTrue(words.contains("him"));
  assertTrue(words.contains("his"));
  assertTrue(words.contains("himself"));
  assertTrue(words.contains("she"));
  assertTrue(words.contains("her"));
  assertTrue(words.contains("hers"));
  assertTrue(words.contains("herself"));
}

Source File: TestCommonGramsFilterFactory.java From lucene-solr with Apache License 2.0

5 votes

public void testInform() throws Exception {
  ResourceLoader loader = new ClasspathResourceLoader(TestStopFilterFactory.class);
  assertTrue("loader is null and it shouldn't be", loader != null);
  CommonGramsFilterFactory factory = (CommonGramsFilterFactory) tokenFilterFactory("CommonGrams", Version.LATEST, loader,
      "words", "stop-1.txt", 
      "ignoreCase", "true");
  CharArraySet words = factory.getCommonWords();
  assertTrue("words is null and it shouldn't be", words != null);
  assertTrue("words Size: " + words.size() + " is not: " + 2,
      words.size() == 2);
  assertTrue(factory.isIgnoreCase() + " does not equal: " + true, factory
      .isIgnoreCase() == true);

  factory = (CommonGramsFilterFactory) tokenFilterFactory("CommonGrams", Version.LATEST, loader,
      "words", "stop-1.txt, stop-2.txt", 
      "ignoreCase", "true");
  words = factory.getCommonWords();
  assertTrue("words is null and it shouldn't be", words != null);
  assertTrue("words Size: " + words.size() + " is not: " + 4,
      words.size() == 4);
  assertTrue(factory.isIgnoreCase() + " does not equal: " + true, factory
      .isIgnoreCase() == true);

  factory = (CommonGramsFilterFactory) tokenFilterFactory("CommonGrams", Version.LATEST, loader,
      "words", "stop-snowball.txt", 
      "format", "snowball", 
      "ignoreCase", "true");
  words = factory.getCommonWords();
  assertEquals(8, words.size());
  assertTrue(words.contains("he"));
  assertTrue(words.contains("him"));
  assertTrue(words.contains("his"));
  assertTrue(words.contains("himself"));
  assertTrue(words.contains("she"));
  assertTrue(words.contains("her"));
  assertTrue(words.contains("hers"));
  assertTrue(words.contains("herself"));
}

Source File: SnowballPorterFilterFactory.java From lucene-solr with Apache License 2.0

5 votes

@Override
public void inform(ResourceLoader loader) throws IOException {
  String className = "org.tartarus.snowball.ext." + language + "Stemmer";
  stemClass = loader.newInstance(className, SnowballStemmer.class).getClass();

  if (wordFiles != null) {
    protectedWords = getWordSet(loader, wordFiles, false);
  }
}

Source File: TypeTokenFilterFactory.java From lucene-solr with Apache License 2.0

5 votes

@Override
public void inform(ResourceLoader loader) throws IOException {
  List<String> files = splitFileNames(stopTypesFiles);
  if (files.size() > 0) {
    stopTypes = new HashSet<>();
    for (String file : files) {
      List<String> typesLines = getLines(loader, file.trim());
      stopTypes.addAll(typesLines);
    }
  }
}

Source File: WordDelimiterGraphFilterFactory.java From lucene-solr with Apache License 2.0

5 votes

@Override
public void inform(ResourceLoader loader) throws IOException {
  if (wordFiles != null) {  
    protectedWords = getWordSet(loader, wordFiles, false);
  }
  if (types != null) {
    List<String> files = splitFileNames( types );
    List<String> wlist = new ArrayList<>();
    for( String file : files ){
      List<String> lines = getLines(loader, file.trim());
      wlist.addAll( lines );
    }
    typeTable = parseTypes(wlist);
  }
}

Source File: ConditionalTokenFilterFactory.java From lucene-solr with Apache License 2.0

5 votes

@Override
public final void inform(ResourceLoader loader) throws IOException {
  if (innerFilters == null)
    return;
  for (TokenFilterFactory factory : innerFilters) {
    if (factory instanceof ResourceLoaderAware) {
      ((ResourceLoaderAware)factory).inform(loader);
    }
  }
  doInform(loader);
}

org.apache.lucene.analysis.util.ResourceLoader Java Examples