org.apache.lucene.analysis.util.ResourceLoader Java Examples
The following examples show how to use
org.apache.lucene.analysis.util.ResourceLoader.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: SynonymFilterFactory.java From lucene-solr with Apache License 2.0 | 6 votes |
/** * Load synonyms with the given {@link SynonymMap.Parser} class. */ protected SynonymMap loadSynonyms(ResourceLoader loader, String cname, boolean dedup, Analyzer analyzer) throws IOException, ParseException { CharsetDecoder decoder = StandardCharsets.UTF_8.newDecoder() .onMalformedInput(CodingErrorAction.REPORT) .onUnmappableCharacter(CodingErrorAction.REPORT); SynonymMap.Parser parser; Class<? extends SynonymMap.Parser> clazz = loader.findClass(cname, SynonymMap.Parser.class); try { parser = clazz.getConstructor(boolean.class, boolean.class, Analyzer.class).newInstance(dedup, expand, analyzer); } catch (Exception e) { throw new RuntimeException(e); } List<String> files = splitFileNames(synonyms); for (String file : files) { decoder.reset(); try (final Reader isr = new InputStreamReader(loader.openResource(file), decoder)) { parser.parse(isr); } } return parser.build(); }
Example #2
Source File: PhoneticFilterFactory.java From lucene-solr with Apache License 2.0 | 6 votes |
@Override public void inform(ResourceLoader loader) throws IOException { clazz = registry.get(name.toUpperCase(Locale.ROOT)); if( clazz == null ) { clazz = resolveEncoder(name, loader); } if (maxCodeLength != null) { try { setMaxCodeLenMethod = clazz.getMethod("setMaxCodeLen", int.class); } catch (Exception e) { throw new IllegalArgumentException("Encoder " + name + " / " + clazz + " does not support " + MAX_CODE_LENGTH, e); } } getEncoder();//trigger initialization for potential problems to be thrown now }
Example #3
Source File: KoreanTokenizerFactory.java From lucene-solr with Apache License 2.0 | 6 votes |
@Override public void inform(ResourceLoader loader) throws IOException { if (userDictionaryPath != null) { try (InputStream stream = loader.openResource(userDictionaryPath)) { String encoding = userDictionaryEncoding; if (encoding == null) { encoding = IOUtils.UTF_8; } CharsetDecoder decoder = Charset.forName(encoding).newDecoder() .onMalformedInput(CodingErrorAction.REPORT) .onUnmappableCharacter(CodingErrorAction.REPORT); Reader reader = new InputStreamReader(stream, decoder); userDictionary = UserDictionary.open(reader); } } else { userDictionary = null; } }
Example #4
Source File: HyphenationCompoundWordTokenFilterFactory.java From lucene-solr with Apache License 2.0 | 6 votes |
@Override public void inform(ResourceLoader loader) throws IOException { InputStream stream = null; try { if (dictFile != null) // the dictionary can be empty. dictionary = getWordSet(loader, dictFile, false); // TODO: Broken, because we cannot resolve real system id // ResourceLoader should also supply method like ClassLoader to get resource URL stream = loader.openResource(hypFile); final InputSource is = new InputSource(stream); is.setEncoding(encoding); // if it's null let xml parser decide is.setSystemId(hypFile); hyphenator = HyphenationCompoundWordTokenFilter.getHyphenationTree(is); } finally { IOUtils.closeWhileHandlingException(stream); } }
Example #5
Source File: ICUTokenizerFactory.java From lucene-solr with Apache License 2.0 | 6 votes |
@Override public void inform(ResourceLoader loader) throws IOException { assert tailored != null : "init must be called first!"; if (tailored.isEmpty()) { config = new DefaultICUTokenizerConfig(cjkAsWords, myanmarAsWords); } else { final BreakIterator breakers[] = new BreakIterator[1 + UCharacter.getIntPropertyMaxValue(UProperty.SCRIPT)]; for (Map.Entry<Integer,String> entry : tailored.entrySet()) { int code = entry.getKey(); String resourcePath = entry.getValue(); breakers[code] = parseRules(resourcePath, loader); } config = new DefaultICUTokenizerConfig(cjkAsWords, myanmarAsWords) { @Override public RuleBasedBreakIterator getBreakIterator(int script) { if (breakers[script] != null) { return (RuleBasedBreakIterator) breakers[script].clone(); } else { return super.getBreakIterator(script); } } // TODO: we could also allow codes->types mapping }; } }
Example #6
Source File: SafeXMLParsing.java From lucene-solr with Apache License 2.0 | 6 votes |
/** Parses a config file from ResourceLoader. Xinclude and external entities are enabled, but cannot escape the resource loader. */ public static Document parseConfigXML(Logger log, ResourceLoader loader, String file) throws SAXException, IOException { try (InputStream in = loader.openResource(file)) { final DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance(); dbf.setValidating(false); dbf.setNamespaceAware(true); trySetDOMFeature(dbf, XMLConstants.FEATURE_SECURE_PROCESSING, true); try { dbf.setXIncludeAware(true); } catch (UnsupportedOperationException e) { throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "XML parser doesn't support XInclude option", e); } final DocumentBuilder db = dbf.newDocumentBuilder(); db.setEntityResolver(new SystemIdResolver(loader)); db.setErrorHandler(new XMLErrorLogger(log)); return db.parse(in, SystemIdResolver.createSystemIdFromResourceName(file)); } catch (ParserConfigurationException pce) { throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "XML parser cannot be configured", pce); } }
Example #7
Source File: TestKeepFilterFactory.java From lucene-solr with Apache License 2.0 | 6 votes |
public void testInform() throws Exception { ResourceLoader loader = new ClasspathResourceLoader(getClass()); assertTrue("loader is null and it shouldn't be", loader != null); KeepWordFilterFactory factory = (KeepWordFilterFactory) tokenFilterFactory("KeepWord", "words", "keep-1.txt", "ignoreCase", "true"); CharArraySet words = factory.getWords(); assertTrue("words is null and it shouldn't be", words != null); assertTrue("words Size: " + words.size() + " is not: " + 2, words.size() == 2); factory = (KeepWordFilterFactory) tokenFilterFactory("KeepWord", "words", "keep-1.txt, keep-2.txt", "ignoreCase", "true"); words = factory.getWords(); assertTrue("words is null and it shouldn't be", words != null); assertTrue("words Size: " + words.size() + " is not: " + 4, words.size() == 4); }
Example #8
Source File: MappingCharFilterFactory.java From lucene-solr with Apache License 2.0 | 6 votes |
@Override public void inform(ResourceLoader loader) throws IOException { if (mapping != null) { List<String> wlist = null; List<String> files = splitFileNames(mapping); wlist = new ArrayList<>(); for (String file : files) { List<String> lines = getLines(loader, file.trim()); wlist.addAll(lines); } final NormalizeCharMap.Builder builder = new NormalizeCharMap.Builder(); parseRules(wlist, builder); normMap = builder.build(); if (normMap.map == null) { // if the inner FST is null, it means it accepts nothing (e.g. the file is empty) // so just set the whole map to null normMap = null; } } }
Example #9
Source File: DefaultQuerqyDismaxQParserTest.java From querqy with Apache License 2.0 | 6 votes |
@Override public RewriterFactory createFactory(final String rewriterId, NamedList<?> args, ResourceLoader resourceLoader) { return new RewriterFactory(rewriterId) { @Override public QueryRewriter createRewriter(ExpandedQuery input, SearchEngineRequestAdapter searchEngineRequestAdapter) { return query -> { query.setUserQuery(new MatchAllQuery()); query.addFilterQuery(WhiteSpaceQuerqyParser.parseString("a")); return query; }; } @Override public Set<Term> getGenerableTerms() { return Collections.emptySet(); } }; }
Example #10
Source File: AbstractQuerqyDismaxQParserPluginTest.java From querqy with Apache License 2.0 | 6 votes |
@Test public void testThatASimpleQuerqyQParserFactoryIsCreatedIfOnlyTheParserClassIsConfigured() throws Exception { NamedList<NamedList<String>> args = mock(NamedList.class); when(args.get("parser")).thenReturn(parserConfig); when(parserConfig.get("factory")).thenReturn(null); when(parserConfig.get("class")).thenReturn("querqy.parser.WhiteSpaceQuerqyParser"); ResourceLoader resourceLoader = new ClasspathResourceLoader(getClass().getClassLoader()); final SolrQuerqyParserFactory factory = plugin.loadSolrQuerqyParserFactory(resourceLoader, args); assertNotNull(factory); assertTrue(factory instanceof SimpleQuerqyQParserFactory); SimpleQuerqyQParserFactory qParserFactory = (SimpleQuerqyQParserFactory) factory; assertEquals(WhiteSpaceQuerqyParser.class, qParserFactory.querqyParserClass); }
Example #11
Source File: EnglishLemmatisationFilterFactory.java From jate with GNU Lesser General Public License v3.0 | 6 votes |
@Override public void inform(ResourceLoader loader) throws IOException { if (lemmatiserResourceDir != null ) { try { String path=((SolrResourceLoader) loader).getConfigDir(); if(!path.endsWith(File.separator)) path=path+File.separator; lemmatiser = new EngLemmatiser(path+lemmatiserResourceDir, false, false); } catch (Exception e) { StringBuilder sb = new StringBuilder("Initiating "); sb.append(this.getClass().getName()).append(" failed due to:\n"); sb.append(ExceptionUtils.getFullStackTrace(e)); throw new IllegalArgumentException(sb.toString()); } } }
Example #12
Source File: OpenNLPTokenizerFactory.java From jate with GNU Lesser General Public License v3.0 | 6 votes |
@Override public void inform(ResourceLoader loader) throws IOException { if(sentenceModelFile!=null) { sentenceOp = new SentenceDetectorME(new SentenceModel( loader.openResource(sentenceModelFile))); } if(tokenizerModelFile==null) throw new IOException("Parameter 'tokenizerModle' is required, but is invalid:"+tokenizerModelFile); tokenizerOp = new TokenizerME(new TokenizerModel( loader.openResource(tokenizerModelFile) )); if(parChunkingClass!=null) { try { Class c = Class.forName(parChunkingClass); Object o = c.newInstance(); paragraphChunker = (ParagraphChunker) o; }catch (Exception e){ throw new IOException(e); } } }
Example #13
Source File: MtasTokenizerFactory.java From mtas with Apache License 2.0 | 6 votes |
/** * Instantiates a new mtas tokenizer factory. * * @param args the args * @param resourceLoader the resource loader * @throws IOException Signals that an I/O exception has occurred. */ public MtasTokenizerFactory(Map<String, String> args, ResourceLoader resourceLoader) throws IOException { super(args); configFileArgument = get(args, ARGUMENT_CONFIGFILE); configArgument = get(args, ARGUMENT_CONFIG); analyzerArgument = get(args, ARGUMENT_PARSER); analyzerArgumentParserArgs = get(args, ARGUMENT_PARSER_ARGS); defaultArgument = get(args, ARGUMENT_DEFAULT); int numberOfArgs = 0; numberOfArgs = (configFileArgument==null)?numberOfArgs:numberOfArgs+1; numberOfArgs = (configArgument==null)?numberOfArgs:numberOfArgs+1; numberOfArgs = (analyzerArgument==null)?numberOfArgs:numberOfArgs+1; if (numberOfArgs>1) { throw new IOException(this.getClass().getName() + " can't have multiple of " + ARGUMENT_CONFIGFILE + ", " + ARGUMENT_CONFIG+" AND "+ARGUMENT_PARSER); } else if (configArgument == null && defaultArgument != null) { throw new IOException(this.getClass().getName() + " can't have " + ARGUMENT_DEFAULT + " without " + ARGUMENT_CONFIG); } else if (numberOfArgs==0) { throw new IOException(this.getClass().getName() + " should have " + ARGUMENT_CONFIGFILE + " or " + ARGUMENT_CONFIG+" or "+ARGUMENT_PARSER); } init(resourceLoader); }
Example #14
Source File: SynonymGraphFilterFactory.java From lucene-solr with Apache License 2.0 | 6 votes |
/** * Load synonyms with the given {@link SynonymMap.Parser} class. */ protected SynonymMap loadSynonyms(ResourceLoader loader, String cname, boolean dedup, Analyzer analyzer) throws IOException, ParseException { CharsetDecoder decoder = StandardCharsets.UTF_8.newDecoder() .onMalformedInput(CodingErrorAction.REPORT) .onUnmappableCharacter(CodingErrorAction.REPORT); SynonymMap.Parser parser; Class<? extends SynonymMap.Parser> clazz = loader.findClass(cname, SynonymMap.Parser.class); try { parser = clazz.getConstructor(boolean.class, boolean.class, Analyzer.class).newInstance(dedup, expand, analyzer); } catch (Exception e) { throw new RuntimeException(e); } List<String> files = splitFileNames(synonyms); for (String file : files) { decoder.reset(); parser.parse(new InputStreamReader(loader.openResource(file), decoder)); } return parser.build(); }
Example #15
Source File: StemmerOverrideFilterFactory.java From lucene-solr with Apache License 2.0 | 6 votes |
@Override public void inform(ResourceLoader loader) throws IOException { if (dictionaryFiles != null) { List<String> files = splitFileNames(dictionaryFiles); if (files.size() > 0) { StemmerOverrideFilter.Builder builder = new StemmerOverrideFilter.Builder(ignoreCase); for (String file : files) { List<String> list = getLines(loader, file.trim()); for (String line : list) { String[] mapping = line.split("\t", 2); builder.add(mapping[0], mapping[1]); } } dictionary = builder.build(); } } }
Example #16
Source File: MtasConfiguration.java From mtas with Apache License 2.0 | 6 votes |
/** * Read mtas tokenizer configurations. * * @param resourceLoader * the resource loader * @param configFile * the config file * @return the hash map * @throws IOException * Signals that an I/O exception has occurred. */ public static HashMap<String, MtasConfiguration> readMtasTokenizerConfigurations( ResourceLoader resourceLoader, String configFile) throws IOException { HashMap<String, HashMap<String, String>> configs = readConfigurations( resourceLoader, configFile, MtasTokenizerFactory.class.getName()); if (configs == null) { throw new IOException("no configurations"); } else { HashMap<String, MtasConfiguration> result = new HashMap<String, MtasConfiguration>(); for (Entry<String, HashMap<String, String>> entry : configs.entrySet()) { HashMap<String, String> config = entry.getValue(); if (config.containsKey(TOKENIZER_CONFIGURATION_FILE)) { result.put(entry.getKey(), readConfiguration(resourceLoader .openResource(config.get(TOKENIZER_CONFIGURATION_FILE)))); } else { throw new IOException("configuration " + entry.getKey() + " has no " + TOKENIZER_CONFIGURATION_FILE); } } return result; } }
Example #17
Source File: ReplaceRewriterFactory.java From querqy with Apache License 2.0 | 5 votes |
@Override public RewriterFactory createFactory(String id, NamedList<?> args, ResourceLoader resourceLoader) throws IOException { final String rulesResourceName = (String) args.get("rules"); if (rulesResourceName == null) { throw new IllegalArgumentException("Property 'rules' not configured"); } final InputStreamReader reader = new InputStreamReader(resourceLoader.openResource(rulesResourceName), StandardCharsets.UTF_8); final Boolean ignoreCase = args.getBooleanArg("ignoreCase"); final String inputDelimiter = (String) args.get("inputDelimiter"); // querqy parser for queries that are part of the instructions in the rules String rulesQuerqyParser = (String) args.get("querqyParser"); QuerqyParserFactory querqyParser = null; if (rulesQuerqyParser != null) { rulesQuerqyParser = rulesQuerqyParser.trim(); if (rulesQuerqyParser.length() > 0) { querqyParser = resourceLoader.newInstance(rulesQuerqyParser, QuerqyParserFactory.class); } } if (querqyParser == null) { querqyParser = new WhiteSpaceQuerqyParserFactory(); } return new querqy.rewrite.contrib.ReplaceRewriterFactory(id, reader, ignoreCase != null ? ignoreCase : DEFAULT_IGNORE_CASE, inputDelimiter != null ? inputDelimiter : DEFAULT_INPUT_DELIMITER, querqyParser.createParser()); }
Example #18
Source File: CollationField.java From lucene-solr with Apache License 2.0 | 5 votes |
/** * Read custom rules from a file, and create a RuleBasedCollator * The file cannot support comments, as # might be in the rules! */ private Collator createFromRules(String fileName, ResourceLoader loader) { InputStream input = null; try { input = loader.openResource(fileName); String rules = IOUtils.toString(input, "UTF-8"); return new RuleBasedCollator(rules); } catch (IOException | ParseException e) { // io error or invalid rules throw new RuntimeException(e); } finally { IOUtils.closeQuietly(input); } }
Example #19
Source File: MMSegTokenizerFactory.java From jstarcraft-nlp with Apache License 2.0 | 5 votes |
@Override public void inform(ResourceLoader loader) { String dicPath = getOriginalArgs().get("dicPath"); dic = Utils.getDict(dicPath, loader); logger.info("dic load... in={}", dic.getDicPath().toURI()); }
Example #20
Source File: TestSystemIdResolver.java From lucene-solr with Apache License 2.0 | 5 votes |
public void testUnsafeResolving() throws Exception { System.setProperty("solr.allow.unsafe.resourceloading", "true"); final Path testHome = SolrTestCaseJ4.getFile("solr/collection1").getParentFile().toPath(); final ResourceLoader loader = new SolrResourceLoader(testHome.resolve("collection1"), this.getClass().getClassLoader()); final SystemIdResolver resolver = new SystemIdResolver(loader); assertEntityResolving(resolver, SystemIdResolver.createSystemIdFromResourceName(testHome+"/crazy-path-to-schema.xml"), SystemIdResolver.createSystemIdFromResourceName(testHome+"/crazy-path-to-config.xml"), "crazy-path-to-schema.xml"); }
Example #21
Source File: OpenNLPRegexChunkerFactory.java From jate with GNU Lesser General Public License v3.0 | 5 votes |
@Override public void inform(ResourceLoader loader) throws IOException { super.inform(loader); if (patternFile != null) { try { List<String> lines = getLines(loader, patternFile.trim()); initPatterns(lines, patterns); } catch (IOException ioe) { StringBuilder sb = new StringBuilder("Initiating "); sb.append(this.getClass().getName()).append(" failed due to patterns. Details:\n"); sb.append(ExceptionUtils.getFullStackTrace(ioe)); throw new IllegalArgumentException(sb.toString()); } } }
Example #22
Source File: NumberUnitRewriterFactory.java From querqy with Apache License 2.0 | 5 votes |
@Override public RewriterFactory createFactory(final String id, final NamedList<?> args, final ResourceLoader resourceLoader) throws IOException { final Object obj = args.get(KEY_CONFIG_FILE); if (!(obj instanceof String)) { throw new IllegalArgumentException("Property 'config' not or not properly configured"); } final String rulesResourceName = (String) obj; // resource InputStream will be closed by Jackson Json Parser final NumberUnitConfigObject numberUnitConfigObject = JSON_DEFAULT_OBJECT_MAPPER.readValue( resourceLoader.openResource(rulesResourceName), NumberUnitConfigObject.class); final int scale = getOrDefaultInt(numberUnitConfigObject::getScaleForLinearFunctions, DEFAULT_SCALE_FOR_LINEAR_FUNCTIONS); final List<NumberUnitDefinition> numberUnitDefinitions = parseConfig(numberUnitConfigObject); numberUnitDefinitions.stream() .filter(this::numberUnitDefinitionHasDuplicateUnitDefinition) .findFirst() .ifPresent(numberUnitDefinition -> { throw new IllegalArgumentException("Units must only defined once per NumberUnitDefinition");}); return new querqy.rewrite.contrib.NumberUnitRewriterFactory(id, numberUnitDefinitions, new NumberUnitQueryCreatorSolr(scale)); }
Example #23
Source File: ManagedSynonymGraphFilterFactory.java From lucene-solr with Apache License 2.0 | 5 votes |
/** * Called once, during core initialization, to initialize any analysis components * that depend on the data managed by this resource. It is important that the * analysis component is only initialized once during core initialization so that * text analysis is consistent, especially in a distributed environment, as we * don't want one server applying a different set of stop words than other servers. */ @SuppressWarnings("unchecked") @Override public void onManagedResourceInitialized(NamedList<?> initArgs, final ManagedResource res) throws SolrException { NamedList<Object> args = (NamedList<Object>)initArgs; args.add("synonyms", getResourceId()); args.add("expand", "false"); args.add("format", "solr"); Map<String,String> filtArgs = new HashMap<>(); for (Map.Entry<String,?> entry : args) { filtArgs.put(entry.getKey(), entry.getValue().toString()); } // create the actual filter factory that pulls the synonym mappings // from synonymMappings using a custom parser implementation delegate = new SynonymGraphFilterFactory(filtArgs) { @Override protected SynonymMap loadSynonyms (ResourceLoader loader, String cname, boolean dedup, Analyzer analyzer) throws IOException, ParseException { ManagedSynonymParser parser = new ManagedSynonymParser((SynonymManager)res, dedup, analyzer); // null is safe here because there's no actual parsing done against a input Reader parser.parse(null); return parser.build(); } }; try { delegate.inform(res.getResourceLoader()); } catch (IOException e) { throw new SolrException(ErrorCode.SERVER_ERROR, e); } }
Example #24
Source File: MtasCharFilterFactory.java From mtas with Apache License 2.0 | 5 votes |
/** * Instantiates a new mtas char filter factory. * * @param args the args * @param resourceLoader the resource loader * @throws IOException Signals that an I/O exception has occurred. */ public MtasCharFilterFactory(Map<String, String> args, ResourceLoader resourceLoader) throws IOException { super(args); typeArgument = get(args, ARGUMENT_TYPE); prefixArgument = get(args, ARGUMENT_PREFIX); postfixArgument = get(args, ARGUMENT_POSTFIX); configArgument = get(args, ARGUMENT_CONFIG); defaultArgument = get(args, ARGUMENT_DEFAULT); if (typeArgument != null && configArgument != null) { throw new IOException(this.getClass().getName() + " can't have both " + ARGUMENT_TYPE + " and " + ARGUMENT_CONFIG); } else if (typeArgument == null && prefixArgument != null) { throw new IOException(this.getClass().getName() + " can't have " + ARGUMENT_PREFIX + " without " + ARGUMENT_TYPE); } else if (typeArgument == null && postfixArgument != null) { throw new IOException(this.getClass().getName() + " can't have " + ARGUMENT_POSTFIX + " without " + ARGUMENT_TYPE); } else if (configArgument == null && defaultArgument != null) { throw new IOException(this.getClass().getName() + " can't have " + ARGUMENT_DEFAULT + " without " + ARGUMENT_CONFIG); } else if (typeArgument == null && configArgument == null) { throw new IOException(this.getClass().getName() + " should have " + ARGUMENT_TYPE + " or " + ARGUMENT_CONFIG); } init(resourceLoader); }
Example #25
Source File: TestCommonGramsQueryFilterFactory.java From lucene-solr with Apache License 2.0 | 5 votes |
public void testInform() throws Exception { ResourceLoader loader = new ClasspathResourceLoader(TestStopFilterFactory.class); assertTrue("loader is null and it shouldn't be", loader != null); CommonGramsQueryFilterFactory factory = (CommonGramsQueryFilterFactory) tokenFilterFactory("CommonGramsQuery", Version.LATEST, loader, "words", "stop-1.txt", "ignoreCase", "true"); CharArraySet words = factory.getCommonWords(); assertTrue("words is null and it shouldn't be", words != null); assertTrue("words Size: " + words.size() + " is not: " + 2, words.size() == 2); assertTrue(factory.isIgnoreCase() + " does not equal: " + true, factory .isIgnoreCase() == true); factory = (CommonGramsQueryFilterFactory) tokenFilterFactory("CommonGramsQuery", Version.LATEST, loader, "words", "stop-1.txt, stop-2.txt", "ignoreCase", "true"); words = factory.getCommonWords(); assertTrue("words is null and it shouldn't be", words != null); assertTrue("words Size: " + words.size() + " is not: " + 4, words.size() == 4); assertTrue(factory.isIgnoreCase() + " does not equal: " + true, factory .isIgnoreCase() == true); factory = (CommonGramsQueryFilterFactory) tokenFilterFactory("CommonGramsQuery", Version.LATEST, loader, "words", "stop-snowball.txt", "format", "snowball", "ignoreCase", "true"); words = factory.getCommonWords(); assertEquals(8, words.size()); assertTrue(words.contains("he")); assertTrue(words.contains("him")); assertTrue(words.contains("his")); assertTrue(words.contains("himself")); assertTrue(words.contains("she")); assertTrue(words.contains("her")); assertTrue(words.contains("hers")); assertTrue(words.contains("herself")); }
Example #26
Source File: TestCommonGramsFilterFactory.java From lucene-solr with Apache License 2.0 | 5 votes |
public void testInform() throws Exception { ResourceLoader loader = new ClasspathResourceLoader(TestStopFilterFactory.class); assertTrue("loader is null and it shouldn't be", loader != null); CommonGramsFilterFactory factory = (CommonGramsFilterFactory) tokenFilterFactory("CommonGrams", Version.LATEST, loader, "words", "stop-1.txt", "ignoreCase", "true"); CharArraySet words = factory.getCommonWords(); assertTrue("words is null and it shouldn't be", words != null); assertTrue("words Size: " + words.size() + " is not: " + 2, words.size() == 2); assertTrue(factory.isIgnoreCase() + " does not equal: " + true, factory .isIgnoreCase() == true); factory = (CommonGramsFilterFactory) tokenFilterFactory("CommonGrams", Version.LATEST, loader, "words", "stop-1.txt, stop-2.txt", "ignoreCase", "true"); words = factory.getCommonWords(); assertTrue("words is null and it shouldn't be", words != null); assertTrue("words Size: " + words.size() + " is not: " + 4, words.size() == 4); assertTrue(factory.isIgnoreCase() + " does not equal: " + true, factory .isIgnoreCase() == true); factory = (CommonGramsFilterFactory) tokenFilterFactory("CommonGrams", Version.LATEST, loader, "words", "stop-snowball.txt", "format", "snowball", "ignoreCase", "true"); words = factory.getCommonWords(); assertEquals(8, words.size()); assertTrue(words.contains("he")); assertTrue(words.contains("him")); assertTrue(words.contains("his")); assertTrue(words.contains("himself")); assertTrue(words.contains("she")); assertTrue(words.contains("her")); assertTrue(words.contains("hers")); assertTrue(words.contains("herself")); }
Example #27
Source File: SnowballPorterFilterFactory.java From lucene-solr with Apache License 2.0 | 5 votes |
@Override public void inform(ResourceLoader loader) throws IOException { String className = "org.tartarus.snowball.ext." + language + "Stemmer"; stemClass = loader.newInstance(className, SnowballStemmer.class).getClass(); if (wordFiles != null) { protectedWords = getWordSet(loader, wordFiles, false); } }
Example #28
Source File: TypeTokenFilterFactory.java From lucene-solr with Apache License 2.0 | 5 votes |
@Override public void inform(ResourceLoader loader) throws IOException { List<String> files = splitFileNames(stopTypesFiles); if (files.size() > 0) { stopTypes = new HashSet<>(); for (String file : files) { List<String> typesLines = getLines(loader, file.trim()); stopTypes.addAll(typesLines); } } }
Example #29
Source File: WordDelimiterGraphFilterFactory.java From lucene-solr with Apache License 2.0 | 5 votes |
@Override public void inform(ResourceLoader loader) throws IOException { if (wordFiles != null) { protectedWords = getWordSet(loader, wordFiles, false); } if (types != null) { List<String> files = splitFileNames( types ); List<String> wlist = new ArrayList<>(); for( String file : files ){ List<String> lines = getLines(loader, file.trim()); wlist.addAll( lines ); } typeTable = parseTypes(wlist); } }
Example #30
Source File: ConditionalTokenFilterFactory.java From lucene-solr with Apache License 2.0 | 5 votes |
@Override public final void inform(ResourceLoader loader) throws IOException { if (innerFilters == null) return; for (TokenFilterFactory factory : innerFilters) { if (factory instanceof ResourceLoaderAware) { ((ResourceLoaderAware)factory).inform(loader); } } doInform(loader); }