Java Code Examples for org.apache.lucene.analysis.BaseTokenStreamTestCase#assertTokenStreamContents()
The following examples show how to use
org.apache.lucene.analysis.BaseTokenStreamTestCase#assertTokenStreamContents() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TestUAX29URLEmailTokenizer.java From lucene-solr with Apache License 2.0 | 5 votes |
public void testHugeDoc() throws IOException { StringBuilder sb = new StringBuilder(); char whitespace[] = new char[4094]; Arrays.fill(whitespace, ' '); sb.append(whitespace); sb.append("testing 1234"); String input = sb.toString(); UAX29URLEmailTokenizer tokenizer = new UAX29URLEmailTokenizer(newAttributeFactory()); tokenizer.setReader(new StringReader(input)); BaseTokenStreamTestCase.assertTokenStreamContents(tokenizer, new String[] { "testing", "1234" }); }
Example 2
Source File: TestStandardAnalyzer.java From lucene-solr with Apache License 2.0 | 5 votes |
public void testHugeDoc() throws IOException { StringBuilder sb = new StringBuilder(); char whitespace[] = new char[4094]; Arrays.fill(whitespace, ' '); sb.append(whitespace); sb.append("testing 1234"); String input = sb.toString(); StandardTokenizer tokenizer = new StandardTokenizer(); tokenizer.setReader(new StringReader(input)); BaseTokenStreamTestCase.assertTokenStreamContents(tokenizer, new String[] { "testing", "1234" }); }
Example 3
Source File: ProtectedTermFilterFactoryTest.java From lucene-solr with Apache License 2.0 | 5 votes |
public void testBasic() throws Exception { String text = "Wuthering FooBar distant goldeN ABC compote"; Map<String,String> args = new HashMap<>(); args.put("ignoreCase", "true"); args.put("protected", "protected-1.txt,protected-2.txt"); // Protected: foobar, jaxfopbuz, golden, compote args.put("wrappedFilters", "lowercase"); ResourceLoader loader = new SolrResourceLoader(TEST_PATH().resolve("collection1")); ProtectedTermFilterFactory factory = new ProtectedTermFilterFactory(args); factory.inform(loader); TokenStream ts = factory.create(whitespaceMockTokenizer(text)); BaseTokenStreamTestCase.assertTokenStreamContents(ts, new String[] { "wuthering", "FooBar", "distant", "goldeN", "abc", "compote" }); }
Example 4
Source File: SimplePhoneticAnalysisTests.java From crate with Apache License 2.0 | 5 votes |
public void testPhoneticTokenFilterBeiderMorseNoLanguage() throws IOException { TokenFilterFactory filterFactory = analysis.tokenFilter.get("beidermorsefilter"); Tokenizer tokenizer = new WhitespaceTokenizer(); tokenizer.setReader(new StringReader("ABADIAS")); String[] expected = new String[] { "abYdias", "abYdios", "abadia", "abadiaS", "abadias", "abadio", "abadioS", "abadios", "abodia", "abodiaS", "abodias", "abodio", "abodioS", "abodios", "avadias", "avadios", "avodias", "avodios", "obadia", "obadiaS", "obadias", "obadio", "obadioS", "obadios", "obodia", "obodiaS", "obodias", "obodioS" }; BaseTokenStreamTestCase.assertTokenStreamContents(filterFactory.create(tokenizer), expected); }
Example 5
Source File: SimplePhoneticAnalysisTests.java From crate with Apache License 2.0 | 5 votes |
public void testPhoneticTokenFilterBeiderMorseWithLanguage() throws IOException { TokenFilterFactory filterFactory = analysis.tokenFilter.get("beidermorsefilterfrench"); Tokenizer tokenizer = new WhitespaceTokenizer(); tokenizer.setReader(new StringReader("Rimbault")); String[] expected = new String[] { "rimbD", "rimbDlt", "rimba", "rimbalt", "rimbo", "rimbolt", "rimbu", "rimbult", "rmbD", "rmbDlt", "rmba", "rmbalt", "rmbo", "rmbolt", "rmbu", "rmbult" }; BaseTokenStreamTestCase.assertTokenStreamContents(filterFactory.create(tokenizer), expected); }
Example 6
Source File: SimplePhoneticAnalysisTests.java From crate with Apache License 2.0 | 5 votes |
public void testPhoneticTokenFilterDaitchMotokoff() throws IOException { TokenFilterFactory filterFactory = analysis.tokenFilter.get("daitch_mokotoff"); Tokenizer tokenizer = new WhitespaceTokenizer(); tokenizer.setReader(new StringReader("chauptman")); String[] expected = new String[] { "473660", "573660" }; assertThat(filterFactory.create(tokenizer), instanceOf(DaitchMokotoffSoundexFilter.class)); BaseTokenStreamTestCase.assertTokenStreamContents(filterFactory.create(tokenizer), expected); }
Example 7
Source File: TestEmptyTokenStream.java From lucene-solr with Apache License 2.0 | 4 votes |
public void testConsume2() throws IOException { BaseTokenStreamTestCase.assertTokenStreamContents(new EmptyTokenStream(), new String[0]); }
Example 8
Source File: TestWordDelimiterFilterFactory.java From lucene-solr with Apache License 2.0 | 4 votes |
@Test public void testCustomTypes() throws Exception { String testText = "I borrowed $5,400.00 at 25% interest-rate"; ResourceLoader loader = new SolrResourceLoader(TEST_PATH().resolve("collection1")); Map<String,String> args = new HashMap<>(); args.put("luceneMatchVersion", Version.LATEST.toString()); args.put("generateWordParts", "1"); args.put("generateNumberParts", "1"); args.put("catenateWords", "1"); args.put("catenateNumbers", "1"); args.put("catenateAll", "0"); args.put("splitOnCaseChange", "1"); /* default behavior */ WordDelimiterFilterFactory factoryDefault = new WordDelimiterFilterFactory(args); factoryDefault.inform(loader); TokenStream ts = factoryDefault.create(whitespaceMockTokenizer(testText)); BaseTokenStreamTestCase.assertTokenStreamContents(ts, new String[] { "I", "borrowed", "5", "540000", "400", "00", "at", "25", "interest", "interestrate", "rate" }); ts = factoryDefault.create(whitespaceMockTokenizer("foo\u200Dbar")); BaseTokenStreamTestCase.assertTokenStreamContents(ts, new String[] { "foo", "foobar", "bar" }); /* custom behavior */ args = new HashMap<>(); // use a custom type mapping args.put("luceneMatchVersion", Version.LATEST.toString()); args.put("generateWordParts", "1"); args.put("generateNumberParts", "1"); args.put("catenateWords", "1"); args.put("catenateNumbers", "1"); args.put("catenateAll", "0"); args.put("splitOnCaseChange", "1"); args.put("types", "wdftypes.txt"); WordDelimiterFilterFactory factoryCustom = new WordDelimiterFilterFactory(args); factoryCustom.inform(loader); ts = factoryCustom.create(whitespaceMockTokenizer(testText)); BaseTokenStreamTestCase.assertTokenStreamContents(ts, new String[] { "I", "borrowed", "$5,400.00", "at", "25%", "interest", "interestrate", "rate" }); /* test custom behavior with a char > 0x7F, because we had to make a larger byte[] */ ts = factoryCustom.create(whitespaceMockTokenizer("foo\u200Dbar")); BaseTokenStreamTestCase.assertTokenStreamContents(ts, new String[] { "foo\u200Dbar" }); }