Java Code Examples for org.apache.lucene.util.AttributeFactory#DEFAULT_ATTRIBUTE_FACTORY
The following examples show how to use
org.apache.lucene.util.AttributeFactory#DEFAULT_ATTRIBUTE_FACTORY .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: QueryParserImpl.java From AdSearch_Endpoints with Apache License 2.0 | 6 votes |
@Override public List<String> parseQuery(String queryStr) { // tokenize queryStr, remove stop word, stemming List<String> tokens = new ArrayList<String>(); AttributeFactory factory = AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY; Tokenizer tokenizer = new StandardTokenizer(factory); tokenizer.setReader(new StringReader(queryStr)); CharArraySet stopWords = EnglishAnalyzer.getDefaultStopSet(); TokenStream tokenStream = new StopFilter(tokenizer, stopWords); // StringBuilder sb = new StringBuilder(); CharTermAttribute charTermAttribute = tokenizer.addAttribute(CharTermAttribute.class); try { tokenStream.reset(); while (tokenStream.incrementToken()) { String term = charTermAttribute.toString(); tokens.add(term); // sb.append(term + " "); } tokenStream.end(); tokenStream.close(); tokenizer.close(); } catch (IOException e) { e.printStackTrace(); } // System.out.println("QU="+ sb.toString()); return tokens; }
Example 2
Source File: SegmentationIcuTokenizerTests.java From elasticsearch-plugin-bundle with GNU Affero General Public License v3.0 | 6 votes |
public void testHugeTerm2() throws Exception { StringBuilder sb = new StringBuilder(); for (int i = 0; i < 40960; i++) { sb.append('a'); } String input = sb.toString(); IcuTokenizer tokenizer = new IcuTokenizer(AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY, new DefaultIcuTokenizerConfig(false, true)); tokenizer.setReader(new StringReader(input)); char token[] = new char[4096]; Arrays.fill(token, 'a'); String expectedToken = new String(token); String expected[] = { expectedToken, expectedToken, expectedToken, expectedToken, expectedToken, expectedToken, expectedToken, expectedToken, expectedToken, expectedToken }; assertTokenStreamContents(tokenizer, expected); }
Example 3
Source File: BaseTokenStreamTestCase.java From lucene-solr with Apache License 2.0 | 5 votes |
/** Returns a random AttributeFactory impl */ public static AttributeFactory newAttributeFactory(Random random) { switch (random.nextInt(3)) { case 0: return TokenStream.DEFAULT_TOKEN_ATTRIBUTE_FACTORY; case 1: return Token.TOKEN_ATTRIBUTE_FACTORY; case 2: return AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY; default: throw new AssertionError("Please fix the Random.nextInt() call above"); } }
Example 4
Source File: Test2BTerms.java From lucene-solr with Apache License 2.0 | 5 votes |
public MyTokenStream(Random random, int tokensPerDoc) { super(new MyAttributeFactory(AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY)); this.tokensPerDoc = tokensPerDoc; addAttribute(TermToBytesRefAttribute.class); bytes.length = TOKEN_LEN; this.random = random; nextSave = TestUtil.nextInt(random, 500000, 1000000); }
Example 5
Source File: CJKBigramFilterTests.java From elasticsearch-plugin-bundle with GNU Affero General Public License v3.0 | 5 votes |
@Before public void up() { analyzer = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName) { Tokenizer source = new IcuTokenizer(AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY, new DefaultIcuTokenizerConfig(false, true)); TokenStream result = new CJKBigramFilter(source); return new TokenStreamComponents(source, new StopFilter(result, CharArraySet.EMPTY_SET)); } }; }
Example 6
Source File: MyanmarSyllableTests.java From elasticsearch-plugin-bundle with GNU Affero General Public License v3.0 | 5 votes |
private static Analyzer createAnalyzer() { return new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName) { Tokenizer tokenizer = new IcuTokenizer(AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY, new DefaultIcuTokenizerConfig(false, false)); return new TokenStreamComponents(tokenizer); } }; }
Example 7
Source File: IcuTokenizerCJKTests.java From elasticsearch-plugin-bundle with GNU Affero General Public License v3.0 | 5 votes |
private static Analyzer create() { return new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName) { return new TokenStreamComponents(new IcuTokenizer(AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY, new DefaultIcuTokenizerConfig(true, true))); } }; }
Example 8
Source File: SegmentationIcuTokenizerTests.java From elasticsearch-plugin-bundle with GNU Affero General Public License v3.0 | 5 votes |
private static Analyzer createAnalyzer() { return new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName) { Tokenizer tokenizer = new IcuTokenizer(AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY, new DefaultIcuTokenizerConfig(false, true)); TokenFilter filter = new IcuNormalizerFilter(tokenizer, Normalizer2.getInstance(null, "nfkc_cf", Normalizer2.Mode.COMPOSE)); return new TokenStreamComponents(tokenizer, filter); } }; }
Example 9
Source File: SegmentationIcuTokenizerTests.java From elasticsearch-plugin-bundle with GNU Affero General Public License v3.0 | 5 votes |
public void testHugeDoc() throws Exception { StringBuilder sb = new StringBuilder(); char whitespace[] = new char[4094]; Arrays.fill(whitespace, ' '); sb.append(whitespace); sb.append("testing 1234"); String input = sb.toString(); IcuTokenizer tokenizer = new IcuTokenizer(AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY, new DefaultIcuTokenizerConfig(false, true)); tokenizer.setReader(new StringReader(input)); assertTokenStreamContents(tokenizer, new String[] { "testing", "1234" }); }
Example 10
Source File: OpenKoreanTextTokenizer.java From elasticsearch-analysis-openkoreantext with Apache License 2.0 | 4 votes |
public OpenKoreanTextTokenizer() { super(AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY); }
Example 11
Source File: PreAnalyzedField.java From lucene-solr with Apache License 2.0 | 4 votes |
public PreAnalyzedTokenizer(PreAnalyzedParser parser) { // we don't pack attributes: since we are used for (de)serialization and dont want bloat. super(AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY); this.parser = parser; }
Example 12
Source File: TestConcatenatingTokenStream.java From lucene-solr with Apache License 2.0 | 3 votes |
public void testInconsistentAttributeFactories() throws IOException { final MockTokenizer first = new MockTokenizer(AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY, MockTokenizer.WHITESPACE, true); final MockTokenizer second = new MockTokenizer(TokenStream.DEFAULT_TOKEN_ATTRIBUTE_FACTORY, MockTokenizer.WHITESPACE, true); expectThrows(IllegalArgumentException.class, () -> new ConcatenatingTokenStream(first, second)); }
Example 13
Source File: LegacyNumericTokenStream.java From lucene-solr with Apache License 2.0 | 2 votes |
/** * Creates a token stream for numeric values using the default <code>precisionStep</code> * {@link org.apache.solr.legacy.LegacyNumericUtils#PRECISION_STEP_DEFAULT} (16). The stream is not yet initialized, * before using set a value using the various set<em>???</em>Value() methods. */ public LegacyNumericTokenStream() { this(AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY, LegacyNumericUtils.PRECISION_STEP_DEFAULT); }
Example 14
Source File: LegacyNumericTokenStream.java From lucene-solr with Apache License 2.0 | 2 votes |
/** * Creates a token stream for numeric values with the specified * <code>precisionStep</code>. The stream is not yet initialized, * before using set a value using the various set<em>???</em>Value() methods. */ public LegacyNumericTokenStream(final int precisionStep) { this(AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY, precisionStep); }