org.apache.lucene.analysis.CharacterUtils Java Examples
The following examples show how to use
org.apache.lucene.analysis.CharacterUtils.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: NGramTokenizer.java From lucene-solr with Apache License 2.0 | 5 votes |
private void init(int minGram, int maxGram, boolean edgesOnly) { if (minGram < 1) { throw new IllegalArgumentException("minGram must be greater than zero"); } if (minGram > maxGram) { throw new IllegalArgumentException("minGram must not be greater than maxGram"); } this.minGram = minGram; this.maxGram = maxGram; this.edgesOnly = edgesOnly; charBuffer = CharacterUtils.newCharacterBuffer(2 * maxGram + 1024); // 2 * maxGram in case all code points require 2 chars and + 1024 for buffering to not keep polling the Reader buffer = new int[charBuffer.getBuffer().length]; // Make the term att large enough termAtt.resizeBuffer(2 * maxGram); }
Example #2
Source File: UpperCaseFilter.java From lucene-solr with Apache License 2.0 | 5 votes |
@Override public final boolean incrementToken() throws IOException { if (input.incrementToken()) { CharacterUtils.toUpperCase(termAtt.buffer(), 0, termAtt.length()); return true; } else return false; }
Example #3
Source File: TestConditionalTokenFilter.java From lucene-solr with Apache License 2.0 | 5 votes |
@Override public final boolean incrementToken() throws IOException { if (input.incrementToken()) { CharacterUtils.toLowerCase(termAtt.buffer(), 0, termAtt.length()); return true; } else return false; }
Example #4
Source File: CharBufferReader.java From elasticsearch-analysis-lc-pinyin with Artistic License 2.0 | 4 votes |
public CharBufferReader(Reader input, int bufferSize) { this.input = input; this.bufferSize = bufferSize; charBuffer = CharacterUtils.newCharacterBuffer(bufferSize); }
Example #5
Source File: CharBufferReader.java From elasticsearch-analysis-lc-pinyin with Artistic License 2.0 | 4 votes |
private boolean readToBuffer() throws IOException { CharacterUtils.fill(charBuffer, input); readCursor = charBuffer.getOffset(); return charBuffer.getLength() > charBuffer.getOffset(); }
Example #6
Source File: CharBufferReader.java From elasticsearch-analysis-lc-pinyin with Artistic License 2.0 | 4 votes |
public void reset(Reader input) { this.input = input; readCursor = 0; charBuffer = CharacterUtils.newCharacterBuffer(bufferSize); }
Example #7
Source File: ICUNormalizer2CharFilter.java From lucene-solr with Apache License 2.0 | 4 votes |
ICUNormalizer2CharFilter(Reader in, Normalizer2 normalizer, int bufferSize) { super(in); this.normalizer = Objects.requireNonNull(normalizer); this.tmpBuffer = CharacterUtils.newCharacterBuffer(bufferSize); }
Example #8
Source File: NameTokenizer.java From HongsCORE with MIT License | 4 votes |
@Override public boolean incrementToken() throws IOException { clearAttributes(); char[] buf = termAttr.buffer(); int bgn, end, len, chr, cnt, bgx; while (true) { // 判断是否结束 if (bufferIndex >= bufferShift) { CharacterUtils.fill(buffer , input); offsetShift += bufferShift ; bufferShift = buffer.getLength(); bufferIndex = 0; if (bufferShift == 0) { endset = correctOffset(offsetShift); offset = 0 ; return false; } } bgn = bufferIndex + offsetShift - offset; chr = Character.codePointAt(buffer.getBuffer(), bufferIndex); cnt = Character.charCount(chr); bufferIndex += cnt; chr = filterToken(chr); if (chr == 0x0) { buf = termAttr.buffer(); offset = 0; continue; } len = Character.toChars(chr, buf, offset); end = bgn + len; termAttr.setLength(len + offset); bgx = correctOffset(bgn); endset = correctOffset(end); ofstAttr.setOffset(bgx , endset); offset += cnt; return true; } }