org.apache.lucene.analysis.CharacterUtils Java Examples

The following examples show how to use org.apache.lucene.analysis.CharacterUtils. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.

Example #1

Source File: NGramTokenizer.java From lucene-solr with Apache License 2.0

5 votes

private void init(int minGram, int maxGram, boolean edgesOnly) {
  if (minGram < 1) {
    throw new IllegalArgumentException("minGram must be greater than zero");
  }
  if (minGram > maxGram) {
    throw new IllegalArgumentException("minGram must not be greater than maxGram");
  }
  this.minGram = minGram;
  this.maxGram = maxGram;
  this.edgesOnly = edgesOnly;
  charBuffer = CharacterUtils.newCharacterBuffer(2 * maxGram + 1024); // 2 * maxGram in case all code points require 2 chars and + 1024 for buffering to not keep polling the Reader
  buffer = new int[charBuffer.getBuffer().length];
  // Make the term att large enough
  termAtt.resizeBuffer(2 * maxGram);
}

Example #2

Source File: UpperCaseFilter.java From lucene-solr with Apache License 2.0

5 votes

@Override
public final boolean incrementToken() throws IOException {
  if (input.incrementToken()) {
    CharacterUtils.toUpperCase(termAtt.buffer(), 0, termAtt.length());
    return true;
  } else
    return false;
}

Example #3

Source File: TestConditionalTokenFilter.java From lucene-solr with Apache License 2.0

5 votes

@Override
public final boolean incrementToken() throws IOException {
  if (input.incrementToken()) {
    CharacterUtils.toLowerCase(termAtt.buffer(), 0, termAtt.length());
    return true;
  } else
    return false;
}

Example #4

Source File: CharBufferReader.java From elasticsearch-analysis-lc-pinyin with Artistic License 2.0

4 votes

public CharBufferReader(Reader input, int bufferSize) {
    this.input = input;
    this.bufferSize = bufferSize;
    charBuffer = CharacterUtils.newCharacterBuffer(bufferSize);
}

Example #5

Source File: CharBufferReader.java From elasticsearch-analysis-lc-pinyin with Artistic License 2.0

4 votes

private boolean readToBuffer() throws IOException {
    CharacterUtils.fill(charBuffer, input);
    readCursor = charBuffer.getOffset();
    return charBuffer.getLength() > charBuffer.getOffset();
}

Example #6

Source File: CharBufferReader.java From elasticsearch-analysis-lc-pinyin with Artistic License 2.0

4 votes

public void reset(Reader input) {
    this.input = input;
    readCursor = 0;
    charBuffer = CharacterUtils.newCharacterBuffer(bufferSize);
}

Example #7

Source File: ICUNormalizer2CharFilter.java From lucene-solr with Apache License 2.0

4 votes

ICUNormalizer2CharFilter(Reader in, Normalizer2 normalizer, int bufferSize) {
  super(in);
  this.normalizer = Objects.requireNonNull(normalizer);
  this.tmpBuffer = CharacterUtils.newCharacterBuffer(bufferSize);
}

Example #8

Source File: NameTokenizer.java From HongsCORE with MIT License

4 votes

@Override
public boolean incrementToken() throws IOException {
    clearAttributes();
    char[] buf = termAttr.buffer();
    int    bgn, end, len, chr, cnt, bgx;

    while (true) {
        // 判断是否结束
        if (bufferIndex >= bufferShift) {
            CharacterUtils.fill(buffer , input);
            offsetShift += bufferShift ;
            bufferShift  = buffer.getLength();
            bufferIndex  = 0;
            if (bufferShift == 0) {
                endset = correctOffset(offsetShift);
                offset =  0 ;
                return false;
            }
        }

        bgn = bufferIndex + offsetShift - offset;

        chr = Character.codePointAt(buffer.getBuffer(), bufferIndex);
        cnt = Character.charCount(chr);
        bufferIndex += cnt;

        chr = filterToken(chr);
        if (chr == 0x0) {
            buf = termAttr.buffer();
            offset = 0;
            continue;
        }

        len = Character.toChars(chr, buf, offset);
        end = bgn + len;

        termAttr.setLength(len + offset);
        bgx    = correctOffset(bgn);
        endset = correctOffset(end);
        ofstAttr.setOffset(bgx , endset);

        offset += cnt;
        return  true;
    }
}