Java Code Examples for org.apache.lucene.analysis.tokenattributes.OffsetAttribute#setOffset()
The following examples show how to use
org.apache.lucene.analysis.tokenattributes.OffsetAttribute#setOffset() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: AutoPhrasingTokenFilter.java From elasticsearch-plugin-bundle with GNU Affero General Public License v3.0 | 6 votes |
private void emit(char[] tokenChars) { char[] token = tokenChars; if (replaceWhitespaceWith != null) { token = replaceWhiteSpace(token); } CharTermAttribute termAttr = getTermAttribute(); if (termAttr != null) { termAttr.setEmpty(); termAttr.append(new StringBuilder().append(token)); } OffsetAttribute offAttr = getOffsetAttribute(); if (offAttr != null && offAttr.endOffset() >= token.length) { int start = offAttr.endOffset() - token.length; offAttr.setOffset(start, offAttr.endOffset()); } PositionIncrementAttribute pia = getPositionIncrementAttribute(); if (pia != null) { pia.setPositionIncrement(++positionIncr); } lastEmitted = token; }
Example 2
Source File: AutoPhrasingTokenFilter.java From elasticsearch-plugin-bundle with GNU Affero General Public License v3.0 | 5 votes |
private void emit(Token token) { emit(token.tok); OffsetAttribute offAttr = getOffsetAttribute(); if (offAttr != null && token.endPos > token.startPos && token.startPos >= 0) { offAttr.setOffset(token.startPos, token.endPos); } }
Example 3
Source File: BaseTermVectorsFormatTestCase.java From lucene-solr with Apache License 2.0 | 4 votes |
@Override public void copyTo(AttributeImpl target) { OffsetAttribute t = (OffsetAttribute) target; t.setOffset(start, end); }
Example 4
Source File: SimplePreAnalyzedParser.java From lucene-solr with Apache License 2.0 | 4 votes |
private static AttributeSource.State createState(AttributeSource a, Tok state, int tokenEnd) { a.clearAttributes(); CharTermAttribute termAtt = a.addAttribute(CharTermAttribute.class); char[] tokChars = state.token.toString().toCharArray(); termAtt.copyBuffer(tokChars, 0, tokChars.length); int tokenStart = tokenEnd - state.token.length(); for (Entry<String, String> e : state.attr.entrySet()) { String k = e.getKey(); if (k.equals("i")) { // position increment int incr = Integer.parseInt(e.getValue()); PositionIncrementAttribute posIncr = a.addAttribute(PositionIncrementAttribute.class); posIncr.setPositionIncrement(incr); } else if (k.equals("s")) { tokenStart = Integer.parseInt(e.getValue()); } else if (k.equals("e")) { tokenEnd = Integer.parseInt(e.getValue()); } else if (k.equals("y")) { TypeAttribute type = a.addAttribute(TypeAttribute.class); type.setType(e.getValue()); } else if (k.equals("f")) { FlagsAttribute flags = a.addAttribute(FlagsAttribute.class); int f = Integer.parseInt(e.getValue(), 16); flags.setFlags(f); } else if (k.equals("p")) { PayloadAttribute p = a.addAttribute(PayloadAttribute.class); byte[] data = hexToBytes(e.getValue()); if (data != null && data.length > 0) { p.setPayload(new BytesRef(data)); } } else { // unknown attribute } } // handle offset attr OffsetAttribute offset = a.addAttribute(OffsetAttribute.class); offset.setOffset(tokenStart, tokenEnd); State resState = a.captureState(); a.clearAttributes(); return resState; }
Example 5
Source File: MtasPreAnalyzedParser.java From mtas with Apache License 2.0 | 4 votes |
@Override public ParseResult parse(Reader reader, AttributeSource parent) throws IOException { ParseResult res = new ParseResult(); // get MtasUpdateRequestProcessorResult StringBuilder sb = new StringBuilder(); char[] buf = new char[128]; int cnt; while ((cnt = reader.read(buf)) > 0) { sb.append(buf, 0, cnt); } Iterator<MtasUpdateRequestProcessorResultItem> iterator; try ( MtasUpdateRequestProcessorResultReader result = new MtasUpdateRequestProcessorResultReader( sb.toString());) { iterator = result.getIterator(); if (iterator != null && iterator.hasNext()) { res.str = result.getStoredStringValue(); res.bin = result.getStoredBinValue(); } else { res.str = null; res.bin = null; result.close(); return res; } parent.clearAttributes(); while (iterator.hasNext()) { MtasUpdateRequestProcessorResultItem item = iterator.next(); if (item.tokenTerm != null) { CharTermAttribute catt = parent.addAttribute(CharTermAttribute.class); catt.append(item.tokenTerm); } if (item.tokenFlags != null) { FlagsAttribute flags = parent.addAttribute(FlagsAttribute.class); flags.setFlags(item.tokenFlags); } if (item.tokenPosIncr != null) { PositionIncrementAttribute patt = parent .addAttribute(PositionIncrementAttribute.class); patt.setPositionIncrement(item.tokenPosIncr); } if (item.tokenPayload != null) { PayloadAttribute p = parent.addAttribute(PayloadAttribute.class); p.setPayload(new BytesRef(item.tokenPayload)); } if (item.tokenOffsetStart != null && item.tokenOffsetEnd != null) { OffsetAttribute offset = parent.addAttribute(OffsetAttribute.class); offset.setOffset(item.tokenOffsetStart, item.tokenOffsetEnd); } // capture state and add to result State state = parent.captureState(); res.states.add(state.clone()); // reset for reuse parent.clearAttributes(); } } catch (IOException e) { // ignore log.debug(e); } return res; }