org.apache.lucene.analysis.tokenattributes.PackedTokenAttributeImpl Java Examples
The following examples show how to use
org.apache.lucene.analysis.tokenattributes.PackedTokenAttributeImpl.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: MLTokenDuplicator.java From SearchServices with GNU Lesser General Public License v3.0 | 6 votes |
@Override public final boolean incrementToken() throws IOException { clearAttributes(); PackedTokenAttributeImpl next = next(); if (next == null) { return false; } termAtt.copyBuffer(next.buffer(), 0, next.length()); offsetAtt.setOffset(next.startOffset(), next.endOffset()); typeAtt.setType(next.type()); posIncAtt.setPositionIncrement(next.getPositionIncrement()); return true; }
Example #2
Source File: StandardnumberTokenFilter.java From elasticsearch-plugin-bundle with GNU Affero General Public License v3.0 | 6 votes |
@Override public final boolean incrementToken() throws IOException { if (!tokens.isEmpty()) { if (current == null) { throw new IllegalArgumentException("current is null"); } PackedTokenAttributeImpl token = tokens.removeFirst(); restoreState(current); termAtt.setEmpty().append(token); posIncAtt.setPositionIncrement(0); return true; } if (input.incrementToken()) { detect(); if (!tokens.isEmpty()) { current = captureState(); } return true; } else { return false; } }
Example #3
Source File: PathTokenFilter.java From SearchServices with GNU Lesser General Public License v3.0 | 6 votes |
public PackedTokenAttributeImpl next() throws IOException { PackedTokenAttributeImpl nextToken; if (it == null) { buildTokenListAndIterator(); } if (it.hasNext()) { nextToken = it.next(); } else { nextToken = null; } return nextToken; }
Example #4
Source File: PathTokenFilter.java From SearchServices with GNU Lesser General Public License v3.0 | 6 votes |
@Override public final boolean incrementToken() throws IOException { clearAttributes(); PackedTokenAttributeImpl next = next(); if (next == null) { return false; } termAtt.copyBuffer(next.buffer(), 0, next.length()); offsetAtt.setOffset(correctOffset(next.startOffset()), correctOffset(next.endOffset())); typeAtt.setType(next.type()); posIncAtt.setPositionIncrement(next.getPositionIncrement()); return true; }
Example #5
Source File: SymbolnameTokenFilter.java From elasticsearch-plugin-bundle with GNU Affero General Public License v3.0 | 6 votes |
@Override public final boolean incrementToken() throws IOException { if (!tokens.isEmpty()) { if (current == null) { throw new IllegalArgumentException("current is null"); } PackedTokenAttributeImpl token = tokens.removeFirst(); restoreState(current); termAtt.setEmpty().append(token); posIncAtt.setPositionIncrement(0); return true; } if (input.incrementToken()) { process(); if (!tokens.isEmpty()) { current = captureState(); } return true; } else { return false; } }
Example #6
Source File: AnalyzerTest.java From mmseg4j-solr with Apache License 2.0 | 6 votes |
public static void printlnToken(String txt, Analyzer analyzer) throws IOException { System.out.println("---------"+txt.length()+"\n"+txt); TokenStream ts = analyzer.tokenStream("text", new StringReader(txt)); /*//lucene 2.9 以下 for(Token t= new Token(); (t=ts.next(t)) !=null;) { System.out.println(t); }*/ /*while(ts.incrementToken()) { TermAttribute termAtt = (TermAttribute)ts.getAttribute(TermAttribute.class); OffsetAttribute offsetAtt = (OffsetAttribute)ts.getAttribute(OffsetAttribute.class); TypeAttribute typeAtt = (TypeAttribute)ts.getAttribute(TypeAttribute.class); System.out.println("("+termAtt.term()+","+offsetAtt.startOffset()+","+offsetAtt.endOffset()+",type="+typeAtt.type()+")"); }*/ ts.reset(); for(PackedTokenAttributeImpl t= new PackedTokenAttributeImpl(); (t=TokenUtils.nextToken(ts, t)) !=null;) { System.out.println(t); } ts.close(); }
Example #7
Source File: MLTokenDuplicator.java From SearchServices with GNU Lesser General Public License v3.0 | 6 votes |
private PackedTokenAttributeImpl next() throws IOException { PackedTokenAttributeImpl t = null; if (it == null) { it = buildIterator(); } if (it == null) { return null; } if (it.hasNext()) { t = it.next(); return t; } else { it = null; t = this.next(); return t; } }
Example #8
Source File: CutLetterDigitFilter.java From mmseg4j-solr with Apache License 2.0 | 5 votes |
public CutLetterDigitFilter(TokenStream input) { super(input); reusableToken = new PackedTokenAttributeImpl(); termAtt = addAttribute(CharTermAttribute.class); offsetAtt = addAttribute(OffsetAttribute.class); typeAtt = addAttribute(TypeAttribute.class); }
Example #9
Source File: BaseformTokenFilter.java From elasticsearch-analysis-baseform with Apache License 2.0 | 5 votes |
protected void baseform() throws CharacterCodingException { CharSequence term = new String(termAtt.buffer(), 0, termAtt.length()); CharSequence s = dictionary.lookup(term); if (s != null && s.length() > 0) { PackedTokenAttributeImpl impl = new PackedTokenAttributeImpl(); impl.append(s); tokens.add(impl); } }
Example #10
Source File: StandardnumberTokenFilter.java From elasticsearch-plugin-bundle with GNU Affero General Public License v3.0 | 5 votes |
private void detect() throws CharacterCodingException { CharSequence term = new String(termAtt.buffer(), 0, termAtt.length()); Collection<CharSequence> variants = service.lookup(settings, term); for (CharSequence ch : variants) { if (ch != null) { PackedTokenAttributeImpl token = new PackedTokenAttributeImpl(); token.append(ch); tokens.add(token); } } }
Example #11
Source File: SymbolnameTokenFilter.java From elasticsearch-plugin-bundle with GNU Affero General Public License v3.0 | 5 votes |
protected void process() throws CharacterCodingException { String term = new String(termAtt.buffer(), 0, termAtt.length()); for (CharSequence charSequence : process(term)) { if (charSequence != null) { PackedTokenAttributeImpl token = new PackedTokenAttributeImpl(); token.append(charSequence); tokens.add(token); } } }
Example #12
Source File: CutLetterDigitFilter.java From mmseg4j-solr with Apache License 2.0 | 5 votes |
public final boolean incrementToken() throws IOException { clearAttributes(); PackedTokenAttributeImpl token = nextToken(reusableToken); if(token != null) { termAtt.copyBuffer(token.buffer(), 0, token.length()); offsetAtt.setOffset(token.startOffset(), token.endOffset()); typeAtt.setType(token.type()); return true; } else { return false; } }
Example #13
Source File: CutLetterDigitFilter.java From mmseg4j-solr with Apache License 2.0 | 5 votes |
private void addToken(PackedTokenAttributeImpl oriToken, int termBufferOffset, int termBufferLength, byte type) { PackedTokenAttributeImpl token = TokenUtils.subToken(oriToken, termBufferOffset, termBufferLength); if(type == Character.DECIMAL_DIGIT_NUMBER) { token.setType(Word.TYPE_DIGIT); } else { token.setType(Word.TYPE_LETTER); } tokenQueue.offer(token); }
Example #14
Source File: Solr4QueryParserTest.java From SearchServices with GNU Lesser General Public License v3.0 | 5 votes |
@Test public void testFlatQueryShouldBeGeneratedFromSequentiallyShiftedTokens() throws Exception { // prepare test data LinkedList<PackedTokenAttributeImpl> tokenSequenceWithRepeatedGroup = new LinkedList<PackedTokenAttributeImpl>(); tokenSequenceWithRepeatedGroup.add(getTokenAttribute(TEST_QUERY.substring(0, 4), 0, 4)); tokenSequenceWithRepeatedGroup.add(getTokenAttribute(TEST_QUERY.substring(5, 6), 5, 6)); tokenSequenceWithRepeatedGroup.add(getTokenAttribute(TEST_QUERY.substring(6, 10), 6, 10)); tokenSequenceWithRepeatedGroup.add(getTokenAttribute(TEST_QUERY.substring(10, 11), 10, 11)); assertTrue("All tokens in test data must be sequentially shifted", parser.isAllTokensSequentiallyShifted(tokenSequenceWithRepeatedGroup)); assertTrue(parser.getEnablePositionIncrements()); LinkedList<LinkedList<PackedTokenAttributeImpl>> fixedTokenSequences = new LinkedList<LinkedList<PackedTokenAttributeImpl>>(); fixedTokenSequences.add(tokenSequenceWithRepeatedGroup); // call method to test SpanQuery q = parser.generateSpanOrQuery(TEST_FIELD, fixedTokenSequences); // check results assertNotNull(q); assertTrue(q instanceof SpanNearQuery); SpanNearQuery spanNearQuery = (SpanNearQuery) q; assertEquals("Slop between term must be 0", 0, spanNearQuery.getSlop()); assertTrue("Terms must be in order", spanNearQuery.isInOrder()); SpanQuery[] termClauses = spanNearQuery.getClauses(); assertEquals("Flat query must be generated (Query: " + q + ")", tokenSequenceWithRepeatedGroup.size(), termClauses.length); for (int i = 0; i < termClauses.length; i++) { assertTrue(termClauses[i] instanceof SpanTermQuery); assertEquals("All tokens must become spanQuery terms", tokenSequenceWithRepeatedGroup.get(i).toString(), ((SpanTermQuery) termClauses[i]).getTerm().text()); } }
Example #15
Source File: Solr4QueryParserTest.java From SearchServices with GNU Lesser General Public License v3.0 | 5 votes |
private PackedTokenAttributeImpl getTokenAttribute(String text, int startOffset, int endOffset) { PackedTokenAttributeImpl token = new PackedTokenAttributeImpl(); token.setEmpty().append(text); token.setOffset(startOffset, endOffset); return token; }
Example #16
Source File: MLTokenDuplicator.java From SearchServices with GNU Lesser General Public License v3.0 | 5 votes |
public Iterator<PackedTokenAttributeImpl> buildIterator(PackedTokenAttributeImpl token) { if (token == null) { return null; } ArrayList<PackedTokenAttributeImpl> tokens = new ArrayList<PackedTokenAttributeImpl>(prefixes.size()); for (String prefix : prefixes) { PackedTokenAttributeImpl newToken = new PackedTokenAttributeImpl(); newToken.setEmpty().append(prefix + termText(token)); newToken.setOffset(token.startOffset(), token.endOffset()); newToken.setType(token.type()); if (tokens.size() == 0) { newToken.setPositionIncrement(token.getPositionIncrement()); } else { newToken.setPositionIncrement(0); } tokens.add(newToken); } return tokens.iterator(); }
Example #17
Source File: MLTokenDuplicator.java From SearchServices with GNU Lesser General Public License v3.0 | 5 votes |
private Iterator<PackedTokenAttributeImpl> buildIterator() throws IOException { // TODO: use incrementToken() somehow? if(!done && source.incrementToken()) { CharTermAttribute cta = source.getAttribute(CharTermAttribute.class); OffsetAttribute offsetAtt = source.getAttribute(OffsetAttribute.class); TypeAttribute typeAtt = null; if(source.hasAttribute(TypeAttribute.class)) { typeAtt = source.getAttribute(TypeAttribute.class); } PositionIncrementAttribute posIncAtt = null; if(source.hasAttribute(PositionIncrementAttribute.class)) { posIncAtt = source.getAttribute(PositionIncrementAttribute.class); } PackedTokenAttributeImpl token = new PackedTokenAttributeImpl(); token.setEmpty().append(new String(cta.buffer()), 0, cta.length()); token.setOffset(offsetAtt.startOffset(), offsetAtt.endOffset()); if(typeAtt != null) { token.setType(typeAtt.type()); } if(posIncAtt != null) { token.setPositionIncrement(posIncAtt.getPositionIncrement()); } return buildIterator(token); } else { done = true; return buildIterator(null); } }
Example #18
Source File: CutLetterDigitFilter.java From mmseg4j-solr with Apache License 2.0 | 4 votes |
private PackedTokenAttributeImpl nextToken(PackedTokenAttributeImpl reusableToken) throws IOException { assert reusableToken != null; //先使用上次留下来的。 PackedTokenAttributeImpl nextToken = tokenQueue.poll(); if(nextToken != null) { return nextToken; } nextToken = TokenUtils.nextToken(input, reusableToken); if(nextToken != null && (Word.TYPE_LETTER_OR_DIGIT.equalsIgnoreCase(nextToken.type()) || Word.TYPE_DIGIT_OR_LETTER.equalsIgnoreCase(nextToken.type())) ) { final char[] buffer = nextToken.buffer(); final int length = nextToken.length(); byte lastType = (byte) Character.getType(buffer[0]); //与上次的字符是否同类 int termBufferOffset = 0; int termBufferLength = 0; for(int i=0;i<length;i++) { byte type = (byte) Character.getType(buffer[i]); if(type <= Character.MODIFIER_LETTER) { type = Character.LOWERCASE_LETTER; } if(type != lastType) { //与上一次的不同 addToken(nextToken, termBufferOffset, termBufferLength, lastType); termBufferOffset += termBufferLength; termBufferLength = 0; lastType = type; } termBufferLength++; } if(termBufferLength > 0) { //最后一次 addToken(nextToken, termBufferOffset, termBufferLength, lastType); } nextToken = tokenQueue.poll(); } return nextToken; }
Example #19
Source File: PathTokenFilter.java From SearchServices with GNU Lesser General Public License v3.0 | 4 votes |
private String termText(PackedTokenAttributeImpl token) { return new String(token.buffer(), 0, token.length()); }
Example #20
Source File: MLTokenDuplicator.java From SearchServices with GNU Lesser General Public License v3.0 | 4 votes |
private String termText(PackedTokenAttributeImpl token) { return new String(token.buffer(), 0, token.length()); }