org.apache.lucene.analysis.tokenattributes.PayloadAttribute Java Examples
The following examples show how to use
org.apache.lucene.analysis.tokenattributes.PayloadAttribute.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: TestConcatenatingTokenStream.java From lucene-solr with Apache License 2.0 | 6 votes |
public void testInconsistentAttributes() throws IOException { AttributeFactory factory = newAttributeFactory(); final MockTokenizer first = new MockTokenizer(factory, MockTokenizer.WHITESPACE, false); first.setReader(new StringReader("first words ")); first.addAttribute(PayloadAttribute.class); final MockTokenizer second = new MockTokenizer(factory, MockTokenizer.WHITESPACE, false); second.setReader(new StringReader("second words")); second.addAttribute(FlagsAttribute.class); TokenStream ts = new ConcatenatingTokenStream(first, second); assertTrue(ts.hasAttribute(FlagsAttribute.class)); assertTrue(ts.hasAttribute(PayloadAttribute.class)); assertTokenStreamContents(ts, new String[] { "first", "words", "second", "words" }, new int[]{ 0, 6, 12, 19, }, new int[]{ 5, 11, 18, 24, }); }
Example #2
Source File: DelimitedPayloadTokenFilterTest.java From lucene-solr with Apache License 2.0 | 6 votes |
void assertTermEquals(String expected, TokenStream stream, byte[] expectPay) throws Exception { CharTermAttribute termAtt = stream.getAttribute(CharTermAttribute.class); PayloadAttribute payloadAtt = stream.getAttribute(PayloadAttribute.class); assertTrue(stream.incrementToken()); assertEquals(expected, termAtt.toString()); BytesRef payload = payloadAtt.getPayload(); if (payload != null) { assertTrue(payload.length + " does not equal: " + expectPay.length, payload.length == expectPay.length); for (int i = 0; i < expectPay.length; i++) { assertTrue(expectPay[i] + " does not equal: " + payload.bytes[i + payload.offset], expectPay[i] == payload.bytes[i + payload.offset]); } } else { assertTrue("expectPay is not null and it should be", expectPay == null); } }
Example #3
Source File: DelimitedPayloadTokenFilterTest.java From lucene-solr with Apache License 2.0 | 6 votes |
public void testIntEncoding() throws Exception { String test = "The quick|1 red|2 fox|3 jumped over the lazy|5 brown|99 dogs|83"; DelimitedPayloadTokenFilter filter = new DelimitedPayloadTokenFilter(whitespaceMockTokenizer(test), '|', new IntegerEncoder()); CharTermAttribute termAtt = filter.getAttribute(CharTermAttribute.class); PayloadAttribute payAtt = filter.getAttribute(PayloadAttribute.class); filter.reset(); assertTermEquals("The", filter, termAtt, payAtt, null); assertTermEquals("quick", filter, termAtt, payAtt, PayloadHelper.encodeInt(1)); assertTermEquals("red", filter, termAtt, payAtt, PayloadHelper.encodeInt(2)); assertTermEquals("fox", filter, termAtt, payAtt, PayloadHelper.encodeInt(3)); assertTermEquals("jumped", filter, termAtt, payAtt, null); assertTermEquals("over", filter, termAtt, payAtt, null); assertTermEquals("the", filter, termAtt, payAtt, null); assertTermEquals("lazy", filter, termAtt, payAtt, PayloadHelper.encodeInt(5)); assertTermEquals("brown", filter, termAtt, payAtt, PayloadHelper.encodeInt(99)); assertTermEquals("dogs", filter, termAtt, payAtt, PayloadHelper.encodeInt(83)); assertFalse(filter.incrementToken()); filter.end(); filter.close(); }
Example #4
Source File: DelimitedPayloadTokenFilterTest.java From lucene-solr with Apache License 2.0 | 6 votes |
public void testFloatEncoding() throws Exception { String test = "The quick|1.0 red|2.0 fox|3.5 jumped|0.5 over the lazy|5 brown|99.3 dogs|83.7"; DelimitedPayloadTokenFilter filter = new DelimitedPayloadTokenFilter(whitespaceMockTokenizer(test), '|', new FloatEncoder()); CharTermAttribute termAtt = filter.getAttribute(CharTermAttribute.class); PayloadAttribute payAtt = filter.getAttribute(PayloadAttribute.class); filter.reset(); assertTermEquals("The", filter, termAtt, payAtt, null); assertTermEquals("quick", filter, termAtt, payAtt, PayloadHelper.encodeFloat(1.0f)); assertTermEquals("red", filter, termAtt, payAtt, PayloadHelper.encodeFloat(2.0f)); assertTermEquals("fox", filter, termAtt, payAtt, PayloadHelper.encodeFloat(3.5f)); assertTermEquals("jumped", filter, termAtt, payAtt, PayloadHelper.encodeFloat(0.5f)); assertTermEquals("over", filter, termAtt, payAtt, null); assertTermEquals("the", filter, termAtt, payAtt, null); assertTermEquals("lazy", filter, termAtt, payAtt, PayloadHelper.encodeFloat(5.0f)); assertTermEquals("brown", filter, termAtt, payAtt, PayloadHelper.encodeFloat(99.3f)); assertTermEquals("dogs", filter, termAtt, payAtt, PayloadHelper.encodeFloat(83.7f)); assertFalse(filter.incrementToken()); filter.end(); filter.close(); }
Example #5
Source File: DelimitedPayloadTokenFilterTest.java From lucene-solr with Apache License 2.0 | 6 votes |
public void testPayloads() throws Exception { String test = "The quick|JJ red|JJ fox|NN jumped|VB over the lazy|JJ brown|JJ dogs|NN"; DelimitedPayloadTokenFilter filter = new DelimitedPayloadTokenFilter (whitespaceMockTokenizer(test), DelimitedPayloadTokenFilter.DEFAULT_DELIMITER, new IdentityEncoder()); CharTermAttribute termAtt = filter.getAttribute(CharTermAttribute.class); PayloadAttribute payAtt = filter.getAttribute(PayloadAttribute.class); filter.reset(); assertTermEquals("The", filter, termAtt, payAtt, null); assertTermEquals("quick", filter, termAtt, payAtt, "JJ".getBytes(StandardCharsets.UTF_8)); assertTermEquals("red", filter, termAtt, payAtt, "JJ".getBytes(StandardCharsets.UTF_8)); assertTermEquals("fox", filter, termAtt, payAtt, "NN".getBytes(StandardCharsets.UTF_8)); assertTermEquals("jumped", filter, termAtt, payAtt, "VB".getBytes(StandardCharsets.UTF_8)); assertTermEquals("over", filter, termAtt, payAtt, null); assertTermEquals("the", filter, termAtt, payAtt, null); assertTermEquals("lazy", filter, termAtt, payAtt, "JJ".getBytes(StandardCharsets.UTF_8)); assertTermEquals("brown", filter, termAtt, payAtt, "JJ".getBytes(StandardCharsets.UTF_8)); assertTermEquals("dogs", filter, termAtt, payAtt, "NN".getBytes(StandardCharsets.UTF_8)); assertFalse(filter.incrementToken()); filter.end(); filter.close(); }
Example #6
Source File: TestDelimitedPayloadTokenFilterFactory.java From lucene-solr with Apache License 2.0 | 6 votes |
public void testDelim() throws Exception { Reader reader = new StringReader("the*0.1 quick*0.1 red*0.1"); TokenStream stream = new MockTokenizer(MockTokenizer.WHITESPACE, false); ((Tokenizer)stream).setReader(reader); stream = tokenFilterFactory("DelimitedPayload", "encoder", "float", "delimiter", "*").create(stream); stream.reset(); while (stream.incrementToken()) { PayloadAttribute payAttr = stream.getAttribute(PayloadAttribute.class); assertNotNull(payAttr); byte[] payData = payAttr.getPayload().bytes; assertNotNull(payData); float payFloat = PayloadHelper.decodeFloat(payData); assertEquals(0.1f, payFloat, 0.0f); } stream.end(); stream.close(); }
Example #7
Source File: TestDelimitedPayloadTokenFilterFactory.java From lucene-solr with Apache License 2.0 | 6 votes |
public void testEncoder() throws Exception { Reader reader = new StringReader("the|0.1 quick|0.1 red|0.1"); TokenStream stream = new MockTokenizer(MockTokenizer.WHITESPACE, false); ((Tokenizer)stream).setReader(reader); stream = tokenFilterFactory("DelimitedPayload", "encoder", "float").create(stream); stream.reset(); while (stream.incrementToken()) { PayloadAttribute payAttr = stream.getAttribute(PayloadAttribute.class); assertNotNull(payAttr); byte[] payData = payAttr.getPayload().bytes; assertNotNull(payData); float payFloat = PayloadHelper.decodeFloat(payData); assertEquals(0.1f, payFloat, 0.0f); } stream.end(); stream.close(); }
Example #8
Source File: TestNGramFilters.java From lucene-solr with Apache License 2.0 | 6 votes |
/** * Test EdgeNGramFilterFactory on tokens with payloads */ public void testEdgeNGramFilterPayload() throws Exception { Reader reader = new StringReader("test|0.1"); TokenStream stream = whitespaceMockTokenizer(reader); stream = tokenFilterFactory("DelimitedPayload", "encoder", "float").create(stream); stream = tokenFilterFactory("EdgeNGram", "minGramSize", "1", "maxGramSize", "2").create(stream); stream.reset(); while (stream.incrementToken()) { PayloadAttribute payAttr = stream.getAttribute(PayloadAttribute.class); assertNotNull(payAttr); BytesRef payData = payAttr.getPayload(); assertNotNull(payData); float payFloat = PayloadHelper.decodeFloat(payData.bytes); assertEquals(0.1f, payFloat, 0.0f); } stream.end(); stream.close(); }
Example #9
Source File: TestNGramFilters.java From lucene-solr with Apache License 2.0 | 6 votes |
/** * Test NGramFilterFactory on tokens with payloads */ public void testNGramFilterPayload() throws Exception { Reader reader = new StringReader("test|0.1"); TokenStream stream = whitespaceMockTokenizer(reader); stream = tokenFilterFactory("DelimitedPayload", "encoder", "float").create(stream); stream = tokenFilterFactory("NGram", "minGramSize", "1", "maxGramSize", "2").create(stream); stream.reset(); while (stream.incrementToken()) { PayloadAttribute payAttr = stream.getAttribute(PayloadAttribute.class); assertNotNull(payAttr); BytesRef payData = payAttr.getPayload(); assertNotNull(payData); float payFloat = PayloadHelper.decodeFloat(payData.bytes); assertEquals(0.1f, payFloat, 0.0f); } stream.end(); stream.close(); }
Example #10
Source File: TestSnowball.java From lucene-solr with Apache License 2.0 | 6 votes |
public void testFilterTokens() throws Exception { SnowballFilter filter = new SnowballFilter(new TestTokenStream(), "English"); CharTermAttribute termAtt = filter.getAttribute(CharTermAttribute.class); OffsetAttribute offsetAtt = filter.getAttribute(OffsetAttribute.class); TypeAttribute typeAtt = filter.getAttribute(TypeAttribute.class); PayloadAttribute payloadAtt = filter.getAttribute(PayloadAttribute.class); PositionIncrementAttribute posIncAtt = filter.getAttribute(PositionIncrementAttribute.class); FlagsAttribute flagsAtt = filter.getAttribute(FlagsAttribute.class); filter.incrementToken(); assertEquals("accent", termAtt.toString()); assertEquals(2, offsetAtt.startOffset()); assertEquals(7, offsetAtt.endOffset()); assertEquals("wrd", typeAtt.type()); assertEquals(3, posIncAtt.getPositionIncrement()); assertEquals(77, flagsAtt.getFlags()); assertEquals(new BytesRef(new byte[]{0,1,2,3}), payloadAtt.getPayload()); }
Example #11
Source File: SpellingQueryConverter.java From lucene-solr with Apache License 2.0 | 6 votes |
protected void analyze(Collection<Token> result, String text, int offset, int flagsAttValue) throws IOException { TokenStream stream = analyzer.tokenStream("", text); // TODO: support custom attributes CharTermAttribute termAtt = stream.addAttribute(CharTermAttribute.class); TypeAttribute typeAtt = stream.addAttribute(TypeAttribute.class); PayloadAttribute payloadAtt = stream.addAttribute(PayloadAttribute.class); PositionIncrementAttribute posIncAtt = stream.addAttribute(PositionIncrementAttribute.class); OffsetAttribute offsetAtt = stream.addAttribute(OffsetAttribute.class); stream.reset(); while (stream.incrementToken()) { Token token = new Token(); token.copyBuffer(termAtt.buffer(), 0, termAtt.length()); token.setOffset(offset + offsetAtt.startOffset(), offset + offsetAtt.endOffset()); token.setFlags(flagsAttValue); //overwriting any flags already set... token.setType(typeAtt.type()); token.setPayload(payloadAtt.getPayload()); token.setPositionIncrement(posIncAtt.getPositionIncrement()); result.add(token); } stream.end(); stream.close(); }
Example #12
Source File: PayloadTokenizer.java From clue with Apache License 2.0 | 5 votes |
public PayloadTokenizer(String text) throws IOException { setReader(new StringReader(text)); this.tokens = text.toLowerCase().split(","); termAttr = addAttribute(CharTermAttribute.class); termAttr.resizeBuffer(text.length()); // maximum size necessary is the size of the input payloadAttr = addAttribute(PayloadAttribute.class); payload = new BytesRef(new byte[4]); positionAttr = addAttribute(PositionIncrementAttribute.class); offsetAttr = addAttribute(OffsetAttribute.class); }
Example #13
Source File: TestAnalyzers.java From lucene-solr with Apache License 2.0 | 5 votes |
void verifyPayload(TokenStream ts) throws IOException { PayloadAttribute payloadAtt = ts.getAttribute(PayloadAttribute.class); ts.reset(); for(byte b=1;;b++) { boolean hasNext = ts.incrementToken(); if (!hasNext) break; // System.out.println("id="+System.identityHashCode(nextToken) + " " + t); // System.out.println("payload=" + (int)nextToken.getPayload().toByteArray()[0]); assertEquals(b, payloadAtt.getPayload().bytes[0]); } }
Example #14
Source File: MockPayloadAnalyzer.java From lucene-solr with Apache License 2.0 | 5 votes |
public MockPayloadFilter(TokenStream input, String fieldName) { super(input); this.fieldName = fieldName; pos = 0; i = 0; posIncrAttr = input.addAttribute(PositionIncrementAttribute.class); payloadAttr = input.addAttribute(PayloadAttribute.class); termAttr = input.addAttribute(CharTermAttribute.class); }
Example #15
Source File: TestPayloads.java From lucene-solr with Apache License 2.0 | 5 votes |
PoolingPayloadTokenStream(ByteArrayPool pool) { this.pool = pool; payload = pool.get(); generateRandomData(payload); term = new String(payload, 0, payload.length, utf8); first = true; payloadAtt = addAttribute(PayloadAttribute.class); termAtt = addAttribute(CharTermAttribute.class); }
Example #16
Source File: FieldInvertState.java From lucene-solr with Apache License 2.0 | 5 votes |
/** * Sets attributeSource to a new instance. */ void setAttributeSource(AttributeSource attributeSource) { if (this.attributeSource != attributeSource) { this.attributeSource = attributeSource; termAttribute = attributeSource.getAttribute(TermToBytesRefAttribute.class); termFreqAttribute = attributeSource.addAttribute(TermFrequencyAttribute.class); posIncrAttribute = attributeSource.addAttribute(PositionIncrementAttribute.class); offsetAttribute = attributeSource.addAttribute(OffsetAttribute.class); payloadAttribute = attributeSource.getAttribute(PayloadAttribute.class); } }
Example #17
Source File: TestPayloads.java From lucene-solr with Apache License 2.0 | 5 votes |
public PayloadFilter(TokenStream in, String fieldName, Map<String,PayloadData> fieldToData) { super(in); this.fieldToData = fieldToData; this.fieldName = fieldName; payloadAtt = addAttribute(PayloadAttribute.class); termAttribute = addAttribute(CharTermAttribute.class); }
Example #18
Source File: DelimitedPayloadTokenFilterTest.java From lucene-solr with Apache License 2.0 | 5 votes |
void assertTermEquals(String expected, TokenStream stream, CharTermAttribute termAtt, PayloadAttribute payAtt, byte[] expectPay) throws Exception { assertTrue(stream.incrementToken()); assertEquals(expected, termAtt.toString()); BytesRef payload = payAtt.getPayload(); if (payload != null) { assertTrue(payload.length + " does not equal: " + expectPay.length, payload.length == expectPay.length); for (int i = 0; i < expectPay.length; i++) { assertTrue(expectPay[i] + " does not equal: " + payload.bytes[i + payload.offset], expectPay[i] == payload.bytes[i + payload.offset]); } } else { assertTrue("expectPay is not null and it should be", expectPay == null); } }
Example #19
Source File: TestPayloadSpanUtil.java From lucene-solr with Apache License 2.0 | 5 votes |
public PayloadFilter(TokenStream input) { super(input); pos = 0; entities.add("xx"); entities.add("one"); nopayload.add("nopayload"); nopayload.add("np"); termAtt = addAttribute(CharTermAttribute.class); posIncrAtt = addAttribute(PositionIncrementAttribute.class); payloadAtt = addAttribute(PayloadAttribute.class); }
Example #20
Source File: SimpleQueryConverter.java From lucene-solr with Apache License 2.0 | 5 votes |
@Override public Collection<Token> convert(String origQuery) { Collection<Token> result = new HashSet<>(); try (WhitespaceAnalyzer analyzer = new WhitespaceAnalyzer(); TokenStream ts = analyzer.tokenStream("", origQuery)) { // TODO: support custom attributes CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class); OffsetAttribute offsetAtt = ts.addAttribute(OffsetAttribute.class); TypeAttribute typeAtt = ts.addAttribute(TypeAttribute.class); FlagsAttribute flagsAtt = ts.addAttribute(FlagsAttribute.class); PayloadAttribute payloadAtt = ts.addAttribute(PayloadAttribute.class); PositionIncrementAttribute posIncAtt = ts.addAttribute(PositionIncrementAttribute.class); ts.reset(); while (ts.incrementToken()) { Token tok = new Token(); tok.copyBuffer(termAtt.buffer(), 0, termAtt.length()); tok.setOffset(offsetAtt.startOffset(), offsetAtt.endOffset()); tok.setFlags(flagsAtt.getFlags()); tok.setPayload(payloadAtt.getPayload()); tok.setPositionIncrement(posIncAtt.getPositionIncrement()); tok.setType(typeAtt.type()); result.add(tok); } ts.end(); return result; } catch (IOException e) { throw new RuntimeException(e); } }
Example #21
Source File: TestPayloadSpans.java From lucene-solr with Apache License 2.0 | 5 votes |
public PayloadFilter(TokenStream input) { super(input); pos = 0; entities.add("xx"); entities.add("one"); nopayload.add("nopayload"); nopayload.add("np"); termAtt = addAttribute(CharTermAttribute.class); posIncrAtt = addAttribute(PositionIncrementAttribute.class); payloadAtt = addAttribute(PayloadAttribute.class); }
Example #22
Source File: SimplePreAnalyzedParser.java From lucene-solr with Apache License 2.0 | 4 votes |
@Override public String toFormattedString(Field f) throws IOException { StringBuilder sb = new StringBuilder(); sb.append(VERSION + " "); if (f.fieldType().stored()) { String s = f.stringValue(); if (s != null) { // encode the equals sign s = s.replaceAll("=", "\\="); sb.append('='); sb.append(s); sb.append('='); } } TokenStream ts = f.tokenStreamValue(); if (ts != null) { StringBuilder tok = new StringBuilder(); boolean next = false; while (ts.incrementToken()) { if (next) { sb.append(' '); } else { next = true; } tok.setLength(0); Iterator<Class<? extends Attribute>> it = ts.getAttributeClassesIterator(); String cTerm = null; String tTerm = null; while (it.hasNext()) { Class<? extends Attribute> cl = it.next(); Attribute att = ts.getAttribute(cl); if (att == null) { continue; } if (cl.isAssignableFrom(CharTermAttribute.class)) { CharTermAttribute catt = (CharTermAttribute)att; cTerm = escape(catt.buffer(), catt.length()); } else if (cl.isAssignableFrom(TermToBytesRefAttribute.class)) { TermToBytesRefAttribute tatt = (TermToBytesRefAttribute)att; char[] tTermChars = tatt.getBytesRef().utf8ToString().toCharArray(); tTerm = escape(tTermChars, tTermChars.length); } else { if (tok.length() > 0) tok.append(','); if (cl.isAssignableFrom(FlagsAttribute.class)) { tok.append("f=").append(Integer.toHexString(((FlagsAttribute) att).getFlags())); } else if (cl.isAssignableFrom(OffsetAttribute.class)) { tok.append("s=").append(((OffsetAttribute) att).startOffset()).append(",e=").append(((OffsetAttribute) att).endOffset()); } else if (cl.isAssignableFrom(PayloadAttribute.class)) { BytesRef p = ((PayloadAttribute)att).getPayload(); if (p != null && p.length > 0) { tok.append("p=").append(bytesToHex(p.bytes, p.offset, p.length)); } else if (tok.length() > 0) { tok.setLength(tok.length() - 1); // remove the last comma } } else if (cl.isAssignableFrom(PositionIncrementAttribute.class)) { tok.append("i=").append(((PositionIncrementAttribute) att).getPositionIncrement()); } else if (cl.isAssignableFrom(TypeAttribute.class)) { tok.append("y=").append(escape(((TypeAttribute) att).type())); } else { tok.append(cl.getName()).append('=').append(escape(att.toString())); } } } String term = null; if (cTerm != null) { term = cTerm; } else { term = tTerm; } if (term != null && term.length() > 0) { if (tok.length() > 0) { tok.insert(0, term + ","); } else { tok.insert(0, term); } } sb.append(tok); } } return sb.toString(); }
Example #23
Source File: SimplePreAnalyzedParser.java From lucene-solr with Apache License 2.0 | 4 votes |
private static AttributeSource.State createState(AttributeSource a, Tok state, int tokenEnd) { a.clearAttributes(); CharTermAttribute termAtt = a.addAttribute(CharTermAttribute.class); char[] tokChars = state.token.toString().toCharArray(); termAtt.copyBuffer(tokChars, 0, tokChars.length); int tokenStart = tokenEnd - state.token.length(); for (Entry<String, String> e : state.attr.entrySet()) { String k = e.getKey(); if (k.equals("i")) { // position increment int incr = Integer.parseInt(e.getValue()); PositionIncrementAttribute posIncr = a.addAttribute(PositionIncrementAttribute.class); posIncr.setPositionIncrement(incr); } else if (k.equals("s")) { tokenStart = Integer.parseInt(e.getValue()); } else if (k.equals("e")) { tokenEnd = Integer.parseInt(e.getValue()); } else if (k.equals("y")) { TypeAttribute type = a.addAttribute(TypeAttribute.class); type.setType(e.getValue()); } else if (k.equals("f")) { FlagsAttribute flags = a.addAttribute(FlagsAttribute.class); int f = Integer.parseInt(e.getValue(), 16); flags.setFlags(f); } else if (k.equals("p")) { PayloadAttribute p = a.addAttribute(PayloadAttribute.class); byte[] data = hexToBytes(e.getValue()); if (data != null && data.length > 0) { p.setPayload(new BytesRef(data)); } } else { // unknown attribute } } // handle offset attr OffsetAttribute offset = a.addAttribute(OffsetAttribute.class); offset.setOffset(tokenStart, tokenEnd); State resState = a.captureState(); a.clearAttributes(); return resState; }
Example #24
Source File: JsonPreAnalyzedParser.java From lucene-solr with Apache License 2.0 | 4 votes |
@Override public String toFormattedString(Field f) throws IOException { Map<String,Object> map = new LinkedHashMap<>(); map.put(VERSION_KEY, VERSION); if (f.fieldType().stored()) { String stringValue = f.stringValue(); if (stringValue != null) { map.put(STRING_KEY, stringValue); } BytesRef binaryValue = f.binaryValue(); if (binaryValue != null) { map.put(BINARY_KEY, Base64.byteArrayToBase64(binaryValue.bytes, binaryValue.offset, binaryValue.length)); } } TokenStream ts = f.tokenStreamValue(); if (ts != null) { List<Map<String,Object>> tokens = new LinkedList<>(); while (ts.incrementToken()) { Iterator<Class<? extends Attribute>> it = ts.getAttributeClassesIterator(); String cTerm = null; String tTerm = null; Map<String,Object> tok = new TreeMap<>(); while (it.hasNext()) { Class<? extends Attribute> cl = it.next(); Attribute att = ts.getAttribute(cl); if (att == null) { continue; } if (cl.isAssignableFrom(CharTermAttribute.class)) { CharTermAttribute catt = (CharTermAttribute)att; cTerm = new String(catt.buffer(), 0, catt.length()); } else if (cl.isAssignableFrom(TermToBytesRefAttribute.class)) { TermToBytesRefAttribute tatt = (TermToBytesRefAttribute)att; tTerm = tatt.getBytesRef().utf8ToString(); } else { if (cl.isAssignableFrom(FlagsAttribute.class)) { tok.put(FLAGS_KEY, Integer.toHexString(((FlagsAttribute)att).getFlags())); } else if (cl.isAssignableFrom(OffsetAttribute.class)) { tok.put(OFFSET_START_KEY, ((OffsetAttribute)att).startOffset()); tok.put(OFFSET_END_KEY, ((OffsetAttribute)att).endOffset()); } else if (cl.isAssignableFrom(PayloadAttribute.class)) { BytesRef p = ((PayloadAttribute)att).getPayload(); if (p != null && p.length > 0) { tok.put(PAYLOAD_KEY, Base64.byteArrayToBase64(p.bytes, p.offset, p.length)); } } else if (cl.isAssignableFrom(PositionIncrementAttribute.class)) { tok.put(POSINCR_KEY, ((PositionIncrementAttribute)att).getPositionIncrement()); } else if (cl.isAssignableFrom(TypeAttribute.class)) { tok.put(TYPE_KEY, ((TypeAttribute)att).type()); } else { tok.put(cl.getName(), att.toString()); } } } String term = null; if (cTerm != null) { term = cTerm; } else { term = tTerm; } if (term != null && term.length() > 0) { tok.put(TOKEN_KEY, term); } tokens.add(tok); } map.put(TOKENS_KEY, tokens); } return JSONUtil.toJSON(map, -1); }
Example #25
Source File: Token.java From lucene-solr with Apache License 2.0 | 4 votes |
@Override public void reflectWith(AttributeReflector reflector) { super.reflectWith(reflector); reflector.reflect(FlagsAttribute.class, "flags", flags); reflector.reflect(PayloadAttribute.class, "payload", payload); }
Example #26
Source File: Token.java From lucene-solr with Apache License 2.0 | 4 votes |
@Override public void copyTo(AttributeImpl target) { super.copyTo(target); ((FlagsAttribute) target).setFlags(flags); ((PayloadAttribute) target).setPayload((payload == null) ? null : BytesRef.deepCopyOf(payload)); }
Example #27
Source File: MtasPreAnalyzedParser.java From mtas with Apache License 2.0 | 4 votes |
@Override public ParseResult parse(Reader reader, AttributeSource parent) throws IOException { ParseResult res = new ParseResult(); // get MtasUpdateRequestProcessorResult StringBuilder sb = new StringBuilder(); char[] buf = new char[128]; int cnt; while ((cnt = reader.read(buf)) > 0) { sb.append(buf, 0, cnt); } Iterator<MtasUpdateRequestProcessorResultItem> iterator; try ( MtasUpdateRequestProcessorResultReader result = new MtasUpdateRequestProcessorResultReader( sb.toString());) { iterator = result.getIterator(); if (iterator != null && iterator.hasNext()) { res.str = result.getStoredStringValue(); res.bin = result.getStoredBinValue(); } else { res.str = null; res.bin = null; result.close(); return res; } parent.clearAttributes(); while (iterator.hasNext()) { MtasUpdateRequestProcessorResultItem item = iterator.next(); if (item.tokenTerm != null) { CharTermAttribute catt = parent.addAttribute(CharTermAttribute.class); catt.append(item.tokenTerm); } if (item.tokenFlags != null) { FlagsAttribute flags = parent.addAttribute(FlagsAttribute.class); flags.setFlags(item.tokenFlags); } if (item.tokenPosIncr != null) { PositionIncrementAttribute patt = parent .addAttribute(PositionIncrementAttribute.class); patt.setPositionIncrement(item.tokenPosIncr); } if (item.tokenPayload != null) { PayloadAttribute p = parent.addAttribute(PayloadAttribute.class); p.setPayload(new BytesRef(item.tokenPayload)); } if (item.tokenOffsetStart != null && item.tokenOffsetEnd != null) { OffsetAttribute offset = parent.addAttribute(OffsetAttribute.class); offset.setOffset(item.tokenOffsetStart, item.tokenOffsetEnd); } // capture state and add to result State state = parent.captureState(); res.states.add(state.clone()); // reset for reuse parent.clearAttributes(); } } catch (IOException e) { // ignore log.debug(e); } return res; }
Example #28
Source File: OpenNLPTokenizer.java From jate with GNU Lesser General Public License v3.0 | 4 votes |
public void addPayloadAttribute(PayloadAttribute attribute, MWEMetadata ctx) { String data = MWEMetadata.serialize(ctx); attribute.setPayload(new BytesRef(data)); }
Example #29
Source File: MWEFilter.java From jate with GNU Lesser General Public License v3.0 | 4 votes |
public void addPayloadAttribute(PayloadAttribute attribute, MWEMetadata ctx) { String data = MWEMetadata.serialize(ctx); attribute.setPayload(new BytesRef(data)); }
Example #30
Source File: BaseTermVectorsFormatTestCase.java From lucene-solr with Apache License 2.0 | 4 votes |
public RandomTokenStream(int len, String[] sampleTerms, BytesRef[] sampleTermBytes) { terms = new String[len]; termBytes = new BytesRef[len]; positionsIncrements = new int[len]; positions = new int[len]; startOffsets = new int[len]; endOffsets = new int[len]; payloads = new BytesRef[len]; for (int i = 0; i < len; ++i) { final int o = random().nextInt(sampleTerms.length); terms[i] = sampleTerms[o]; termBytes[i] = sampleTermBytes[o]; positionsIncrements[i] = TestUtil.nextInt(random(), i == 0 ? 1 : 0, 10); if (i == 0) { startOffsets[i] = TestUtil.nextInt(random(), 0, 1 << 16); } else { startOffsets[i] = startOffsets[i-1] + TestUtil.nextInt(random(), 0, rarely() ? 1 << 16 : 20); } endOffsets[i] = startOffsets[i] + TestUtil.nextInt(random(), 0, rarely() ? 1 << 10 : 20); } for (int i = 0; i < len; ++i) { if (i == 0) { positions[i] = positionsIncrements[i] - 1; } else { positions[i] = positions[i - 1] + positionsIncrements[i]; } } if (rarely()) { Arrays.fill(payloads, randomPayload()); } else { for (int i = 0; i < len; ++i) { payloads[i] = randomPayload(); } } positionToTerms = new HashMap<>(len); startOffsetToTerms = new HashMap<>(len); for (int i = 0; i < len; ++i) { if (!positionToTerms.containsKey(positions[i])) { positionToTerms.put(positions[i], new HashSet<Integer>(1)); } positionToTerms.get(positions[i]).add(i); if (!startOffsetToTerms.containsKey(startOffsets[i])) { startOffsetToTerms.put(startOffsets[i], new HashSet<Integer>(1)); } startOffsetToTerms.get(startOffsets[i]).add(i); } freqs = new HashMap<>(); for (String term : terms) { if (freqs.containsKey(term)) { freqs.put(term, freqs.get(term) + 1); } else { freqs.put(term, 1); } } addAttributeImpl(new PermissiveOffsetAttributeImpl()); termAtt = addAttribute(CharTermAttribute.class); piAtt = addAttribute(PositionIncrementAttribute.class); oAtt = addAttribute(OffsetAttribute.class); pAtt = addAttribute(PayloadAttribute.class); }