org.apache.lucene.util.Attribute Java Examples
The following examples show how to use
org.apache.lucene.util.Attribute.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: ConcatenatingTokenStream.java From lucene-solr with Apache License 2.0 | 6 votes |
private static AttributeSource combineSources(TokenStream... sources) { AttributeSource base = sources[0].cloneAttributes(); try { for (int i = 1; i < sources.length; i++) { Iterator<Class<? extends Attribute>> it = sources[i].getAttributeClassesIterator(); while (it.hasNext()) { base.addAttribute(it.next()); } // check attributes can be captured sources[i].copyTo(base); } return base; } catch (IllegalArgumentException e) { throw new IllegalArgumentException("Attempted to concatenate TokenStreams with different attribute types", e); } }
Example #2
Source File: NumericTokenizer.java From Elasticsearch with Apache License 2.0 | 5 votes |
/** Make this tokenizer get attributes from the delegate token stream. */ private static final AttributeFactory delegatingAttributeFactory(final AttributeSource source) { return new AttributeFactory() { @Override public AttributeImpl createAttributeInstance(Class<? extends Attribute> attClass) { return (AttributeImpl) source.addAttribute(attClass); } }; }
Example #3
Source File: NumericTokenizer.java From Elasticsearch with Apache License 2.0 | 5 votes |
protected NumericTokenizer(NumericTokenStream numericTokenStream, char[] buffer, Object extra) throws IOException { super(delegatingAttributeFactory(numericTokenStream)); this.numericTokenStream = numericTokenStream; // Add attributes from the numeric token stream, this works fine because the attribute factory delegates to numericTokenStream for (Iterator<Class<? extends Attribute>> it = numericTokenStream.getAttributeClassesIterator(); it.hasNext();) { addAttribute(it.next()); } this.extra = extra; this.buffer = buffer; started = true; }
Example #4
Source File: TransportAnalyzeAction.java From Elasticsearch with Apache License 2.0 | 5 votes |
/** * other attribute extract object. * Extracted object group by AttributeClassName * * @param stream current TokenStream * @param includeAttributes filtering attributes * @return Map<key value> */ private static Map<String, Object> extractExtendedAttributes(TokenStream stream, final Set<String> includeAttributes) { final Map<String, Object> extendedAttributes = new TreeMap<>(); stream.reflectWith(new AttributeReflector() { @Override public void reflect(Class<? extends Attribute> attClass, String key, Object value) { if (CharTermAttribute.class.isAssignableFrom(attClass)) return; if (PositionIncrementAttribute.class.isAssignableFrom(attClass)) return; if (OffsetAttribute.class.isAssignableFrom(attClass)) return; if (TypeAttribute.class.isAssignableFrom(attClass)) return; if (includeAttributes == null || includeAttributes.isEmpty() || includeAttributes.contains(key.toLowerCase(Locale.ROOT))) { if (value instanceof BytesRef) { final BytesRef p = (BytesRef) value; value = p.toString(); } extendedAttributes.put(key, value); } } }); return extendedAttributes; }
Example #5
Source File: Test2BTerms.java From lucene-solr with Apache License 2.0 | 5 votes |
@Override public AttributeImpl createAttributeInstance(Class<? extends Attribute> attClass) { if (attClass == TermToBytesRefAttribute.class) return new MyTermAttributeImpl(); if (CharTermAttribute.class.isAssignableFrom(attClass)) throw new IllegalArgumentException("no"); return delegate.createAttributeInstance(attClass); }
Example #6
Source File: OpenNLPPOSTaggerFilter.java From jate with GNU Lesser General Public License v3.0 | 5 votes |
@Override public boolean incrementToken() throws IOException { //clearAttributes(); if (first) { //gather all tokens from doc String[] words = walkTokens(); if (words.length == 0) { return false; } //tagging posTags = createTags(words); first = false; tokenIdx = 0; } if (tokenIdx == tokenAttrs.size()) { resetParams(); return false; } AttributeSource as = tokenAttrs.get(tokenIdx); Iterator<? extends Class<? extends Attribute>> it = as.getAttributeClassesIterator(); while (it.hasNext()) { Class<? extends Attribute> attrClass = it.next(); if (!hasAttribute(attrClass)) { addAttribute(attrClass); } } as.copyTo(this); MWEMetadata metadata = exitingPayload.getPayload() == null ? new MWEMetadata() : MWEMetadata.deserialize(exitingPayload.getPayload().utf8ToString()); metadata.addMetaData(MWEMetadataType.POS, posTags[tokenIdx]); exitingPayload.setPayload(new BytesRef(MWEMetadata.serialize(metadata))); tokenIdx++; return true; }
Example #7
Source File: TransportExtendedAnalyzeAction.java From elasticsearch-extended-analyze with Apache License 2.0 | 5 votes |
/** * other attribute extract object.<br/> * Extracted object group by AttributeClassName * * @param stream current TokenStream * @param includeAttributes filtering attributes * @param shortAttrName if true, return short attribute name * @return Nested Object : Map<attrClass, Map<key, value>> */ private Map<String, Map<String, Object>> extractExtendedAttributes(TokenStream stream, final Set<String> includeAttributes, final boolean shortAttrName) { final Map<String, Map<String, Object>> extendedAttributes = new TreeMap<>(); stream.reflectWith(new AttributeReflector() { @Override public void reflect(Class<? extends Attribute> attClass, String key, Object value) { if (CharTermAttribute.class.isAssignableFrom(attClass)) return; if (PositionIncrementAttribute.class.isAssignableFrom(attClass)) return; if (OffsetAttribute.class.isAssignableFrom(attClass)) return; if (TypeAttribute.class.isAssignableFrom(attClass)) return; if (includeAttributes == null || includeAttributes.isEmpty() || includeAttributes.contains(attClass.getSimpleName().toLowerCase())) { Map<String, Object> currentAttributes = extendedAttributes.get(attClass.getName()); if (currentAttributes == null) { currentAttributes = new HashMap<>(); } if (value instanceof BytesRef) { final BytesRef p = (BytesRef) value; value = p.toString(); } currentAttributes.put(key, value); if (shortAttrName) { extendedAttributes.put(attClass.getName().substring(attClass.getName().lastIndexOf(".")+1), currentAttributes); } else { extendedAttributes.put(attClass.getName(), currentAttributes); } } } }); return extendedAttributes; }
Example #8
Source File: LegacyNumericTokenStream.java From lucene-solr with Apache License 2.0 | 4 votes |
@Override public AttributeImpl createAttributeInstance(Class<? extends Attribute> attClass) { if (CharTermAttribute.class.isAssignableFrom(attClass)) throw new IllegalArgumentException("LegacyNumericTokenStream does not support CharTermAttribute."); return delegate.createAttributeInstance(attClass); }
Example #9
Source File: JsonPreAnalyzedParser.java From lucene-solr with Apache License 2.0 | 4 votes |
@Override public String toFormattedString(Field f) throws IOException { Map<String,Object> map = new LinkedHashMap<>(); map.put(VERSION_KEY, VERSION); if (f.fieldType().stored()) { String stringValue = f.stringValue(); if (stringValue != null) { map.put(STRING_KEY, stringValue); } BytesRef binaryValue = f.binaryValue(); if (binaryValue != null) { map.put(BINARY_KEY, Base64.byteArrayToBase64(binaryValue.bytes, binaryValue.offset, binaryValue.length)); } } TokenStream ts = f.tokenStreamValue(); if (ts != null) { List<Map<String,Object>> tokens = new LinkedList<>(); while (ts.incrementToken()) { Iterator<Class<? extends Attribute>> it = ts.getAttributeClassesIterator(); String cTerm = null; String tTerm = null; Map<String,Object> tok = new TreeMap<>(); while (it.hasNext()) { Class<? extends Attribute> cl = it.next(); Attribute att = ts.getAttribute(cl); if (att == null) { continue; } if (cl.isAssignableFrom(CharTermAttribute.class)) { CharTermAttribute catt = (CharTermAttribute)att; cTerm = new String(catt.buffer(), 0, catt.length()); } else if (cl.isAssignableFrom(TermToBytesRefAttribute.class)) { TermToBytesRefAttribute tatt = (TermToBytesRefAttribute)att; tTerm = tatt.getBytesRef().utf8ToString(); } else { if (cl.isAssignableFrom(FlagsAttribute.class)) { tok.put(FLAGS_KEY, Integer.toHexString(((FlagsAttribute)att).getFlags())); } else if (cl.isAssignableFrom(OffsetAttribute.class)) { tok.put(OFFSET_START_KEY, ((OffsetAttribute)att).startOffset()); tok.put(OFFSET_END_KEY, ((OffsetAttribute)att).endOffset()); } else if (cl.isAssignableFrom(PayloadAttribute.class)) { BytesRef p = ((PayloadAttribute)att).getPayload(); if (p != null && p.length > 0) { tok.put(PAYLOAD_KEY, Base64.byteArrayToBase64(p.bytes, p.offset, p.length)); } } else if (cl.isAssignableFrom(PositionIncrementAttribute.class)) { tok.put(POSINCR_KEY, ((PositionIncrementAttribute)att).getPositionIncrement()); } else if (cl.isAssignableFrom(TypeAttribute.class)) { tok.put(TYPE_KEY, ((TypeAttribute)att).type()); } else { tok.put(cl.getName(), att.toString()); } } } String term = null; if (cTerm != null) { term = cTerm; } else { term = tTerm; } if (term != null && term.length() > 0) { tok.put(TOKEN_KEY, term); } tokens.add(tok); } map.put(TOKENS_KEY, tokens); } return JSONUtil.toJSON(map, -1); }
Example #10
Source File: SimplePreAnalyzedParser.java From lucene-solr with Apache License 2.0 | 4 votes |
@Override public String toFormattedString(Field f) throws IOException { StringBuilder sb = new StringBuilder(); sb.append(VERSION + " "); if (f.fieldType().stored()) { String s = f.stringValue(); if (s != null) { // encode the equals sign s = s.replaceAll("=", "\\="); sb.append('='); sb.append(s); sb.append('='); } } TokenStream ts = f.tokenStreamValue(); if (ts != null) { StringBuilder tok = new StringBuilder(); boolean next = false; while (ts.incrementToken()) { if (next) { sb.append(' '); } else { next = true; } tok.setLength(0); Iterator<Class<? extends Attribute>> it = ts.getAttributeClassesIterator(); String cTerm = null; String tTerm = null; while (it.hasNext()) { Class<? extends Attribute> cl = it.next(); Attribute att = ts.getAttribute(cl); if (att == null) { continue; } if (cl.isAssignableFrom(CharTermAttribute.class)) { CharTermAttribute catt = (CharTermAttribute)att; cTerm = escape(catt.buffer(), catt.length()); } else if (cl.isAssignableFrom(TermToBytesRefAttribute.class)) { TermToBytesRefAttribute tatt = (TermToBytesRefAttribute)att; char[] tTermChars = tatt.getBytesRef().utf8ToString().toCharArray(); tTerm = escape(tTermChars, tTermChars.length); } else { if (tok.length() > 0) tok.append(','); if (cl.isAssignableFrom(FlagsAttribute.class)) { tok.append("f=").append(Integer.toHexString(((FlagsAttribute) att).getFlags())); } else if (cl.isAssignableFrom(OffsetAttribute.class)) { tok.append("s=").append(((OffsetAttribute) att).startOffset()).append(",e=").append(((OffsetAttribute) att).endOffset()); } else if (cl.isAssignableFrom(PayloadAttribute.class)) { BytesRef p = ((PayloadAttribute)att).getPayload(); if (p != null && p.length > 0) { tok.append("p=").append(bytesToHex(p.bytes, p.offset, p.length)); } else if (tok.length() > 0) { tok.setLength(tok.length() - 1); // remove the last comma } } else if (cl.isAssignableFrom(PositionIncrementAttribute.class)) { tok.append("i=").append(((PositionIncrementAttribute) att).getPositionIncrement()); } else if (cl.isAssignableFrom(TypeAttribute.class)) { tok.append("y=").append(escape(((TypeAttribute) att).type())); } else { tok.append(cl.getName()).append('=').append(escape(att.toString())); } } } String term = null; if (cTerm != null) { term = cTerm; } else { term = tTerm; } if (term != null && term.length() > 0) { if (tok.length() > 0) { tok.insert(0, term + ","); } else { tok.insert(0, term); } } sb.append(tok); } } return sb.toString(); }
Example #11
Source File: ITokenizer.java From datawave with Apache License 2.0 | 2 votes |
/** * * @param clazz * the attribute class * @return true if the tokenizer has that attribute available */ boolean hasAttribute(Class<? extends Attribute> clazz);
Example #12
Source File: ITokenizer.java From datawave with Apache License 2.0 | 2 votes |
/** * * @param clazz * the attribute class * @return the Attribute for the specified class */ <A extends Attribute> A getAttribute(Class<A> clazz);