Java Code Examples for org.apache.lucene.analysis.tokenattributes.CharTermAttribute#buffer()
The following examples show how to use
org.apache.lucene.analysis.tokenattributes.CharTermAttribute#buffer() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: MinHashFilterTest.java From lucene-solr with Apache License 2.0 | 5 votes |
private ArrayList<String> getTokens(TokenStream ts) throws IOException { ArrayList<String> tokens = new ArrayList<>(); ts.reset(); while (ts.incrementToken()) { CharTermAttribute termAttribute = ts.getAttribute(CharTermAttribute.class); String token = new String(termAttribute.buffer(), 0, termAttribute.length()); tokens.add(token); } ts.end(); ts.close(); return tokens; }
Example 2
Source File: AutoPhrasingTokenFilter.java From elasticsearch-plugin-bundle with GNU Affero General Public License v3.0 | 5 votes |
private char[] nextToken() throws IOException { if (input.incrementToken()) { CharTermAttribute termAttr = getTermAttribute(); if (termAttr != null) { char[] termBuf = termAttr.buffer(); char[] nextTok = new char[termAttr.length()]; System.arraycopy(termBuf, 0, nextTok, 0, termAttr.length()); return nextTok; } } return null; }
Example 3
Source File: SolrInformationServer.java From SearchServices with GNU Lesser General Public License v3.0 | 4 votes |
private void addContentPropertyToDocUsingAlfrescoRepository( SolrInputDocument doc, QName propertyQName, long dbId, String locale) throws AuthenticationException, IOException { long start = System.nanoTime(); // Expensive call to be done with ContentTracker try (GetTextContentResponse response = repositoryClient.getTextContent(dbId, propertyQName, null)) { addContentPropertyMetadata(doc, propertyQName, AlfrescoSolrDataModel.SpecializedFieldType.TRANSFORMATION_STATUS, response); addContentPropertyMetadata(doc, propertyQName, AlfrescoSolrDataModel.SpecializedFieldType.TRANSFORMATION_EXCEPTION, response); addContentPropertyMetadata(doc, propertyQName, AlfrescoSolrDataModel.SpecializedFieldType.TRANSFORMATION_TIME, response); final String textContent = textContentFrom(response); if (fingerprintHasBeenEnabledOnThisInstance && !textContent.isBlank()) { Analyzer analyzer = core.getLatestSchema().getFieldType("min_hash").getIndexAnalyzer(); TokenStream ts = analyzer.tokenStream("dummy_field", textContent); CharTermAttribute termAttribute = ts.getAttribute(CharTermAttribute.class); ts.reset(); while (ts.incrementToken()) { StringBuilder tokenBuff = new StringBuilder(); char[] buff = termAttribute.buffer(); for (int i = 0; i < termAttribute.length(); i++) { tokenBuff.append(Integer.toHexString(buff[i])); } doc.addField(FINGERPRINT_FIELD, tokenBuff.toString()); } ts.end(); ts.close(); } this.getTrackerStats().addDocTransformationTime(System.nanoTime() - start); String storedField = dataModel.getStoredContentField(propertyQName); doc.setField(storedField, "\u0000" + languageFrom(locale) + "\u0000" + textContent); dataModel.getIndexedFieldNamesForProperty(propertyQName) .getFields() .forEach(field -> addFieldIfNotSet(doc, field.getField())); } }
Example 4
Source File: ICUTransformFilter.java From lucene-solr with Apache License 2.0 | 4 votes |
void setText(final CharTermAttribute token) { this.token = token; this.buffer = token.buffer(); this.length = token.length(); }
Example 5
Source File: JsonPreAnalyzedParser.java From lucene-solr with Apache License 2.0 | 4 votes |
@Override public String toFormattedString(Field f) throws IOException { Map<String,Object> map = new LinkedHashMap<>(); map.put(VERSION_KEY, VERSION); if (f.fieldType().stored()) { String stringValue = f.stringValue(); if (stringValue != null) { map.put(STRING_KEY, stringValue); } BytesRef binaryValue = f.binaryValue(); if (binaryValue != null) { map.put(BINARY_KEY, Base64.byteArrayToBase64(binaryValue.bytes, binaryValue.offset, binaryValue.length)); } } TokenStream ts = f.tokenStreamValue(); if (ts != null) { List<Map<String,Object>> tokens = new LinkedList<>(); while (ts.incrementToken()) { Iterator<Class<? extends Attribute>> it = ts.getAttributeClassesIterator(); String cTerm = null; String tTerm = null; Map<String,Object> tok = new TreeMap<>(); while (it.hasNext()) { Class<? extends Attribute> cl = it.next(); Attribute att = ts.getAttribute(cl); if (att == null) { continue; } if (cl.isAssignableFrom(CharTermAttribute.class)) { CharTermAttribute catt = (CharTermAttribute)att; cTerm = new String(catt.buffer(), 0, catt.length()); } else if (cl.isAssignableFrom(TermToBytesRefAttribute.class)) { TermToBytesRefAttribute tatt = (TermToBytesRefAttribute)att; tTerm = tatt.getBytesRef().utf8ToString(); } else { if (cl.isAssignableFrom(FlagsAttribute.class)) { tok.put(FLAGS_KEY, Integer.toHexString(((FlagsAttribute)att).getFlags())); } else if (cl.isAssignableFrom(OffsetAttribute.class)) { tok.put(OFFSET_START_KEY, ((OffsetAttribute)att).startOffset()); tok.put(OFFSET_END_KEY, ((OffsetAttribute)att).endOffset()); } else if (cl.isAssignableFrom(PayloadAttribute.class)) { BytesRef p = ((PayloadAttribute)att).getPayload(); if (p != null && p.length > 0) { tok.put(PAYLOAD_KEY, Base64.byteArrayToBase64(p.bytes, p.offset, p.length)); } } else if (cl.isAssignableFrom(PositionIncrementAttribute.class)) { tok.put(POSINCR_KEY, ((PositionIncrementAttribute)att).getPositionIncrement()); } else if (cl.isAssignableFrom(TypeAttribute.class)) { tok.put(TYPE_KEY, ((TypeAttribute)att).type()); } else { tok.put(cl.getName(), att.toString()); } } } String term = null; if (cTerm != null) { term = cTerm; } else { term = tTerm; } if (term != null && term.length() > 0) { tok.put(TOKEN_KEY, term); } tokens.add(tok); } map.put(TOKENS_KEY, tokens); } return JSONUtil.toJSON(map, -1); }
Example 6
Source File: IcuTransformTokenFilter.java From elasticsearch-plugin-bundle with GNU Affero General Public License v3.0 | 4 votes |
void setText(final CharTermAttribute token) { this.token = token; this.buffer = token.buffer(); this.length = token.length(); }