Java Code Examples for org.wltea.analyzer.core.IKSegmenter#next()
The following examples show how to use
org.wltea.analyzer.core.IKSegmenter#next() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: IKAnalyzer.java From hugegraph with Apache License 2.0 | 6 votes |
@Override public Set<String> segment(String text) { Set<String> result = InsertionOrderUtil.newSet(); IKSegmenter ik = new IKSegmenter(new StringReader(text), this.smartSegMode); try { Lexeme word = null; while ((word = ik.next()) != null) { result.add(word.getLexemeText()); } } catch (Exception e) { throw new HugeException("IKAnalyzer segment text '%s' failed", e, text); } return result; }
Example 2
Source File: TokenizerAnalyzerUtils.java From JewelCrawler with GNU General Public License v3.0 | 6 votes |
public static String getAnalyzerResult(String input) { StringReader sr=new StringReader(input); IKSegmenter ik=new IKSegmenter(sr, true);//true is use smart Lexeme lex=null; List<String> stopWordsList = getStopWordsList(); StringBuilder stringBuilder = new StringBuilder(); try { while((lex=ik.next())!=null){ if(stopWordsList.contains(lex.getLexemeText())) { continue; } stringBuilder.append(lex.getLexemeText() + Constants.BLANKSPACE); } } catch (IOException e) { e.printStackTrace(); System.out.println("failed to parse input content"); } return stringBuilder.toString(); }
Example 3
Source File: ChineseTokenizer.java From RDMP1 with GNU General Public License v2.0 | 6 votes |
/** * * @Title: segStr * @Description: 返回LinkedHashMap的分词 * @param @param content * @param @return * @return Map<String,Integer> * @throws */ public static Map<String, Long> segStr(String content){ // 分词 Reader input = new StringReader(content); // 智能分词关闭(对分词的精度影响很大) IKSegmenter iks = new IKSegmenter(input, true); Lexeme lexeme = null; Map<String, Long> words = new LinkedHashMap<String, Long>(); try { while ((lexeme = iks.next()) != null) { if (words.containsKey(lexeme.getLexemeText())) { words.put(lexeme.getLexemeText(), words.get(lexeme.getLexemeText()) + 1); } else { words.put(lexeme.getLexemeText(), 1L); } } }catch(IOException e) { e.printStackTrace(); } return words; }
Example 4
Source File: StrUtils.java From Lottery with GNU General Public License v2.0 | 6 votes |
/** * * @param keyword 源词汇 * @param smart 是否智能分词 * @return 分词词组(,拼接) */ public static String getKeywords(String keyword, boolean smart) { StringReader reader = new StringReader(keyword); IKSegmenter iks = new IKSegmenter(reader, smart); StringBuilder buffer = new StringBuilder(); try { Lexeme lexeme; while ((lexeme = iks.next()) != null) { buffer.append(lexeme.getLexemeText()).append(','); } } catch (IOException e) { } //去除最后一个, if (buffer.length() > 0) { buffer.setLength(buffer.length() - 1); } return buffer.toString(); }
Example 5
Source File: SWMCQueryBuilder.java From IKAnalyzer with Apache License 2.0 | 5 votes |
/** * 分词切分,并返回结链表 * @param keywords * @return */ private static List<Lexeme> doAnalyze(String keywords){ List<Lexeme> lexemes = new ArrayList<Lexeme>(); IKSegmenter ikSeg = new IKSegmenter(new StringReader(keywords) , true); try{ Lexeme l = null; while( (l = ikSeg.next()) != null){ lexemes.add(l); } }catch(IOException e){ e.printStackTrace(); } return lexemes; }
Example 6
Source File: SWMCQueryBuilder.java From ik-analyzer with GNU General Public License v3.0 | 5 votes |
/** * 分词切分,并返回结链表 * * @param keywords * * @return */ private static List<Lexeme> doAnalyze(String keywords) { List<Lexeme> lexemes = new ArrayList<Lexeme>(); IKSegmenter ikSeg = new IKSegmenter(new StringReader(keywords), true); try { Lexeme l; while ((l = ikSeg.next()) != null) { lexemes.add(l); } } catch (IOException e) { LOG.error("io error.", e); } return lexemes; }