Java Code Examples for com.hankcs.hanlp.corpus.io.IOUtil#newInputStream()
The following examples show how to use
com.hankcs.hanlp.corpus.io.IOUtil#newInputStream() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: Nlputil.java From dk-fitting with Apache License 2.0 | 6 votes |
/** * 添加词库 * * @param filePath 新的词库文件,每个词使用回车换行分隔 * @param encoding 编码 * @return 空—完成,其它—错误信息 */ public static String addCK(String filePath, String encoding) { if (filePath == null || encoding == null) return String.format("参数错误:addCK(%s, %s)", filePath, encoding); try { BufferedReader br = new BufferedReader(new InputStreamReader(IOUtil.newInputStream(filePath), encoding)); String line; synchronized (lockCustomDictionary) { while ((line = br.readLine()) != null) { CustomDictionary.insert(line); } } br.close(); } catch (Exception e) { System.out.println(e); return TextUtility.exceptionToString(e); } return "添加成功"; }
Example 2
Source File: DKNLPBase.java From dk-fitting with Apache License 2.0 | 6 votes |
/** * 添加词库 * * @param filePath 新的词库文件,每个词使用回车换行分隔 * @param encoding 编码 * @return 空—完成,其它—错误信息 */ public static String addCK(String filePath, String encoding) { if (filePath == null || encoding == null) return String.format("参数错误:addCK(%s, %s)", filePath, encoding); try { BufferedReader br = new BufferedReader(new InputStreamReader(IOUtil.newInputStream(filePath), encoding)); String line; synchronized (lockCustomDictionary) { while ((line = br.readLine()) != null) { CustomDictionary.insert(line); } } br.close(); } catch (Exception e) { return TextUtility.exceptionToString(e); } return null; }
Example 3
Source File: CustomDictionaryUtility.java From elasticsearch-analysis-hanlp with Apache License 2.0 | 4 votes |
/** * 加载用户词典(追加) * * @param path 词典路径 * @param defaultNature 默认词性 * @param customNatureCollector 收集用户词性 * @return */ private static boolean load(String path, Nature defaultNature, TreeMap<String, CoreDictionary.Attribute> map, LinkedHashSet<Nature> customNatureCollector) { try { String splitter = "\\s"; if (path.endsWith(".csv")) { splitter = ","; } BufferedReader br = new BufferedReader(new InputStreamReader(IOUtil.newInputStream(path), "UTF-8")); String line; boolean firstLine = true; while ((line = br.readLine()) != null) { if (firstLine) { line = IOUtil.removeUTF8BOM(line); firstLine = false; } String[] param = line.split(splitter); // 排除空行 if (param[0].length() == 0) { continue; } // 正规化 if (HanLP.Config.Normalization) { param[0] = CharTable.convert(param[0]); } int natureCount = (param.length - 1) / 2; CoreDictionary.Attribute attribute; if (natureCount == 0) { attribute = new CoreDictionary.Attribute(defaultNature); } else { attribute = new CoreDictionary.Attribute(natureCount); for (int i = 0; i < natureCount; ++i) { attribute.nature[i] = LexiconUtility.convertStringToNature(param[1 + 2 * i], customNatureCollector); attribute.frequency[i] = Integer.parseInt(param[2 + 2 * i]); attribute.totalFrequency += attribute.frequency[i]; } } map.put(param[0], attribute); } br.close(); } catch (Exception e) { logger.error("hanlp custom dictionary [{}] read failed!", path, e); return false; } return true; }