org.ansj.domain.Result Java Exaples

Source File: WordSegmenter.java From SnowGraph with Apache License 2.0

6 votes

private static void tokenizeDocxFile(String filePath) {
    File file = new File(filePath);
    DocumentInfo doc = DocumentParser.parseFileToDocumentInfo(file);
    if(doc instanceof WordDocumentInfo) {
        String content = ((WordDocumentInfo) doc).getDocStr();
        Result terms = ToAnalysis.parse(content);
        for (int i = 0; i < terms.size(); i++) {
            String words = terms.get(i).getName();
            boolean filtered = false;
            for(String stopToken : stopTokens)
                if(words.equals(stopToken)) { filtered = true; break; }
            char firstLetter = words.charAt(0);
            if((firstLetter >= 'A' && firstLetter <= 'Z') ||
                    (firstLetter >= 'a' && firstLetter <= 'z') ||
                    (firstLetter >= '0' && firstLetter <= '9'))
                filtered = true;
            if(filtered) continue;
            wordsCN.add(words);
        }
    }
    else System.out.println("Not a docx file");
}

Source File: AnsjTokenizer.java From jstarcraft-nlp with Apache License 2.0

5 votes

@Override
public Iterable<AnsjToken> tokenize(CharSequence text) {
    Result result = analysis.parseStr(text.toString());
    for (Recognition recognition : recognitions) {
        recognition.recognition(result);
    }
    AnsjToken iterable = new AnsjToken(result.iterator());
    return iterable;
}

Source File: AnsjImpl.java From chinese-segmentation-evaluation with Apache License 2.0

5 votes

@Override
public List<Term> segment(String sentence) {
    Result result = ToAnalysis.parse(sentence);
    List<Term> terms = new ArrayList<>();
    for (org.ansj.domain.Term term : result) {
        terms.add(new Term(term.getName()));
    }
    return terms;
}

Source File: DicSegment.java From youkefu with Apache License 2.0

5 votes

public static String[] byNature(String content , Set<String> expectedNature){
	List<String> wordList = new ArrayList<String>();
	if (!StringUtils.isBlank(content) && expectedNature != null && expectedNature.size() > 0) {
		Result result = NlpAnalysis.parse(content,DicLibrary.gets(librarykeyList));//分词结果的一个封装，主要是一个List<Term>的terms
           List<Term> terms = result.getTerms(); //拿到terms
           for(int i=0; i<terms.size(); i++) {
               String word = terms.get(i).getName(); //拿到词
               String natureStr = terms.get(i).getNatureStr(); //拿到词性
               if(expectedNature.contains(natureStr)) {
                   wordList.add(word+"/"+natureStr);
               }
           }
	}
	return wordList.toArray(new String[wordList.size()]);
}

Source File: WordSegmenter.java From SnowGraph with Apache License 2.0

5 votes

public static ArrayList<String> demo(String strToParse) {
    String str = strToParse;
            //"我年纪还轻，阅历不深的时候，我父亲教导过我一句话，我至今还念念不忘。 \n" +
            //"“每逢你想要批评任何人的时候，”他对我说，“你就记住，这个世界上所有的人，并不是个个都有过你拥有的那些优越的条件。”";
    ArrayList<String> ret = new ArrayList<>();
    Result terms = ToAnalysis.parse(str);
    for (int i = 0; i < terms.size(); i++) {
        String words = terms.get(i).getName();// 获取单词
        String nominal = terms.get(i).getNatureStr();// 获取词性
        ret.add(words);
        //System.out.print(words + "\t" + nominal + "\n");
    }
    return ret;
}

Source File: Analysis.java From deeplearning4j with Apache License 2.0

5 votes

/**
 * 通过构造方法传入的reader直接获取到分词结果
 * 
 * @return
 * @throws IOException
 */
public Result parse() throws IOException {
    List<Term> list = new ArrayList<>();
    Term temp = null;
    while ((temp = next()) != null) {
        list.add(temp);
    }
    Result result = new Result(list);
    return result;
}

Source File: DicRecognition.java From deeplearning4j with Apache License 2.0

5 votes

@Override
public void recognition(Result result) {
    for (Forest forest : forests) {
        if (forest == null) {
            continue;
        }
        recognition(result, forest);
    }
}

Source File: SynonymsRecgnition.java From deeplearning4j with Apache License 2.0

5 votes

@Override
public void recognition(Result result) {
    for (Term term : result) {
        SmartForest<List<String>> branch = synonyms.getBranch(term.getName());
        if (branch != null && branch.getStatus() > 1) {
            List<String> syns = branch.getParam();
            if (syns != null) {
                term.setSynonyms(syns);
            }
        }
    }
}

Source File: UserDicNatureRecognition.java From deeplearning4j with Apache License 2.0

5 votes

@Override
public void recognition(Result result) {
    for (Term term : result) {
        for (int i = forests.length - 1; i > -1; i--) {
            String[] params = getParams(forests[i], term.getName());
            if (params != null) {
                term.setNature(new Nature(params[0]));
                break;
            }
        }
    }
}

Source File: StopRecognition.java From deeplearning4j with Apache License 2.0

5 votes

@Override
public void recognition(Result result) {
    List<Term> list = result.getTerms();
    Iterator<Term> iterator = list.iterator();

    while (iterator.hasNext()) {
        Term term = iterator.next();
        if (filter(term)) {
            iterator.remove();
        }
    }

}

Source File: ChineseTokenizer.java From deeplearning4j with Apache License 2.0

4 votes

public ChineseTokenizer(String toTokenize) {
    Result result = NlpAnalysis.parse(toTokenize);
    this.tokenList = result.getTerms();
    this.tokenIter = tokenList.iterator();
}

Source File: DicAnalysis.java From deeplearning4j with Apache License 2.0

4 votes

public static Result parse(String str, Forest... forests) {
    return new DicAnalysis().setForests(forests).parseStr(str);
}

Source File: DicAnalysis.java From deeplearning4j with Apache License 2.0

4 votes

public static Result parse(String str) {
    return new DicAnalysis().parseStr(str);
}

Source File: ToAnalysis.java From deeplearning4j with Apache License 2.0

4 votes

public static Result parse(String str, Forest... forests) {
    return new ToAnalysis().setForests(forests).parseStr(str);
}

Source File: ToAnalysis.java From deeplearning4j with Apache License 2.0

4 votes

public static Result parse(String str) {
    return new ToAnalysis().parseStr(str);
}

Source File: IndexAnalysis.java From deeplearning4j with Apache License 2.0

4 votes

public static Result parse(String str, Forest... forests) {
    return new IndexAnalysis().setForests(forests).parseStr(str);
}

Source File: IndexAnalysis.java From deeplearning4j with Apache License 2.0

4 votes

public static Result parse(String str) {
    return new IndexAnalysis().parseStr(str);
}

Source File: BaseAnalysis.java From deeplearning4j with Apache License 2.0

4 votes

public static Result parse(String str) {
    return new BaseAnalysis().parseStr(str);
}

Source File: TimeRecognition.java From deeplearning4j with Apache License 2.0

4 votes

@Override
public void recognition(Result result) {
    String name = "";
    String timeWord = "";
    List<Term> terms = result.getTerms();
    LinkedList<Term> mergeList = new LinkedList<>();
    List<Term> list = new LinkedList<>();

    Pattern pattern =
                    Pattern.compile("((\\d|[０１２３４５６７８９]){1,4}年(\\d|[０１２３４５６７８９]){1,2}月(\\d|[０１２３４５６７８９]){1,2}[日|号](上午|下午|中午|晚)?(\\s)*((\\d|[０１２３４５６７８９]){1,2}([点|时|點|時])?((:)?(\\d|[０１２３４５６７８９]){1,2}(分)?((:)?(\\d|[０１２３４５６７８９]){1,2}(秒)?)?)?)?(\\s)*(PM|AM)?|(\\d|[０１２３４５６７８９]){1,2}(月|月份)(\\d|[０１２３４５６７８９]){1,2}([日|号])?(上午|下午|中午|晚)?(\\s)*((\\d|[０１２３４５６７８９]){1,2}([点|时|點|時])?((:)?(\\d|[０１２３４５６７８９]){1,2}(分)?((:)?(\\d|[０１２３４５６７８９]){1,2}(秒)?)?)?)?(\\s)*(PM|AM)?|(\\d|[０１２３４５６７８９]){1,2}日(上午|下午|中午|晚)?(\\s)*((\\d|[０１２３４５６７８９]){1,2}([点|时|點|時])?((:)?(\\d|[０１２３４５６７８９]){1,2}(分)?((:)?(\\d|[０１２３４５６７８９]){1,2}(秒)?)?)?)?(\\s)*(PM|AM)?|(昨天|昨日|昨日上午|昨日下午|昨日晚上|昨天早上|昨天上午|昨天中午|昨天下午|昨晚|昨夜|昨天晚上|今天早上|今天上午|今天下午|今晚|今天晚上|今日上午|今日下午|今日|今天|前天|今年|去年|当日|当日上午|上午|下午|中午|清晨|前晚|早上|凌晨|今晨|近日|日前|不久前)((\\d|[０１２３４５６７８９]){1,2}[点|时|點|時])?((:)?(\\d|[０１２３４５６７８９]){1,2}(分)?((:)?(\\d|[０１２３４５６７８９]){1,2}(秒)?)?)?(\\s)*(PM|AM)?|[\\“|\"](1|2|3|4|5|6|7|8|9|10|11|12)[·|.| |-](\\d|[０１２３４５６７８９]){1,2}[\\”|\"]|星期[一|二|三|四|五|六|天|日]|(\\d|[０１２３４５６７８９]){1,2}[点|时|點|時]((:)?(\\d|[０１２３４５６７８９]){1,2}(分)?((:)?(\\d|[０１２３４５６７８９]){1,2}(秒)?)?)?(\\s)*(PM|AM)?|(\\d|[０１２３４５６７８９]){4}年((\\d|[０１２３４５６７８９]){1,2}月)?|(\\d|[０１２３４５６７８９]){1,2}月|(正|一|二|三|四|五|六|七|八|九|十|十一|十二|腊)月((初|十|二十|三十)[ 一二三四五六七八九十])?(上午|下午|中午|晚)?|((\\d|[０１２３４５６７８９]){4}-(\\d|[０１２３４５６７８９]){2}-(\\d|[０１２３４５６７８９]){2})?(\\s)*(\\d|[０１２３４５６７８９]){2}:(\\d|[０１２３４５６７８９]){2}:(\\d|[０１２３４５６７８９]){2}|(\\d|[０１２３４５６７８９]){4}-(\\d|[０１２３４５６７８９]){2}-(\\d|[０１２３４５６７８９]){2}(\\s)*((\\d|[０１２３４５６７８９]){2}:(\\d|[０１２３４５６７８９]){2}:(\\d|[０１２３４５６７８９]){2})?)",
                                    Pattern.CASE_INSENSITIVE | Pattern.MULTILINE);

    for (int i = 0; i < terms.size(); i++) {
        boolean isTime = false;
        Term termBase = terms.get(i);
        int timeTermsLength = 1;
        int matchLength = 0; //匹配长度
        for (int j = i; j < terms.size() && matchLength < 11; j++) { //向后最大找14个词匹配是否是时间词
            Term term = terms.get(j);
            name = term.getName();
            timeWord += name;
            Matcher matcher = pattern.matcher(timeWord);
            mergeList.add(term);
            if (matcher.matches()) {
                isTime = true;
                timeTermsLength += (j - i);
                i = j;
            }
            matchLength++;
        }
        if (isTime) {
            Term ft = mergeList.pollFirst();
            for (int k = 0; k < timeTermsLength - 1; k++) {
                ft.merageWithBlank(mergeList.get(k));
            }
            ft.setNature(nature);
            list.add(ft);
        } else {
            list.add(termBase);
        }
        mergeList.clear();
        timeWord = "";

    }
    result.setTerms(list);
}

Source File: TFIDF.java From NewsRecommendSystem with MIT License

4 votes

public static Result split(String text)
{
	return ToAnalysis.parse(text);
}

Source File: DicRecognition.java From deeplearning4j with Apache License 2.0

2 votes

private void recognition(Result result, Forest forest) {
    List<Term> terms = result.getTerms();

}

Source File: Analysis.java From deeplearning4j with Apache License 2.0

2 votes

/**
 * 一句话进行分词并且封装
 * 
 * @param temp
 * @return
 */
public Result parseStr(String temp) {
    return new Result(analysisStr(temp));
}

Source File: Recognition.java From deeplearning4j with Apache License 2.0

votes

public void recognition(Result result);

org.ansj.domain.Result Java Examples