Python Examples of jieba.cut_for

Source File: jieba_test.py From annotated_jieba with MIT License

5 votes

def testCutForSearch_NOHMM(self):
        for content in test_contents:
            result = jieba.cut_for_search(content,HMM=False)
            assert isinstance(result, types.GeneratorType), "Test CutForSearch Generator error"
            result = list(result)
            assert isinstance(result, list), "Test CutForSearch error on content: %s" % content
            print(" , ".join(result), file=sys.stderr)
        print("testCutForSearch_NOHMM", file=sys.stderr)

Source File: similar_doc.py From Information_retrieva_Projectl- with MIT License

5 votes

def calculate(self,doc_id,Top_numbers=10,multiple=10):
        title,content,url=self.index.get_data(doc_id)
        cut=jieba.cut_for_search(content)
        word_list=[]
        for word in cut:
            if  word not in self.punct and word not in self.Letters_and_numbers :
                #计算文档间相似度，必须去停用词，否则太慢
                if self.stopword.has_key(word.encode("utf-8")):
                    pass
                else:
                    word_list.append(word.encode("utf-8"))
	return self.FastCos.calculate(word_list,Top_numbers,multiple)

Source File: main.py From Information_retrieva_Projectl- with MIT License

5 votes

def GET(self):
        data=web.input()
        if data:
            ID=data.id
            news = dict()
            title, content, url=id_index.get_data(int(ID))
            news['content'] = content.decode("utf-8")
            news['title'] = title.decode("utf-8")
            news['url'] = url.decode("utf-8")
            recomand=[]
            #在线方法
            cut = jieba.cut_for_search(content)
            word_list = []
            for word in cut:
                if word not in punct and word not in Letters_and_numbers:
                    # 计算文档间相似度，必须去停用词，否则太慢
                    if recommand.stopword.has_key(word.encode("utf-8")):
                        pass
                    else:
                        word_list.append(word.encode("utf-8"))
            topk= recommand.calculate(word_list, config.recommand_numbers, 10)
            for i in topk:#在线方法
            #for i in recommand.dic[int(ID)]:#离线方法
                if i !=int(ID):
                    title, content, url=id_index.get_data(i)
                    recomand.append([title.decode('utf-8'),content.decode('utf-8'),url.decode('utf-8')])
            news['recommand']=recomand
            del title,content,url,recomand
        else:
            ID=''
            news = dict()
            news['title'] = "No Such News"
            news['content'] = "Oh No!"
            news['url'] = "#"
            news['recommand']=[['','',''] for m in range(config.recommand_numbers)]
        return render.news(news)

Source File: main.py From Information_retrieva_Projectl- with MIT License

5 votes

def GET(self):
        data=web.input()
        if data:
            searchword=data.searchword
        else:
            searchword=''
        news_list=list()
        topic=list()
        if searchword:
            cut = jieba.cut_for_search(searchword)
            word_list = []
            for word in cut:
                if word not in punct and word not in Letters_and_numbers:
                    word_list.append(word.encode("utf-8"))
            topK=query.calculate(word_list,config.query_return_numbers)
            for k in topK:
                data = dict()
                title, content, url= id_index.get_data(k)
                data['id'] = k
                data['content'] = content.decode("utf-8")[:config.query_return_snipper_size]
                data['title']=title.decode("utf-8")
                data['url'] = url.decode("utf-8")
                news_list.append(data)
            del data,cut,word_list,word,topK,title,content,url
            #word2Vec推荐相似主题
            word2vec.cal(searchword.encode('utf-8'))
            print word2vec.result.length
            if word2vec.result.length==0:#词不存在，长度为1
                pass
            else:
                for i in range(config.recommand_topic_numbers):
                    topic.append(word2vec.result.word[i].char)
        return render.index(searchword,news_list,topic)

Source File: test_multithread.py From Malicious_Domain_Whois with GNU General Public License v3.0

5 votes

def run(self):
        seg_list = jieba.cut("我来到北京清华大学",cut_all=True)
        print("Full Mode:" + "/ ".join(seg_list)) #全模式

        seg_list = jieba.cut("我来到北京清华大学",cut_all=False)
        print("Default Mode:" + "/ ".join(seg_list)) #默认模式

        seg_list = jieba.cut("他来到了网易杭研大厦")
        print(", ".join(seg_list))

        seg_list = jieba.cut_for_search("小明硕士毕业于中国科学院计算所，后在日本京都大学深造") #搜索引擎模式
        print(", ".join(seg_list))

Source File: test_cut_for_search.py From Malicious_Domain_Whois with GNU General Public License v3.0

5 votes

def cuttest(test_sent):
    result = jieba.cut_for_search(test_sent)
    for word in result:
        print(word, "/", end=' ') 
    print("")

Source File: test_cut_for_search.py From Malicious_Domain_Whois with GNU General Public License v3.0

5 votes

def cuttest(test_sent):
    result = jieba.cut_for_search(test_sent)
    for word in result:
        print(word, "/", end=' ') 
    print("")

Source File: jieba_test.py From Malicious_Domain_Whois with GNU General Public License v3.0

5 votes

def testCutForSearch_NOHMM(self):
        for content in test_contents:
            result = jieba.cut_for_search(content,HMM=False)
            assert isinstance(result, types.GeneratorType), "Test CutForSearch Generator error"
            result = list(result)
            assert isinstance(result, list), "Test CutForSearch error on content: %s" % content
            print(" , ".join(result), file=sys.stderr)
        print("testCutForSearch_NOHMM", file=sys.stderr)

Source File: jieba_test.py From Malicious_Domain_Whois with GNU General Public License v3.0

5 votes

def testCutForSearch(self):
        for content in test_contents:
            result = jieba.cut_for_search(content)
            assert isinstance(result, types.GeneratorType), "Test CutForSearch Generator error"
            result = list(result)
            assert isinstance(result, list), "Test CutForSearch error on content: %s" % content
            print(" , ".join(result), file=sys.stderr)
        print("testCutForSearch", file=sys.stderr)

Source File: test_multithread.py From annotated_jieba with MIT License

5 votes

def run(self):
        seg_list = jieba.cut("我来到北京清华大学",cut_all=True)
        print("Full Mode:" + "/ ".join(seg_list)) #全模式

        seg_list = jieba.cut("我来到北京清华大学",cut_all=False)
        print("Default Mode:" + "/ ".join(seg_list)) #默认模式

        seg_list = jieba.cut("他来到了网易杭研大厦")
        print(", ".join(seg_list))

        seg_list = jieba.cut_for_search("小明硕士毕业于中国科学院计算所，后在日本京都大学深造") #搜索引擎模式
        print(", ".join(seg_list))

Source File: test_cut_for_search.py From annotated_jieba with MIT License

5 votes

def cuttest(test_sent):
    result = jieba.cut_for_search(test_sent)
    for word in result:
        print(word, "/", end=' ') 
    print("")

Source File: test_cut_for_search.py From annotated_jieba with MIT License

5 votes

def cuttest(test_sent):
    result = jieba.cut_for_search(test_sent)
    for word in result:
        print(word, "/", end=' ') 
    print("")

Source File: jieba_test.py From jieba_fast with MIT License

5 votes

def testCutForSearch(self):
        for content in test_contents:
            result = jieba.cut_for_search(content)
            assert isinstance(result, types.GeneratorType), "Test CutForSearch Generator error"
            result = list(result)
            assert isinstance(result, list), "Test CutForSearch error on content: %s" % content
            print(" , ".join(result), file=sys.stderr)
        print("testCutForSearch", file=sys.stderr)

Source File: jieba_test.py From annotated_jieba with MIT License

5 votes

def testCutForSearch(self):
        for content in test_contents:
            result = jieba.cut_for_search(content)
            assert isinstance(result, types.GeneratorType), "Test CutForSearch Generator error"
            result = list(result)
            assert isinstance(result, list), "Test CutForSearch error on content: %s" % content
            print(" , ".join(result), file=sys.stderr)
        print("testCutForSearch", file=sys.stderr)

Source File: min.py From min with GNU General Public License v2.0

5 votes

def add_content(self, content, obj_key):
        """
        添加文档到索引
        """
        seg_list = jieba.cut_for_search(content)
        seg_list = min_nlp.get_weight(seg_list)
        self.add_word_index(seg_list, obj_key)

Source File: min.py From min with GNU General Public License v2.0

5 votes

def search(self, keywords, start=0, length=20):
        """
        搜索关键字
        """
        seg_list = list(jieba.cut_for_search(keywords))
        key_list = self.search_by_words(seg_list, start, length)
        return key_list

Source File: jiebaSegment.py From QAmodel-for-Retrievalchatbot with MIT License

5 votes

def cut_for_search(self,sentence, stopword=True):
        seg_list = jieba.cut_for_search(sentence)

        results = []
        for seg in seg_list:
            if stopword and seg in self.stopwords:
                continue
            results.append(seg)

        return results

Source File: jiebaSegment.py From Customer-Chatbot with MIT License

5 votes

def cut_for_search(self,sentence, stopword=True):
        seg_list = jieba.cut_for_search(sentence)

        results = []
        for seg in seg_list:
            if stopword and seg in self.stopwords:
                continue
            results.append(seg)

        return results

Source File: jiebaSegment.py From Customer-Chatbot with MIT License

5 votes

def cut_for_search(self,sentence, stopword=True):
        seg_list = jieba.cut_for_search(sentence)

        results = []
        for seg in seg_list:
            if stopword and seg in self.stopwords:
                continue
            results.append(seg)

        return results

Source File: article.py From public-opinion-analysis with MIT License

5 votes

def cache_raw_seg(self):
        config = yaml.safe_load(open("./application.yml"))
        r = redis.StrictRedis(host=config['redis']['host'], port=config['redis']['port'], db=config['redis']['db'])
        for i in range(0, len(self.sentences)):
            raw_word_seg_list = jieba.cut_for_search(self.sentences[i].raw_sentence)
            sentence_seg_id = 'article:' + self.article_id + ':raw_seg:' + str(i)
            for raw_word_seg in raw_word_seg_list:
                r.sadd(sentence_seg_id, raw_word_seg)

Source File: test_multithread.py From jieba_fast with MIT License

5 votes

def run(self):
        seg_list = jieba.cut("我来到北京清华大学",cut_all=True)
        print("Full Mode:" + "/ ".join(seg_list)) #全模式

        seg_list = jieba.cut("我来到北京清华大学",cut_all=False)
        print("Default Mode:" + "/ ".join(seg_list)) #默认模式

        seg_list = jieba.cut("他来到了网易杭研大厦")
        print(", ".join(seg_list))

        seg_list = jieba.cut_for_search("小明硕士毕业于中国科学院计算所，后在日本京都大学深造") #搜索引擎模式
        print(", ".join(seg_list))

Source File: test_cut_for_search.py From jieba_fast with MIT License

5 votes

def cuttest(test_sent):
    result = jieba.cut_for_search(test_sent)
    for word in result:
        print(word, "/", end=' ') 
    print("")

Source File: test_cut_for_search.py From jieba_fast with MIT License

5 votes

def cuttest(test_sent):
    result = jieba.cut_for_search(test_sent)
    for word in result:
        print(word, "/", end=' ') 
    print("")

Source File: jieba_test.py From jieba_fast with MIT License

5 votes

def testCutForSearch_NOHMM(self):
        for content in test_contents:
            result = jieba.cut_for_search(content,HMM=False)
            assert isinstance(result, types.GeneratorType), "Test CutForSearch Generator error"
            result = list(result)
            assert isinstance(result, list), "Test CutForSearch error on content: %s" % content
            print(" , ".join(result), file=sys.stderr)
        print("testCutForSearch_NOHMM", file=sys.stderr)

Python jieba.cut_for_search() Examples