Python jieba.cut_for_search() Examples
The following are 24
code examples of jieba.cut_for_search().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
jieba
, or try the search function
.
Example #1
Source File: jieba_test.py From annotated_jieba with MIT License | 5 votes |
def testCutForSearch_NOHMM(self): for content in test_contents: result = jieba.cut_for_search(content,HMM=False) assert isinstance(result, types.GeneratorType), "Test CutForSearch Generator error" result = list(result) assert isinstance(result, list), "Test CutForSearch error on content: %s" % content print(" , ".join(result), file=sys.stderr) print("testCutForSearch_NOHMM", file=sys.stderr)
Example #2
Source File: similar_doc.py From Information_retrieva_Projectl- with MIT License | 5 votes |
def calculate(self,doc_id,Top_numbers=10,multiple=10): title,content,url=self.index.get_data(doc_id) cut=jieba.cut_for_search(content) word_list=[] for word in cut: if word not in self.punct and word not in self.Letters_and_numbers : #计算文档间相似度,必须去停用词,否则太慢 if self.stopword.has_key(word.encode("utf-8")): pass else: word_list.append(word.encode("utf-8")) return self.FastCos.calculate(word_list,Top_numbers,multiple)
Example #3
Source File: main.py From Information_retrieva_Projectl- with MIT License | 5 votes |
def GET(self): data=web.input() if data: ID=data.id news = dict() title, content, url=id_index.get_data(int(ID)) news['content'] = content.decode("utf-8") news['title'] = title.decode("utf-8") news['url'] = url.decode("utf-8") recomand=[] #在线方法 cut = jieba.cut_for_search(content) word_list = [] for word in cut: if word not in punct and word not in Letters_and_numbers: # 计算文档间相似度,必须去停用词,否则太慢 if recommand.stopword.has_key(word.encode("utf-8")): pass else: word_list.append(word.encode("utf-8")) topk= recommand.calculate(word_list, config.recommand_numbers, 10) for i in topk:#在线方法 #for i in recommand.dic[int(ID)]:#离线方法 if i !=int(ID): title, content, url=id_index.get_data(i) recomand.append([title.decode('utf-8'),content.decode('utf-8'),url.decode('utf-8')]) news['recommand']=recomand del title,content,url,recomand else: ID='' news = dict() news['title'] = "No Such News" news['content'] = "Oh No!" news['url'] = "#" news['recommand']=[['','',''] for m in range(config.recommand_numbers)] return render.news(news)
Example #4
Source File: main.py From Information_retrieva_Projectl- with MIT License | 5 votes |
def GET(self): data=web.input() if data: searchword=data.searchword else: searchword='' news_list=list() topic=list() if searchword: cut = jieba.cut_for_search(searchword) word_list = [] for word in cut: if word not in punct and word not in Letters_and_numbers: word_list.append(word.encode("utf-8")) topK=query.calculate(word_list,config.query_return_numbers) for k in topK: data = dict() title, content, url= id_index.get_data(k) data['id'] = k data['content'] = content.decode("utf-8")[:config.query_return_snipper_size] data['title']=title.decode("utf-8") data['url'] = url.decode("utf-8") news_list.append(data) del data,cut,word_list,word,topK,title,content,url #word2Vec推荐相似主题 word2vec.cal(searchword.encode('utf-8')) print word2vec.result.length if word2vec.result.length==0:#词不存在,长度为1 pass else: for i in range(config.recommand_topic_numbers): topic.append(word2vec.result.word[i].char) return render.index(searchword,news_list,topic)
Example #5
Source File: test_multithread.py From Malicious_Domain_Whois with GNU General Public License v3.0 | 5 votes |
def run(self): seg_list = jieba.cut("我来到北京清华大学",cut_all=True) print("Full Mode:" + "/ ".join(seg_list)) #全模式 seg_list = jieba.cut("我来到北京清华大学",cut_all=False) print("Default Mode:" + "/ ".join(seg_list)) #默认模式 seg_list = jieba.cut("他来到了网易杭研大厦") print(", ".join(seg_list)) seg_list = jieba.cut_for_search("小明硕士毕业于中国科学院计算所,后在日本京都大学深造") #搜索引擎模式 print(", ".join(seg_list))
Example #6
Source File: test_cut_for_search.py From Malicious_Domain_Whois with GNU General Public License v3.0 | 5 votes |
def cuttest(test_sent): result = jieba.cut_for_search(test_sent) for word in result: print(word, "/", end=' ') print("")
Example #7
Source File: test_cut_for_search.py From Malicious_Domain_Whois with GNU General Public License v3.0 | 5 votes |
def cuttest(test_sent): result = jieba.cut_for_search(test_sent) for word in result: print(word, "/", end=' ') print("")
Example #8
Source File: jieba_test.py From Malicious_Domain_Whois with GNU General Public License v3.0 | 5 votes |
def testCutForSearch_NOHMM(self): for content in test_contents: result = jieba.cut_for_search(content,HMM=False) assert isinstance(result, types.GeneratorType), "Test CutForSearch Generator error" result = list(result) assert isinstance(result, list), "Test CutForSearch error on content: %s" % content print(" , ".join(result), file=sys.stderr) print("testCutForSearch_NOHMM", file=sys.stderr)
Example #9
Source File: jieba_test.py From Malicious_Domain_Whois with GNU General Public License v3.0 | 5 votes |
def testCutForSearch(self): for content in test_contents: result = jieba.cut_for_search(content) assert isinstance(result, types.GeneratorType), "Test CutForSearch Generator error" result = list(result) assert isinstance(result, list), "Test CutForSearch error on content: %s" % content print(" , ".join(result), file=sys.stderr) print("testCutForSearch", file=sys.stderr)
Example #10
Source File: test_multithread.py From annotated_jieba with MIT License | 5 votes |
def run(self): seg_list = jieba.cut("我来到北京清华大学",cut_all=True) print("Full Mode:" + "/ ".join(seg_list)) #全模式 seg_list = jieba.cut("我来到北京清华大学",cut_all=False) print("Default Mode:" + "/ ".join(seg_list)) #默认模式 seg_list = jieba.cut("他来到了网易杭研大厦") print(", ".join(seg_list)) seg_list = jieba.cut_for_search("小明硕士毕业于中国科学院计算所,后在日本京都大学深造") #搜索引擎模式 print(", ".join(seg_list))
Example #11
Source File: test_cut_for_search.py From annotated_jieba with MIT License | 5 votes |
def cuttest(test_sent): result = jieba.cut_for_search(test_sent) for word in result: print(word, "/", end=' ') print("")
Example #12
Source File: test_cut_for_search.py From annotated_jieba with MIT License | 5 votes |
def cuttest(test_sent): result = jieba.cut_for_search(test_sent) for word in result: print(word, "/", end=' ') print("")
Example #13
Source File: jieba_test.py From jieba_fast with MIT License | 5 votes |
def testCutForSearch(self): for content in test_contents: result = jieba.cut_for_search(content) assert isinstance(result, types.GeneratorType), "Test CutForSearch Generator error" result = list(result) assert isinstance(result, list), "Test CutForSearch error on content: %s" % content print(" , ".join(result), file=sys.stderr) print("testCutForSearch", file=sys.stderr)
Example #14
Source File: jieba_test.py From annotated_jieba with MIT License | 5 votes |
def testCutForSearch(self): for content in test_contents: result = jieba.cut_for_search(content) assert isinstance(result, types.GeneratorType), "Test CutForSearch Generator error" result = list(result) assert isinstance(result, list), "Test CutForSearch error on content: %s" % content print(" , ".join(result), file=sys.stderr) print("testCutForSearch", file=sys.stderr)
Example #15
Source File: min.py From min with GNU General Public License v2.0 | 5 votes |
def add_content(self, content, obj_key): """ 添加文档到索引 """ seg_list = jieba.cut_for_search(content) seg_list = min_nlp.get_weight(seg_list) self.add_word_index(seg_list, obj_key)
Example #16
Source File: min.py From min with GNU General Public License v2.0 | 5 votes |
def search(self, keywords, start=0, length=20): """ 搜索关键字 """ seg_list = list(jieba.cut_for_search(keywords)) key_list = self.search_by_words(seg_list, start, length) return key_list
Example #17
Source File: jiebaSegment.py From QAmodel-for-Retrievalchatbot with MIT License | 5 votes |
def cut_for_search(self,sentence, stopword=True): seg_list = jieba.cut_for_search(sentence) results = [] for seg in seg_list: if stopword and seg in self.stopwords: continue results.append(seg) return results
Example #18
Source File: jiebaSegment.py From Customer-Chatbot with MIT License | 5 votes |
def cut_for_search(self,sentence, stopword=True): seg_list = jieba.cut_for_search(sentence) results = [] for seg in seg_list: if stopword and seg in self.stopwords: continue results.append(seg) return results
Example #19
Source File: jiebaSegment.py From Customer-Chatbot with MIT License | 5 votes |
def cut_for_search(self,sentence, stopword=True): seg_list = jieba.cut_for_search(sentence) results = [] for seg in seg_list: if stopword and seg in self.stopwords: continue results.append(seg) return results
Example #20
Source File: article.py From public-opinion-analysis with MIT License | 5 votes |
def cache_raw_seg(self): config = yaml.safe_load(open("./application.yml")) r = redis.StrictRedis(host=config['redis']['host'], port=config['redis']['port'], db=config['redis']['db']) for i in range(0, len(self.sentences)): raw_word_seg_list = jieba.cut_for_search(self.sentences[i].raw_sentence) sentence_seg_id = 'article:' + self.article_id + ':raw_seg:' + str(i) for raw_word_seg in raw_word_seg_list: r.sadd(sentence_seg_id, raw_word_seg)
Example #21
Source File: test_multithread.py From jieba_fast with MIT License | 5 votes |
def run(self): seg_list = jieba.cut("我来到北京清华大学",cut_all=True) print("Full Mode:" + "/ ".join(seg_list)) #全模式 seg_list = jieba.cut("我来到北京清华大学",cut_all=False) print("Default Mode:" + "/ ".join(seg_list)) #默认模式 seg_list = jieba.cut("他来到了网易杭研大厦") print(", ".join(seg_list)) seg_list = jieba.cut_for_search("小明硕士毕业于中国科学院计算所,后在日本京都大学深造") #搜索引擎模式 print(", ".join(seg_list))
Example #22
Source File: test_cut_for_search.py From jieba_fast with MIT License | 5 votes |
def cuttest(test_sent): result = jieba.cut_for_search(test_sent) for word in result: print(word, "/", end=' ') print("")
Example #23
Source File: test_cut_for_search.py From jieba_fast with MIT License | 5 votes |
def cuttest(test_sent): result = jieba.cut_for_search(test_sent) for word in result: print(word, "/", end=' ') print("")
Example #24
Source File: jieba_test.py From jieba_fast with MIT License | 5 votes |
def testCutForSearch_NOHMM(self): for content in test_contents: result = jieba.cut_for_search(content,HMM=False) assert isinstance(result, types.GeneratorType), "Test CutForSearch Generator error" result = list(result) assert isinstance(result, list), "Test CutForSearch error on content: %s" % content print(" , ".join(result), file=sys.stderr) print("testCutForSearch_NOHMM", file=sys.stderr)