Python wikipedia.search() Examples
The following are 19
code examples of wikipedia.search().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
wikipedia
, or try the search function
.
Example #1
Source File: fetch_wiki.py From adam_qas with GNU General Public License v3.0 | 8 votes |
def search_wiki(keywords, number_of_search, wiki_pages): suggestion = False for word in range(0, len(keywords) - 1): # print(keywords[word], ">>") result_set = wikipedia.search(keywords[word], number_of_search, suggestion) for term in result_set: try: page = wikipedia.page(term, preload=False) page_title = page.title # page_summary = page.summary page_content = page.content wiki_pages[page_title] = page_content except wikipedia.exceptions.DisambiguationError as error: pass except wikipedia.exceptions.PageError as error: pass # print(error.options) # print(page_title, len(page_content), type(page_content)) return wiki_pages
Example #2
Source File: gen_corpus.py From Living-Audio-Dataset with Apache License 2.0 | 6 votes |
def get_articles(language, no_words, max_no_articles, search, **kwargs): """ Retrieve articles from Wikipedia """ wikipedia.set_rate_limiting(True) # be polite wikipedia.set_lang(language) if search is not None: titles = wikipedia.search(search, results = max_no_articles) else: titles = wikipedia.random(pages = max_no_articles) articles = [] current_no_words = 0 for title in titles: print("INFO: loading {}".format(title)) page = wikipedia.page(title=title) content = page.content article_no_words = len(content.split()) current_no_words += article_no_words print("INFO: article contains {} words".format(article_no_words)) articles.append((title, content)) if current_no_words >= no_words: break return articles
Example #3
Source File: fetch_tax_info.py From idseq-dag with MIT License | 6 votes |
def fetch_wiki_content(num_threads, taxid2wikidict, taxid2wikicontent, id2namedict): ''' Fetch wikipedia content based on taxid2wikidict ''' threads = [] semaphore = threading.Semaphore(num_threads) mutex = TraceLock("fetch_wiki_content", threading.RLock()) for taxid, url in taxid2wikidict.items(): m = re.search(r"curid=(\d+)", url) pageid = None if m: pageid = m[1] name = id2namedict.get(taxid) if pageid or name: semaphore.acquire() t = threading.Thread( target=PipelineStepFetchTaxInfo. get_wiki_content_for_page, args=[taxid, pageid, name, taxid2wikicontent, mutex, semaphore] ) t.start() threads.append(t) for t in threads: t.join()
Example #4
Source File: BuscadorPersonas.py From osint-suite-tools with GNU General Public License v3.0 | 6 votes |
def search_google_(target): engine = Google() results = engine.search("'" + target + "'") for r in results: print ("|--[INFO][GOOGLE][RESULTS][>] " + r["title"] + " | " + r["text"] + " | " + r["link"]) try: tsd, td, tsu = extract(r["link"]) domain = td + '.' + tsu web = requests.get(r["link"], timeout=3) print ("|----[INFO][WEB][HTTP CODE][>] " + str(web.status_code) + "\n") if web.status_code >= 200 or web.status_code < 300: if not domain in config.BL_parserPhone: TEXT = er.remove_tags(str(web.text)) parser.parserMAIN(TEXT) except Exception as e: print ("|----[ERROR][HTTP CONNECTION][>] " + str(e))
Example #5
Source File: search.py From W.I.L.L with MIT License | 6 votes |
def main(data): '''Start the search''' response = {"text": None, "data":{}, "type": "success"} query = data["command"] log.info("In main search function with query {0}".format(query)) db = data["db"] answer = False wolfram_key = tools.load_key("wolfram", db) wolfram_response = search_wolfram(query, wolfram_key) # If it found an answer answer will be set to that, if not it'll still be false answer = wolfram_response if answer: response["text"] = answer else: response["text"]=search_google(query) return response
Example #6
Source File: search.py From W.I.L.L with MIT License | 6 votes |
def is_search(event): '''Determine whether it's a search command''' command = event["command"] if "search" in event["verbs"]: return True question_words = [ "what", "when", "why", "how", "who", "are", "is" ] first_word = command.split(" ")[0].lower() log.debug("First word in command is {0}".format(first_word)) if first_word in question_words: return True return False
Example #7
Source File: wiki.py From Jarvis with MIT License | 6 votes |
def __call__(self, jarvis, s): k = s.split(' ', 1) if len(k) == 1: jarvis.say( "Do you mean:\n1. wiki search <subject>\n2. wiki summary <subject>\n3. wiki content <subject>") else: data = None if k[0] == "search": data = self.search(" ".join(k[1:])) elif k[0] == "summary": data = self.summary(" ".join(k[1:])) elif k[0] == "content": data = self.content(" ".join(k[1:])) else: jarvis.say("I don't know what you mean") return if isinstance(data, list): print("\nDid you mean one of these pages?\n") for d in range(len(data)): print(str(d + 1) + ": " + data[d]) else: print("\n" + data)
Example #8
Source File: gen_corpus.py From Living-Audio-Dataset with Apache License 2.0 | 6 votes |
def main(): parser = argparse.ArgumentParser() parser.add_argument("-n", "--max-no-articles", type = int, default=10, help = "maximum number of articles to download") parser.add_argument("-w", "--no-words", type = int, default=1000000, help = "target number of words") parser.add_argument("-s", "--search", help = "if specified will use this search term") parser.add_argument("language", help = "2 letter language code") parser.add_argument("output", type = argparse.FileType('w'), help = "output file") args = parser.parse_args() articles = get_articles(**vars(args)) corpusxml = articles2xml(articles) xmlstr = lxml.etree.tostring(corpusxml, pretty_print=True, xml_declaration=True, encoding='utf-8') args.output.write(xmlstr.decode('utf-8'))
Example #9
Source File: temporal_lobe.py From rpi_ai with MIT License | 5 votes |
def playMusic(query): # get YouTube list pattern = re.compile('([^\s\w]|_)+') b_string = re.sub(pattern, '', query) phrase=b_string pattern = re.compile("\\b(some|play)\\W", re.I) query = [pattern.sub("", phrase)] # get YouTube list query = query[0] print query url = "https://www.googleapis.com/youtube/v3/search?part=snippet&key="+keyring.get_password('google','api_secret')+"&q="+urllib.quote_plus(query)+"&type=video" response = urllib2.urlopen(url) jsonResp = response.read() decoded = json.loads(jsonResp) #os.system('echo \''+url+'\' > url.txt') #for debugging url = 'http://youtube.com/watch?v=' + decoded['items'][0]['id']['videoId'] theSongName = decoded['items'][0]['snippet']['title'] pattern = re.compile("([^a-zA-Z\d\s:,.']|_)+") theSongName = re.sub(pattern, '', theSongName) #for x in range(1,len(decoded['items'])): #url = url + ' ' + 'http://youtube.com/watch?v=' + decoded['items'][x]['id']['videoId'] permission = audio_cortex.getUserPermission("Do you want to hear " + theSongName) if permission: vlc = 'cvlc --no-video --volume 270 -A alsa,none --alsa-audio-device hw:1' + ' ' + url + ' --play-and-exit &' print url os.system(vlc) print "started music.." return "Sure I'll play " + theSongName else: return "Okay, I will play nothing." # Look up declarative knowledge with Wolfram
Example #10
Source File: temporal_lobe.py From rpi_ai with MIT License | 5 votes |
def wikipediaLookUp(a_string,num_sentences): print a_string pattern = re.compile('([^\s\w]|_)+') b_string = re.sub(pattern, '', a_string) phrase=b_string print phrase pattern = re.compile("\\b(lot|lots|a|an|who|can|you|what|is|info|somethings|whats|have|i|something|to|know|like|Id|information|about|tell|me)\\W", re.I) phrase_noise_removed = [pattern.sub("", phrase)] print phrase_noise_removed[0] a = wikipedia.search(phrase_noise_removed[0]) print a[0] the_summary = (wikipedia.summary(a[0], sentences=num_sentences)) print the_summary return the_summary
Example #11
Source File: wiki.py From Jarvis with MIT License | 5 votes |
def search(self, query, count=10, suggestion=False): """Do a Wikipedia search for a query, returns a list of 10 related items.""" items = wikipedia.search(query, count, suggestion) if isinstance(items, list) and items: return items return "No articles with that name, try another item."
Example #12
Source File: BuscadorPersonas.py From osint-suite-tools with GNU General Public License v3.0 | 5 votes |
def searchWikipedia(target): try: wikipedia.set_lang("es") d0 = wikipedia.search(target) if d0: print() print("|----[INFO][WIKIPEDIA][>] ") print(" |----[INFO][SEARCH][>] ") print(" - Resultados encontrados: ") for r in d0: print(" - " + r) else: print("|----[INFO][WIKIPEDIA][>] No aparecen resultados en WIKIPEDIA.") except: print("[!][WARNING][WIKIPEDIA][>] Error en la API...") try: d1 = wikipedia.page(target) linksWIKI = d1.links urlWIKI = d1.url if d1: print(" |----[INFO][TAGS][>] ") for l in linksWIKI: print(" - " + l) print("|----[FUENTES][WIKIPEDIA][>] ") print(" - " + urlWIKI) config.wikipediaData_list.append(urlWIKI) else: print("|----[INFO][WIKIPEDIA][>] No aparecen resultados en WIKIPEDIA.") except: print("[!][WARNING][WIKIPEDIA][>] Error en la API o no aparecen resultados...") #Funciones para buscar en Youtube
Example #13
Source File: wikipedia.py From pyconjpbot with MIT License | 5 votes |
def wikipedia_page(message, option, query): """ Wikipediaで検索した結果を返す """ if query == 'help': return # set language lang = 'ja' if option: _, lang = option.split('-') wikipedia.set_lang(lang) try: # search with query results = wikipedia.search(query) except: botsend(message, '指定された言語 `{}` は存在しません'.format(lang)) return # get first result if results: page = wikipedia.page(results[0]) attachments = [{ 'fallback': 'Wikipedia: {}'.format(page.title), 'pretext': 'Wikipedia: <{}|{}>'.format(page.url, page.title), 'text': page.summary, }] botwebapi(message, attachments) else: botsend(message, '`{}` に該当するページはありません'.format(query))
Example #14
Source File: BuscadorPersonas.py From osint-suite-tools with GNU General Public License v3.0 | 5 votes |
def search_dogpile_(target): engine = Dogpile() results = engine.search("'" + target + "'") for r in results: print ("|--[INFO][DOGPILE][RESULTS][>] " + r["title"] + " | " + r["text"] + " | " + r["link"]) try: web = requests.get(r["link"], timeout=3) print ("|----[INFO][WEB][HTTP CODE][>] " + str(web.status_code) + "\n") if web.status_code >= 200 or web.status_code < 300: TEXT = er.remove_tags(str(web.text)) parser.parserMAIN(TEXT) except Exception as e: print ("|----[ERROR][HTTP CONNECTION][>] " + str(e))
Example #15
Source File: views.py From Microsoft-chatbot with MIT License | 5 votes |
def who_is(query, session_id="general"): try: return wikipedia.summary(query) except requests.exceptions.SSLError: return "Sorry I could not search online due to SSL error" except: pass for new_query in wikipedia.search(query): try: return wikipedia.summary(new_query) except: pass return "Sorry I could not find any data related to '%s'" % query
Example #16
Source File: wiki_search.py From NeuralTripleTranslation with Apache License 2.0 | 5 votes |
def get(query): return find_candidates(find_json(query)) # with open("testoutput.txt", "w") as text_file: # for i in get('video-assisted thoracoscopic'): # try: # # print get('video-assisted thoracoscopic') # # print wikipedia.page(i) # # allwiki.append(wikipedia.page(i).content) # text_file.write('%s\n\n' % wikipedia.page(i).content.encode('utf8')) # # print type(wikipedia.page(i).content.encode('utf8')) # # print wikipedia.page(i).content # except: # print "Unexpected error:", sys.exc_info()[0] # print get('video-assisted thoracoscopic') # for keys in world_dict.keys(): # for term in world_dict.get(keys, keys): # if wikipedia.search(term): # term = wikipedia.search(term)[0] # wikipage = wikipedia.page(term) # content = wikipage.content # allwiki.append(content) # # print (wikipedia.page("Georgia_(country)").content) # # print wikipedia.search('fraction of inspired o2')[0] # text_file.close()
Example #17
Source File: fetch_tax_info.py From idseq-dag with MIT License | 5 votes |
def get_wiki_content_for_page(taxid, pageid, taxname, taxid2wikicontent, mutex, semaphore, max_attempt=3): ''' Fetch wiki content for pageid ''' for attempt in range(max_attempt): try: page = None if pageid: log.write(f"fetching wiki {pageid} for {taxid}") page = wikipedia.page(pageid=pageid) elif taxname: search_results = wikipedia.search(taxname) if len(search_results) > 0: wikiname = str(search_results[0]) if taxname.lower() == wikiname.lower(): page = wikipedia.page(wikiname) if not page: # query the page directly try: page = wikipedia.page(taxname.replace(" ", "_")) except: page = None if page: output = { "pageid": page.pageid, "description": page.content[:1000], "title": page.title, "summary": page.summary } with mutex: taxid2wikicontent[taxid] = output break except: log.write(f"having trouble fetching {taxid} wiki {pageid} attempt {attempt}") semaphore.release()
Example #18
Source File: fetch_tax_info.py From idseq-dag with MIT License | 5 votes |
def get_taxid_mapping_for_batch(taxids, taxid2wikidict, mutex, semaphore, max_attempt=3): ''' Get wiki mapping for a list of taxids ''' taxid_str = ",".join(taxids) log.write(f"fetching batch {taxid_str}") for attempt in range(max_attempt): try: handle = Entrez.elink(dbfrom="taxonomy", id=taxid_str, cmd="llinks") record = Entrez.read(handle) handle.close() parsed = {} results = record[0]['IdUrlList']['IdUrlSet'] for result in results: taxid = result['Id'] wikiurl = "" for link in result['ObjUrl']: url = str(link['Url']) if re.search('wikipedia.org', url): wikiurl = url break parsed[taxid] = wikiurl break except: log.write(f"failed batch attempt {attempt}") time.sleep(5) semaphore.release() with mutex: taxid2wikidict.update(parsed)
Example #19
Source File: search.py From W.I.L.L with MIT License | 4 votes |
def search_google(query): '''Search google and determine if wikipedia is in it''' search_object = google.search(query) #Determine if a wikipedia url is in the first 5 searches urls = [] for i in range(0, 4): url = search_object.__next__() urls.append(url) if "wikipedia.org/wiki" in url: wikipedia_search = wikipedia.search(query)[0] url = wikipedia.page(wikipedia_search).url response = wikipedia.summary(wikipedia_search) + " ({0})".format(url) return response #If there were no wikipedia pages first_url = urls[0] try: article = Article(first_url) article.download() article.parse() article.nlp() article_summary = article.summary article_title = article.title return "{0}\n{1} - ({2})".format( article_summary, article_title, first_url ) except Exception as article_exception: try: log.debug("Got error {0}, {1} while using newspaper, switching to bs4".format( article_exception.message,article_exception.args )) html = requests.get(first_url).text #Parse the html using bs4 soup = BeautifulSoup(html, "html.parser") [s.extract() for s in soup(['style', 'script', '[document]', 'head', 'title'])] text = soup.getText() # break into lines and remove leading and trailing space on each lines = (line.strip() for line in text.splitlines()) # break multi-headlines into a line each chunks = (phrase.strip() for line in lines for phrase in line.split(" ")) # drop blank lines soup_text = '\n'.join(chunk for chunk in chunks if " " in chunk) response = format(soup_text) + " ({0})".format(first_url) return response except Exception as search_exception: log.info("Error {0},{1} occurred while searching query {2}".format( search_exception.message, search_exception.args, query )) return "Error encountered on query {0}".format(query)