Python re.findall() Examples
The following are 30
code examples of re.findall().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
re
, or try the search function
.
Example #1
Source File: utils.py From DOTA_models with Apache License 2.0 | 6 votes |
def parse_labelme_poly(filename): """ Parse a labelme xml file """ tree = ET.parse(filename) objects = [] for obj in tree.findall('object'): obj_struct = {} obj_struct['name'] = obj.find('name').text obj_struct['deleted'] = obj.find('deleted').text obj_struct['verified'] = int(obj.find('verified').text) obj_struct['occluded'] = obj.find('occluded').text obj_struct['attributes'] = obj.find('attributes').text poly = obj.find('polygon').findall('pt') obj_struct['polygon'] = [] for point in poly: pt = [point.find('x').text, point.find('y').text] obj_struct['polygon'] = obj_struct['polygon'] + pt objects.append(obj_struct) return objects
Example #2
Source File: reportMetrics.py From InsightAgent with Apache License 2.0 | 6 votes |
def extract_fields_db2(obj, line, field_name_regex): line = '#'.join(re.split('\s*#', line)) last_key = '' field_names = re.findall(field_name_regex, line) for field in reversed(field_names): split_at = line.find(field) + len(field) field_name = re.split('\s*:', field)[0] # don't overwrite existing fields if field_name in obj: continue else: obj[field_name] = ' '.join(line[split_at:].split()) if not last_key: last_key = field_name line = line[:split_at - len(field)] return last_key
Example #3
Source File: helpers.py From tensortrade with Apache License 2.0 | 6 votes |
def scale_times_to_generate(times_to_generate: int, time_frame: str): if 'MIN' in time_frame.upper(): times_to_generate *= int(re.findall(r'\d+', time_frame)[0]) elif 'H' in time_frame.upper(): times_to_generate *= int(re.findall(r'\d+', time_frame)[0]) * 60 elif 'D' in time_frame.upper(): times_to_generate *= int(re.findall(r'\d+', time_frame)[0]) * 60 * 24 elif 'W' in time_frame.upper(): times_to_generate *= int(re.findall(r'\d+', time_frame)[0]) * 60 * 24 * 7 elif 'M' in time_frame.upper(): times_to_generate *= int(re.findall(r'\d+', time_frame)[0]) * 60 * 24 * 7 * 30 else: raise ValueError('Timeframe must be either in minutes (min), hours (H), days (D), weeks (W), or months (M)') return times_to_generate
Example #4
Source File: adventure.py From Dumb-Cogs with MIT License | 6 votes |
def adventure_command(self, ctx, *, text): "Do something in your adventure" words = re.findall(r'\w+', text) if words: # await self.baudout(ctx, game.do_command(words)) channel = ctx.message.channel server = ctx.message.server author = ctx.message.author try: team = self.players[server.id][channel.id][author.id] except: await self.bot.reply('You are not in an adventure. If your team has embarked on one, join them using `{}adventure join`, otherwise embark on your own adventure.'.format(ctx.prefix)) return await self.baudout(ctx, self.game_loops[server.id][team][channel.id]["GAME"].do_command(words, ctx, self)) pass # edited - irdumbs
Example #5
Source File: test_sanity_tutorials.py From dynamic-training-with-apache-mxnet-on-aws with Apache License 2.0 | 6 votes |
def test_tutorial_tested(): """ Make sure that every tutorial that isn't in the whitelist has been added to the tutorial test file """ tutorial_test_file = os.path.join(os.path.dirname(__file__), 'test_tutorials.py') f = open(tutorial_test_file, 'r') tutorial_test_text = '\n'.join(f.readlines()) tutorial_path = os.path.join(os.path.dirname(__file__), '..', '..', 'docs', 'tutorials') tutorials = glob.glob(os.path.join(tutorial_path, '**', '*.md')) tested_tutorials = set(re.findall(r"assert _test_tutorial_nb\('(.*)'\)", tutorial_test_text)) for tutorial in tutorials: friendly_name = '/'.join(tutorial.split('/')[-2:]).split('.')[0] if friendly_name not in tested_tutorials and friendly_name+".md" not in whitelist_set: assert False, "{} has not been added to the tests/tutorials/test_tutorials.py test_suite".format(friendly_name)
Example #6
Source File: method.py From py2swagger with MIT License | 6 votes |
def _get_path_parameters(self): """ Creates parameters described in url path :return: list of parameters :rtype: list """ params = [] url_parameters = re.findall(r'/{(.+?)}', self.introspector.path) for parameter in url_parameters: params.append({ 'name': parameter, 'type': 'string', 'in': 'path', 'required': True }) return params
Example #7
Source File: utils.py From DOTA_models with Apache License 2.0 | 6 votes |
def parse_rec(filename): """ Parse a PASCAL VOC xml file """ tree = ET.parse(filename) objects = [] for obj in tree.findall('object'): obj_struct = {} obj_struct['name'] = obj.find('name').text obj_struct['pose'] = obj.find('pose').text obj_struct['truncated'] = int(obj.find('truncated').text) obj_struct['difficult'] = int(obj.find('difficult').text) bbox = obj.find('bndbox') obj_struct['bbox'] = [int(bbox.find('xmin').text), int(bbox.find('ymin').text), int(bbox.find('xmax').text), int(bbox.find('ymax').text)] objects.append(obj_struct) return objects
Example #8
Source File: simplesam.py From simplesam with MIT License | 6 votes |
def parse_md(self): """ Return the ungapped reference sequence from the MD tag, if present. """ try: return self._cache['parse_md'] except KeyError: pass try: md = self['MD'] except KeyError: raise KeyError('MD tag not found in SAM record.') ref_seq = list(self.gapped('seq')) md_match = re.findall(r"([0-9]+)\^?([A-Z]+)?", md) ref_seq_i = 0 for i, b in md_match: ref_seq_i += int(i) for mismatch in b: try: ref_seq[ref_seq_i] = mismatch except IndexError: raise IndexError(locals()) ref_seq_i += 1 self._cache['parse_md'] = ref_seq return ref_seq
Example #9
Source File: utils.py From DOTA_models with Apache License 2.0 | 6 votes |
def reWriteImgWithMask(srcpath, dstpath, gtpath, srcform, dstform): namelist = GetFileFromThisRootDir(gtpath) for fullname in namelist: objects = parse_bod_poly(fullname) mask_polys = [] for obj in objects: clsname = obj['name'] matches = re.findall('area|mask', clsname) if 'mask' in matches: #print('mask:') mask_polys.append(shgeo.Polygon(obj['poly'])) elif 'area' in matches: #print('area:') mask_polys.append(shgeo.Polygon(obj['poly'])) basename = mybasename(fullname) imgname = os.path.join(srcpath, basename + srcform) img = cv2.imread(imgname) dstname = os.path.join(dstpath, basename + dstform) if len(mask_polys) > 0: saveimageWithMask(img, dstname, mask_polys)
Example #10
Source File: compiler.py From PyOptiX with MIT License | 6 votes |
def _has_modified_includes(cls, file_path, modified_after, depth=4): if depth == 0: return False include_pattern = '#include\s*"(.*)"' with open(file_path) as f: content = f.read() for included_path in re.findall(include_pattern, content): for compiler_include_path in cls._program_directories: included_file_path = os.path.join(compiler_include_path, included_path) if not os.path.exists(included_file_path): continue included_file_mtime = os.path.getmtime(included_file_path) if included_file_mtime > modified_after: return True elif cls._has_modified_includes(included_file_path, modified_after, depth=depth - 1): return True return False
Example #11
Source File: huaban.py From PickTrue with MIT License | 6 votes |
def __init__(self, board_url_or_id): board_id = str(board_url_or_id) self.fetcher = HuaBanFetcher() if "http" in board_id: board_id = re.findall(r'boards/(\d+)/', board_id)[0] self.id = board_id path = "/boards/{board_id}/".format( board_id=board_id, ) self.base_url = urljoin(BASE_URL, path) self.further_pin_url_tpl = urljoin( self.base_url, "?{random_string}" "&max={pin_id}" "&limit=20" "&wfl=1" ) # uninitialized properties self.pin_count = None self.title = None self.description = None self._pins = [] self._init_board()
Example #12
Source File: vcc_utils.py From VEX_Syntax with MIT License | 6 votes |
def context_function_signatures(context, vcc_path=VCC_PATH): ctx_info = subprocess.check_output([vcc_path, '-X', context]) ctx_info = ctx_info.decode('ascii') sigs = [] for s in re.findall('(\w+(\[\])?) (\w+)\((.*)\)', ctx_info): sig_str = '%s %s(%s)' % (s[0], s[2], s[3]) if s[3] == 'void': hint_str = '' else: hint_str = '%s\n(%s)' % (s[0], s[3].rstrip().lstrip().rstrip(';')) args = [x.strip() for x in s[3].split(';')] sigs.append({'returns':s[0], 'name':s[2], 'ctx':context, 'args':args, 'str':sig_str, 'hint':hint_str}) return sigs
Example #13
Source File: tnslsnr-ping.py From zbxdb with GNU General Public License v3.0 | 6 votes |
def ParseNestedParen(string, level): """ Generate strings contained in nested (), indexing i = level """ if len(re.findall(r"\(", string)) == len(re.findall(r"\)", string)): LeftRightIndex = [x for x in zip( [Left.start()+1 for Left in re.finditer(r'\(', string)], reversed([Right.start() for Right in re.finditer(r'\)', string)]))] elif len(re.findall(r"\(", string)) > len(re.findall(r"\)", string)): return ParseNestedParen(string + ')', level) elif len(re.findall(r"\(", string)) < len(re.findall(r"\)", string)): return ParseNestedParen('(' + string, level) else: return 'fail' return [string[LeftRightIndex[level][0]:LeftRightIndex[level][1]]]
Example #14
Source File: test_re.py From jawfish with MIT License | 6 votes |
def test_string_boundaries(self): # See http://bugs.python.org/issue10713 self.assertEqual(re.search(r"\b(abc)\b", "abc").group(1), "abc") # There's a word boundary at the start of a string. self.assertTrue(re.match(r"\b", "abc")) # A non-empty string includes a non-boundary zero-length match. self.assertTrue(re.search(r"\B", "abc")) # There is no non-boundary match at the start of a string. self.assertFalse(re.match(r"\B", "abc")) # However, an empty string contains no word boundaries, and also no # non-boundaries. self.assertEqual(re.search(r"\B", ""), None) # This one is questionable and different from the perlre behaviour, # but describes current behavior. self.assertEqual(re.search(r"\b", ""), None) # A single word-character string has two boundaries, but no # non-boundary gaps. self.assertEqual(len(re.findall(r"\b", "a")), 2) self.assertEqual(len(re.findall(r"\B", "a")), 0) # If there are no words, there are no boundaries self.assertEqual(len(re.findall(r"\b", " ")), 0) self.assertEqual(len(re.findall(r"\b", " ")), 0) # Can match around the whitespace. self.assertEqual(len(re.findall(r"\B", " ")), 2)
Example #15
Source File: conftest.py From sanic with MIT License | 5 votes |
def generate_url_for_template(template): url = template for pattern, param_type in re.findall( re.compile(r"((?:<\w+:(string|int|number|alpha|uuid)>)+)"), template, ): value = TYPE_TO_GENERATOR_MAP.get(param_type)() url = url.replace(pattern, str(value), -1) return url
Example #16
Source File: data_process.py From nlp-tensorflow with MIT License | 5 votes |
def tokenizer(sentence): tokens = re.findall(r"[\w]+|[^\s\w]", sentence) return tokens
Example #17
Source File: ksp_plugin.py From SublimeKSP with GNU General Public License v3.0 | 5 votes |
def _extract_completions(self, view, prefix, point): # the sublime view.extract_completions implementation doesn't seem to allow for # the . character to be included in the prefix irrespectively of the "word_separators" setting if '.' in prefix: # potentially slow work around for the case where there is a period in the prefix code = view.substr(sublime.Region(0, view.size())) return sorted(re.findall(re.escape(prefix) + r'[a-zA-Z0-9_.]+', code)) else: return view.extract_completions(prefix, point) # default implementation if no '.' in the prefix
Example #18
Source File: data_process.py From nlp-tensorflow with MIT License | 5 votes |
def tokenizer(sentence): tokens = re.findall(r"[\w]+|[^\s\w]", sentence) return tokens
Example #19
Source File: mailslurper_import.py From sarlacc with MIT License | 5 votes |
def main(): config = ConfigParser() config.read("./smtpd.cfg") store = storage.StorageControl(config) cnx = mysql.connector.connect( user="root", password="root", host="localhost", database="sarlacc") mysql_cursor = cnx.cursor() mysql_cursor.execute("SELECT dateSent, fromAddress, toAddressList, subject, body FROM mailitem;") for (dateSent, fromAddress, toAddressList, subject, body) in mysql_cursor: # tidy up fromAddress fromAddress = cleanupAddress(re.findall(r"<(.*?)>", fromAddress)[0]) # tidy up toaAdressList toAddressList = re.findall(r"<(.*?)>", toAddressList) body = str(b64decode(body)) store.store_email(subject, toAddressList, fromAddress, body, dateSent, []) mysql_cursor.close() cnx.close()
Example #20
Source File: webfinger.py From Webfinger with GNU General Public License v3.0 | 5 votes |
def check_rule(self, key, header, body, title): """指纹识别""" try: if 'title="' in key: if re.findall(rtitle, key)[0].lower() in title.lower(): return True elif 'body="' in key: if re.findall(rbody, key)[0] in body:return True else: if re.findall(rheader, key)[0] in header:return True except Exception as e: pass
Example #21
Source File: tumblrdownloader.py From TumblrDownloader with MIT License | 5 votes |
def _getimages(self): ''' Get all images returned by Tumblr API ''' site = self.api_url.replace("#start#",str(self._start)) file = urlopen(site) data = file.read().decode('utf8') file.close() regex = r"<photo-url max-width=\"" + str(self._resolution) + "\">(.+?)</photo-url>" imagelist = re.findall(regex, data) return imagelist
Example #22
Source File: inputs.py From NGU-scripts with GNU Lesser General Public License v3.0 | 5 votes |
def get_numbers(s :str) -> Iterable[int]: """Finds all numbers in a string""" s = Inputs.remove_spaces(s) s = Inputs.remove_number_separators(s) match = re.findall(r"(\d+(\.\d+E\+\d+)?)", s) nums = [int(float(x[0])) for x in match] return nums
Example #23
Source File: data_process.py From nlp-tensorflow with MIT License | 5 votes |
def tokenizer(sentence): tokens = re.findall(r"[\w]+|[^\s\w]", sentence) return tokens
Example #24
Source File: dvrlogin.py From hkdvr_login with MIT License | 5 votes |
def getinfo(host): username = "admin" password = "12345" timeout = 5 for port in range(80,100): try: req = requests.get(url='http://'+ username +':'+ password +'@'+ host +':'+ str(port) +'/ISAPI/Security/userCheck',timeout=timeout) result = req.text status = re.findall(r'<statusValue>(.*)</statusValue>', result) if status[0] == '200': print '[√] Host http://'+ host +':'+ str(port) +' Login Success!' except: pass
Example #25
Source File: adventure.py From Dumb-Cogs with MIT License | 5 votes |
def team_saves(self, ctx, team=None): # TeamNebNeb didn't show saves also !advernture embark didn't load save author = ctx.message.author server = ctx.message.server channel = ctx.message.channel if team is None: try: team = self.players[server.id][channel.id][author.id] except: try: teams = self.teams[server.id]["MEMBERS"][author.id] if len(teams) != 1: await self.bot.reply('You are in more than one team. Please specify which team to see the saves for.') return team = teams[0] except: await self.bot.reply('You are not in any team. Find one that will recruit you or create you own with `{}team new`'.format(ctx.prefix)) return team = self._safe_path(team).lower() tname = self._team_name(server, team) try: # http://stackoverflow.com/questions/168409/how-do-you-get-a-directory-listing-sorted-by-creation-date-in-python files = list(filter(os.path.isfile, glob.glob('data/adventure/saves/{}/{}/*.save'.format(server.id, team)))) files.sort(key=os.path.getmtime, reverse=True) if not files: raise NoSave msg = tname+"'s save" if len(files) > 1: msg += 's' reg = re.compile('data/adventure/saves/{}/{}/([^/]*).save'.format(server.id,team)) # just bein verbose msg += ':\n' + '\n'.join([str(num+1) + ". " + re.findall(reg, sv)[0] for num,sv in enumerate(files)]) await self.bot.reply(msg) except Exception as e: print(e) await self.bot.reply('The {} team does not have any saves'.format(tname)) # only leaders can recruit?
Example #26
Source File: alot.py From Dumb-Cogs with MIT License | 5 votes |
def alot_of_checks(self, message): if message.author.id == self.bot.user.id: return server = message.server #let PMs if server != None: if server.id not in self.settings["SERVERS"]: #default off self.settings["SERVERS"][server.id] = False if not self.settings["SERVERS"][server.id]: return lower = message.content.lower() if ' ' not in lower: return if lower == "what's an alot?": await self.bot.send_message(message.channel, "This is an alot: http://hyperboleandahalf.blogspot.com/2010/04/alot-is-better-than-you-at-everything.html") return lowerm = re.sub(self.alotRegex,"",lower,1) if lowerm == lower: return matchedKeys = re.findall(self.keyRegex,lowerm) matchedTags = [] for k in matchedKeys: vals = self.alotTags[k] for tag in vals: if tag not in matchedTags: matchedTags.append(tag) url = "" if matchedTags == []: url = randchoice(list(self.alots.values())) else: url = self.alots[randchoice(matchedTags)] await self.bot.send_message(message.channel,url)
Example #27
Source File: lolz.py From Dumb-Cogs with MIT License | 5 votes |
def translate_sentence(self, sentence): # no links if re.findall(self.regex['link'], sentence): return sentence new_sentence = '' # reminder to self... # ([\w]*) - match 0 or more a-zA-Z0-9_ group # ([\W]*) - match 0 or more non-(see above) group for word, space in re.findall("([:\w]*)([^:\w]*)", sentence): word = self.translate_word(word) # if word != '': new_sentence += word + space return new_sentence
Example #28
Source File: avclass_common.py From BASS with GNU General Public License v2.0 | 5 votes |
def __norm_cat(self, label, hashes): if not label: return [] # Initialize list of tokens to return ret = [] # Split label into tokens and process each token for token in re.split("[^0-9a-zA-Z]", label): # Remove leading and trailing backspace from token # and convert to lowercase token = token.lower() # Remove digits at the end # FIXME: What if it is a hash, and removes digits at the end??? end_len = len(re.findall("\d*$", token)[0]) if end_len: token = token[:-end_len] # Ignore short token if len(token) < 4: continue # Ignore token if prefix of a hash of the sample # Most AVs use MD5 prefixes in labels, # but we check SHA1 and SHA256 as well hash_token = False for hash_str in hashes: if hash_str[0:len(token)] == token: hash_token = True break if hash_token: continue for keys, values in self.cat.iteritems(): if token in values: token = keys ret.append(token) break # Add token return ret
Example #29
Source File: interval.py From rate.sx with MIT License | 5 votes |
def parse_length(length): """ Parse ``length``` and return parsed length interval (in seconds) or None if length can't be parsed. >>> parse_length('1m') 60 >>> parse_length('1h1m') 3660 >>> parse_length('1') >>> parse_length('1hX1m') >>> parse_length('1d') 86400 >>> parse_length('2M') 5184000 """ sum_ = 0 joined = "" letters = "".join(INTERVAL_LENGTH.keys()) for number, int_spec in re.findall('([0-9]+)([%s])' % letters, length): joined += number + int_spec try: sum_ += int(number)*INTERVAL_LENGTH[int_spec] except KeyError: return None # if there were some skipped characters, # it was not a correct interval specification, # return None if joined != length: return None return sum_
Example #30
Source File: tnslsnr-ping.py From zbxdb with GNU General Public License v3.0 | 5 votes |
def getVersion(cmd): """send get verson cmd""" cmdl = len(cmd).to_bytes(2, byteorder='big') pckl = (len(cmd)+len(TNSPacket)).to_bytes(2, byteorder='big') TNSPacket[0] = pckl[0] TNSPacket[1] = pckl[1] TNSPacket[24] = cmdl[0] TNSPacket[25] = cmdl[1] # print(cmd) try: with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: s.settimeout(3) _start = timer() s.connect((HOST, PORT)) scmd = TNSPacket + bytes(cmd, 'utf-8') s.sendall(scmd) data = s.recv(1024) ela = round((timer() - _start)*1000) rectxt = (ParseNestedParen(str(data), 0)) vsnnum = re.findall(r'(?<=VSNNUM=).+?(?=\))', str(rectxt), flags=re.IGNORECASE) err = re.findall(r'(?<=ERR=).+?(?=\))', str(rectxt), flags=re.IGNORECASE) version = vsnnumToVersion(vsnnum[0]) return vsnnum[0], err[0], version, ela except: return 0, "12541", "notfound"