Python re.finditer() Examples
The following are 30
code examples of re.finditer().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
re
, or try the search function
.
Example #1
Source File: _string.py From jawfish with MIT License | 6 votes |
def formatter_parser(*args,**kw): """parse the argument as a format string""" assert len(args)==1 assert isinstance(args[0], str) _result=[] for _match in re.finditer("([^{]*)?(\{[^}]*\})?", args[0]): _pre, _fmt = _match.groups() if _fmt is None: _result.append((_pre, None, None, None)) elif _fmt == '{}': _result.append((_pre, '', '', None)) else: _m=re.match("\{([^!]*)!?(.*)?\}", _fmt) _name=_m.groups(0) _flags=_m.groups(1) _result.append((_pre, _name, _flags, None)) return _result
Example #2
Source File: adapter.py From yang-explorer with Apache License 2.0 | 6 votes |
def get_ydk_def_names(python_ydk_defs): """ Get the Python YDK definition names """ logging.debug('get_ydk_def_names: python_ydk_defs : \n' + python_ydk_defs) import re ydk_def_names = "" for m in re.finditer(r"def \w+()", python_ydk_defs): logging.debug('get_ydk_def_names: m.group(0): \n' + m.group(0)) tmp_str = m.group(0).replace('def ', '') ydk_def_names = ydk_def_names + tmp_str + " " logging.debug('get_ydk_def_names: ydk_def_names : \n' + ydk_def_names) return ydk_def_names
Example #3
Source File: file_dialog.py From panda3dstudio with BSD 3-Clause "New" or "Revised" License | 6 votes |
def get_incremented_filename(filename, namestring): import re min_index = 1 pattern = r"(.*?)(\s*)(\d*)$" basename, space, index_str = re.search(pattern, filename).groups() search_pattern = fr"^{re.escape(basename)}\s*(\d+)$" if index_str: min_index = int(index_str) zero_padding = len(index_str) if index_str.startswith("0") else 0 naming_pattern = basename + space + "{:0" + str(zero_padding) + "d}" else: naming_pattern = basename + " {:02d}" names = re.finditer(search_pattern, namestring, re.I | re.M) inds = [int(name.group(1)) for name in names] max_index = min_index + len(inds) for i in range(min_index, max_index): if i not in inds: return naming_pattern.format(i) return naming_pattern.format(max_index)
Example #4
Source File: texttiling.py From razzy-spinner with GNU General Public License v3.0 | 6 votes |
def _mark_paragraph_breaks(self, text): """Identifies indented text or line breaks as the beginning of paragraphs""" MIN_PARAGRAPH = 100 pattern = re.compile("[ \t\r\f\v]*\n[ \t\r\f\v]*\n[ \t\r\f\v]*") matches = pattern.finditer(text) last_break = 0 pbreaks = [0] for pb in matches: if pb.start()-last_break < MIN_PARAGRAPH: continue else: pbreaks.append(pb.start()) last_break = pb.start() return pbreaks
Example #5
Source File: parser.py From jbox with MIT License | 6 votes |
def split_arg_string(string): """Given an argument string this attempts to split it into small parts.""" rv = [] for match in re.finditer(r"('([^'\\]*(?:\\.[^'\\]*)*)'" r'|"([^"\\]*(?:\\.[^"\\]*)*)"' r'|\S+)\s*', string, re.S): arg = match.group().strip() if arg[:1] == arg[-1:] and arg[:1] in '"\'': arg = arg[1:-1].encode('ascii', 'backslashreplace') \ .decode('unicode-escape') try: arg = type(string)(arg) except UnicodeError: pass rv.append(arg) return rv
Example #6
Source File: chunkparser_app.py From razzy-spinner with GNU General Public License v3.0 | 6 votes |
def _syntax_highlight_grammar(self, grammar): if self.top is None: return self.grammarbox.tag_remove('comment', '1.0', 'end') self.grammarbox.tag_remove('angle', '1.0', 'end') self.grammarbox.tag_remove('brace', '1.0', 'end') self.grammarbox.tag_add('hangindent', '1.0', 'end') for lineno, line in enumerate(grammar.split('\n')): if not line.strip(): continue m = re.match(r'(\\.|[^#])*(#.*)?', line) comment_start = None if m.group(2): comment_start = m.start(2) s = '%d.%d' % (lineno+1, m.start(2)) e = '%d.%d' % (lineno+1, m.end(2)) self.grammarbox.tag_add('comment', s, e) for m in re.finditer('[<>{}]', line): if comment_start is not None and m.start() >= comment_start: break s = '%d.%d' % (lineno+1, m.start()) e = '%d.%d' % (lineno+1, m.end()) if m.group() in '<>': self.grammarbox.tag_add('angle', s, e) else: self.grammarbox.tag_add('brace', s, e)
Example #7
Source File: chunkparser_app.py From razzy-spinner with GNU General Public License v3.0 | 6 votes |
def show_help(self, tab): self.helpbox['state'] = 'normal' self.helpbox.delete('1.0', 'end') for (name, tabstops, text) in self.HELP: if name == tab: text = text.replace('<<TAGSET>>', '\n'.join( ('\t%s\t%s' % item for item in sorted(list(self.tagset.items()), key=lambda t_w:re.match('\w+',t_w[0]) and (0,t_w[0]) or (1,t_w[0]))))) self.helptabs[name].config(**self._HELPTAB_FG_PARAMS) self.helpbox.config(tabs=tabstops) self.helpbox.insert('1.0', text+'\n'*20) C = '1.0 + %d chars' for (tag, params) in self.HELP_AUTOTAG: pattern = '(?s)(<%s>)(.*?)(</%s>)' % (tag, tag) for m in re.finditer(pattern, text): self.helpbox.tag_add('elide', C % m.start(1), C % m.end(1)) self.helpbox.tag_add('tag-%s' % tag, C % m.start(2), C % m.end(2)) self.helpbox.tag_add('elide', C % m.start(3), C % m.end(3)) else: self.helptabs[name].config(**self._HELPTAB_BG_PARAMS) self.helpbox['state'] = 'disabled'
Example #8
Source File: tree.py From razzy-spinner with GNU General Public License v3.0 | 6 votes |
def _ieer_read_text(s, top_node): stack = [Tree(top_node, [])] for piece_m in re.finditer('<[^>]+>|[^\s<]+', s): piece = piece_m.group() try: if piece.startswith('<b_'): m = _IEER_TYPE_RE.match(piece) if m is None: print 'XXXX', piece chunk = Tree(m.group('type'), []) stack[-1].append(chunk) stack.append(chunk) elif piece.startswith('<e_'): stack.pop() # elif piece.startswith('<'): # print "ERROR:", piece # raise ValueError # Unexpected HTML else: stack[-1].append(piece) except (IndexError, ValueError): raise ValueError('Bad IEER string (error at character %d)' % piece_m.start()) if len(stack) != 1: raise ValueError('Bad IEER string') return stack[0]
Example #9
Source File: parser.py From recruit with Apache License 2.0 | 6 votes |
def split_arg_string(string): """Given an argument string this attempts to split it into small parts.""" rv = [] for match in re.finditer(r"('([^'\\]*(?:\\.[^'\\]*)*)'" r'|"([^"\\]*(?:\\.[^"\\]*)*)"' r'|\S+)\s*', string, re.S): arg = match.group().strip() if arg[:1] == arg[-1:] and arg[:1] in '"\'': arg = arg[1:-1].encode('ascii', 'backslashreplace') \ .decode('unicode-escape') try: arg = type(string)(arg) except UnicodeError: pass rv.append(arg) return rv
Example #10
Source File: sources.py From brownie with MIT License | 6 votes |
def get_pragma_spec(source: str, path: Optional[str] = None) -> NpmSpec: """ Extracts pragma information from Solidity source code. Args: source: Solidity source code path: Optional path to the source (only used for error reporting) Returns: NpmSpec object """ pragma_match = next(re.finditer(r"pragma +solidity([^;]*);", source), None) if pragma_match is not None: pragma_string = pragma_match.groups()[0] pragma_string = " ".join(pragma_string.split()) return NpmSpec(pragma_string) if path: raise PragmaError(f"No version pragma in '{path}'") raise PragmaError("String does not contain a version pragma")
Example #11
Source File: __init__.py From ChromaTerm with MIT License | 6 votes |
def get_matches(self, data): """Returns a list of tuples, each of which containing a start index, an end index, and the [chromaterm.Color][] object for that match. Only regex groups associated with a color are included. Args: data (str): A string to match regex against. """ if not self.colors: return [] matches = [] for match in self.regex.finditer(data): for group in self.colors: start, end = match.span(group) # Zero-length match or optional group not in the match if start == end: continue matches.append((start, end, self.colors[group])) return matches
Example #12
Source File: test_re.py From jawfish with MIT License | 6 votes |
def test_finditer(self): iter = re.finditer(r":+", "a:b::c:::d") self.assertEqual([item.group(0) for item in iter], [":", "::", ":::"]) pat = re.compile(r":+") iter = pat.finditer("a:b::c:::d", 1, 10) self.assertEqual([item.group(0) for item in iter], [":", "::", ":::"]) pat = re.compile(r":+") iter = pat.finditer("a:b::c:::d", pos=1, endpos=10) self.assertEqual([item.group(0) for item in iter], [":", "::", ":::"]) pat = re.compile(r":+") iter = pat.finditer("a:b::c:::d", endpos=10, pos=1) self.assertEqual([item.group(0) for item in iter], [":", "::", ":::"]) pat = re.compile(r":+") iter = pat.finditer("a:b::c:::d", pos=3, endpos=8) self.assertEqual([item.group(0) for item in iter], ["::", "::"])
Example #13
Source File: remap.py From svviz with MIT License | 6 votes |
def tryAlignExact(query, revquery, target, aligner): f_results = [m for m in re.finditer(query, target)] r_results = [m for m in re.finditer(revquery, target)] if len(f_results) > 0: aln = RemapAlignment(f_results[0], query, aligner.match) strand = "+" elif len(r_results) > 0: aln = RemapAlignment(r_results[0], revquery, aligner.match) strand = "-" else: return None if len(f_results) + len(r_results) > 1: aln.score2 = aln.score return strand, aln
Example #14
Source File: tnslsnr-ping.py From zbxdb with GNU General Public License v3.0 | 6 votes |
def ParseNestedParen(string, level): """ Generate strings contained in nested (), indexing i = level """ if len(re.findall(r"\(", string)) == len(re.findall(r"\)", string)): LeftRightIndex = [x for x in zip( [Left.start()+1 for Left in re.finditer(r'\(', string)], reversed([Right.start() for Right in re.finditer(r'\)', string)]))] elif len(re.findall(r"\(", string)) > len(re.findall(r"\)", string)): return ParseNestedParen(string + ')', level) elif len(re.findall(r"\(", string)) < len(re.findall(r"\)", string)): return ParseNestedParen('(' + string, level) else: return 'fail' return [string[LeftRightIndex[level][0]:LeftRightIndex[level][1]]]
Example #15
Source File: gadgets.py From rop-chainer with GNU General Public License v3.0 | 6 votes |
def _locate_gadgets(self, section, terminals, gadget_type): disassembler = cs.Cs(cs.CS_ARCH_X86, cs.CS_MODE_32) for terminal in terminals: matches = [match.start() for match in re.finditer(terminal[0], section["data"])] for index in matches: for i in range(self._options.depth): gadget = "" instructions = disassembler.disasm_lite( section["data"][index-i:index+terminal[1]], section["vaddr"]+index) for instruction in instructions: gadget += (str(instruction[2]) + " " + str(instruction[3]) + " ; ") if gadget: gadget = gadget.replace(" ", " ") gadget = gadget[:-3] self._gadgets += [{"vaddr" : section["vaddr"]+index-i, "insts" : gadget, "gadget_type" : gadget_type}]
Example #16
Source File: generate_test.py From rekall with GNU General Public License v2.0 | 6 votes |
def ExtractTestCases(data): test_cases = [] # The start of the section is at this file offset. This is mapped into # memory at the .text segment (which is at offset 0). m = re.search("__start__", data) origin = m.start() for match in re.finditer( r"(---.*?\.\.\.)\n<bin>(.+?)</bin>", data, re.M | re.S): offset, _ = match.span(2) # Replace the assembled segment with a base64 equivalent. segment = yaml.safe_load(match.group(1)) segment["offset"] = offset - origin segment["data"] = match.group(2).encode("base64").strip() test_cases.append(segment) return test_cases
Example #17
Source File: TextDecorators.py From dcc with Apache License 2.0 | 6 votes |
def decorate(self, pageOffset=None): page = self.decorated.decorate(pageOffset) self.PenInterval = self.decorated.PenInterval self.brushMap = self.decorated.brushMap self.penMap = self.decorated.penMap off = self.dataModel.getOffset() Match = [(m.start(), m.end()) for m in re.finditer(b'([a-zA-Z0-9\\-\\\\.%*:/? _<>]){4,}', page)] for s, e in Match: for i in range(e - s): idx = off + s + i if idx not in self.penMap: self.penMap[off + s + i] = self.redPen self.page = page return self.page
Example #18
Source File: text.py From django-rest-registration with MIT License | 6 votes |
def _assert_urls_in_text(text, expected_num, line_url_pattern): lines = [line.rstrip() for line in text.split('\n')] urls = [] for line in lines: for match in re.finditer(line_url_pattern, line): match_groupdict = match.groupdict() urls.append(match_groupdict['url']) num_of_urls = len(urls) msg_format = "Found {num_of_urls} urls instead of {expected_num} in:\n{text}" # noqa: E501 msg = msg_format.format( num_of_urls=num_of_urls, expected_num=expected_num, text=text, ) assert num_of_urls == expected_num, msg return urls
Example #19
Source File: to_wiki.py From mindustry-modding with GNU General Public License v3.0 | 6 votes |
def normalize(md): '''Normalize anchors.''' def on_match(link): desc = link.group(1) old = link.group(2) href = (link.group(2) .lower() .replace('%20', '-') .replace(" ", "-") .replace("~", "") .replace(".", "")) old, new = f'[{desc}]({old})', f'[{desc}]({href})' print(old, new) return old, new replacers = set((on_match(x) for x in re.finditer(r'\[([^\]\[]*)\]\((#[^\)]*)\)', md))) return ft.reduce(lambda md, x: md.replace(x[0], x[1]), replacers, md)
Example #20
Source File: parser.py From pcocc with GNU General Public License v3.0 | 6 votes |
def split_arg_string(string): """Given an argument string this attempts to split it into small parts.""" rv = [] for match in re.finditer(r"('([^'\\]*(?:\\.[^'\\]*)*)'" r'|"([^"\\]*(?:\\.[^"\\]*)*)"' r'|\S+)\s*', string, re.S): arg = match.group().strip() if arg[:1] == arg[-1:] and arg[:1] in '"\'': arg = arg[1:-1].encode('ascii', 'backslashreplace') \ .decode('unicode-escape') try: arg = type(string)(arg) except UnicodeError: pass rv.append(arg) return rv
Example #21
Source File: testcases.py From django-rest-registration with MIT License | 6 votes |
def _assert_urls_in_text(self, text, expected_num, line_url_pattern): lines = [line.rstrip() for line in text.split('\n')] urls = [] for line in lines: for match in re.finditer(line_url_pattern, line): match_groupdict = match.groupdict() urls.append(match_groupdict['url']) num_of_urls = len(urls) msg_format = "Found {num_of_urls} urls instead of {expected_num} in:\n{text}" # noqa: E501 msg = msg_format.format( num_of_urls=num_of_urls, expected_num=expected_num, text=text, ) self.assertEqual(num_of_urls, expected_num, msg=msg) return urls
Example #22
Source File: Sentence.py From Snowball with GNU General Public License v3.0 | 6 votes |
def __init__(self, _sentence, e1_type, e2_type): self.relationships = set() self.sentence = _sentence self.entities = list() self.valid = False self.tree = None self.deps = None for m in re.finditer(regex, self.sentence): self.entities.append(m.group()) for e1 in self.entities: for e2 in self.entities: if e1 == e2: continue arg1match = re.match("<([A-Z]+)>", e1) arg2match = re.match("<([A-Z]+)>", e2) if arg1match.group(1) == e1_type and arg2match.group(1) == e2_type: self.valid = True break;
Example #23
Source File: typing.py From pytorch_geometric with MIT License | 6 votes |
def parse_types(func: Callable) -> List[Tuple[Dict[str, str], str]]: source = inspect.getsource(func) signature = inspect.signature(func) # Parse `# type: (...) -> ...` annotation. Note that it is allowed to pass # multiple `# type:` annotations in `forward()`. iterator = re.finditer(r'#\s*type:\s*\((.*)\)\s*->\s*(.*)\s*\n', source) matches = list(iterator) if len(matches) > 0: out = [] args = list(signature.parameters.keys()) for match in matches: arg_types_repr, return_type = match.groups() arg_types = split_types_repr(arg_types_repr) arg_types = OrderedDict((k, v) for k, v in zip(args, arg_types)) return_type = return_type.split('#')[0].strip() out.append((arg_types, return_type)) return out # Alternatively, parse annotations using the inspected signature. else: ps = signature.parameters arg_types = OrderedDict((k, param_type_repr(v)) for k, v in ps.items()) return [(arg_types, return_type_repr(signature))]
Example #24
Source File: SSLServerConnection.py From 3vilTwinAttacker with MIT License | 5 votes |
def replaceCssLinks(self, data): iterator = re.finditer(SSLServerConnection.cssExpression, data) for match in iterator: self.buildAbsoluteLink(match.group(1)) return data
Example #25
Source File: tarfile.py From recruit with Apache License 2.0 | 5 votes |
def _proc_gnusparse_00(self, next, pax_headers, buf): """Process a GNU tar extended sparse header, version 0.0. """ offsets = [] for match in re.finditer(br"\d+ GNU.sparse.offset=(\d+)\n", buf): offsets.append(int(match.group(1))) numbytes = [] for match in re.finditer(br"\d+ GNU.sparse.numbytes=(\d+)\n", buf): numbytes.append(int(match.group(1))) next.sparse = list(zip(offsets, numbytes))
Example #26
Source File: SSLServerConnection.py From 3vilTwinAttacker with MIT License | 5 votes |
def replaceSecureLinks(self, data): data = ServerConnection.replaceSecureLinks(self, data) data = self.replaceCssLinks(data) if (self.urlMonitor.isFaviconSpoofing()): data = self.replaceFavicon(data) iterator = re.finditer(SSLServerConnection.linkExpression, data) for match in iterator: self.buildAbsoluteLink(match.group(10)) return data
Example #27
Source File: utils.py From bamnostic with BSD 3-Clause "New" or "Revised" License | 5 votes |
def parse_cigar(cigar_str): """Parses a CIGAR string and turns it into a list of tuples Args: cigar_str (str): the CIGAR string as shown in SAM entry Returns: cigar_array (list): list of tuples of CIGAR operations (by id) and number of operations Raises: ValueError: if CIGAR operation is invalid Examples: >>> parse_cigar('3M1I3M1D5M') # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE [(('BAM_CMATCH', 0), 3), ..., (('BAM_CMATCH', 0), 5)] """ cigar_array = [] for cigar_op in re.finditer(r'(?P<n_op>\d+)(?P<op>\w)', cigar_str): op_dict = cigar_op.groupdict() n_ops = int(op_dict['n_op']) op = _CIGAR_OPS.get(op_dict['op'], -1) if op == -1: raise ValueError('Invalid CIGAR operation ({}).'.format(op_dict['op'])) cigar_array.append((op, n_ops)) return cigar_array
Example #28
Source File: ServerConnection.py From 3vilTwinAttacker with MIT License | 5 votes |
def replaceSecureLinks(self, data): sustitucion = {} patchDict = self.urlMonitor.patchDict if len(patchDict)>0: dregex = re.compile("(%s)" % "|".join(map(re.escape, patchDict.keys()))) data = dregex.sub(lambda x: str(patchDict[x.string[x.start() :x.end()]]), data) iterator = re.finditer(ServerConnection.urlExpression, data) for match in iterator: url = match.group() logging.debug("Found secure reference: " + url) nuevaurl=self.urlMonitor.addSecureLink(self.client.getClientIP(), url) logging.debug("LEO replacing %s => %s"%(url,nuevaurl)) sustitucion[url] = nuevaurl #data.replace(url,nuevaurl) #data = self.urlMonitor.DataReemplazo(data) if len(sustitucion)>0: dregex = re.compile("(%s)" % "|".join(map(re.escape, sustitucion.keys()))) data = dregex.sub(lambda x: str(sustitucion[x.string[x.start() :x.end()]]), data) #logging.debug("LEO DEBUG received data:\n"+data) #data = re.sub(ServerConnection.urlExplicitPort, r'https://\1/', data) #data = re.sub(ServerConnection.urlTypewww, 'http://w', data) #if data.find("http://w.face")!=-1: # logging.debug("LEO DEBUG Found error in modifications") # raw_input("Press Enter to continue") #return re.sub(ServerConnection.urlType, 'http://web.', data) return data
Example #29
Source File: Sentence.py From Snowball with GNU General Public License v3.0 | 5 votes |
def __init__(self, _sentence, _before=None, _between=None, _after=None, _ent1=None, _ent2=None, _arg1type=None, _arg2type=None, _type=None): self.sentence = _sentence self.rel_type = _type self.before = _before self.between = _between self.after = _after self.ent1 = _ent1 self.ent2 = _ent2 self.arg1type = _arg1type self.arg2type = _arg2type if _before is None and _between is None and _after is None and _sentence is not None: matches = [] for m in re.finditer(regex_linked, self.sentence): matches.append(m) for x in range(0, len(matches) - 1): if x == 0: start = 0 if x > 0: start = matches[x - 1].end() try: end = matches[x + 2].init_bootstrapp() except IndexError: end = len(self.sentence) - 1 self.before = self.sentence[start:matches[x].init_bootstrapp()] self.between = self.sentence[matches[x].end():matches[x + 1].init_bootstrapp()] self.after = self.sentence[matches[x + 1].end(): end] self.ent1 = matches[x].group() self.ent2 = matches[x + 1].group() arg1match = re.match("<[A-Z]+>", self.ent1) arg2match = re.match("<[A-Z]+>", self.ent2) self.arg1type = arg1match.group()[1:-1] self.arg2type = arg2match.group()[1:-1]
Example #30
Source File: tarfile.py From jbox with MIT License | 5 votes |
def _proc_gnusparse_00(self, next, pax_headers, buf): """Process a GNU tar extended sparse header, version 0.0. """ offsets = [] for match in re.finditer(br"\d+ GNU.sparse.offset=(\d+)\n", buf): offsets.append(int(match.group(1))) numbytes = [] for match in re.finditer(br"\d+ GNU.sparse.numbytes=(\d+)\n", buf): numbytes.append(int(match.group(1))) next.sparse = list(zip(offsets, numbytes))