Python Examples of re.findall

Source File: utils.py From DOTA_models with Apache License 2.0

6 votes

def parse_labelme_poly(filename):
    """ Parse a labelme xml file """
    tree = ET.parse(filename)
    objects = []
    for obj in tree.findall('object'):
        obj_struct = {}
        obj_struct['name'] = obj.find('name').text
        obj_struct['deleted'] = obj.find('deleted').text
        obj_struct['verified'] = int(obj.find('verified').text)
        obj_struct['occluded'] = obj.find('occluded').text
        obj_struct['attributes'] = obj.find('attributes').text
        poly = obj.find('polygon').findall('pt')
        obj_struct['polygon'] = []
        for point in poly:
            pt = [point.find('x').text, point.find('y').text]
            obj_struct['polygon'] = obj_struct['polygon'] + pt
        objects.append(obj_struct)
    return objects

Source File: reportMetrics.py From InsightAgent with Apache License 2.0

6 votes

def extract_fields_db2(obj, line, field_name_regex):
    line = '#'.join(re.split('\s*#', line))

    last_key = ''
    field_names = re.findall(field_name_regex, line)
    for field in reversed(field_names):
        split_at = line.find(field) + len(field)
        field_name = re.split('\s*:', field)[0]
        # don't overwrite existing fields
        if field_name in obj:
            continue
        else:
            obj[field_name] = ' '.join(line[split_at:].split())
            if not last_key:
                last_key = field_name
        line = line[:split_at - len(field)]
    return last_key

Source File: helpers.py From tensortrade with Apache License 2.0

6 votes

def scale_times_to_generate(times_to_generate: int, time_frame: str):

    if 'MIN' in time_frame.upper():
        times_to_generate *= int(re.findall(r'\d+', time_frame)[0])
    elif 'H' in time_frame.upper():
        times_to_generate *= int(re.findall(r'\d+', time_frame)[0]) * 60
    elif 'D' in time_frame.upper():
        times_to_generate *= int(re.findall(r'\d+', time_frame)[0]) * 60 * 24
    elif 'W' in time_frame.upper():
        times_to_generate *= int(re.findall(r'\d+', time_frame)[0]) * 60 * 24 * 7
    elif 'M' in time_frame.upper():
        times_to_generate *= int(re.findall(r'\d+', time_frame)[0]) * 60 * 24 * 7 * 30
    else:
        raise ValueError('Timeframe must be either in minutes (min), hours (H), days (D), weeks (W), or months (M)')

    return times_to_generate

Source File: adventure.py From Dumb-Cogs with MIT License

6 votes

def adventure_command(self, ctx, *, text):
        "Do something in your adventure"
        words = re.findall(r'\w+', text)
        if words:
            # await self.baudout(ctx, game.do_command(words))
            channel = ctx.message.channel
            server = ctx.message.server
            author = ctx.message.author
            try:
                team = self.players[server.id][channel.id][author.id]
            except:
                await self.bot.reply('You are not in an adventure. If your team has embarked on one, join them using `{}adventure join`, otherwise embark on your own adventure.'.format(ctx.prefix))
                return
            await self.baudout(ctx, self.game_loops[server.id][team][channel.id]["GAME"].do_command(words, ctx, self))

        pass


    # edited - irdumbs

Source File: test_sanity_tutorials.py From dynamic-training-with-apache-mxnet-on-aws with Apache License 2.0

6 votes

def test_tutorial_tested():
    """
    Make sure that every tutorial that isn't in the whitelist
    has been added to the tutorial test file
    """
    tutorial_test_file = os.path.join(os.path.dirname(__file__), 'test_tutorials.py')
    f = open(tutorial_test_file, 'r')
    tutorial_test_text = '\n'.join(f.readlines())
    tutorial_path = os.path.join(os.path.dirname(__file__), '..', '..', 'docs', 'tutorials')
    tutorials = glob.glob(os.path.join(tutorial_path, '**', '*.md'))

    tested_tutorials = set(re.findall(r"assert _test_tutorial_nb\('(.*)'\)", tutorial_test_text))
    for tutorial in tutorials:
        friendly_name = '/'.join(tutorial.split('/')[-2:]).split('.')[0]
        if friendly_name not in tested_tutorials and friendly_name+".md" not in whitelist_set:
            assert False, "{} has not been added to the tests/tutorials/test_tutorials.py test_suite".format(friendly_name)

Source File: method.py From py2swagger with MIT License

6 votes

def _get_path_parameters(self):
        """
        Creates parameters described in url path
        :return: list of parameters
        :rtype: list
        """
        params = []
        url_parameters = re.findall(r'/{(.+?)}', self.introspector.path)

        for parameter in url_parameters:
            params.append({
                'name': parameter,
                'type': 'string',
                'in': 'path',
                'required': True
            })

        return params

Source File: utils.py From DOTA_models with Apache License 2.0

6 votes

def parse_rec(filename):
    """ Parse a PASCAL VOC xml file """
    tree = ET.parse(filename)
    objects = []
    for obj in tree.findall('object'):
        obj_struct = {}
        obj_struct['name'] = obj.find('name').text
        obj_struct['pose'] = obj.find('pose').text
        obj_struct['truncated'] = int(obj.find('truncated').text)
        obj_struct['difficult'] = int(obj.find('difficult').text)
        bbox = obj.find('bndbox')
        obj_struct['bbox'] = [int(bbox.find('xmin').text),
                              int(bbox.find('ymin').text),
                              int(bbox.find('xmax').text),
                              int(bbox.find('ymax').text)]
        objects.append(obj_struct)
    return objects

Source File: simplesam.py From simplesam with MIT License

6 votes

def parse_md(self):
        """ Return the ungapped reference sequence from the MD tag, if present.
        """
        try:
            return self._cache['parse_md']
        except KeyError:
            pass
        try:
            md = self['MD']
        except KeyError:
            raise KeyError('MD tag not found in SAM record.')
        ref_seq = list(self.gapped('seq'))
        md_match = re.findall(r"([0-9]+)\^?([A-Z]+)?", md)
        ref_seq_i = 0
        for i, b in md_match:
            ref_seq_i += int(i)
            for mismatch in b:
                try:
                    ref_seq[ref_seq_i] = mismatch
                except IndexError:
                    raise IndexError(locals())
                ref_seq_i += 1
        self._cache['parse_md'] = ref_seq
        return ref_seq

Source File: utils.py From DOTA_models with Apache License 2.0

6 votes

def reWriteImgWithMask(srcpath, dstpath, gtpath, srcform, dstform):
    namelist = GetFileFromThisRootDir(gtpath)
    for fullname in namelist:
        objects = parse_bod_poly(fullname)
        mask_polys = []
        for obj in objects:
            clsname = obj['name']
            matches = re.findall('area|mask', clsname)
            if 'mask' in matches:
                #print('mask:')
                mask_polys.append(shgeo.Polygon(obj['poly']))
            elif 'area' in matches:
                #print('area:')
                mask_polys.append(shgeo.Polygon(obj['poly']))
        basename = mybasename(fullname)
        imgname = os.path.join(srcpath, basename + srcform)
        img = cv2.imread(imgname)
        dstname = os.path.join(dstpath, basename + dstform)
        if len(mask_polys) > 0:
            saveimageWithMask(img, dstname, mask_polys)

Source File: compiler.py From PyOptiX with MIT License

6 votes

def _has_modified_includes(cls, file_path, modified_after, depth=4):
        if depth == 0:
            return False

        include_pattern = '#include\s*"(.*)"'

        with open(file_path) as f:
            content = f.read()
            for included_path in re.findall(include_pattern, content):
                for compiler_include_path in cls._program_directories:
                    included_file_path = os.path.join(compiler_include_path, included_path)
                    if not os.path.exists(included_file_path):
                        continue

                    included_file_mtime = os.path.getmtime(included_file_path)

                    if included_file_mtime > modified_after:
                        return True
                    elif cls._has_modified_includes(included_file_path, modified_after, depth=depth - 1):
                        return True

        return False

Source File: huaban.py From PickTrue with MIT License

6 votes

def __init__(self, board_url_or_id):
        board_id = str(board_url_or_id)
        self.fetcher = HuaBanFetcher()
        if "http" in board_id:
            board_id = re.findall(r'boards/(\d+)/', board_id)[0]
        self.id = board_id
        path = "/boards/{board_id}/".format(
            board_id=board_id,
        )
        self.base_url = urljoin(BASE_URL, path)
        self.further_pin_url_tpl = urljoin(
            self.base_url,
            "?{random_string}"
            "&max={pin_id}"
            "&limit=20"
            "&wfl=1"
        )

        # uninitialized properties
        self.pin_count = None
        self.title = None
        self.description = None
        self._pins = []
        self._init_board()

Source File: vcc_utils.py From VEX_Syntax with MIT License

6 votes

def context_function_signatures(context, vcc_path=VCC_PATH):
    ctx_info = subprocess.check_output([vcc_path, '-X', context])
    ctx_info = ctx_info.decode('ascii')

    sigs = []
    for s in re.findall('(\w+(\[\])?) (\w+)\((.*)\)', ctx_info):
        sig_str  = '%s %s(%s)' % (s[0], s[2], s[3])
        if s[3] == 'void':
            hint_str = ''
        else:
            hint_str = '%s\n(%s)' % (s[0], s[3].rstrip().lstrip().rstrip(';'))
        args = [x.strip() for x in s[3].split(';')]
        sigs.append({'returns':s[0], 'name':s[2], 'ctx':context, 'args':args, 'str':sig_str,
                     'hint':hint_str})

    return sigs

Source File: tnslsnr-ping.py From zbxdb with GNU General Public License v3.0

6 votes

def ParseNestedParen(string, level):
    """
    Generate strings contained in nested (), indexing i = level
    """

    if len(re.findall(r"\(", string)) == len(re.findall(r"\)", string)):
        LeftRightIndex = [x for x in zip(
            [Left.start()+1 for Left in re.finditer(r'\(', string)],
            reversed([Right.start() for Right in re.finditer(r'\)', string)]))]

    elif len(re.findall(r"\(", string)) > len(re.findall(r"\)", string)):
        return ParseNestedParen(string + ')', level)

    elif len(re.findall(r"\(", string)) < len(re.findall(r"\)", string)):
        return ParseNestedParen('(' + string, level)

    else:
        return 'fail'

    return [string[LeftRightIndex[level][0]:LeftRightIndex[level][1]]]

Source File: test_re.py From jawfish with MIT License

6 votes

def test_string_boundaries(self):
        # See http://bugs.python.org/issue10713
        self.assertEqual(re.search(r"\b(abc)\b", "abc").group(1),
                         "abc")
        # There's a word boundary at the start of a string.
        self.assertTrue(re.match(r"\b", "abc"))
        # A non-empty string includes a non-boundary zero-length match.
        self.assertTrue(re.search(r"\B", "abc"))
        # There is no non-boundary match at the start of a string.
        self.assertFalse(re.match(r"\B", "abc"))
        # However, an empty string contains no word boundaries, and also no
        # non-boundaries.
        self.assertEqual(re.search(r"\B", ""), None)
        # This one is questionable and different from the perlre behaviour,
        # but describes current behavior.
        self.assertEqual(re.search(r"\b", ""), None)
        # A single word-character string has two boundaries, but no
        # non-boundary gaps.
        self.assertEqual(len(re.findall(r"\b", "a")), 2)
        self.assertEqual(len(re.findall(r"\B", "a")), 0)
        # If there are no words, there are no boundaries
        self.assertEqual(len(re.findall(r"\b", " ")), 0)
        self.assertEqual(len(re.findall(r"\b", "   ")), 0)
        # Can match around the whitespace.
        self.assertEqual(len(re.findall(r"\B", " ")), 2)

Source File: conftest.py From sanic with MIT License

5 votes

def generate_url_for_template(template):
        url = template
        for pattern, param_type in re.findall(
            re.compile(r"((?:<\w+:(string|int|number|alpha|uuid)>)+)"),
            template,
        ):
            value = TYPE_TO_GENERATOR_MAP.get(param_type)()
            url = url.replace(pattern, str(value), -1)
        return url

Source File: data_process.py From nlp-tensorflow with MIT License

5 votes

def tokenizer(sentence):
    tokens = re.findall(r"[\w]+|[^\s\w]", sentence)
    return tokens

Source File: ksp_plugin.py From SublimeKSP with GNU General Public License v3.0

5 votes

def _extract_completions(self, view, prefix, point):
        # the sublime view.extract_completions implementation doesn't seem to allow for
        # the . character to be included in the prefix irrespectively of the "word_separators" setting
        if '.' in prefix:
            # potentially slow work around for the case where there is a period in the prefix
            code = view.substr(sublime.Region(0, view.size()))
            return sorted(re.findall(re.escape(prefix) + r'[a-zA-Z0-9_.]+', code))
        else:
            return view.extract_completions(prefix, point) # default implementation if no '.' in the prefix

Source File: data_process.py From nlp-tensorflow with MIT License

5 votes

def tokenizer(sentence):
    tokens = re.findall(r"[\w]+|[^\s\w]", sentence)
    return tokens

Source File: mailslurper_import.py From sarlacc with MIT License

5 votes

def main():
    config = ConfigParser()
    config.read("./smtpd.cfg")


    store = storage.StorageControl(config)

    cnx = mysql.connector.connect(
            user="root", password="root",
            host="localhost",
            database="sarlacc")

    mysql_cursor = cnx.cursor()

    mysql_cursor.execute("SELECT dateSent, fromAddress, toAddressList, subject, body FROM mailitem;")

    for (dateSent, fromAddress, toAddressList, subject, body) in mysql_cursor:
        # tidy up fromAddress
        fromAddress = cleanupAddress(re.findall(r"<(.*?)>", fromAddress)[0])

        # tidy up toaAdressList
        toAddressList = re.findall(r"<(.*?)>", toAddressList)

        body = str(b64decode(body))

        store.store_email(subject, toAddressList, fromAddress, body, dateSent, [])

    mysql_cursor.close()
    cnx.close()

Source File: webfinger.py From Webfinger with GNU General Public License v3.0

5 votes

def check_rule(self, key, header, body, title):
		"""指纹识别"""
		try:
			if 'title="' in key:
				if re.findall(rtitle, key)[0].lower() in title.lower():
					return True
			elif 'body="' in key:
				if re.findall(rbody, key)[0] in body:return True
			else:
				if re.findall(rheader, key)[0] in header:return True
		except Exception as e:
			pass

Source File: tumblrdownloader.py From TumblrDownloader with MIT License

5 votes

def _getimages(self):
		'''
			Get all images returned by Tumblr API
		'''
		site = self.api_url.replace("#start#",str(self._start))

		file = urlopen(site)
		data = file.read().decode('utf8')
		file.close()

		regex		= r"<photo-url max-width=\"" + str(self._resolution) + "\">(.+?)</photo-url>"
		imagelist	= re.findall(regex, data)
		return imagelist

Source File: inputs.py From NGU-scripts with GNU Lesser General Public License v3.0

5 votes

def get_numbers(s :str) -> Iterable[int]:
        """Finds all numbers in a string"""
        s = Inputs.remove_spaces(s)
        s = Inputs.remove_number_separators(s)
        match = re.findall(r"(\d+(\.\d+E\+\d+)?)", s)
        nums = [int(float(x[0])) for x in match]
        return nums

Source File: data_process.py From nlp-tensorflow with MIT License

5 votes

def tokenizer(sentence):
    tokens = re.findall(r"[\w]+|[^\s\w]", sentence)
    return tokens

Source File: dvrlogin.py From hkdvr_login with MIT License

5 votes

def getinfo(host):
    username = "admin"
    password = "12345"
    timeout = 5

    for port in range(80,100):
        try:
            req = requests.get(url='http://'+ username +':'+ password +'@'+ host +':'+ str(port) +'/ISAPI/Security/userCheck',timeout=timeout)
            result = req.text
            status = re.findall(r'<statusValue>(.*)</statusValue>', result)
            if status[0] == '200':
                print '[√] Host http://'+ host +':'+ str(port) +' Login Success!'
        except:
            pass

Source File: adventure.py From Dumb-Cogs with MIT License

5 votes

def team_saves(self, ctx, team=None):
        # TeamNebNeb didn't show saves also !advernture embark didn't load save
        author = ctx.message.author
        server = ctx.message.server
        channel = ctx.message.channel

        if team is None:
            try:
                team = self.players[server.id][channel.id][author.id]
            except:
                try:
                    teams = self.teams[server.id]["MEMBERS"][author.id]
                    if len(teams) != 1:
                        await self.bot.reply('You are in more than one team. Please specify which team to see the saves for.')
                        return
                    team = teams[0]
                except:
                    await self.bot.reply('You are not in any team. Find one that will recruit you or create you own with `{}team new`'.format(ctx.prefix))
                    return
        team = self._safe_path(team).lower()
        tname = self._team_name(server, team)
        try:
            # http://stackoverflow.com/questions/168409/how-do-you-get-a-directory-listing-sorted-by-creation-date-in-python
            files = list(filter(os.path.isfile, glob.glob('data/adventure/saves/{}/{}/*.save'.format(server.id, team))))
            files.sort(key=os.path.getmtime, reverse=True)
            if not files:
                raise NoSave
            msg = tname+"'s save"
            if len(files) > 1:
                msg += 's'
            reg = re.compile('data/adventure/saves/{}/{}/([^/]*).save'.format(server.id,team)) # just bein verbose
            msg += ':\n' + '\n'.join([str(num+1) + ". " + re.findall(reg, sv)[0] for num,sv in enumerate(files)])
            
            await self.bot.reply(msg)
        except Exception as e:
            print(e)
            await self.bot.reply('The {} team does not have any saves'.format(tname))


    # only leaders can recruit?

Source File: alot.py From Dumb-Cogs with MIT License

5 votes

def alot_of_checks(self, message):
        if message.author.id == self.bot.user.id:
            return

        server = message.server
        #let PMs
        if server != None:
            if server.id not in self.settings["SERVERS"]:
                #default off
                self.settings["SERVERS"][server.id] = False
            if not self.settings["SERVERS"][server.id]:
                return


        lower = message.content.lower()
        if ' ' not in lower:
            return

        if lower == "what's an alot?":
            await self.bot.send_message(message.channel, "This is an alot: http://hyperboleandahalf.blogspot.com/2010/04/alot-is-better-than-you-at-everything.html")
            return

        lowerm = re.sub(self.alotRegex,"",lower,1)
        if lowerm == lower:
            return


        matchedKeys = re.findall(self.keyRegex,lowerm)
        matchedTags = []
        for k in matchedKeys:
            vals = self.alotTags[k]
            for tag in vals:
                if tag not in matchedTags:
                    matchedTags.append(tag)
        url = ""
        if matchedTags == []:
            url = randchoice(list(self.alots.values()))
        else:
            url = self.alots[randchoice(matchedTags)]
        await self.bot.send_message(message.channel,url)

Source File: lolz.py From Dumb-Cogs with MIT License

5 votes

def translate_sentence(self, sentence):
        # no links
        if re.findall(self.regex['link'], sentence):
            return sentence

        new_sentence = ''
        # reminder to self...
        # ([\w]*) - match 0 or more a-zA-Z0-9_ group
        # ([\W]*) - match 0 or more non-(see above) group
        for word, space in re.findall("([:\w]*)([^:\w]*)", sentence):
            word = self.translate_word(word)
            # if word != '':
            new_sentence += word + space
        return new_sentence

Source File: avclass_common.py From BASS with GNU General Public License v2.0

5 votes

def __norm_cat(self, label, hashes):
        if not label:
            return []

        # Initialize list of tokens to return
        ret = []

        # Split label into tokens and process each token
        for token in re.split("[^0-9a-zA-Z]", label):
            # Remove leading and trailing backspace from token
            # and convert to lowercase
            token = token.lower()

            # Remove digits at the end
            # FIXME: What if it is a hash, and removes digits at the end???
            end_len = len(re.findall("\d*$", token)[0])
            if end_len:
                token = token[:-end_len]

            # Ignore short token
            if len(token) < 4:
                continue

            # Ignore token if prefix of a hash of the sample 
            # Most AVs use MD5 prefixes in labels, 
            # but we check SHA1 and SHA256 as well
            hash_token = False
            for hash_str in hashes:
                if hash_str[0:len(token)] == token:
                    hash_token = True
                    break
            if hash_token:
                continue
            for keys, values in self.cat.iteritems():
                if token in values:
                    token = keys
                    ret.append(token)
                    break
                    # Add token
        return ret

Source File: interval.py From rate.sx with MIT License

5 votes

def parse_length(length):
    """
    Parse ``length``` and return parsed length interval (in seconds)
    or None if length can't be parsed.

    >>> parse_length('1m')
    60
    >>> parse_length('1h1m')
    3660
    >>> parse_length('1')
    >>> parse_length('1hX1m')
    >>> parse_length('1d')
    86400
    >>> parse_length('2M')
    5184000
    """

    sum_ = 0
    joined = ""
    letters = "".join(INTERVAL_LENGTH.keys())
    for number, int_spec in re.findall('([0-9]+)([%s])' % letters, length):
        joined += number + int_spec
        try:
            sum_ += int(number)*INTERVAL_LENGTH[int_spec]
        except KeyError:
            return None

    # if there were some skipped characters,
    # it was not a correct interval specification,
    # return None
    if joined != length:
        return None

    return sum_

Source File: tnslsnr-ping.py From zbxdb with GNU General Public License v3.0

5 votes

def getVersion(cmd):
    """send get verson cmd"""
    cmdl = len(cmd).to_bytes(2, byteorder='big')
    pckl = (len(cmd)+len(TNSPacket)).to_bytes(2, byteorder='big')
    TNSPacket[0] = pckl[0]
    TNSPacket[1] = pckl[1]
    TNSPacket[24] = cmdl[0]
    TNSPacket[25] = cmdl[1]
    # print(cmd)

    try:
        with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
            s.settimeout(3)
            _start = timer()
            s.connect((HOST, PORT))
            scmd = TNSPacket + bytes(cmd, 'utf-8')
            s.sendall(scmd)
            data = s.recv(1024)
            ela = round((timer() - _start)*1000)

            rectxt = (ParseNestedParen(str(data), 0))
            vsnnum = re.findall(r'(?<=VSNNUM=).+?(?=\))',
                                str(rectxt), flags=re.IGNORECASE)
            err = re.findall(r'(?<=ERR=).+?(?=\))',
                             str(rectxt), flags=re.IGNORECASE)
            version = vsnnumToVersion(vsnnum[0])

            return vsnnum[0], err[0], version, ela
    except:
        return 0, "12541", "notfound"

Python re.findall() Examples