Python Examples of fnmatch.filter

Source File: build_py.py From python-netsurv with MIT License

6 votes

def find_data_files(self, package, src_dir):
        """Return filenames for package's data files in 'src_dir'"""
        patterns = self._get_platform_patterns(
            self.package_data,
            package,
            src_dir,
        )
        globs_expanded = map(glob, patterns)
        # flatten the expanded globs into an iterable of matches
        globs_matches = itertools.chain.from_iterable(globs_expanded)
        glob_files = filter(os.path.isfile, globs_matches)
        files = itertools.chain(
            self.manifest_files.get(package, []),
            glob_files,
        )
        return self.exclude_data_files(package, src_dir, files)

Source File: train_multi_v2.py From DeepFashion with Apache License 2.0

6 votes

def get_images_count_recursive(path):
    matches = []
    score_iou = []
    # logging.debug('path {}'.format(path))
    for root, dirnames, filenames in sorted(os.walk(path)):
        for filename in sorted(fnmatch.filter(filenames, '*.jpg')):
            # logging.debug('filename {}'.format(filename))
            matches.append(os.path.join(root, filename))
            score_iou.append(filename.split('_')[-1].replace('.jpg',''))

    # logging.debug('matches {}'.format(matches))

    images_count = len(matches)
    return score_iou, images_count

# nb_train_samples = 2000
# nb_validation_samples = 800

Source File: conversion_tools.py From GelReportModels with Apache License 2.0

6 votes

def json2java(self):
        """
        Transform all JSON Avro schemas in a given folder to Java source code.
        :return:
        """
        parser = argparse.ArgumentParser(
            description='Generates Java source code from Avro schemas')
        # NOT prefixing the argument with -- means it's not optional
        parser.add_argument('--input', help='Input folder containing *.avdl files')
        parser.add_argument('--output', help='Output folder for the Java source code')
        args = parser.parse_args(sys.argv[2:])
        logging.info('json2java')
        makedir(args.output)
        jsons = [os.path.join(dirpath, f)
                for dirpath, dirnames, files in os.walk(args.input)
                for f in fnmatch.filter(files, "*.{}".format(JSON_EXTENSION))]
        for json in jsons:
            logging.info("Transforming: " + json)
            idl2avpr_command = "java -jar {} compile -string schema {} {}".format(
                AVRO_TOOLS_JAR, json, args.output
            )
            logging.info("Running: [%s]" % idl2avpr_command)
            run_command(idl2avpr_command)

Source File: pipe.py From d6tpipe with MIT License

6 votes

def _apply_fname_filter(fnames, include, exclude):
    # todo: multi filter with *.csv|*.xls*|*.txt, split on |
    def helper(list_,filter_):
        return list(itertools.chain.from_iterable(fnmatch.filter(list_, ifilter) for ifilter in filter_.split('|')))
    if include:
        fnames = helper(fnames, include)

    if exclude:
        fnamesex = helper(fnames, exclude)
        fnames = list(set(fnames) - set(fnamesex))

    return fnames


#************************************************
# Pipe
#************************************************

Source File: conversion_tools.py From GelReportModels with Apache License 2.0

6 votes

def idl2avpr(self):
        """
        Transform all IDL files in input folder to AVPRs in the output folder
        :return:
        """
        parser = argparse.ArgumentParser(
            description='Converts IDL to AVPR')
        # prefixing the argument with -- means it's optional
        parser.add_argument('--input', help='Input folder containing *.avdl files')
        parser.add_argument('--output', help='Output folder for the AVPRs')
        args = parser.parse_args(sys.argv[2:])
        logging.info('idl2avpr')
        makedir(args.output)
        idls = [os.path.join(dirpath, f)
                for dirpath, dirnames, files in os.walk(args.input)
                for f in fnmatch.filter(files, "*.{}".format(IDL_EXTENSION))]
        for idl in idls:
            logging.info("Transforming: " + idl)
            file_name = os.path.basename(idl).replace(IDL_EXTENSION, AVPR_EXTENSION)
            idl2avpr_command = "java -jar {} idl {} {}/{}".format(
                AVRO_TOOLS_JAR, idl, args.output, file_name
            )
            logging.info("Running: [%s]" % idl2avpr_command)
            run_command(idl2avpr_command)

Source File: conversion_tools.py From GelReportModels with Apache License 2.0

6 votes

def idl2json(self):
        """
        Transform all IDL files in input folder to AVRO schemas in the output folder
        :return:
        """
        parser = argparse.ArgumentParser(
            description='Converts IDL to Avro JSON schema')
        # prefixing the argument with -- means it's optional
        parser.add_argument('--input', help='Input folder containing *.avdl files')
        parser.add_argument('--output', help='Output folder for the JSON schemas')
        args = parser.parse_args(sys.argv[2:])
        logging.info('idl2schema')
        makedir(args.output)
        idls = [os.path.join(dirpath, f)
                for dirpath, dirnames, files in os.walk(args.input)
                for f in fnmatch.filter(files, "*.{}".format(IDL_EXTENSION))]

        for idl in idls:
            logging.info("Transforming: " + idl)
            idl2schemata_command = "java -jar {} idl2schemata {} {}".format(
                AVRO_TOOLS_JAR, idl, args.output
            )
            logging.info("Running: [%s]" % idl2schemata_command)
            run_command(idl2schemata_command)

Source File: utils.py From yatsm with MIT License

6 votes

def find_results(location, pattern):
    """ Create list of result files and return sorted

    Args:
      location (str): directory location to search
      pattern (str): glob style search pattern for results

    Returns:
      results (list): list of file paths for results found

    """
    # Note: already checked for location existence in main()
    records = []
    for root, dirnames, filenames in walk(location):
        for filename in fnmatch.filter(filenames, pattern):
            records.append(os.path.join(root, filename))

    if len(records) == 0:
        raise IOError('Could not find results in: %s' % location)

    records.sort()

    return records

Source File: organizers.py From neurawkes with MIT License

6 votes

def read_logs(self):
        self.list_logs = []
        #
        print "reading and parsing all the log files ... "
        #
        for path, dirs, files in os.walk(self.path_tracks):
            for name_file in fnmatch.filter(files, 'log.txt'):
                path_log = os.path.abspath(
                    os.path.join(path, name_file)
                )
                #
                self.list_logs.append(
                    self.parse_log(path_log)
                )
                #
            #
        #
        print "done ! "
        #
    #
    #

Source File: build_py.py From python-netsurv with MIT License

6 votes

def find_data_files(self, package, src_dir):
        """Return filenames for package's data files in 'src_dir'"""
        patterns = self._get_platform_patterns(
            self.package_data,
            package,
            src_dir,
        )
        globs_expanded = map(glob, patterns)
        # flatten the expanded globs into an iterable of matches
        globs_matches = itertools.chain.from_iterable(globs_expanded)
        glob_files = filter(os.path.isfile, globs_matches)
        files = itertools.chain(
            self.manifest_files.get(package, []),
            glob_files,
        )
        return self.exclude_data_files(package, src_dir, files)

Source File: build_py.py From python-netsurv with MIT License

6 votes

def exclude_data_files(self, package, src_dir, files):
        """Filter filenames for package's data files in 'src_dir'"""
        files = list(files)
        patterns = self._get_platform_patterns(
            self.exclude_package_data,
            package,
            src_dir,
        )
        match_groups = (
            fnmatch.filter(files, pattern)
            for pattern in patterns
        )
        # flatten the groups of matches into an iterable of matches
        matches = itertools.chain.from_iterable(match_groups)
        bad = set(matches)
        keepers = (
            fn
            for fn in files
            if fn not in bad
        )
        # ditch dupes
        return list(_unique_everseen(keepers))

Source File: android.py From andle with MIT License

6 votes

def update(path, data, dryrun=False, remote=False, gradle=False, interact=False):
    global is_dryrun
    is_dryrun = dryrun
    global check_remote
    check_remote = remote
    global check_gradle
    check_gradle = gradle
    global is_interact
    is_interact = interact

    for file in filter(path, "build.gradle"):
        parse_dependency(file, data)

    if check_gradle:
        gradle_version = andle.gradle.load()
        for file in filter(path, "gradle-wrapper.properties"):
            parse_gradle(file, gradle_version)

Source File: FS.py From arnold-usd with Apache License 2.0

6 votes

def walk(self, func, arg):
        """
        Walk this directory tree by calling the specified function
        for each directory in the tree.

        This behaves like the os.path.walk() function, but for in-memory
        Node.FS.Dir objects.  The function takes the same arguments as
        the functions passed to os.path.walk():

                func(arg, dirname, fnames)

        Except that "dirname" will actually be the directory *Node*,
        not the string.  The '.' and '..' entries are excluded from
        fnames.  The fnames list may be modified in-place to filter the
        subdirectories visited or otherwise impose a specific order.
        The "arg" argument is always passed to func() and may be used
        in any way (or ignored, passing None is common).
        """
        entries = self.entries
        names = list(entries.keys())
        names.remove('.')
        names.remove('..')
        func(arg, self, names)
        for dirname in [n for n in names if isinstance(entries[n], Dir)]:
            entries[dirname].walk(func, arg)

Source File: build_py.py From pledgeservice with Apache License 2.0

6 votes

def exclude_data_files(self, package, src_dir, files):
        """Filter filenames for package's data files in 'src_dir'"""
        globs = (self.exclude_package_data.get('', [])
                 + self.exclude_package_data.get(package, []))
        bad = []
        for pattern in globs:
            bad.extend(
                fnmatch.filter(
                    files, os.path.join(src_dir, convert_path(pattern))
                )
            )
        bad = dict.fromkeys(bad)
        seen = {}
        return [
            f for f in files if f not in bad
                and f not in seen and seen.setdefault(f,1)  # ditch dupes
        ]

Source File: build_py.py From jbox with MIT License

6 votes

def exclude_data_files(self, package, src_dir, files):
        """Filter filenames for package's data files in 'src_dir'"""
        globs = (
            self.exclude_package_data.get('', [])
            + self.exclude_package_data.get(package, [])
        )
        bad = set(
            item
            for pattern in globs
            for item in fnmatch.filter(
                files,
                os.path.join(src_dir, convert_path(pattern)),
            )
        )
        seen = collections.defaultdict(itertools.count)
        return [
            fn
            for fn in files
            if fn not in bad
            # ditch dupes
            and not next(seen[fn])
        ]

Source File: build_py.py From lambda-packs with MIT License

6 votes

def find_data_files(self, package, src_dir):
        """Return filenames for package's data files in 'src_dir'"""
        patterns = self._get_platform_patterns(
            self.package_data,
            package,
            src_dir,
        )
        globs_expanded = map(glob, patterns)
        # flatten the expanded globs into an iterable of matches
        globs_matches = itertools.chain.from_iterable(globs_expanded)
        glob_files = filter(os.path.isfile, globs_matches)
        files = itertools.chain(
            self.manifest_files.get(package, []),
            glob_files,
        )
        return self.exclude_data_files(package, src_dir, files)

Source File: build_py.py From lambda-packs with MIT License

6 votes

def exclude_data_files(self, package, src_dir, files):
        """Filter filenames for package's data files in 'src_dir'"""
        files = list(files)
        patterns = self._get_platform_patterns(
            self.exclude_package_data,
            package,
            src_dir,
        )
        match_groups = (
            fnmatch.filter(files, pattern)
            for pattern in patterns
        )
        # flatten the groups of matches into an iterable of matches
        matches = itertools.chain.from_iterable(match_groups)
        bad = set(matches)
        keepers = (
            fn
            for fn in files
            if fn not in bad
        )
        # ditch dupes
        return list(_unique_everseen(keepers))

Source File: ncbitaxonomy.py From CAMISIM with Apache License 2.0

6 votes

def get_taxids_by_scientific_name_wildcard(self, scientific_name):
        """
        Return all available taxid that fit the scientific name

        @attention: Several taxid might be a hit for one scientific name

        @param scientific_name: ncbi scientific name or synonym
        @type scientific_name: str

        @return: set of ncbi taxonomic identifiers
        @rtype: set[str | unicode] | None
        """
        assert isinstance(scientific_name, str)
        scientific_name = scientific_name.lower()
        matches = fnmatch.filter(self.name_to_taxids.keys(), scientific_name)
        set_of_tax_id = set()
        for match in matches:
            set_of_tax_id.update(set(self.name_to_taxids[match]))
        if len(set_of_tax_id) > 1:
            self._logger.warning(
                "Several matches '{}' found for scientific_name: '{}'".format(", ".join(matches), scientific_name))
            return set_of_tax_id
        elif len(set_of_tax_id) == 0:
            return None
        return set_of_tax_id

Source File: android.py From andle with MIT License

5 votes

def filter(path, name):
    result = []
    for root, _, files in os.walk(path):
        for file in fnmatch.filter(files, name):
            result.append(root + "/" + file)
    return result

Source File: shutil.py From meddle with MIT License

5 votes

def ignore_patterns(*patterns):
    """Function that can be used as copytree() ignore parameter.

    Patterns is a sequence of glob-style patterns
    that are used to exclude files"""
    def _ignore_patterns(path, names):
        ignored_names = []
        for pattern in patterns:
            ignored_names.extend(fnmatch.filter(names, pattern))
        return set(ignored_names)
    return _ignore_patterns

Source File: glob.py From meddle with MIT License

5 votes

def glob1(dirname, pattern):
    if not dirname:
        dirname = os.curdir
    if isinstance(pattern, unicode) and not isinstance(dirname, unicode):
        dirname = unicode(dirname, sys.getfilesystemencoding() or
                                   sys.getdefaultencoding())
    try:
        names = os.listdir(dirname)
    except os.error:
        return []
    if pattern[0] != '.':
        names = filter(lambda x: x[0] != '.', names)
    return fnmatch.filter(names, pattern)

Source File: glob.py From lambda-packs with MIT License

5 votes

def glob1(dirname, pattern):
    if not dirname:
        if isinstance(pattern, bytes):
            dirname = os.curdir.encode('ASCII')
        else:
            dirname = os.curdir
    try:
        names = os.listdir(dirname)
    except OSError:
        return []
    return fnmatch.filter(names, pattern)

Source File: glob.py From python-netsurv with MIT License

5 votes

def glob1(dirname, pattern):
    if not dirname:
        if isinstance(pattern, binary_type):
            dirname = os.curdir.encode('ASCII')
        else:
            dirname = os.curdir
    try:
        names = os.listdir(dirname)
    except OSError:
        return []
    return fnmatch.filter(names, pattern)

Source File: bear_export_sync.py From Bear-Markdown-Export with MIT License

5 votes

def sync_md_updates():
    updates_found = False
    if not os.path.exists(sync_ts_file) or not os.path.exists(export_ts_file):
        return False
    ts_last_sync = os.path.getmtime(sync_ts_file)
    ts_last_export = os.path.getmtime(export_ts_file)
    # Update synced timestamp file:
    update_sync_time_file(0)
    file_types = ('*.md', '*.txt', '*.markdown')
    for (root, dirnames, filenames) in os.walk(export_path):
        '''
        This step walks down into all sub folders, if any.
        '''
        for pattern in file_types:
            for filename in fnmatch.filter(filenames, pattern):
                md_file = os.path.join(root, filename)
                ts = os.path.getmtime(md_file)
                if ts > ts_last_sync:
                    if not updates_found:  # Yet
                        # Wait 5 sec at first for external files to finish downloading from dropbox.
                        # Otherwise images in textbundles might be missing in import:
                        time.sleep(5)
                    updates_found = True
                    md_text = read_file(md_file)
                    backup_ext_note(md_file)
                    if check_if_image_added(md_text, md_file):
                        textbundle_to_bear(md_text, md_file, ts)
                        write_log('Imported to Bear: ' + md_file)
                    else:
                        update_bear_note(md_text, md_file, ts, ts_last_export)
                        write_log('Bear Note Updated: ' + md_file)
    if updates_found:
        # Give Bear time to process updates:
        time.sleep(3)
        # Check again, just in case new updates synced from remote (OneDrive/Dropbox) 
        # during this process!
        # The logic is not 100% fool proof, but should be close to 99.99%
        sync_md_updates() # Recursive call
    return updates_found

Source File: testing.py From tensorflow-image-wavenet with MIT License

5 votes

def find_files(directory, pattern='*.jpg'):
    '''Recursively finds all files matching the pattern.'''
    files = []
    for root, dirnames, filenames in os.walk(directory):
        for filename in fnmatch.filter(filenames, pattern):
            files.append(os.path.join(root, filename))
    return files

Source File: text_reader.py From tensorflow-image-wavenet with MIT License

5 votes

def find_files(directory, pattern='*.jpg'):
    '''Recursively finds all files matching the pattern.'''
    files = []
    for root, dirnames, filenames in os.walk(directory):
        for filename in fnmatch.filter(filenames, pattern):
            files.append(os.path.join(root, filename))
    return files

Source File: bccache.py From jbox with MIT License

5 votes

def clear(self):
        # imported lazily here because google app-engine doesn't support
        # write access on the file system and the function does not exist
        # normally.
        from os import remove
        files = fnmatch.filter(listdir(self.directory), self.pattern % '*')
        for filename in files:
            try:
                remove(path.join(self.directory, filename))
            except OSError:
                pass

Source File: repository_manager.py From rekall with GNU General Public License v2.0

5 votes

def Build(self, renderer):
        repository = self.args.repository
        profile_metadata = repository.Metadata(self.args.profile_name)

        sources = []
        for pattern in self.args.patterns:
            sources.extend(fnmatch.filter(repository.ListFiles(), pattern))

        # Find the latest modified source
        last_modified = 0
        for source in sources:
            source_metadata = repository.Metadata(source)
            last_modified = max(
                last_modified, source_metadata["LastModified"])

        if not profile_metadata or (
                last_modified > profile_metadata["LastModified"]):
            definitions = []
            for source in sources:
                definitions.extend(yaml.safe_load_all(
                    repository.GetData(source, raw=True)))

            # Transform the data as required.
            data = {
                "$ARTIFACTS": definitions,
                "$METADATA": dict(
                    ProfileClass="ArtifactProfile",
                )
            }

            repository.StoreData(self.args.profile_name, utils.PPrint(data),
                                 raw=True)
            renderer.format("Building artifact profile {0}\n",
                            self.args.profile_name)

Source File: dataset.py From waveglow with Apache License 2.0

5 votes

def _find_files(self, directory, pattern='*.wav'):
        """Recursively finds all files matching the pattern."""
        files = []
        for root, dirnames, filenames in os.walk(directory):
            for filename in fnmatch.filter(filenames, pattern):
                files.append(os.path.join(root, filename))
        return files

Source File: shutil.py From jbox with MIT License

5 votes

def ignore_patterns(*patterns):
    """Function that can be used as copytree() ignore parameter.

    Patterns is a sequence of glob-style patterns
    that are used to exclude files"""
    def _ignore_patterns(path, names):
        ignored_names = []
        for pattern in patterns:
            ignored_names.extend(fnmatch.filter(names, pattern))
        return set(ignored_names)
    return _ignore_patterns

Source File: gnucrawler.py From binaryanalysis with Apache License 2.0

5 votes

def prune(storedir):
	grablist = []
	oslist = os.listdir(storedir)
	for i in filelist:
		try:
			(base, extension) = i.rsplit(".", 1)
		except:
			#print >>sys.stderr, "CAN'T UNPACK:", i
			continue
		if re.search('gcc-[a-z]+', i) != None:
			continue
		## see if we can match the filename, without the extension, if
		## so we don't download the file.
		matches = fnmatch.filter(oslist, "%s.*" % base.rsplit('/', 1)[-1])
		if len(matches) != 0:
			continue
		## we don't have the file yet, so put it in the grablist
		if extension == "bz2":
			grablist.append(i)
			for ext in ['gz', 'xz', 'lzma']:
				try:
					grablist.remove("%s.%s" % (base, ext))
				except Exception, e:
					pass
			continue
		if extension == "gz":
			if "%s.%s" % (base, "bz2") in grablist:
				continue
			if "%s.%s" % (base, "bz2") in filelist:
				continue
			grablist.append(i)
			for ext in ['xz', 'lzma']:
				try:
					grablist.remove("%s.%s" % (base, ext))
				except Exception, e:
					pass
					#print >>sys.stderr, e, i
			continue

Python fnmatch.filter() Examples