Python itertools.ifilter() Examples

The following are 30 code examples of itertools.ifilter(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module itertools , or try the search function .
Example #1
Source File: metrics_parser.py    From flocker with Apache License 2.0 6 votes vote down vote up
def wallclock_for_operation(results, operation):
    """
    Calculate the wallclock time for a process running in a particular
    scenario.

    :param results: Results to extract values from.
    :param operation: Operation name to calculate wallclock results for.

    :return: The mean wallclock time observed.
    """
    operation_results = itertools.ifilter(
        lambda r: r['metric']['type'] == 'wallclock' and
        r['operation']['type'] == operation,
        results
    )
    values = [r['value'] for r in operation_results]
    return mean(values) 
Example #2
Source File: compat.py    From razzy-spinner with GNU General Public License v3.0 6 votes vote down vote up
def __and__(self, other):
                ''' Intersection is the minimum of corresponding counts.

                >>> Counter('abbb') & Counter('bcc')
                Counter({'b': 1})

                '''
                if not isinstance(other, Counter):
                    return NotImplemented
                _min = min
                result = Counter()
                if len(self) < len(other):
                    self, other = other, self
                for elem in ifilter(self.__contains__, other):
                    newcount = _min(self[elem], other[elem])
                    if newcount > 0:
                        result[elem] = newcount
                return result 
Example #3
Source File: newCollections.py    From scoop with GNU Lesser General Public License v3.0 6 votes vote down vote up
def __and__(self, other):
        ''' Intersection is the minimum of corresponding counts.

        >>> Counter('abbb') & Counter('bcc')
        Counter({'b': 1})

        '''
        if not isinstance(other, Counter):
            return NotImplemented
        _min = min
        result = Counter()
        if len(self) < len(other):
            self, other = other, self
        for elem in ifilter(self.__contains__, other):
            newcount = _min(self[elem], other[elem])
            if newcount > 0:
                result[elem] = newcount
        return result 
Example #4
Source File: dstream.py    From LearningApacheSpark with MIT License 6 votes vote down vote up
def countByValueAndWindow(self, windowDuration, slideDuration, numPartitions=None):
        """
        Return a new DStream in which each RDD contains the count of distinct elements in
        RDDs in a sliding window over this DStream.

        @param windowDuration: width of the window; must be a multiple of this DStream's
                              batching interval
        @param slideDuration:  sliding interval of the window (i.e., the interval after which
                              the new DStream will generate RDDs); must be a multiple of this
                              DStream's batching interval
        @param numPartitions:  number of partitions of each RDD in the new DStream.
        """
        keyed = self.map(lambda x: (x, 1))
        counted = keyed.reduceByKeyAndWindow(operator.add, operator.sub,
                                             windowDuration, slideDuration, numPartitions)
        return counted.filter(lambda kv: kv[1] > 0) 
Example #5
Source File: rdd.py    From LearningApacheSpark with MIT License 6 votes vote down vote up
def lookup(self, key):
        """
        Return the list of values in the RDD for key `key`. This operation
        is done efficiently if the RDD has a known partitioner by only
        searching the partition that the key maps to.

        >>> l = range(1000)
        >>> rdd = sc.parallelize(zip(l, l), 10)
        >>> rdd.lookup(42)  # slow
        [42]
        >>> sorted = rdd.sortByKey()
        >>> sorted.lookup(42)  # fast
        [42]
        >>> sorted.lookup(1024)
        []
        >>> rdd2 = sc.parallelize([(('a', 'b'), 'c')]).groupByKey()
        >>> list(rdd2.lookup(('a', 'b'))[0])
        ['c']
        """
        values = self.filter(lambda kv: kv[0] == key).values()

        if self.partitioner is not None:
            return self.ctx.runJob(values, lambda x: x, [self.partitioner(key)])

        return values.collect() 
Example #6
Source File: recipe-576611.py    From code with MIT License 6 votes vote down vote up
def __and__(self, other):
        ''' Intersection is the minimum of corresponding counts.

        >>> Counter('abbb') & Counter('bcc')
        Counter({'b': 1})

        '''
        if not isinstance(other, Counter):
            return NotImplemented
        _min = min
        result = Counter()
        if len(self) < len(other):
            self, other = other, self
        for elem in ifilter(self.__contains__, other):
            newcount = _min(self[elem], other[elem])
            if newcount > 0:
                result[elem] = newcount
        return result 
Example #7
Source File: model.py    From instance-segmentation-pytorch with GNU General Public License v3.0 6 votes vote down vote up
def __define_optimizer(self, learning_rate, weight_decay,
                           lr_drop_factor, lr_drop_patience, optimizer='Adam'):
        assert optimizer in ['RMSprop', 'Adam', 'Adadelta', 'SGD']

        parameters = ifilter(lambda p: p.requires_grad,
                             self.model.parameters())

        if optimizer == 'RMSprop':
            self.optimizer = optim.RMSprop(
                parameters, lr=learning_rate, weight_decay=weight_decay)
        elif optimizer == 'Adadelta':
            self.optimizer = optim.Adadelta(
                parameters, lr=learning_rate, weight_decay=weight_decay)
        elif optimizer == 'Adam':
            self.optimizer = optim.Adam(
                parameters, lr=learning_rate, weight_decay=weight_decay)
        elif optimizer == 'SGD':
            self.optimizer = optim.SGD(
                parameters, lr=learning_rate, momentum=0.9,
                weight_decay=weight_decay)

        self.lr_scheduler = ReduceLROnPlateau(
            self.optimizer, mode='min', factor=lr_drop_factor,
            patience=lr_drop_patience, verbose=True) 
Example #8
Source File: dstream.py    From LearningApacheSpark with MIT License 5 votes vote down vote up
def updateStateByKey(self, updateFunc, numPartitions=None, initialRDD=None):
        """
        Return a new "state" DStream where the state for each key is updated by applying
        the given function on the previous state of the key and the new values of the key.

        @param updateFunc: State update function. If this function returns None, then
                           corresponding state key-value pair will be eliminated.
        """
        if numPartitions is None:
            numPartitions = self._sc.defaultParallelism

        if initialRDD and not isinstance(initialRDD, RDD):
            initialRDD = self._sc.parallelize(initialRDD)

        def reduceFunc(t, a, b):
            if a is None:
                g = b.groupByKey(numPartitions).mapValues(lambda vs: (list(vs), None))
            else:
                g = a.cogroup(b.partitionBy(numPartitions), numPartitions)
                g = g.mapValues(lambda ab: (list(ab[1]), list(ab[0])[0] if len(ab[0]) else None))
            state = g.mapValues(lambda vs_s: updateFunc(vs_s[0], vs_s[1]))
            return state.filter(lambda k_v: k_v[1] is not None)

        jreduceFunc = TransformFunction(self._sc, reduceFunc,
                                        self._sc.serializer, self._jrdd_deserializer)
        if initialRDD:
            initialRDD = initialRDD._reserialize(self._jrdd_deserializer)
            dstream = self._sc._jvm.PythonStateDStream(self._jdstream.dstream(), jreduceFunc,
                                                       initialRDD._jrdd)
        else:
            dstream = self._sc._jvm.PythonStateDStream(self._jdstream.dstream(), jreduceFunc)

        return DStream(dstream.asJavaDStream(), self._ssc, self._sc.serializer) 
Example #9
Source File: fill.py    From whisper-backup with Apache License 2.0 5 votes vote down vote up
def fill(src, dst, tstart, tstop):
    # fetch range start-stop from src, taking values from the highest
    # precision archive, thus optionally requiring multiple fetch + merges
    srcHeader = info(src)

    srcArchives = srcHeader['archives']
    srcArchives.sort(key=itemgetter('retention'))

    # find oldest point in time, stored by both files
    srcTime = int(time.time()) - srcHeader['maxRetention']

    if tstart < srcTime and tstop < srcTime:
        return

    # we want to retain as much precision as we can, hence we do backwards
    # walk in time

    # skip forward at max 'step' points at a time
    for archive in srcArchives:
        # skip over archives that don't have any data points
        rtime = time.time() - archive['retention']
        if tstop <= rtime:
            continue

        untilTime = tstop
        fromTime = rtime if rtime > tstart else tstart

        (timeInfo, values) = fetch(src, fromTime, untilTime)
        (start, end, archive_step) = timeInfo
        pointsToWrite = list(itertools.ifilter(
            lambda points: points[1] is not None,
            itertools.izip(xrange(start, end, archive_step), values)))
        # order points by timestamp, newest first
        pointsToWrite.sort(key=lambda p: p[0], reverse=True)
        update_many(dst, pointsToWrite)

        tstop = fromTime

        # can stop when there's nothing to fetch any more
        if tstart == tstop:
            return 
Example #10
Source File: rdd.py    From LearningApacheSpark with MIT License 5 votes vote down vote up
def filter(self, f):
        """
        Return a new RDD containing only the elements that satisfy a predicate.

        >>> rdd = sc.parallelize([1, 2, 3, 4, 5])
        >>> rdd.filter(lambda x: x % 2 == 0).collect()
        [2, 4]
        """
        def func(iterator):
            return filter(fail_on_stopiteration(f), iterator)
        return self.mapPartitions(func, True) 
Example #11
Source File: rdd.py    From LearningApacheSpark with MIT License 5 votes vote down vote up
def subtractByKey(self, other, numPartitions=None):
        """
        Return each (key, value) pair in C{self} that has no pair with matching
        key in C{other}.

        >>> x = sc.parallelize([("a", 1), ("b", 4), ("b", 5), ("a", 2)])
        >>> y = sc.parallelize([("a", 3), ("c", None)])
        >>> sorted(x.subtractByKey(y).collect())
        [('b', 4), ('b', 5)]
        """
        def filter_func(pair):
            key, (val1, val2) = pair
            return val1 and not val2
        return self.cogroup(other, numPartitions).filter(filter_func).flatMapValues(lambda x: x[0]) 
Example #12
Source File: doctovec.py    From dblp with MIT License 5 votes vote down vote up
def vectorize(doc):
    """Convert a document (string/unicode) into a filtered, cleaned,
    stemmed, list of words. See `doctovec` for a function with more options.
    The following cleaning operations are performed:

        1.  punctuation removed
        2.  word lowercased
        3.  whitespace stripped

    Then words meeting these filtering criteria are removed:

        1.  empty or only 1 character
        2.  stopword
        3.  all digits
        4.  starts with digit

    Finally, all words are stemmed.

    :param str doc: The document to vectorize.
    :rtype:  list of str
    :return: The cleaned, stemmed, filtered, list of words.
    """
    word_list = word_tokenize(doc)
    cleaned_words = [clean_word(word) for word in word_list]
    filtered_words = it.ifilter(word_is_not_junk, cleaned_words)
    return [stem_word(word) for word in filtered_words] 
Example #13
Source File: py_linq.py    From py-enumerable with MIT License 5 votes vote down vote up
def last_or_default(self, func=None):
        """
        Return the last element in a collection or None if the collection is empty
        :func: predicate as a lambda expression used to filter collection
        :return: data element as object or None if transformed data contains no
         elements
        """
        if func is not None:
            return self.where(func).reverse().first_or_default()
        return self.reverse().first_or_default() 
Example #14
Source File: py_linq.py    From py-enumerable with MIT License 5 votes vote down vote up
def __getitem__(self, n):
        for i, e in enumerate(filter(self.predicate, self)):
            if i == n:
                return e 
Example #15
Source File: validator.py    From setcover with MIT License 5 votes vote down vote up
def bruteforce_solver(task):
    """
    As simple solution as we can make.
    It finds the optimal solution, but it can't work on big inputs
     (say, 20 sets take a few seconds, 25 sets - take a few minutes)
    :param reader.Task task:
    :return list[1|0]:
    """
    all_configurations = product([0, 1], repeat=task.set_count)

    valid_configurations = ifilter(partial(is_valid, task), all_configurations)

    return min(valid_configurations, key=partial(calc_cost, task)) 
Example #16
Source File: master.py    From mesos-cli with Apache License 2.0 5 votes vote down vote up
def slaves(self, fltr=""):
        return list(map(
            lambda x: slave.MesosSlave(x),
            itertools.ifilter(
                lambda x: fltr in x["id"], self.state["slaves"]))) 
Example #17
Source File: master.py    From mesos-cli with Apache License 2.0 5 votes vote down vote up
def tasks(self, fltr="", active_only=False):
        return list(map(
            lambda x: task.Task(self, x),
            itertools.ifilter(
                lambda x: fltr in x["id"] or fnmatch.fnmatch(x["id"], fltr),
                self._task_list(active_only)))) 
Example #18
Source File: py_linq.py    From py-enumerable with MIT License 5 votes vote down vote up
def last(self, func=None):
        """
        Return the last element in a collection
        :func: predicate as a lambda expression used to filter collection
        :return: data element as object or NoElementsError if transformed data
        contains no elements
        """
        if func is not None:
            return self.where(func).reverse().first()
        return self.reverse().first() 
Example #19
Source File: filecmp.py    From datafari with Apache License 2.0 5 votes vote down vote up
def phase1(self): # Compute common names
        a = dict(izip(imap(os.path.normcase, self.left_list), self.left_list))
        b = dict(izip(imap(os.path.normcase, self.right_list), self.right_list))
        self.common = map(a.__getitem__, ifilter(b.__contains__, a))
        self.left_only = map(a.__getitem__, ifilterfalse(b.__contains__, a))
        self.right_only = map(b.__getitem__, ifilterfalse(a.__contains__, b)) 
Example #20
Source File: dstream.py    From LearningApacheSpark with MIT License 5 votes vote down vote up
def filter(self, f):
        """
        Return a new DStream containing only the elements that satisfy predicate.
        """
        def func(iterator):
            return filter(f, iterator)
        return self.mapPartitions(func, True) 
Example #21
Source File: thresher.py    From combine with GNU General Public License v3.0 5 votes vote down vote up
def process_packetmail(response, source, direction):
    data = []
    filter_comments = lambda x: not x[0].startswith('#')
    try:
        for line in ifilter(filter_comments,
                            reader(response.splitlines(), delimiter=';')):
            i = line[0]
            date = line[1].split(' ')[1]
            data.append((i, indicator_type(i), direction, source, '', date))
    except (IndexError, AttributeError):
        pass
    return data 
Example #22
Source File: List.py    From hask with BSD 2-Clause "Simplified" License 5 votes vote down vote up
def filter(f, xs):
    """
    filter :: (a -> Bool) -> [a] -> [a]

    filter, applied to a predicate and a list, returns the list of those
    elements that satisfy the predicate
    """
    return L[itertools.ifilter(f, xs)] 
Example #23
Source File: model.py    From reseg-pytorch with GNU General Public License v3.0 5 votes vote down vote up
def __define_optimizer(self, learning_rate, weight_decay, lr_drop_factor, lr_drop_patience, optimizer='Adam'):
        assert optimizer in ['RMSprop', 'Adam', 'Adadelta', 'SGD']

        parameters = ifilter(lambda p: p.requires_grad, self.model.parameters())

        if optimizer == 'RMSprop':
            self.optimizer = optim.RMSprop(parameters, lr=learning_rate, weight_decay=weight_decay)
        elif optimizer == 'Adadelta':
            self.optimizer = optim.Adadelta(parameters, lr=learning_rate, weight_decay=weight_decay)
        elif optimizer == 'Adam':
            self.optimizer = optim.Adam(parameters, lr=learning_rate, weight_decay=weight_decay)
        elif optimizer == 'SGD':
            self.optimizer = optim.SGD(parameters, lr=learning_rate, momentum=0.9, weight_decay=weight_decay)

        self.lr_scheduler = ReduceLROnPlateau(self.optimizer, mode='max', factor=lr_drop_factor, patience=lr_drop_patience, verbose=True) 
Example #24
Source File: action.py    From marsnake with GNU General Public License v3.0 5 votes vote down vote up
def get_paths(self):
		if common.is_python2x():
			import itertools
			for f in itertools.ifilter(self.path_filter, self._get_paths()):
				yield f
		else:
			for f in filter(self.path_filter, self._get_paths()):
				yield f 
Example #25
Source File: 216_Combination_Sum_III.py    From leetcode with MIT License 5 votes vote down vote up
def combinationSum3(self, k, n):
        """
        :type k: int
        :type n: int
        :rtype: List[List[int]]
        """
        return list(it.ifilter(lambda x: sum(x) == n, list(it.combinations(range(1, 10), k)))) 
Example #26
Source File: artifact.py    From repositorytools with Apache License 2.0 5 votes vote down vote up
def get_artifact_group(url):
        if url is None:
            raise Exception('Web pages of the package not present in RPM metadata, please fill the URL tag in specfile')

        parts = six.moves.urllib.parse.urlsplit(url).netloc.split(".")
        return ".".join(itertools.ifilter(lambda x: x != "www", reversed(parts))) 
Example #27
Source File: streaming_command.py    From SA-ctf_scoreboard_admin with Creative Commons Zero v1.0 Universal 5 votes vote down vote up
def iteritems(self):
            iteritems = SearchCommand.ConfigurationSettings.iteritems(self)
            version = self.command.protocol_version
            if version == 1:
                if self.required_fields is None:
                    iteritems = ifilter(lambda (name, value): name != 'clear_required_fields', iteritems)
            else:
                iteritems = ifilter(lambda (name, value): name != 'distributed', iteritems)
                if self.distributed:
                    iteritems = imap(
                        lambda (name, value): (name, 'stateful') if name == 'type' else (name, value), iteritems)
            return iteritems

        # endregion 
Example #28
Source File: search_command.py    From SA-ctf_scoreboard_admin with Creative Commons Zero v1.0 Universal 5 votes vote down vote up
def iteritems(self):
            definitions = type(self).configuration_setting_definitions
            version = self.command.protocol_version
            return ifilter(
                lambda (name, value): value is not None, imap(
                    lambda setting: (setting.name, setting.__get__(self)), ifilter(
                        lambda setting: setting.is_supported_by_protocol(version), definitions))) 
Example #29
Source File: filecmp.py    From Splunking-Crime with GNU Affero General Public License v3.0 5 votes vote down vote up
def phase1(self): # Compute common names
        a = dict(izip(imap(os.path.normcase, self.left_list), self.left_list))
        b = dict(izip(imap(os.path.normcase, self.right_list), self.right_list))
        self.common = map(a.__getitem__, ifilter(b.__contains__, a))
        self.left_only = map(a.__getitem__, ifilterfalse(b.__contains__, a))
        self.right_only = map(b.__getitem__, ifilterfalse(a.__contains__, b)) 
Example #30
Source File: headers.py    From NoobSec-Toolkit with GNU General Public License v2.0 5 votes vote down vote up
def headersParser(headers):
    """
    This function calls a class that parses the input HTTP headers to
    fingerprint the back-end database management system operating system
    and the web application technology
    """

    if not kb.headerPaths:
        kb.headerPaths = {
            "cookie":                          os.path.join(paths.SQLMAP_XML_BANNER_PATH, "cookie.xml"),
            "microsoftsharepointteamservices": os.path.join(paths.SQLMAP_XML_BANNER_PATH, "sharepoint.xml"),
            "server":                          os.path.join(paths.SQLMAP_XML_BANNER_PATH, "server.xml"),
            "servlet-engine":                  os.path.join(paths.SQLMAP_XML_BANNER_PATH, "servlet.xml"),
            "set-cookie":                      os.path.join(paths.SQLMAP_XML_BANNER_PATH, "cookie.xml"),
            "x-aspnet-version":                os.path.join(paths.SQLMAP_XML_BANNER_PATH, "x-aspnet-version.xml"),
            "x-powered-by":                    os.path.join(paths.SQLMAP_XML_BANNER_PATH, "x-powered-by.xml"),
        }

    for header in itertools.ifilter(lambda x: x in kb.headerPaths, headers):
        value = headers[header]
        xmlfile = kb.headerPaths[header]

        handler = FingerprintHandler(value, kb.headersFp)

        parseXmlFile(xmlfile, handler)
        parseXmlFile(paths.GENERIC_XML, handler)