Python Examples of itertools.ifilter

Source File: metrics_parser.py From flocker with Apache License 2.0

6 votes

def wallclock_for_operation(results, operation):
    """
    Calculate the wallclock time for a process running in a particular
    scenario.

    :param results: Results to extract values from.
    :param operation: Operation name to calculate wallclock results for.

    :return: The mean wallclock time observed.
    """
    operation_results = itertools.ifilter(
        lambda r: r['metric']['type'] == 'wallclock' and
        r['operation']['type'] == operation,
        results
    )
    values = [r['value'] for r in operation_results]
    return mean(values)

Source File: compat.py From razzy-spinner with GNU General Public License v3.0

6 votes

def __and__(self, other):
                ''' Intersection is the minimum of corresponding counts.

                >>> Counter('abbb') & Counter('bcc')
                Counter({'b': 1})

                '''
                if not isinstance(other, Counter):
                    return NotImplemented
                _min = min
                result = Counter()
                if len(self) < len(other):
                    self, other = other, self
                for elem in ifilter(self.__contains__, other):
                    newcount = _min(self[elem], other[elem])
                    if newcount > 0:
                        result[elem] = newcount
                return result

Source File: newCollections.py From scoop with GNU Lesser General Public License v3.0

6 votes

def __and__(self, other):
        ''' Intersection is the minimum of corresponding counts.

        >>> Counter('abbb') & Counter('bcc')
        Counter({'b': 1})

        '''
        if not isinstance(other, Counter):
            return NotImplemented
        _min = min
        result = Counter()
        if len(self) < len(other):
            self, other = other, self
        for elem in ifilter(self.__contains__, other):
            newcount = _min(self[elem], other[elem])
            if newcount > 0:
                result[elem] = newcount
        return result

Source File: dstream.py From LearningApacheSpark with MIT License

6 votes

def countByValueAndWindow(self, windowDuration, slideDuration, numPartitions=None):
        """
        Return a new DStream in which each RDD contains the count of distinct elements in
        RDDs in a sliding window over this DStream.

        @param windowDuration: width of the window; must be a multiple of this DStream's
                              batching interval
        @param slideDuration:  sliding interval of the window (i.e., the interval after which
                              the new DStream will generate RDDs); must be a multiple of this
                              DStream's batching interval
        @param numPartitions:  number of partitions of each RDD in the new DStream.
        """
        keyed = self.map(lambda x: (x, 1))
        counted = keyed.reduceByKeyAndWindow(operator.add, operator.sub,
                                             windowDuration, slideDuration, numPartitions)
        return counted.filter(lambda kv: kv[1] > 0)

Source File: rdd.py From LearningApacheSpark with MIT License

6 votes

def lookup(self, key):
        """
        Return the list of values in the RDD for key `key`. This operation
        is done efficiently if the RDD has a known partitioner by only
        searching the partition that the key maps to.

        >>> l = range(1000)
        >>> rdd = sc.parallelize(zip(l, l), 10)
        >>> rdd.lookup(42)  # slow
        [42]
        >>> sorted = rdd.sortByKey()
        >>> sorted.lookup(42)  # fast
        [42]
        >>> sorted.lookup(1024)
        []
        >>> rdd2 = sc.parallelize([(('a', 'b'), 'c')]).groupByKey()
        >>> list(rdd2.lookup(('a', 'b'))[0])
        ['c']
        """
        values = self.filter(lambda kv: kv[0] == key).values()

        if self.partitioner is not None:
            return self.ctx.runJob(values, lambda x: x, [self.partitioner(key)])

        return values.collect()

Source File: recipe-576611.py From code with MIT License

6 votes

def __and__(self, other):
        ''' Intersection is the minimum of corresponding counts.

        >>> Counter('abbb') & Counter('bcc')
        Counter({'b': 1})

        '''
        if not isinstance(other, Counter):
            return NotImplemented
        _min = min
        result = Counter()
        if len(self) < len(other):
            self, other = other, self
        for elem in ifilter(self.__contains__, other):
            newcount = _min(self[elem], other[elem])
            if newcount > 0:
                result[elem] = newcount
        return result

Source File: model.py From instance-segmentation-pytorch with GNU General Public License v3.0

6 votes

def __define_optimizer(self, learning_rate, weight_decay,
                           lr_drop_factor, lr_drop_patience, optimizer='Adam'):
        assert optimizer in ['RMSprop', 'Adam', 'Adadelta', 'SGD']

        parameters = ifilter(lambda p: p.requires_grad,
                             self.model.parameters())

        if optimizer == 'RMSprop':
            self.optimizer = optim.RMSprop(
                parameters, lr=learning_rate, weight_decay=weight_decay)
        elif optimizer == 'Adadelta':
            self.optimizer = optim.Adadelta(
                parameters, lr=learning_rate, weight_decay=weight_decay)
        elif optimizer == 'Adam':
            self.optimizer = optim.Adam(
                parameters, lr=learning_rate, weight_decay=weight_decay)
        elif optimizer == 'SGD':
            self.optimizer = optim.SGD(
                parameters, lr=learning_rate, momentum=0.9,
                weight_decay=weight_decay)

        self.lr_scheduler = ReduceLROnPlateau(
            self.optimizer, mode='min', factor=lr_drop_factor,
            patience=lr_drop_patience, verbose=True)

Source File: dstream.py From LearningApacheSpark with MIT License

5 votes

def updateStateByKey(self, updateFunc, numPartitions=None, initialRDD=None):
        """
        Return a new "state" DStream where the state for each key is updated by applying
        the given function on the previous state of the key and the new values of the key.

        @param updateFunc: State update function. If this function returns None, then
                           corresponding state key-value pair will be eliminated.
        """
        if numPartitions is None:
            numPartitions = self._sc.defaultParallelism

        if initialRDD and not isinstance(initialRDD, RDD):
            initialRDD = self._sc.parallelize(initialRDD)

        def reduceFunc(t, a, b):
            if a is None:
                g = b.groupByKey(numPartitions).mapValues(lambda vs: (list(vs), None))
            else:
                g = a.cogroup(b.partitionBy(numPartitions), numPartitions)
                g = g.mapValues(lambda ab: (list(ab[1]), list(ab[0])[0] if len(ab[0]) else None))
            state = g.mapValues(lambda vs_s: updateFunc(vs_s[0], vs_s[1]))
            return state.filter(lambda k_v: k_v[1] is not None)

        jreduceFunc = TransformFunction(self._sc, reduceFunc,
                                        self._sc.serializer, self._jrdd_deserializer)
        if initialRDD:
            initialRDD = initialRDD._reserialize(self._jrdd_deserializer)
            dstream = self._sc._jvm.PythonStateDStream(self._jdstream.dstream(), jreduceFunc,
                                                       initialRDD._jrdd)
        else:
            dstream = self._sc._jvm.PythonStateDStream(self._jdstream.dstream(), jreduceFunc)

        return DStream(dstream.asJavaDStream(), self._ssc, self._sc.serializer)

Source File: fill.py From whisper-backup with Apache License 2.0

5 votes

def fill(src, dst, tstart, tstop):
    # fetch range start-stop from src, taking values from the highest
    # precision archive, thus optionally requiring multiple fetch + merges
    srcHeader = info(src)

    srcArchives = srcHeader['archives']
    srcArchives.sort(key=itemgetter('retention'))

    # find oldest point in time, stored by both files
    srcTime = int(time.time()) - srcHeader['maxRetention']

    if tstart < srcTime and tstop < srcTime:
        return

    # we want to retain as much precision as we can, hence we do backwards
    # walk in time

    # skip forward at max 'step' points at a time
    for archive in srcArchives:
        # skip over archives that don't have any data points
        rtime = time.time() - archive['retention']
        if tstop <= rtime:
            continue

        untilTime = tstop
        fromTime = rtime if rtime > tstart else tstart

        (timeInfo, values) = fetch(src, fromTime, untilTime)
        (start, end, archive_step) = timeInfo
        pointsToWrite = list(itertools.ifilter(
            lambda points: points[1] is not None,
            itertools.izip(xrange(start, end, archive_step), values)))
        # order points by timestamp, newest first
        pointsToWrite.sort(key=lambda p: p[0], reverse=True)
        update_many(dst, pointsToWrite)

        tstop = fromTime

        # can stop when there's nothing to fetch any more
        if tstart == tstop:
            return

Source File: rdd.py From LearningApacheSpark with MIT License

5 votes

def filter(self, f):
        """
        Return a new RDD containing only the elements that satisfy a predicate.

        >>> rdd = sc.parallelize([1, 2, 3, 4, 5])
        >>> rdd.filter(lambda x: x % 2 == 0).collect()
        [2, 4]
        """
        def func(iterator):
            return filter(fail_on_stopiteration(f), iterator)
        return self.mapPartitions(func, True)

Source File: rdd.py From LearningApacheSpark with MIT License

5 votes

def subtractByKey(self, other, numPartitions=None):
        """
        Return each (key, value) pair in C{self} that has no pair with matching
        key in C{other}.

        >>> x = sc.parallelize([("a", 1), ("b", 4), ("b", 5), ("a", 2)])
        >>> y = sc.parallelize([("a", 3), ("c", None)])
        >>> sorted(x.subtractByKey(y).collect())
        [('b', 4), ('b', 5)]
        """
        def filter_func(pair):
            key, (val1, val2) = pair
            return val1 and not val2
        return self.cogroup(other, numPartitions).filter(filter_func).flatMapValues(lambda x: x[0])

Source File: doctovec.py From dblp with MIT License

5 votes

def vectorize(doc):
    """Convert a document (string/unicode) into a filtered, cleaned,
    stemmed, list of words. See `doctovec` for a function with more options.
    The following cleaning operations are performed:

        1.  punctuation removed
        2.  word lowercased
        3.  whitespace stripped

    Then words meeting these filtering criteria are removed:

        1.  empty or only 1 character
        2.  stopword
        3.  all digits
        4.  starts with digit

    Finally, all words are stemmed.

    :param str doc: The document to vectorize.
    :rtype:  list of str
    :return: The cleaned, stemmed, filtered, list of words.
    """
    word_list = word_tokenize(doc)
    cleaned_words = [clean_word(word) for word in word_list]
    filtered_words = it.ifilter(word_is_not_junk, cleaned_words)
    return [stem_word(word) for word in filtered_words]

Source File: py_linq.py From py-enumerable with MIT License

5 votes

def last_or_default(self, func=None):
        """
        Return the last element in a collection or None if the collection is empty
        :func: predicate as a lambda expression used to filter collection
        :return: data element as object or None if transformed data contains no
         elements
        """
        if func is not None:
            return self.where(func).reverse().first_or_default()
        return self.reverse().first_or_default()

Source File: py_linq.py From py-enumerable with MIT License

5 votes

def __getitem__(self, n):
        for i, e in enumerate(filter(self.predicate, self)):
            if i == n:
                return e

Source File: validator.py From setcover with MIT License

5 votes

def bruteforce_solver(task):
    """
    As simple solution as we can make.
    It finds the optimal solution, but it can't work on big inputs
     (say, 20 sets take a few seconds, 25 sets - take a few minutes)
    :param reader.Task task:
    :return list[1|0]:
    """
    all_configurations = product([0, 1], repeat=task.set_count)

    valid_configurations = ifilter(partial(is_valid, task), all_configurations)

    return min(valid_configurations, key=partial(calc_cost, task))

Source File: master.py From mesos-cli with Apache License 2.0

5 votes

def slaves(self, fltr=""):
        return list(map(
            lambda x: slave.MesosSlave(x),
            itertools.ifilter(
                lambda x: fltr in x["id"], self.state["slaves"])))

Source File: master.py From mesos-cli with Apache License 2.0

5 votes

def tasks(self, fltr="", active_only=False):
        return list(map(
            lambda x: task.Task(self, x),
            itertools.ifilter(
                lambda x: fltr in x["id"] or fnmatch.fnmatch(x["id"], fltr),
                self._task_list(active_only))))

Source File: py_linq.py From py-enumerable with MIT License

5 votes

def last(self, func=None):
        """
        Return the last element in a collection
        :func: predicate as a lambda expression used to filter collection
        :return: data element as object or NoElementsError if transformed data
        contains no elements
        """
        if func is not None:
            return self.where(func).reverse().first()
        return self.reverse().first()

Source File: filecmp.py From datafari with Apache License 2.0

5 votes

def phase1(self): # Compute common names
        a = dict(izip(imap(os.path.normcase, self.left_list), self.left_list))
        b = dict(izip(imap(os.path.normcase, self.right_list), self.right_list))
        self.common = map(a.__getitem__, ifilter(b.__contains__, a))
        self.left_only = map(a.__getitem__, ifilterfalse(b.__contains__, a))
        self.right_only = map(b.__getitem__, ifilterfalse(a.__contains__, b))

Source File: dstream.py From LearningApacheSpark with MIT License

5 votes

def filter(self, f):
        """
        Return a new DStream containing only the elements that satisfy predicate.
        """
        def func(iterator):
            return filter(f, iterator)
        return self.mapPartitions(func, True)

Source File: thresher.py From combine with GNU General Public License v3.0

5 votes

def process_packetmail(response, source, direction):
    data = []
    filter_comments = lambda x: not x[0].startswith('#')
    try:
        for line in ifilter(filter_comments,
                            reader(response.splitlines(), delimiter=';')):
            i = line[0]
            date = line[1].split(' ')[1]
            data.append((i, indicator_type(i), direction, source, '', date))
    except (IndexError, AttributeError):
        pass
    return data

Source File: List.py From hask with BSD 2-Clause "Simplified" License

5 votes

def filter(f, xs):
    """
    filter :: (a -> Bool) -> [a] -> [a]

    filter, applied to a predicate and a list, returns the list of those
    elements that satisfy the predicate
    """
    return L[itertools.ifilter(f, xs)]

Source File: model.py From reseg-pytorch with GNU General Public License v3.0

5 votes

def __define_optimizer(self, learning_rate, weight_decay, lr_drop_factor, lr_drop_patience, optimizer='Adam'):
        assert optimizer in ['RMSprop', 'Adam', 'Adadelta', 'SGD']

        parameters = ifilter(lambda p: p.requires_grad, self.model.parameters())

        if optimizer == 'RMSprop':
            self.optimizer = optim.RMSprop(parameters, lr=learning_rate, weight_decay=weight_decay)
        elif optimizer == 'Adadelta':
            self.optimizer = optim.Adadelta(parameters, lr=learning_rate, weight_decay=weight_decay)
        elif optimizer == 'Adam':
            self.optimizer = optim.Adam(parameters, lr=learning_rate, weight_decay=weight_decay)
        elif optimizer == 'SGD':
            self.optimizer = optim.SGD(parameters, lr=learning_rate, momentum=0.9, weight_decay=weight_decay)

        self.lr_scheduler = ReduceLROnPlateau(self.optimizer, mode='max', factor=lr_drop_factor, patience=lr_drop_patience, verbose=True)

Source File: action.py From marsnake with GNU General Public License v3.0

5 votes

def get_paths(self):
		if common.is_python2x():
			import itertools
			for f in itertools.ifilter(self.path_filter, self._get_paths()):
				yield f
		else:
			for f in filter(self.path_filter, self._get_paths()):
				yield f

Source File: 216_Combination_Sum_III.py From leetcode with MIT License

5 votes

def combinationSum3(self, k, n):
        """
        :type k: int
        :type n: int
        :rtype: List[List[int]]
        """
        return list(it.ifilter(lambda x: sum(x) == n, list(it.combinations(range(1, 10), k))))

Source File: artifact.py From repositorytools with Apache License 2.0

5 votes

def get_artifact_group(url):
        if url is None:
            raise Exception('Web pages of the package not present in RPM metadata, please fill the URL tag in specfile')

        parts = six.moves.urllib.parse.urlsplit(url).netloc.split(".")
        return ".".join(itertools.ifilter(lambda x: x != "www", reversed(parts)))

Source File: streaming_command.py From SA-ctf_scoreboard_admin with Creative Commons Zero v1.0 Universal

5 votes

def iteritems(self):
            iteritems = SearchCommand.ConfigurationSettings.iteritems(self)
            version = self.command.protocol_version
            if version == 1:
                if self.required_fields is None:
                    iteritems = ifilter(lambda (name, value): name != 'clear_required_fields', iteritems)
            else:
                iteritems = ifilter(lambda (name, value): name != 'distributed', iteritems)
                if self.distributed:
                    iteritems = imap(
                        lambda (name, value): (name, 'stateful') if name == 'type' else (name, value), iteritems)
            return iteritems

        # endregion

Source File: search_command.py From SA-ctf_scoreboard_admin with Creative Commons Zero v1.0 Universal

5 votes

def iteritems(self):
            definitions = type(self).configuration_setting_definitions
            version = self.command.protocol_version
            return ifilter(
                lambda (name, value): value is not None, imap(
                    lambda setting: (setting.name, setting.__get__(self)), ifilter(
                        lambda setting: setting.is_supported_by_protocol(version), definitions)))

Source File: filecmp.py From Splunking-Crime with GNU Affero General Public License v3.0

5 votes

def phase1(self): # Compute common names
        a = dict(izip(imap(os.path.normcase, self.left_list), self.left_list))
        b = dict(izip(imap(os.path.normcase, self.right_list), self.right_list))
        self.common = map(a.__getitem__, ifilter(b.__contains__, a))
        self.left_only = map(a.__getitem__, ifilterfalse(b.__contains__, a))
        self.right_only = map(b.__getitem__, ifilterfalse(a.__contains__, b))

Source File: headers.py From NoobSec-Toolkit with GNU General Public License v2.0

5 votes

def headersParser(headers):
    """
    This function calls a class that parses the input HTTP headers to
    fingerprint the back-end database management system operating system
    and the web application technology
    """

    if not kb.headerPaths:
        kb.headerPaths = {
            "cookie":                          os.path.join(paths.SQLMAP_XML_BANNER_PATH, "cookie.xml"),
            "microsoftsharepointteamservices": os.path.join(paths.SQLMAP_XML_BANNER_PATH, "sharepoint.xml"),
            "server":                          os.path.join(paths.SQLMAP_XML_BANNER_PATH, "server.xml"),
            "servlet-engine":                  os.path.join(paths.SQLMAP_XML_BANNER_PATH, "servlet.xml"),
            "set-cookie":                      os.path.join(paths.SQLMAP_XML_BANNER_PATH, "cookie.xml"),
            "x-aspnet-version":                os.path.join(paths.SQLMAP_XML_BANNER_PATH, "x-aspnet-version.xml"),
            "x-powered-by":                    os.path.join(paths.SQLMAP_XML_BANNER_PATH, "x-powered-by.xml"),
        }

    for header in itertools.ifilter(lambda x: x in kb.headerPaths, headers):
        value = headers[header]
        xmlfile = kb.headerPaths[header]

        handler = FingerprintHandler(value, kb.headersFp)

        parseXmlFile(xmlfile, handler)
        parseXmlFile(paths.GENERIC_XML, handler)

Python itertools.ifilter() Examples