Python Examples of future.utils.iteritems

Source File: misc.py From kgsgo-dataset-preprocessor with Mozilla Public License 2.0

6 votes

def elements(self):
        '''Iterator over elements repeating each as many times as its count.

        >>> c = Counter('ABCABC')
        >>> sorted(c.elements())
        ['A', 'A', 'B', 'B', 'C', 'C']

        If an element's count has been set to zero or is a negative number,
        elements() will ignore it.

        '''
        for elem, count in iteritems(self):
            for _ in range(count):
                yield elem

    # Override dict methods where the meaning changes for Counter objects.

Source File: data_augmentation.py From snips-nlu with Apache License 2.0

6 votes

def get_entities_iterators(intent_entities, language,
                           add_builtin_entities_examples, random_state):
    from snips_nlu_parsers import get_builtin_entity_examples

    entities_its = dict()
    for entity_name, entity in iteritems(intent_entities):
        utterance_values = random_state.permutation(sorted(entity[UTTERANCES]))
        if add_builtin_entities_examples and is_builtin_entity(entity_name):
            entity_examples = get_builtin_entity_examples(
                entity_name, language)
            # Builtin entity examples must be kept first in the iterator to
            # ensure that they are used when augmenting data
            iterator_values = entity_examples + list(utterance_values)
        else:
            iterator_values = utterance_values
        entities_its[entity_name] = cycle(iterator_values)
    return entities_its

Source File: directory_utils.py From plugin.video.netflix with MIT License

6 votes

def _convert_dict_to_listitem(dict_item):
    list_item = xbmcgui.ListItem(label=dict_item['label'], offscreen=True)
    list_item.setContentLookup(False)
    properties = dict_item.get('properties', {})  # 'properties' key allow to set custom properties to xbmcgui.Listitem
    properties['isFolder'] = str(dict_item['is_folder'])

    if not dict_item['is_folder'] and dict_item['media_type'] in [common.VideoId.EPISODE,
                                                                  common.VideoId.MOVIE,
                                                                  common.VideoId.SUPPLEMENTAL]:
        properties.update({
            'IsPlayable': 'true',
            'TotalTime': dict_item.get('TotalTime', ''),
            'ResumeTime': dict_item.get('ResumeTime', '')
        })
    for stream_type, quality_info in iteritems(dict_item.get('quality_info', {})):
        list_item.addStreamInfo(stream_type, quality_info)
    list_item.setProperties(properties)
    list_item.setInfo('video', dict_item.get('info', {}))
    list_item.setArt(dict_item.get('art', {}))
    list_item.addContextMenuItems(dict_item.get('menu_items', []))
    if dict_item.get('is_selected'):
        list_item.select(True)
    return list_item

Source File: featurizer.py From snips-nlu with Apache License 2.0

6 votes

def feature_index_to_feature_name(self):
        """Maps the feature index of the feature matrix to printable features
        names. Mainly useful for debug.

        Returns:
            dict: a dict mapping feature indices to printable features names
        """
        if not self.fitted:
            return dict()

        index = {
            i: "ngram:%s" % ng
            for ng, i in iteritems(self.tfidf_vectorizer.vocabulary)
        }
        num_ng = len(index)
        if self.cooccurrence_vectorizer is not None:
            for word_pair, j in iteritems(
                    self.cooccurrence_vectorizer.word_pairs):
                index[j + num_ng] = "pair:%s+%s" % (word_pair[0], word_pair[1])
        return index

Source File: paths.py From plugin.video.netflix with MIT License

6 votes

def iterate_references(source):
    """Generator expression that iterates over a dictionary of
    index=>reference pairs (sorted in ascending order by indices) until it
    reaches the first empty reference, which signals the end of the reference
    list.
    Items with a key that do not represent an integer are ignored."""
    for index, ref in sorted({int(k): v
                              for k, v in iteritems(source)
                              if common.is_numeric(k)}.items()):
        path = reference_path(ref)
        if path is None:
            break
        if path[0] == 'characters':
            # TODO: Implement handling of character references in Kids profiles
            continue
        yield (index, path)

Source File: featurizer.py From snips-nlu with Apache License 2.0

6 votes

def persist(self, path):
        path.mkdir()

        builtin_entity_scope = None
        if self.builtin_entity_scope is not None:
            builtin_entity_scope = list(self.builtin_entity_scope)

        self_as_dict = {
            "language_code": self.language,
            "word_pairs": {
                i: list(p) for p, i in iteritems(self.word_pairs)
            },
            "builtin_entity_scope": builtin_entity_scope,
            "config": self.config.to_dict()
        }
        vectorizer_json = json_string(self_as_dict)
        vectorizer_path = path / "vectorizer.json"
        with vectorizer_path.open(mode="w", encoding="utf8") as f:
            f.write(vectorizer_json)
        self.persist_metadata(path)

Source File: data_types.py From plugin.video.netflix with MIT License

6 votes

def lists_by_context(self, context, break_on_first=False):
        """Return a generator expression that iterates over all video
        lists with the given context.
        Will match any video lists with type contained in context
        if context is a list."""
        # 'context' may contain a list of multiple contexts or a single
        # 'context' can be passed as a string, convert to simplify code
        if not isinstance(context, list):
            context = [context]

        match_context = ((lambda context, contexts: context in contexts)
                         if isinstance(context, list)
                         else (lambda context, target: context == target))

        # Keep sort order of context list
        lists = {}
        for context_name in context:
            for list_id, video_list in iteritems(self.lists):
                if match_context(video_list['context'], context_name):
                    lists.update({list_id: VideoList(self.data, list_id)})
                    if break_on_first:
                        break
        return iteritems(lists)

Source File: crf_slot_filler.py From snips-nlu with Apache License 2.0

6 votes

def log_weights(self):
        """Returns a logs for both the label-to-label and label-to-features
         weights"""
        if not self.slot_name_mapping:
            return "No weights to display: intent '%s' has no slots" \
                   % self.intent
        log = ""
        transition_features = self.crf_model.transition_features_
        transition_features = sorted(
            iteritems(transition_features), key=_weight_absolute_value,
            reverse=True)
        log += "\nTransition weights: \n\n"
        for (state_1, state_2), weight in transition_features:
            log += "\n%s %s: %s" % (
                _decode_tag(state_1), _decode_tag(state_2), weight)
        feature_weights = self.crf_model.state_features_
        feature_weights = sorted(
            iteritems(feature_weights), key=_weight_absolute_value,
            reverse=True)
        log += "\n\nFeature weights: \n\n"
        for (feat, tag), weight in feature_weights:
            log += "\n%s %s: %s" % (feat, _decode_tag(tag), weight)
        return log

Source File: lookup_intent_parser.py From snips-nlu with Apache License 2.0

6 votes

def to_dict(self):
        """Returns a json-serializable dict"""
        stop_words_whitelist = None
        if self._stop_words_whitelist is not None:
            stop_words_whitelist = {
                intent: sorted(values)
                for intent, values in iteritems(self._stop_words_whitelist)}
        return {
            "config": self.config.to_dict(),
            "language_code": self.language,
            "map": self._map,
            "slots_names": self._slots_names,
            "intents_names": self._intents_names,
            "entity_scopes": self._entity_scopes,
            "stop_words_whitelist": stop_words_whitelist,
        }

Source File: from_dict.py From snips-nlu with Apache License 2.0

6 votes

def from_dict(cls, dict):
        if dict is None:
            return cls()
        params = inspect.signature(cls.__init__).parameters

        if any(p.kind == inspect.Parameter.VAR_KEYWORD for p in
               params.values()):
            return cls(**dict)

        param_names = set()
        for i, (name, param) in enumerate(iteritems(params)):
            if i == 0 and name == "self":
                continue
            if param.kind in KEYWORD_KINDS:
                param_names.add(name)
        filtered_dict = {k: v for k, v in iteritems(dict) if k in param_names}
        return cls(**filtered_dict)

Source File: utils.py From snips-nlu with Apache License 2.0

5 votes

def extract_intent_entities(dataset, entity_filter=None):
    intent_entities = {intent: set() for intent in dataset[INTENTS]}
    for intent_name, intent_data in iteritems(dataset[INTENTS]):
        for utterance in intent_data[UTTERANCES]:
            for chunk in utterance[DATA]:
                if ENTITY in chunk:
                    if entity_filter and not entity_filter(chunk[ENTITY]):
                        continue
                    intent_entities[intent_name].add(chunk[ENTITY])
    return intent_entities

Source File: test_log_reg_classifier_utils.py From snips-nlu with Apache License 2.0

5 votes

def test_should_build_training_data_with_no_noise(
            self, mocked_augment_utterances):
        # Given
        dataset_stream = io.StringIO("""
---
type: intent
name: my_first_intent
utterances:
- how are you
- hello how are you?
- what's up

---
type: intent
name: my_second_intent
utterances:
- what is the weather today ?
- does it rain
- will it rain tomorrow""")
        dataset = Dataset.from_yaml_files("en", [dataset_stream]).json
        resources = self.get_resources(dataset[LANGUAGE])
        mocked_augment_utterances.side_effect = get_mocked_augment_utterances
        random_state = np.random.RandomState(1)

        # When
        data_augmentation_config = IntentClassifierDataAugmentationConfig(
            noise_factor=0)
        utterances, _, intent_mapping = build_training_data(
            dataset, LANGUAGE_EN, data_augmentation_config, resources,
            random_state)

        # Then
        expected_utterances = [utterance for _, intent
                               in sorted(iteritems(dataset[INTENTS]))
                               for utterance in intent[UTTERANCES]]
        expected_intent_mapping = ["my_first_intent", "my_second_intent"]
        self.assertListEqual(expected_utterances, utterances)
        self.assertListEqual(expected_intent_mapping, intent_mapping)

Source File: utils.py From snips-nlu with Apache License 2.0

5 votes

def extract_entity_values(dataset, apply_normalization):
    from snips_nlu_utils import normalize

    entities_per_intent = {intent: set() for intent in dataset[INTENTS]}
    intent_entities = extract_intent_entities(dataset)
    for intent, entities in iteritems(intent_entities):
        for entity in entities:
            entity_values = set(dataset[ENTITIES][entity][UTTERANCES])
            if apply_normalization:
                entity_values = {normalize(v) for v in entity_values}
            entities_per_intent[intent].update(entity_values)
    return entities_per_intent

Source File: utils.py From snips-nlu with Apache License 2.0

5 votes

def get_stop_words_whitelist(dataset, stop_words):
    """Extracts stop words whitelists per intent consisting of entity values
    that appear in the stop_words list"""
    entity_values_per_intent = extract_entity_values(
        dataset, apply_normalization=True)
    stop_words_whitelist = dict()
    for intent, entity_values in iteritems(entity_values_per_intent):
        whitelist = stop_words.intersection(entity_values)
        if whitelist:
            stop_words_whitelist[intent] = whitelist
    return stop_words_whitelist

Source File: featurizer.py From snips-nlu with Apache License 2.0

5 votes

def _fit_cooccurrence_vectorizer(self, x, classes, none_class, dataset):
        import numpy as np
        from sklearn.feature_selection import chi2

        non_null_x = (d for d, c in zip(x, classes) if c != none_class)
        self.cooccurrence_vectorizer = CooccurrenceVectorizer(
            config=self.config.cooccurrence_vectorizer_config,
            builtin_entity_parser=self.builtin_entity_parser,
            custom_entity_parser=self.custom_entity_parser,
            resources=self.resources,
            random_state=self.random_state,
        )
        x_cooccurrence = self.cooccurrence_vectorizer.fit(
            non_null_x, dataset).transform(x)
        if not self.cooccurrence_vectorizer.word_pairs:
            return self
        _, pval = chi2(x_cooccurrence, classes)

        top_k = int(self.config.added_cooccurrence_feature_ratio * len(
            self.tfidf_vectorizer.idf_diag))

        # No selection if k is greater or equal than the number of word pairs
        if top_k >= len(self.cooccurrence_vectorizer.word_pairs):
            return self

        top_k_cooccurrence_ix = np.argpartition(
            pval, top_k - 1, axis=None)[:top_k]
        top_k_cooccurrence_ix = set(top_k_cooccurrence_ix)
        top_word_pairs = [
            pair for pair, i in iteritems(
                self.cooccurrence_vectorizer.word_pairs)
            if i in top_k_cooccurrence_ix
        ]

        self.cooccurrence_vectorizer.limit_word_pairs(top_word_pairs)
        return self

Source File: registrable.py From snips-nlu with Apache License 2.0

5 votes

def registered_name(cls, registered_class):
        for name, subclass in iteritems(Registrable._registry[cls]):
            if subclass == registered_class:
                return name
        raise NotRegisteredError(cls, registered_cls=registered_class)

Source File: test_utils.py From snips-nlu with Apache License 2.0

5 votes

def test_should_initialize_with_argument(self):
        # Given
        sequence = [("a", 1), ("b", 2)]
        size_limit = 3
        # When
        d = LimitedSizeDict(sequence, size_limit=size_limit)
        # Then
        items = sorted(iteritems(d), key=lambda i: i[0])
        self.assertListEqual(items, sequence)

Source File: featurizer.py From snips-nlu with Apache License 2.0

5 votes

def _fit_transform_tfidf_vectorizer(self, x, y, dataset):
        from sklearn.feature_selection import chi2

        self.tfidf_vectorizer = TfidfVectorizer(
            config=self.config.tfidf_vectorizer_config,
            builtin_entity_parser=self.builtin_entity_parser,
            custom_entity_parser=self.custom_entity_parser,
            resources=self.resources,
            random_state=self.random_state,
        )
        x_tfidf = self.tfidf_vectorizer.fit_transform(x, dataset)

        if not self.tfidf_vectorizer.vocabulary:
            raise _EmptyDatasetUtterancesError(
                "Dataset is empty or with empty utterances")
        _, tfidf_pval = chi2(x_tfidf, y)
        best_tfidf_features = set(i for i, v in enumerate(tfidf_pval)
                                  if v < self.config.pvalue_threshold)
        if not best_tfidf_features:
            best_tfidf_features = set(
                idx for idx, val in enumerate(tfidf_pval) if
                val == tfidf_pval.min())

        best_ngrams = [ng for ng, i in
                       iteritems(self.tfidf_vectorizer.vocabulary)
                       if i in best_tfidf_features]
        self.tfidf_vectorizer.limit_vocabulary(best_ngrams)
        # We can't return x_tfidf[:best_tfidf_features] because of the
        # normalization in the transform of the tfidf_vectorizer
        # this would lead to inconsistent result between: fit_transform(x, y)
        # and fit(x, y).transform(x)
        return self.tfidf_vectorizer.transform(x)

Source File: crf_slot_filler.py From snips-nlu with Apache License 2.0

5 votes

def _get_outgoing_weights(self, tag):
        return [((first, second), w) for (first, second), w
                in iteritems(self.crf_model.transition_features_)
                if first == tag]

Source File: featurizer.py From snips-nlu with Apache License 2.0

5 votes

def persist(self, path):
        path.mkdir()

        vectorizer_ = None
        if self._tfidf_vectorizer is not None:
            vocab = {k: int(v) for k, v in iteritems(self.vocabulary)}
            idf_diag = self.idf_diag.tolist()
            vectorizer_ = {
                "vocab": vocab,
                "idf_diag": idf_diag
            }

        builtin_entity_scope = None
        if self.builtin_entity_scope is not None:
            builtin_entity_scope = list(self.builtin_entity_scope)

        self_as_dict = {
            "vectorizer": vectorizer_,
            "language_code": self.language,
            "builtin_entity_scope": builtin_entity_scope,
            "config": self.config.to_dict(),
        }

        vectorizer_path = path / "vectorizer.json"
        with vectorizer_path.open("w", encoding="utf-8") as f:
            f.write(json_string(self_as_dict))
        self.persist_metadata(path)

Source File: utils.py From snips-nlu with Apache License 2.0

5 votes

def extract_utterance_entities(dataset):
    entities_values = {ent_name: set() for ent_name in dataset[ENTITIES]}

    for intent in itervalues(dataset[INTENTS]):
        for utterance in intent[UTTERANCES]:
            for chunk in utterance[DATA]:
                if ENTITY in chunk:
                    entities_values[chunk[ENTITY]].add(chunk[TEXT].strip())
    return {k: list(v) for k, v in iteritems(entities_values)}

Source File: provider.py From Adafruit_Python_BluefruitLE with MIT License

5 votes

def _get_objects(self, interface, parent_path='/org/bluez'):
        """Return a list of all bluez DBus objects that implement the requested
        interface name and are under the specified path.  The default is to
        search devices under the root of all bluez objects.
        """
        # Iterate through all the objects in bluez's DBus hierarchy and return
        # any that implement the requested interface under the specified path.
        parent_path = parent_path.lower()
        objects = []
        for opath, interfaces in iteritems(self._bluez.GetManagedObjects()):
            if interface in interfaces.keys() and opath.lower().startswith(parent_path):
                objects.append(self._bus.get_object('org.bluez', opath))
        return objects

Source File: export.py From django_OA with GNU General Public License v3.0

5 votes

def _to_xml(self, xml, data):
        if isinstance(data, (list, tuple)):
            for item in data:
                xml.startElement("row", {})
                self._to_xml(xml, item)
                xml.endElement("row")
        elif isinstance(data, dict):
            for key, value in iteritems(data):
                key = key.replace(' ', '_')
                xml.startElement(key, {})
                self._to_xml(xml, value)
                xml.endElement(key)
        else:
            xml.characters(smart_text(data))

Source File: export.py From CTF_AWD_Platform with MIT License

5 votes

def _to_xml(self, xml, data):
        if isinstance(data, (list, tuple)):
            for item in data:
                xml.startElement("row", {})
                self._to_xml(xml, item)
                xml.endElement("row")
        elif isinstance(data, dict):
            for key, value in iteritems(data):
                key = key.replace(' ', '_')
                xml.startElement(key, {})
                self._to_xml(xml, value)
                xml.endElement(key)
        else:
            xml.characters(smart_text(data))

Source File: export.py From myblog with GNU Affero General Public License v3.0

5 votes

def _to_xml(self, xml, data):
        if isinstance(data, (list, tuple)):
            for item in data:
                xml.startElement("row", {})
                self._to_xml(xml, item)
                xml.endElement("row")
        elif isinstance(data, dict):
            for key, value in iteritems(data):
                key = key.replace(' ', '_')
                xml.startElement(key, {})
                self._to_xml(xml, value)
                xml.endElement(key)
        else:
            xml.characters(smart_text(data))

Source File: base.py From mendeley-python-sdk with Apache License 2.0

5 votes

def add_query_params(url, params):
    scheme, netloc, path, query_string, fragment = urlsplit(url)
    query_params = parse_qs(query_string)

    for name, value in iteritems(params):
        if value:
            query_params[name] = [value]

    new_query_string = urlencode(query_params, doseq=True)

    return urlunsplit((scheme, netloc, path, new_query_string, fragment))

Source File: bear.py From caspanda with MIT License

5 votes

def _sync_metadata(self, kp):
        """
        Syncs all of the metadata keyspaces and their underlying tables and columns. Sets keyspace to be a dict
        of all MetaKeyspace in the connection by name:MetaKeyspace
        :return:
        """

        self.keyspaces = {}
        #TODO: Turn off warnings when this occurs
        self.session.row_factory = dict_factory

        #gets all of the column data for all tables/keyspaces
        result = self.session.execute("""SELECT keyspace_name, columnfamily_name, column_name, component_index, index_name,
                             index_options, index_type, type as cql_type, validator FROM system.schema_columns""")


        cols = [ColumnMeta(**row) for row in result]
        for i in cols:
            #create keyspace if not already exists
            if self.keyspaces.get(i.keyspace) is None:
                self.keyspaces.update({i.keyspace:KeyspaceMeta(i.keyspace)})

            #add table if not already exists
            kp = self.keyspaces.get(i.keyspace)
            if kp.tables.get(i.table) is None:
                kp.tables.update({i.table:TableMeta(i.keyspace, i.table)})

            #finally add/overwrite column into table
            tb = kp.tables.get(i.table)
            tb.columns[i.name] = i
        for kp_nm, kp in iteritems(self.keyspaces):
            for tbl_nm, tbl in iteritems(kp.tables):
                tbl.categorize_columns()

        self.session.row_factory = self.panda_factory

Source File: graph_view.py From pyflowgraph with BSD 3-Clause "New" or "Revised" License

5 votes

def frameAllNodes(self):
        allnodes = []
        for name, node in iteritems(self.__nodes):
            allnodes.append(node)
        self.frameNodes(allnodes)

Source File: misc.py From kgsgo-dataset-preprocessor with Mozilla Public License 2.0

5 votes

def update(self, iterable=None, **kwds):
        '''Like dict.update() but add counts instead of replacing them.

        Source can be an iterable, a dictionary, or another Counter instance.

        >>> c = Counter('which')
        >>> c.update('witch')           # add elements from another iterable
        >>> d = Counter('watch')
        >>> c.update(d)                 # add elements from another counter
        >>> c['h']                      # four 'h' in which, witch, and watch
        4

        '''
        if iterable is not None:
            if hasattr(iterable, 'iteritems'):
                if self:
                    self_get = self.get
                    for elem, count in iteritems(iterable):
                        self[elem] = self_get(elem, 0) + count
                else:
                    dict.update(
                        self, iterable)  # fast path when counter is empty
            else:
                self_get = self.get
                for elem in iterable:
                    self[elem] = self_get(elem, 0) + 1
        if kwds:
            self.update(kwds)

Source File: custom_entity_parser.py From snips-nlu with Apache License 2.0

5 votes

def _merge_entity_utterances(raw_utterances, stemmed_utterances):
    # Sort by resolved value, so that values conflict in a deterministic way
    for raw_stemmed_value, resolved_value in sorted(
            iteritems(stemmed_utterances), key=operator.itemgetter(1)):
        if raw_stemmed_value not in raw_utterances:
            raw_utterances[raw_stemmed_value] = resolved_value
    return raw_utterances

Python future.utils.iteritems() Examples