Python future.utils.iteritems() Examples
The following are 30
code examples of future.utils.iteritems().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
future.utils
, or try the search function
.
Example #1
Source File: misc.py From kgsgo-dataset-preprocessor with Mozilla Public License 2.0 | 6 votes |
def elements(self): '''Iterator over elements repeating each as many times as its count. >>> c = Counter('ABCABC') >>> sorted(c.elements()) ['A', 'A', 'B', 'B', 'C', 'C'] If an element's count has been set to zero or is a negative number, elements() will ignore it. ''' for elem, count in iteritems(self): for _ in range(count): yield elem # Override dict methods where the meaning changes for Counter objects.
Example #2
Source File: data_augmentation.py From snips-nlu with Apache License 2.0 | 6 votes |
def get_entities_iterators(intent_entities, language, add_builtin_entities_examples, random_state): from snips_nlu_parsers import get_builtin_entity_examples entities_its = dict() for entity_name, entity in iteritems(intent_entities): utterance_values = random_state.permutation(sorted(entity[UTTERANCES])) if add_builtin_entities_examples and is_builtin_entity(entity_name): entity_examples = get_builtin_entity_examples( entity_name, language) # Builtin entity examples must be kept first in the iterator to # ensure that they are used when augmenting data iterator_values = entity_examples + list(utterance_values) else: iterator_values = utterance_values entities_its[entity_name] = cycle(iterator_values) return entities_its
Example #3
Source File: directory_utils.py From plugin.video.netflix with MIT License | 6 votes |
def _convert_dict_to_listitem(dict_item): list_item = xbmcgui.ListItem(label=dict_item['label'], offscreen=True) list_item.setContentLookup(False) properties = dict_item.get('properties', {}) # 'properties' key allow to set custom properties to xbmcgui.Listitem properties['isFolder'] = str(dict_item['is_folder']) if not dict_item['is_folder'] and dict_item['media_type'] in [common.VideoId.EPISODE, common.VideoId.MOVIE, common.VideoId.SUPPLEMENTAL]: properties.update({ 'IsPlayable': 'true', 'TotalTime': dict_item.get('TotalTime', ''), 'ResumeTime': dict_item.get('ResumeTime', '') }) for stream_type, quality_info in iteritems(dict_item.get('quality_info', {})): list_item.addStreamInfo(stream_type, quality_info) list_item.setProperties(properties) list_item.setInfo('video', dict_item.get('info', {})) list_item.setArt(dict_item.get('art', {})) list_item.addContextMenuItems(dict_item.get('menu_items', [])) if dict_item.get('is_selected'): list_item.select(True) return list_item
Example #4
Source File: featurizer.py From snips-nlu with Apache License 2.0 | 6 votes |
def feature_index_to_feature_name(self): """Maps the feature index of the feature matrix to printable features names. Mainly useful for debug. Returns: dict: a dict mapping feature indices to printable features names """ if not self.fitted: return dict() index = { i: "ngram:%s" % ng for ng, i in iteritems(self.tfidf_vectorizer.vocabulary) } num_ng = len(index) if self.cooccurrence_vectorizer is not None: for word_pair, j in iteritems( self.cooccurrence_vectorizer.word_pairs): index[j + num_ng] = "pair:%s+%s" % (word_pair[0], word_pair[1]) return index
Example #5
Source File: paths.py From plugin.video.netflix with MIT License | 6 votes |
def iterate_references(source): """Generator expression that iterates over a dictionary of index=>reference pairs (sorted in ascending order by indices) until it reaches the first empty reference, which signals the end of the reference list. Items with a key that do not represent an integer are ignored.""" for index, ref in sorted({int(k): v for k, v in iteritems(source) if common.is_numeric(k)}.items()): path = reference_path(ref) if path is None: break if path[0] == 'characters': # TODO: Implement handling of character references in Kids profiles continue yield (index, path)
Example #6
Source File: featurizer.py From snips-nlu with Apache License 2.0 | 6 votes |
def persist(self, path): path.mkdir() builtin_entity_scope = None if self.builtin_entity_scope is not None: builtin_entity_scope = list(self.builtin_entity_scope) self_as_dict = { "language_code": self.language, "word_pairs": { i: list(p) for p, i in iteritems(self.word_pairs) }, "builtin_entity_scope": builtin_entity_scope, "config": self.config.to_dict() } vectorizer_json = json_string(self_as_dict) vectorizer_path = path / "vectorizer.json" with vectorizer_path.open(mode="w", encoding="utf8") as f: f.write(vectorizer_json) self.persist_metadata(path)
Example #7
Source File: data_types.py From plugin.video.netflix with MIT License | 6 votes |
def lists_by_context(self, context, break_on_first=False): """Return a generator expression that iterates over all video lists with the given context. Will match any video lists with type contained in context if context is a list.""" # 'context' may contain a list of multiple contexts or a single # 'context' can be passed as a string, convert to simplify code if not isinstance(context, list): context = [context] match_context = ((lambda context, contexts: context in contexts) if isinstance(context, list) else (lambda context, target: context == target)) # Keep sort order of context list lists = {} for context_name in context: for list_id, video_list in iteritems(self.lists): if match_context(video_list['context'], context_name): lists.update({list_id: VideoList(self.data, list_id)}) if break_on_first: break return iteritems(lists)
Example #8
Source File: crf_slot_filler.py From snips-nlu with Apache License 2.0 | 6 votes |
def log_weights(self): """Returns a logs for both the label-to-label and label-to-features weights""" if not self.slot_name_mapping: return "No weights to display: intent '%s' has no slots" \ % self.intent log = "" transition_features = self.crf_model.transition_features_ transition_features = sorted( iteritems(transition_features), key=_weight_absolute_value, reverse=True) log += "\nTransition weights: \n\n" for (state_1, state_2), weight in transition_features: log += "\n%s %s: %s" % ( _decode_tag(state_1), _decode_tag(state_2), weight) feature_weights = self.crf_model.state_features_ feature_weights = sorted( iteritems(feature_weights), key=_weight_absolute_value, reverse=True) log += "\n\nFeature weights: \n\n" for (feat, tag), weight in feature_weights: log += "\n%s %s: %s" % (feat, _decode_tag(tag), weight) return log
Example #9
Source File: lookup_intent_parser.py From snips-nlu with Apache License 2.0 | 6 votes |
def to_dict(self): """Returns a json-serializable dict""" stop_words_whitelist = None if self._stop_words_whitelist is not None: stop_words_whitelist = { intent: sorted(values) for intent, values in iteritems(self._stop_words_whitelist)} return { "config": self.config.to_dict(), "language_code": self.language, "map": self._map, "slots_names": self._slots_names, "intents_names": self._intents_names, "entity_scopes": self._entity_scopes, "stop_words_whitelist": stop_words_whitelist, }
Example #10
Source File: from_dict.py From snips-nlu with Apache License 2.0 | 6 votes |
def from_dict(cls, dict): if dict is None: return cls() params = inspect.signature(cls.__init__).parameters if any(p.kind == inspect.Parameter.VAR_KEYWORD for p in params.values()): return cls(**dict) param_names = set() for i, (name, param) in enumerate(iteritems(params)): if i == 0 and name == "self": continue if param.kind in KEYWORD_KINDS: param_names.add(name) filtered_dict = {k: v for k, v in iteritems(dict) if k in param_names} return cls(**filtered_dict)
Example #11
Source File: utils.py From snips-nlu with Apache License 2.0 | 5 votes |
def extract_intent_entities(dataset, entity_filter=None): intent_entities = {intent: set() for intent in dataset[INTENTS]} for intent_name, intent_data in iteritems(dataset[INTENTS]): for utterance in intent_data[UTTERANCES]: for chunk in utterance[DATA]: if ENTITY in chunk: if entity_filter and not entity_filter(chunk[ENTITY]): continue intent_entities[intent_name].add(chunk[ENTITY]) return intent_entities
Example #12
Source File: test_log_reg_classifier_utils.py From snips-nlu with Apache License 2.0 | 5 votes |
def test_should_build_training_data_with_no_noise( self, mocked_augment_utterances): # Given dataset_stream = io.StringIO(""" --- type: intent name: my_first_intent utterances: - how are you - hello how are you? - what's up --- type: intent name: my_second_intent utterances: - what is the weather today ? - does it rain - will it rain tomorrow""") dataset = Dataset.from_yaml_files("en", [dataset_stream]).json resources = self.get_resources(dataset[LANGUAGE]) mocked_augment_utterances.side_effect = get_mocked_augment_utterances random_state = np.random.RandomState(1) # When data_augmentation_config = IntentClassifierDataAugmentationConfig( noise_factor=0) utterances, _, intent_mapping = build_training_data( dataset, LANGUAGE_EN, data_augmentation_config, resources, random_state) # Then expected_utterances = [utterance for _, intent in sorted(iteritems(dataset[INTENTS])) for utterance in intent[UTTERANCES]] expected_intent_mapping = ["my_first_intent", "my_second_intent"] self.assertListEqual(expected_utterances, utterances) self.assertListEqual(expected_intent_mapping, intent_mapping)
Example #13
Source File: utils.py From snips-nlu with Apache License 2.0 | 5 votes |
def extract_entity_values(dataset, apply_normalization): from snips_nlu_utils import normalize entities_per_intent = {intent: set() for intent in dataset[INTENTS]} intent_entities = extract_intent_entities(dataset) for intent, entities in iteritems(intent_entities): for entity in entities: entity_values = set(dataset[ENTITIES][entity][UTTERANCES]) if apply_normalization: entity_values = {normalize(v) for v in entity_values} entities_per_intent[intent].update(entity_values) return entities_per_intent
Example #14
Source File: utils.py From snips-nlu with Apache License 2.0 | 5 votes |
def get_stop_words_whitelist(dataset, stop_words): """Extracts stop words whitelists per intent consisting of entity values that appear in the stop_words list""" entity_values_per_intent = extract_entity_values( dataset, apply_normalization=True) stop_words_whitelist = dict() for intent, entity_values in iteritems(entity_values_per_intent): whitelist = stop_words.intersection(entity_values) if whitelist: stop_words_whitelist[intent] = whitelist return stop_words_whitelist
Example #15
Source File: featurizer.py From snips-nlu with Apache License 2.0 | 5 votes |
def _fit_cooccurrence_vectorizer(self, x, classes, none_class, dataset): import numpy as np from sklearn.feature_selection import chi2 non_null_x = (d for d, c in zip(x, classes) if c != none_class) self.cooccurrence_vectorizer = CooccurrenceVectorizer( config=self.config.cooccurrence_vectorizer_config, builtin_entity_parser=self.builtin_entity_parser, custom_entity_parser=self.custom_entity_parser, resources=self.resources, random_state=self.random_state, ) x_cooccurrence = self.cooccurrence_vectorizer.fit( non_null_x, dataset).transform(x) if not self.cooccurrence_vectorizer.word_pairs: return self _, pval = chi2(x_cooccurrence, classes) top_k = int(self.config.added_cooccurrence_feature_ratio * len( self.tfidf_vectorizer.idf_diag)) # No selection if k is greater or equal than the number of word pairs if top_k >= len(self.cooccurrence_vectorizer.word_pairs): return self top_k_cooccurrence_ix = np.argpartition( pval, top_k - 1, axis=None)[:top_k] top_k_cooccurrence_ix = set(top_k_cooccurrence_ix) top_word_pairs = [ pair for pair, i in iteritems( self.cooccurrence_vectorizer.word_pairs) if i in top_k_cooccurrence_ix ] self.cooccurrence_vectorizer.limit_word_pairs(top_word_pairs) return self
Example #16
Source File: registrable.py From snips-nlu with Apache License 2.0 | 5 votes |
def registered_name(cls, registered_class): for name, subclass in iteritems(Registrable._registry[cls]): if subclass == registered_class: return name raise NotRegisteredError(cls, registered_cls=registered_class)
Example #17
Source File: test_utils.py From snips-nlu with Apache License 2.0 | 5 votes |
def test_should_initialize_with_argument(self): # Given sequence = [("a", 1), ("b", 2)] size_limit = 3 # When d = LimitedSizeDict(sequence, size_limit=size_limit) # Then items = sorted(iteritems(d), key=lambda i: i[0]) self.assertListEqual(items, sequence)
Example #18
Source File: featurizer.py From snips-nlu with Apache License 2.0 | 5 votes |
def _fit_transform_tfidf_vectorizer(self, x, y, dataset): from sklearn.feature_selection import chi2 self.tfidf_vectorizer = TfidfVectorizer( config=self.config.tfidf_vectorizer_config, builtin_entity_parser=self.builtin_entity_parser, custom_entity_parser=self.custom_entity_parser, resources=self.resources, random_state=self.random_state, ) x_tfidf = self.tfidf_vectorizer.fit_transform(x, dataset) if not self.tfidf_vectorizer.vocabulary: raise _EmptyDatasetUtterancesError( "Dataset is empty or with empty utterances") _, tfidf_pval = chi2(x_tfidf, y) best_tfidf_features = set(i for i, v in enumerate(tfidf_pval) if v < self.config.pvalue_threshold) if not best_tfidf_features: best_tfidf_features = set( idx for idx, val in enumerate(tfidf_pval) if val == tfidf_pval.min()) best_ngrams = [ng for ng, i in iteritems(self.tfidf_vectorizer.vocabulary) if i in best_tfidf_features] self.tfidf_vectorizer.limit_vocabulary(best_ngrams) # We can't return x_tfidf[:best_tfidf_features] because of the # normalization in the transform of the tfidf_vectorizer # this would lead to inconsistent result between: fit_transform(x, y) # and fit(x, y).transform(x) return self.tfidf_vectorizer.transform(x)
Example #19
Source File: crf_slot_filler.py From snips-nlu with Apache License 2.0 | 5 votes |
def _get_outgoing_weights(self, tag): return [((first, second), w) for (first, second), w in iteritems(self.crf_model.transition_features_) if first == tag]
Example #20
Source File: featurizer.py From snips-nlu with Apache License 2.0 | 5 votes |
def persist(self, path): path.mkdir() vectorizer_ = None if self._tfidf_vectorizer is not None: vocab = {k: int(v) for k, v in iteritems(self.vocabulary)} idf_diag = self.idf_diag.tolist() vectorizer_ = { "vocab": vocab, "idf_diag": idf_diag } builtin_entity_scope = None if self.builtin_entity_scope is not None: builtin_entity_scope = list(self.builtin_entity_scope) self_as_dict = { "vectorizer": vectorizer_, "language_code": self.language, "builtin_entity_scope": builtin_entity_scope, "config": self.config.to_dict(), } vectorizer_path = path / "vectorizer.json" with vectorizer_path.open("w", encoding="utf-8") as f: f.write(json_string(self_as_dict)) self.persist_metadata(path)
Example #21
Source File: utils.py From snips-nlu with Apache License 2.0 | 5 votes |
def extract_utterance_entities(dataset): entities_values = {ent_name: set() for ent_name in dataset[ENTITIES]} for intent in itervalues(dataset[INTENTS]): for utterance in intent[UTTERANCES]: for chunk in utterance[DATA]: if ENTITY in chunk: entities_values[chunk[ENTITY]].add(chunk[TEXT].strip()) return {k: list(v) for k, v in iteritems(entities_values)}
Example #22
Source File: provider.py From Adafruit_Python_BluefruitLE with MIT License | 5 votes |
def _get_objects(self, interface, parent_path='/org/bluez'): """Return a list of all bluez DBus objects that implement the requested interface name and are under the specified path. The default is to search devices under the root of all bluez objects. """ # Iterate through all the objects in bluez's DBus hierarchy and return # any that implement the requested interface under the specified path. parent_path = parent_path.lower() objects = [] for opath, interfaces in iteritems(self._bluez.GetManagedObjects()): if interface in interfaces.keys() and opath.lower().startswith(parent_path): objects.append(self._bus.get_object('org.bluez', opath)) return objects
Example #23
Source File: export.py From django_OA with GNU General Public License v3.0 | 5 votes |
def _to_xml(self, xml, data): if isinstance(data, (list, tuple)): for item in data: xml.startElement("row", {}) self._to_xml(xml, item) xml.endElement("row") elif isinstance(data, dict): for key, value in iteritems(data): key = key.replace(' ', '_') xml.startElement(key, {}) self._to_xml(xml, value) xml.endElement(key) else: xml.characters(smart_text(data))
Example #24
Source File: export.py From CTF_AWD_Platform with MIT License | 5 votes |
def _to_xml(self, xml, data): if isinstance(data, (list, tuple)): for item in data: xml.startElement("row", {}) self._to_xml(xml, item) xml.endElement("row") elif isinstance(data, dict): for key, value in iteritems(data): key = key.replace(' ', '_') xml.startElement(key, {}) self._to_xml(xml, value) xml.endElement(key) else: xml.characters(smart_text(data))
Example #25
Source File: export.py From myblog with GNU Affero General Public License v3.0 | 5 votes |
def _to_xml(self, xml, data): if isinstance(data, (list, tuple)): for item in data: xml.startElement("row", {}) self._to_xml(xml, item) xml.endElement("row") elif isinstance(data, dict): for key, value in iteritems(data): key = key.replace(' ', '_') xml.startElement(key, {}) self._to_xml(xml, value) xml.endElement(key) else: xml.characters(smart_text(data))
Example #26
Source File: base.py From mendeley-python-sdk with Apache License 2.0 | 5 votes |
def add_query_params(url, params): scheme, netloc, path, query_string, fragment = urlsplit(url) query_params = parse_qs(query_string) for name, value in iteritems(params): if value: query_params[name] = [value] new_query_string = urlencode(query_params, doseq=True) return urlunsplit((scheme, netloc, path, new_query_string, fragment))
Example #27
Source File: bear.py From caspanda with MIT License | 5 votes |
def _sync_metadata(self, kp): """ Syncs all of the metadata keyspaces and their underlying tables and columns. Sets keyspace to be a dict of all MetaKeyspace in the connection by name:MetaKeyspace :return: """ self.keyspaces = {} #TODO: Turn off warnings when this occurs self.session.row_factory = dict_factory #gets all of the column data for all tables/keyspaces result = self.session.execute("""SELECT keyspace_name, columnfamily_name, column_name, component_index, index_name, index_options, index_type, type as cql_type, validator FROM system.schema_columns""") cols = [ColumnMeta(**row) for row in result] for i in cols: #create keyspace if not already exists if self.keyspaces.get(i.keyspace) is None: self.keyspaces.update({i.keyspace:KeyspaceMeta(i.keyspace)}) #add table if not already exists kp = self.keyspaces.get(i.keyspace) if kp.tables.get(i.table) is None: kp.tables.update({i.table:TableMeta(i.keyspace, i.table)}) #finally add/overwrite column into table tb = kp.tables.get(i.table) tb.columns[i.name] = i for kp_nm, kp in iteritems(self.keyspaces): for tbl_nm, tbl in iteritems(kp.tables): tbl.categorize_columns() self.session.row_factory = self.panda_factory
Example #28
Source File: graph_view.py From pyflowgraph with BSD 3-Clause "New" or "Revised" License | 5 votes |
def frameAllNodes(self): allnodes = [] for name, node in iteritems(self.__nodes): allnodes.append(node) self.frameNodes(allnodes)
Example #29
Source File: misc.py From kgsgo-dataset-preprocessor with Mozilla Public License 2.0 | 5 votes |
def update(self, iterable=None, **kwds): '''Like dict.update() but add counts instead of replacing them. Source can be an iterable, a dictionary, or another Counter instance. >>> c = Counter('which') >>> c.update('witch') # add elements from another iterable >>> d = Counter('watch') >>> c.update(d) # add elements from another counter >>> c['h'] # four 'h' in which, witch, and watch 4 ''' if iterable is not None: if hasattr(iterable, 'iteritems'): if self: self_get = self.get for elem, count in iteritems(iterable): self[elem] = self_get(elem, 0) + count else: dict.update( self, iterable) # fast path when counter is empty else: self_get = self.get for elem in iterable: self[elem] = self_get(elem, 0) + 1 if kwds: self.update(kwds)
Example #30
Source File: custom_entity_parser.py From snips-nlu with Apache License 2.0 | 5 votes |
def _merge_entity_utterances(raw_utterances, stemmed_utterances): # Sort by resolved value, so that values conflict in a deterministic way for raw_stemmed_value, resolved_value in sorted( iteritems(stemmed_utterances), key=operator.itemgetter(1)): if raw_stemmed_value not in raw_utterances: raw_utterances[raw_stemmed_value] = resolved_value return raw_utterances