Python allennlp.common.util.namespace_match() Examples
The following are 13
code examples of allennlp.common.util.namespace_match().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
allennlp.common.util
, or try the search function
.
Example #1
Source File: exvocab.py From combine-FEVER-NSMN with MIT License | 6 votes |
def initialize_dictionary(self, namespace: str, unk_num: int, mode: MappingMode): if mode == MappingMode.token2index: if any(namespace_match(pattern, namespace) for pattern in self._non_padded_namespaces): dict.__setitem__(self, namespace, {}) else: init_namespace_dictionary = RandomHashDict(unk_num=unk_num, oov_token=self.oov_token) init_namespace_dictionary.update({self.padding_token: 0}) init_namespace_dictionary.add_unk_tokens() dict.__setitem__(self, namespace, init_namespace_dictionary) elif mode == MappingMode.index2token: if any(namespace_match(pattern, namespace) for pattern in self._non_padded_namespaces): dict.__setitem__(self, namespace, {}) else: init_namespace_dictionary = {0: self.padding_token} for i in range(unk_num): init_namespace_dictionary[len(init_namespace_dictionary)] = f"@@{self.oov_token}#{str(i)}@@" dict.__setitem__(self, namespace, init_namespace_dictionary)
Example #2
Source File: exvocab.py From semanticRetrievalMRS with MIT License | 6 votes |
def initialize_dictionary(self, namespace: str, unk_num: int, mode: MappingMode): if mode == MappingMode.token2index: if any(namespace_match(pattern, namespace) for pattern in self._non_padded_namespaces): dict.__setitem__(self, namespace, {}) else: init_namespace_dictionary = RandomHashDict(unk_num=unk_num, oov_token=self.oov_token) init_namespace_dictionary.update({self.padding_token: 0}) init_namespace_dictionary.add_unk_tokens() dict.__setitem__(self, namespace, init_namespace_dictionary) elif mode == MappingMode.index2token: if any(namespace_match(pattern, namespace) for pattern in self._non_padded_namespaces): dict.__setitem__(self, namespace, {}) else: init_namespace_dictionary = {0: self.padding_token} for i in range(unk_num): init_namespace_dictionary[len(init_namespace_dictionary)] = f"@@{self.oov_token}#{str(i)}@@" dict.__setitem__(self, namespace, init_namespace_dictionary)
Example #3
Source File: vocabulary.py From allennlp with Apache License 2.0 | 5 votes |
def __missing__(self, key: str): if any(namespace_match(pattern, key) for pattern in self._non_padded_namespaces): value = self._non_padded_function() else: value = self._padded_function() dict.__setitem__(self, key, value) return value
Example #4
Source File: util_test.py From allennlp with Apache License 2.0 | 5 votes |
def test_namespace_match(self): assert util.namespace_match("*tags", "tags") assert util.namespace_match("*tags", "passage_tags") assert util.namespace_match("*tags", "question_tags") assert util.namespace_match("tokens", "tokens") assert not util.namespace_match("tokens", "stemmed_tokens")
Example #5
Source File: exvocab.py From combine-FEVER-NSMN with MIT License | 5 votes |
def __missing__(self, key: str): if any(namespace_match(pattern, key) for pattern in self._non_padded_namespaces): value = self._non_padded_function() else: value = self._padded_function() dict.__setitem__(self, key, value) return value
Example #6
Source File: vocabulary.py From magnitude with MIT License | 5 votes |
def __missing__(self, key ): if any(namespace_match(pattern, key) for pattern in self._non_padded_namespaces): value = self._non_padded_function() else: value = self._padded_function() dict.__setitem__(self, key, value) return value
Example #7
Source File: vocabulary.py From magnitude with MIT License | 5 votes |
def from_files(cls, directory ) : u""" Loads a ``Vocabulary`` that was serialized using ``save_to_files``. Parameters ---------- directory : ``str`` The directory containing the serialized vocabulary. """ logger.info(u"Loading token dictionary from %s.", directory) with codecs.open(os.path.join(directory, NAMESPACE_PADDING_FILE), u'r', u'utf-8') as namespace_file: non_padded_namespaces = [namespace_str.strip() for namespace_str in namespace_file] vocab = Vocabulary(non_padded_namespaces=non_padded_namespaces) # Check every file in the directory. for namespace_filename in os.listdir(directory): if namespace_filename == NAMESPACE_PADDING_FILE: continue namespace = namespace_filename.replace(u'.txt', u'') if any(namespace_match(pattern, namespace) for pattern in non_padded_namespaces): is_padded = False else: is_padded = True filename = os.path.join(directory, namespace_filename) vocab.set_from_file(filename, is_padded, namespace=namespace) return vocab
Example #8
Source File: test_util.py From magnitude with MIT License | 5 votes |
def test_namespace_match(self): assert util.namespace_match(u"*tags", u"tags") assert util.namespace_match(u"*tags", u"passage_tags") assert util.namespace_match(u"*tags", u"question_tags") assert util.namespace_match(u"tokens", u"tokens") assert not util.namespace_match(u"tokens", u"stemmed_tokens")
Example #9
Source File: exvocab.py From semanticRetrievalMRS with MIT License | 5 votes |
def __missing__(self, key: str): if any(namespace_match(pattern, key) for pattern in self._non_padded_namespaces): value = self._non_padded_function() else: value = self._padded_function() dict.__setitem__(self, key, value) return value
Example #10
Source File: vocabulary_multitask.py From scicite with Apache License 2.0 | 5 votes |
def __missing__(self, key: str): if any(namespace_match(pattern, key) for pattern in self._non_padded_namespaces): value = self._non_padded_function() else: value = self._padded_function() dict.__setitem__(self, key, value) return value
Example #11
Source File: vocabulary_multitask.py From scicite with Apache License 2.0 | 5 votes |
def from_files(cls, directory: str) -> 'Vocabulary': """ Loads a ``Vocabulary`` that was serialized using ``save_to_files``. Parameters ---------- directory : ``str`` The directory containing the serialized vocabulary. """ logger.info("Loading token dictionary from %s.", directory) with codecs.open(os.path.join(directory, NAMESPACE_PADDING_FILE), 'r', 'utf-8') as namespace_file: non_padded_namespaces = [namespace_str.strip() for namespace_str in namespace_file] vocab = VocabularyMultitask(non_padded_namespaces=non_padded_namespaces) # Check every file in the directory. for namespace_filename in os.listdir(directory): if namespace_filename == NAMESPACE_PADDING_FILE: continue namespace = namespace_filename.replace('.txt', '') if any(namespace_match(pattern, namespace) for pattern in non_padded_namespaces): is_padded = False else: is_padded = True filename = os.path.join(directory, namespace_filename) vocab.set_from_file(filename, is_padded, namespace=namespace) return vocab
Example #12
Source File: allennlp_bridge.py From vampire with Apache License 2.0 | 5 votes |
def from_files(cls, directory: str) -> 'Vocabulary': """ Loads a ``Vocabulary`` that was serialized using ``save_to_files``. Parameters ---------- directory : ``str`` The directory containing the serialized vocabulary. """ logger.info("Loading token dictionary from %s.", directory) with codecs.open(os.path.join(directory, NAMESPACE_PADDING_FILE), 'r', 'utf-8') as namespace_file: non_padded_namespaces = [namespace_str.strip() for namespace_str in namespace_file] vocab = cls(non_padded_namespaces=non_padded_namespaces) vocab.serialization_dir = directory # pylint: disable=W0201 # Check every file in the directory. for namespace_filename in os.listdir(directory): if namespace_filename == NAMESPACE_PADDING_FILE: continue if namespace_filename.startswith("."): continue namespace = namespace_filename.replace('.txt', '') if any(namespace_match(pattern, namespace) for pattern in non_padded_namespaces): is_padded = False else: is_padded = True filename = os.path.join(directory, namespace_filename) vocab.set_from_file(filename, is_padded, namespace=namespace) return vocab
Example #13
Source File: vocabulary.py From allennlp with Apache License 2.0 | 4 votes |
def from_files( cls, directory: str, padding_token: Optional[str] = DEFAULT_PADDING_TOKEN, oov_token: Optional[str] = DEFAULT_OOV_TOKEN, ) -> "Vocabulary": """ Loads a `Vocabulary` that was serialized either using `save_to_files` or inside a model archive file. # Parameters directory : `str` The directory or archive file containing the serialized vocabulary. """ logger.info("Loading token dictionary from %s.", directory) padding_token = padding_token if padding_token is not None else DEFAULT_PADDING_TOKEN oov_token = oov_token if oov_token is not None else DEFAULT_OOV_TOKEN if not os.path.isdir(directory): base_directory = cached_path(directory, extract_archive=True) # For convenience we'll check for a 'vocabulary' subdirectory of the archive. # That way you can use model archives directly. vocab_subdir = os.path.join(base_directory, "vocabulary") if os.path.isdir(vocab_subdir): directory = vocab_subdir elif os.path.isdir(base_directory): directory = base_directory else: raise ConfigurationError(f"{directory} is neither a directory nor an archive") # We use a lock file to avoid race conditions where multiple processes # might be reading/writing from/to the same vocab files at once. with FileLock(os.path.join(directory, ".lock")): with codecs.open( os.path.join(directory, NAMESPACE_PADDING_FILE), "r", "utf-8" ) as namespace_file: non_padded_namespaces = [namespace_str.strip() for namespace_str in namespace_file] vocab = cls( non_padded_namespaces=non_padded_namespaces, padding_token=padding_token, oov_token=oov_token, ) # Check every file in the directory. for namespace_filename in os.listdir(directory): if namespace_filename == NAMESPACE_PADDING_FILE: continue if namespace_filename.startswith("."): continue namespace = namespace_filename.replace(".txt", "") if any(namespace_match(pattern, namespace) for pattern in non_padded_namespaces): is_padded = False else: is_padded = True filename = os.path.join(directory, namespace_filename) vocab.set_from_file(filename, is_padded, namespace=namespace, oov_token=oov_token) return vocab