Python allennlp.common.util.namespace_match() Examples

The following are 13 code examples of allennlp.common.util.namespace_match(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module allennlp.common.util , or try the search function .
Example #1
Source File: exvocab.py    From combine-FEVER-NSMN with MIT License 6 votes vote down vote up
def initialize_dictionary(self, namespace: str, unk_num: int, mode: MappingMode):
        if mode == MappingMode.token2index:
            if any(namespace_match(pattern, namespace) for pattern in self._non_padded_namespaces):
                dict.__setitem__(self, namespace, {})
            else:
                init_namespace_dictionary = RandomHashDict(unk_num=unk_num, oov_token=self.oov_token)
                init_namespace_dictionary.update({self.padding_token: 0})
                init_namespace_dictionary.add_unk_tokens()

                dict.__setitem__(self, namespace, init_namespace_dictionary)

        elif mode == MappingMode.index2token:
            if any(namespace_match(pattern, namespace) for pattern in self._non_padded_namespaces):
                dict.__setitem__(self, namespace, {})
            else:
                init_namespace_dictionary = {0: self.padding_token}
                for i in range(unk_num):
                    init_namespace_dictionary[len(init_namespace_dictionary)] = f"@@{self.oov_token}#{str(i)}@@"

                dict.__setitem__(self, namespace, init_namespace_dictionary) 
Example #2
Source File: exvocab.py    From semanticRetrievalMRS with MIT License 6 votes vote down vote up
def initialize_dictionary(self, namespace: str, unk_num: int, mode: MappingMode):
        if mode == MappingMode.token2index:
            if any(namespace_match(pattern, namespace) for pattern in self._non_padded_namespaces):
                dict.__setitem__(self, namespace, {})
            else:
                init_namespace_dictionary = RandomHashDict(unk_num=unk_num, oov_token=self.oov_token)
                init_namespace_dictionary.update({self.padding_token: 0})
                init_namespace_dictionary.add_unk_tokens()

                dict.__setitem__(self, namespace, init_namespace_dictionary)

        elif mode == MappingMode.index2token:
            if any(namespace_match(pattern, namespace) for pattern in self._non_padded_namespaces):
                dict.__setitem__(self, namespace, {})
            else:
                init_namespace_dictionary = {0: self.padding_token}
                for i in range(unk_num):
                    init_namespace_dictionary[len(init_namespace_dictionary)] = f"@@{self.oov_token}#{str(i)}@@"

                dict.__setitem__(self, namespace, init_namespace_dictionary) 
Example #3
Source File: vocabulary.py    From allennlp with Apache License 2.0 5 votes vote down vote up
def __missing__(self, key: str):
        if any(namespace_match(pattern, key) for pattern in self._non_padded_namespaces):
            value = self._non_padded_function()
        else:
            value = self._padded_function()
        dict.__setitem__(self, key, value)
        return value 
Example #4
Source File: util_test.py    From allennlp with Apache License 2.0 5 votes vote down vote up
def test_namespace_match(self):
        assert util.namespace_match("*tags", "tags")
        assert util.namespace_match("*tags", "passage_tags")
        assert util.namespace_match("*tags", "question_tags")
        assert util.namespace_match("tokens", "tokens")
        assert not util.namespace_match("tokens", "stemmed_tokens") 
Example #5
Source File: exvocab.py    From combine-FEVER-NSMN with MIT License 5 votes vote down vote up
def __missing__(self, key: str):
        if any(namespace_match(pattern, key) for pattern in self._non_padded_namespaces):
            value = self._non_padded_function()
        else:
            value = self._padded_function()
        dict.__setitem__(self, key, value)
        return value 
Example #6
Source File: vocabulary.py    From magnitude with MIT License 5 votes vote down vote up
def __missing__(self, key     ):
        if any(namespace_match(pattern, key) for pattern in self._non_padded_namespaces):
            value = self._non_padded_function()
        else:
            value = self._padded_function()
        dict.__setitem__(self, key, value)
        return value 
Example #7
Source File: vocabulary.py    From magnitude with MIT License 5 votes vote down vote up
def from_files(cls, directory     )                :
        u"""
        Loads a ``Vocabulary`` that was serialized using ``save_to_files``.

        Parameters
        ----------
        directory : ``str``
            The directory containing the serialized vocabulary.
        """
        logger.info(u"Loading token dictionary from %s.", directory)
        with codecs.open(os.path.join(directory, NAMESPACE_PADDING_FILE), u'r', u'utf-8') as namespace_file:
            non_padded_namespaces = [namespace_str.strip() for namespace_str in namespace_file]

        vocab = Vocabulary(non_padded_namespaces=non_padded_namespaces)

        # Check every file in the directory.
        for namespace_filename in os.listdir(directory):
            if namespace_filename == NAMESPACE_PADDING_FILE:
                continue
            namespace = namespace_filename.replace(u'.txt', u'')
            if any(namespace_match(pattern, namespace) for pattern in non_padded_namespaces):
                is_padded = False
            else:
                is_padded = True
            filename = os.path.join(directory, namespace_filename)
            vocab.set_from_file(filename, is_padded, namespace=namespace)

        return vocab 
Example #8
Source File: test_util.py    From magnitude with MIT License 5 votes vote down vote up
def test_namespace_match(self):
        assert util.namespace_match(u"*tags", u"tags")
        assert util.namespace_match(u"*tags", u"passage_tags")
        assert util.namespace_match(u"*tags", u"question_tags")
        assert util.namespace_match(u"tokens", u"tokens")
        assert not util.namespace_match(u"tokens", u"stemmed_tokens") 
Example #9
Source File: exvocab.py    From semanticRetrievalMRS with MIT License 5 votes vote down vote up
def __missing__(self, key: str):
        if any(namespace_match(pattern, key) for pattern in self._non_padded_namespaces):
            value = self._non_padded_function()
        else:
            value = self._padded_function()
        dict.__setitem__(self, key, value)
        return value 
Example #10
Source File: vocabulary_multitask.py    From scicite with Apache License 2.0 5 votes vote down vote up
def __missing__(self, key: str):
        if any(namespace_match(pattern, key) for pattern in self._non_padded_namespaces):
            value = self._non_padded_function()
        else:
            value = self._padded_function()
        dict.__setitem__(self, key, value)
        return value 
Example #11
Source File: vocabulary_multitask.py    From scicite with Apache License 2.0 5 votes vote down vote up
def from_files(cls, directory: str) -> 'Vocabulary':
        """
        Loads a ``Vocabulary`` that was serialized using ``save_to_files``.

        Parameters
        ----------
        directory : ``str``
            The directory containing the serialized vocabulary.
        """
        logger.info("Loading token dictionary from %s.", directory)
        with codecs.open(os.path.join(directory, NAMESPACE_PADDING_FILE), 'r', 'utf-8') as namespace_file:
            non_padded_namespaces = [namespace_str.strip() for namespace_str in namespace_file]

        vocab = VocabularyMultitask(non_padded_namespaces=non_padded_namespaces)

        # Check every file in the directory.
        for namespace_filename in os.listdir(directory):
            if namespace_filename == NAMESPACE_PADDING_FILE:
                continue
            namespace = namespace_filename.replace('.txt', '')
            if any(namespace_match(pattern, namespace) for pattern in non_padded_namespaces):
                is_padded = False
            else:
                is_padded = True
            filename = os.path.join(directory, namespace_filename)
            vocab.set_from_file(filename, is_padded, namespace=namespace)

        return vocab 
Example #12
Source File: allennlp_bridge.py    From vampire with Apache License 2.0 5 votes vote down vote up
def from_files(cls, directory: str) -> 'Vocabulary':
        """
        Loads a ``Vocabulary`` that was serialized using ``save_to_files``.
        Parameters
        ----------
        directory : ``str``
            The directory containing the serialized vocabulary.
        """

        logger.info("Loading token dictionary from %s.", directory)
        with codecs.open(os.path.join(directory, NAMESPACE_PADDING_FILE), 'r', 'utf-8') as namespace_file:
            non_padded_namespaces = [namespace_str.strip() for namespace_str in namespace_file]

        vocab = cls(non_padded_namespaces=non_padded_namespaces)
        vocab.serialization_dir = directory  # pylint: disable=W0201
        # Check every file in the directory.
        for namespace_filename in os.listdir(directory):
            if namespace_filename == NAMESPACE_PADDING_FILE:
                continue
            if namespace_filename.startswith("."):
                continue
            namespace = namespace_filename.replace('.txt', '')
            if any(namespace_match(pattern, namespace) for pattern in non_padded_namespaces):
                is_padded = False
            else:
                is_padded = True
            filename = os.path.join(directory, namespace_filename)
            vocab.set_from_file(filename, is_padded, namespace=namespace)

        return vocab 
Example #13
Source File: vocabulary.py    From allennlp with Apache License 2.0 4 votes vote down vote up
def from_files(
        cls,
        directory: str,
        padding_token: Optional[str] = DEFAULT_PADDING_TOKEN,
        oov_token: Optional[str] = DEFAULT_OOV_TOKEN,
    ) -> "Vocabulary":
        """
        Loads a `Vocabulary` that was serialized either using `save_to_files` or inside
        a model archive file.

        # Parameters

        directory : `str`
            The directory or archive file containing the serialized vocabulary.
        """
        logger.info("Loading token dictionary from %s.", directory)
        padding_token = padding_token if padding_token is not None else DEFAULT_PADDING_TOKEN
        oov_token = oov_token if oov_token is not None else DEFAULT_OOV_TOKEN

        if not os.path.isdir(directory):
            base_directory = cached_path(directory, extract_archive=True)
            # For convenience we'll check for a 'vocabulary' subdirectory of the archive.
            # That way you can use model archives directly.
            vocab_subdir = os.path.join(base_directory, "vocabulary")
            if os.path.isdir(vocab_subdir):
                directory = vocab_subdir
            elif os.path.isdir(base_directory):
                directory = base_directory
            else:
                raise ConfigurationError(f"{directory} is neither a directory nor an archive")

        # We use a lock file to avoid race conditions where multiple processes
        # might be reading/writing from/to the same vocab files at once.
        with FileLock(os.path.join(directory, ".lock")):
            with codecs.open(
                os.path.join(directory, NAMESPACE_PADDING_FILE), "r", "utf-8"
            ) as namespace_file:
                non_padded_namespaces = [namespace_str.strip() for namespace_str in namespace_file]

            vocab = cls(
                non_padded_namespaces=non_padded_namespaces,
                padding_token=padding_token,
                oov_token=oov_token,
            )

            # Check every file in the directory.
            for namespace_filename in os.listdir(directory):
                if namespace_filename == NAMESPACE_PADDING_FILE:
                    continue
                if namespace_filename.startswith("."):
                    continue
                namespace = namespace_filename.replace(".txt", "")
                if any(namespace_match(pattern, namespace) for pattern in non_padded_namespaces):
                    is_padded = False
                else:
                    is_padded = True
                filename = os.path.join(directory, namespace_filename)
                vocab.set_from_file(filename, is_padded, namespace=namespace, oov_token=oov_token)

        return vocab