Python Examples of allennlp.common.util.namespace

Source File: exvocab.py From combine-FEVER-NSMN with MIT License

6 votes

def initialize_dictionary(self, namespace: str, unk_num: int, mode: MappingMode):
        if mode == MappingMode.token2index:
            if any(namespace_match(pattern, namespace) for pattern in self._non_padded_namespaces):
                dict.__setitem__(self, namespace, {})
            else:
                init_namespace_dictionary = RandomHashDict(unk_num=unk_num, oov_token=self.oov_token)
                init_namespace_dictionary.update({self.padding_token: 0})
                init_namespace_dictionary.add_unk_tokens()

                dict.__setitem__(self, namespace, init_namespace_dictionary)

        elif mode == MappingMode.index2token:
            if any(namespace_match(pattern, namespace) for pattern in self._non_padded_namespaces):
                dict.__setitem__(self, namespace, {})
            else:
                init_namespace_dictionary = {0: self.padding_token}
                for i in range(unk_num):
                    init_namespace_dictionary[len(init_namespace_dictionary)] = f"@@{self.oov_token}#{str(i)}@@"

                dict.__setitem__(self, namespace, init_namespace_dictionary)

Source File: exvocab.py From semanticRetrievalMRS with MIT License

6 votes

def initialize_dictionary(self, namespace: str, unk_num: int, mode: MappingMode):
        if mode == MappingMode.token2index:
            if any(namespace_match(pattern, namespace) for pattern in self._non_padded_namespaces):
                dict.__setitem__(self, namespace, {})
            else:
                init_namespace_dictionary = RandomHashDict(unk_num=unk_num, oov_token=self.oov_token)
                init_namespace_dictionary.update({self.padding_token: 0})
                init_namespace_dictionary.add_unk_tokens()

                dict.__setitem__(self, namespace, init_namespace_dictionary)

        elif mode == MappingMode.index2token:
            if any(namespace_match(pattern, namespace) for pattern in self._non_padded_namespaces):
                dict.__setitem__(self, namespace, {})
            else:
                init_namespace_dictionary = {0: self.padding_token}
                for i in range(unk_num):
                    init_namespace_dictionary[len(init_namespace_dictionary)] = f"@@{self.oov_token}#{str(i)}@@"

                dict.__setitem__(self, namespace, init_namespace_dictionary)

Source File: vocabulary.py From allennlp with Apache License 2.0

5 votes

def __missing__(self, key: str):
        if any(namespace_match(pattern, key) for pattern in self._non_padded_namespaces):
            value = self._non_padded_function()
        else:
            value = self._padded_function()
        dict.__setitem__(self, key, value)
        return value

Source File: util_test.py From allennlp with Apache License 2.0

5 votes

def test_namespace_match(self):
        assert util.namespace_match("*tags", "tags")
        assert util.namespace_match("*tags", "passage_tags")
        assert util.namespace_match("*tags", "question_tags")
        assert util.namespace_match("tokens", "tokens")
        assert not util.namespace_match("tokens", "stemmed_tokens")

Source File: exvocab.py From combine-FEVER-NSMN with MIT License

5 votes

def __missing__(self, key: str):
        if any(namespace_match(pattern, key) for pattern in self._non_padded_namespaces):
            value = self._non_padded_function()
        else:
            value = self._padded_function()
        dict.__setitem__(self, key, value)
        return value

Source File: vocabulary.py From magnitude with MIT License

5 votes

def __missing__(self, key     ):
        if any(namespace_match(pattern, key) for pattern in self._non_padded_namespaces):
            value = self._non_padded_function()
        else:
            value = self._padded_function()
        dict.__setitem__(self, key, value)
        return value

Source File: vocabulary.py From magnitude with MIT License

5 votes

def from_files(cls, directory     )                :
        u"""
        Loads a ``Vocabulary`` that was serialized using ``save_to_files``.

        Parameters
        ----------
        directory : ``str``
            The directory containing the serialized vocabulary.
        """
        logger.info(u"Loading token dictionary from %s.", directory)
        with codecs.open(os.path.join(directory, NAMESPACE_PADDING_FILE), u'r', u'utf-8') as namespace_file:
            non_padded_namespaces = [namespace_str.strip() for namespace_str in namespace_file]

        vocab = Vocabulary(non_padded_namespaces=non_padded_namespaces)

        # Check every file in the directory.
        for namespace_filename in os.listdir(directory):
            if namespace_filename == NAMESPACE_PADDING_FILE:
                continue
            namespace = namespace_filename.replace(u'.txt', u'')
            if any(namespace_match(pattern, namespace) for pattern in non_padded_namespaces):
                is_padded = False
            else:
                is_padded = True
            filename = os.path.join(directory, namespace_filename)
            vocab.set_from_file(filename, is_padded, namespace=namespace)

        return vocab

Source File: test_util.py From magnitude with MIT License

5 votes

def test_namespace_match(self):
        assert util.namespace_match(u"*tags", u"tags")
        assert util.namespace_match(u"*tags", u"passage_tags")
        assert util.namespace_match(u"*tags", u"question_tags")
        assert util.namespace_match(u"tokens", u"tokens")
        assert not util.namespace_match(u"tokens", u"stemmed_tokens")

Source File: exvocab.py From semanticRetrievalMRS with MIT License

5 votes

def __missing__(self, key: str):
        if any(namespace_match(pattern, key) for pattern in self._non_padded_namespaces):
            value = self._non_padded_function()
        else:
            value = self._padded_function()
        dict.__setitem__(self, key, value)
        return value

Source File: vocabulary_multitask.py From scicite with Apache License 2.0

5 votes

def __missing__(self, key: str):
        if any(namespace_match(pattern, key) for pattern in self._non_padded_namespaces):
            value = self._non_padded_function()
        else:
            value = self._padded_function()
        dict.__setitem__(self, key, value)
        return value

Source File: vocabulary_multitask.py From scicite with Apache License 2.0

5 votes

def from_files(cls, directory: str) -> 'Vocabulary':
        """
        Loads a ``Vocabulary`` that was serialized using ``save_to_files``.

        Parameters
        ----------
        directory : ``str``
            The directory containing the serialized vocabulary.
        """
        logger.info("Loading token dictionary from %s.", directory)
        with codecs.open(os.path.join(directory, NAMESPACE_PADDING_FILE), 'r', 'utf-8') as namespace_file:
            non_padded_namespaces = [namespace_str.strip() for namespace_str in namespace_file]

        vocab = VocabularyMultitask(non_padded_namespaces=non_padded_namespaces)

        # Check every file in the directory.
        for namespace_filename in os.listdir(directory):
            if namespace_filename == NAMESPACE_PADDING_FILE:
                continue
            namespace = namespace_filename.replace('.txt', '')
            if any(namespace_match(pattern, namespace) for pattern in non_padded_namespaces):
                is_padded = False
            else:
                is_padded = True
            filename = os.path.join(directory, namespace_filename)
            vocab.set_from_file(filename, is_padded, namespace=namespace)

        return vocab

Source File: allennlp_bridge.py From vampire with Apache License 2.0

5 votes

def from_files(cls, directory: str) -> 'Vocabulary':
        """
        Loads a ``Vocabulary`` that was serialized using ``save_to_files``.
        Parameters
        ----------
        directory : ``str``
            The directory containing the serialized vocabulary.
        """

        logger.info("Loading token dictionary from %s.", directory)
        with codecs.open(os.path.join(directory, NAMESPACE_PADDING_FILE), 'r', 'utf-8') as namespace_file:
            non_padded_namespaces = [namespace_str.strip() for namespace_str in namespace_file]

        vocab = cls(non_padded_namespaces=non_padded_namespaces)
        vocab.serialization_dir = directory  # pylint: disable=W0201
        # Check every file in the directory.
        for namespace_filename in os.listdir(directory):
            if namespace_filename == NAMESPACE_PADDING_FILE:
                continue
            if namespace_filename.startswith("."):
                continue
            namespace = namespace_filename.replace('.txt', '')
            if any(namespace_match(pattern, namespace) for pattern in non_padded_namespaces):
                is_padded = False
            else:
                is_padded = True
            filename = os.path.join(directory, namespace_filename)
            vocab.set_from_file(filename, is_padded, namespace=namespace)

        return vocab

Source File: vocabulary.py From allennlp with Apache License 2.0

4 votes

def from_files(
        cls,
        directory: str,
        padding_token: Optional[str] = DEFAULT_PADDING_TOKEN,
        oov_token: Optional[str] = DEFAULT_OOV_TOKEN,
    ) -> "Vocabulary":
        """
        Loads a `Vocabulary` that was serialized either using `save_to_files` or inside
        a model archive file.

        # Parameters

        directory : `str`
            The directory or archive file containing the serialized vocabulary.
        """
        logger.info("Loading token dictionary from %s.", directory)
        padding_token = padding_token if padding_token is not None else DEFAULT_PADDING_TOKEN
        oov_token = oov_token if oov_token is not None else DEFAULT_OOV_TOKEN

        if not os.path.isdir(directory):
            base_directory = cached_path(directory, extract_archive=True)
            # For convenience we'll check for a 'vocabulary' subdirectory of the archive.
            # That way you can use model archives directly.
            vocab_subdir = os.path.join(base_directory, "vocabulary")
            if os.path.isdir(vocab_subdir):
                directory = vocab_subdir
            elif os.path.isdir(base_directory):
                directory = base_directory
            else:
                raise ConfigurationError(f"{directory} is neither a directory nor an archive")

        # We use a lock file to avoid race conditions where multiple processes
        # might be reading/writing from/to the same vocab files at once.
        with FileLock(os.path.join(directory, ".lock")):
            with codecs.open(
                os.path.join(directory, NAMESPACE_PADDING_FILE), "r", "utf-8"
            ) as namespace_file:
                non_padded_namespaces = [namespace_str.strip() for namespace_str in namespace_file]

            vocab = cls(
                non_padded_namespaces=non_padded_namespaces,
                padding_token=padding_token,
                oov_token=oov_token,
            )

            # Check every file in the directory.
            for namespace_filename in os.listdir(directory):
                if namespace_filename == NAMESPACE_PADDING_FILE:
                    continue
                if namespace_filename.startswith("."):
                    continue
                namespace = namespace_filename.replace(".txt", "")
                if any(namespace_match(pattern, namespace) for pattern in non_padded_namespaces):
                    is_padded = False
                else:
                    is_padded = True
                filename = os.path.join(directory, namespace_filename)
                vocab.set_from_file(filename, is_padded, namespace=namespace, oov_token=oov_token)

        return vocab

Python allennlp.common.util.namespace_match() Examples