Python allennlp.data.token_indexers.SingleIdTokenIndexer() Examples

The following are 30 code examples of allennlp.data.token_indexers.SingleIdTokenIndexer(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module allennlp.data.token_indexers , or try the search function .
Example #1
Source File: text_classification_json.py    From allennlp with Apache License 2.0 6 votes vote down vote up
def __init__(
        self,
        token_indexers: Dict[str, TokenIndexer] = None,
        tokenizer: Tokenizer = None,
        segment_sentences: bool = False,
        max_sequence_length: int = None,
        skip_label_indexing: bool = False,
        **kwargs,
    ) -> None:
        super().__init__(**kwargs)
        self._tokenizer = tokenizer or SpacyTokenizer()
        self._segment_sentences = segment_sentences
        self._max_sequence_length = max_sequence_length
        self._skip_label_indexing = skip_label_indexing
        self._token_indexers = token_indexers or {"tokens": SingleIdTokenIndexer()}
        if self._segment_sentences:
            self._sentence_segmenter = SpacySentenceSplitter() 
Example #2
Source File: vocabulary_test.py    From allennlp with Apache License 2.0 6 votes vote down vote up
def test_from_params_extend_config(self):

        vocab_dir = self.TEST_DIR / "vocab_save"
        original_vocab = Vocabulary(non_padded_namespaces=["tokens"])
        original_vocab.add_token_to_namespace("a", namespace="tokens")
        original_vocab.save_to_files(vocab_dir)

        text_field = TextField(
            [Token(t) for t in ["a", "b"]], {"tokens": SingleIdTokenIndexer("tokens")}
        )
        instances = Batch([Instance({"text": text_field})])

        # If you ask to extend vocab from `directory`, instances must be passed
        # in Vocabulary constructor, or else there is nothing to extend to.
        params = Params({"type": "extend", "directory": vocab_dir})
        with pytest.raises(ConfigurationError):
            _ = Vocabulary.from_params(params)

        # If you ask to extend vocab, `directory` key must be present in params,
        # or else there is nothing to extend from.
        params = Params({"type": "extend"})
        with pytest.raises(ConfigurationError):
            _ = Vocabulary.from_params(params, instances=instances) 
Example #3
Source File: conll2003.py    From allennlp with Apache License 2.0 6 votes vote down vote up
def __init__(
        self,
        token_indexers: Dict[str, TokenIndexer] = None,
        tag_label: str = "ner",
        feature_labels: Sequence[str] = (),
        coding_scheme: str = "IOB1",
        label_namespace: str = "labels",
        **kwargs,
    ) -> None:
        super().__init__(**kwargs)
        self._token_indexers = token_indexers or {"tokens": SingleIdTokenIndexer()}
        if tag_label is not None and tag_label not in self._VALID_LABELS:
            raise ConfigurationError("unknown tag label type: {}".format(tag_label))
        for label in feature_labels:
            if label not in self._VALID_LABELS:
                raise ConfigurationError("unknown feature label type: {}".format(label))
        if coding_scheme not in ("IOB1", "BIOUL"):
            raise ConfigurationError("unknown coding_scheme: {}".format(coding_scheme))

        self.tag_label = tag_label
        self.feature_labels = set(feature_labels)
        self.coding_scheme = coding_scheme
        self.label_namespace = label_namespace
        self._original_coding_scheme = "IOB1" 
Example #4
Source File: fever_reader_with_wn.py    From combine-FEVER-NSMN with MIT License 6 votes vote down vote up
def __init__(self,
                 token_indexers: Dict[str, TokenIndexer] = None,
                 lazy: bool = False,
                 example_filter=None,
                 wn_p_dict=None, wn_feature_list=wn_persistent_api.default_fn_list,
                 max_l=None) -> None:

        super().__init__(lazy=lazy)
        self._token_indexers = token_indexers or {'tokens': SingleIdTokenIndexer(namespace='tokens')}
        self._example_filter = example_filter
        self.wn_p_dict = wn_p_dict
        if wn_p_dict is None:
            raise ValueError("Need to specify WN feature dict for FEVER Reader.")
        self.wn_feature_list = wn_feature_list
        self.wn_feature_size = len(self.wn_feature_list) * 3
        self.max_l = max_l 
Example #5
Source File: sampler_test.py    From allennlp with Apache License 2.0 6 votes vote down vote up
def setup_method(self):
        super().setup_method()
        self.token_indexers = {"tokens": SingleIdTokenIndexer()}
        self.vocab = Vocabulary()
        self.this_index = self.vocab.add_token_to_namespace("this")
        self.is_index = self.vocab.add_token_to_namespace("is")
        self.a_index = self.vocab.add_token_to_namespace("a")
        self.sentence_index = self.vocab.add_token_to_namespace("sentence")
        self.another_index = self.vocab.add_token_to_namespace("another")
        self.yet_index = self.vocab.add_token_to_namespace("yet")
        self.very_index = self.vocab.add_token_to_namespace("very")
        self.long_index = self.vocab.add_token_to_namespace("long")
        instances = [
            self.create_instance(["this", "is", "a", "sentence"]),
            self.create_instance(["this", "is", "another", "sentence"]),
            self.create_instance(["yet", "another", "sentence"]),
            self.create_instance(
                ["this", "is", "a", "very", "very", "very", "very", "long", "sentence"]
            ),
            self.create_instance(["sentence"]),
        ]

        self.instances = instances
        self.lazy_instances = LazyIterable(instances) 
Example #6
Source File: reader.py    From fever-naacl-2018 with Apache License 2.0 6 votes vote down vote up
def __init__(self,
                 db: FeverDocDB,
                 sentence_level = False,
                 wiki_tokenizer: Tokenizer = None,
                 claim_tokenizer: Tokenizer = None,
                 token_indexers: Dict[str, TokenIndexer] = None,
                 filtering: str = None) -> None:
        self._sentence_level = sentence_level
        self._wiki_tokenizer = wiki_tokenizer or WordTokenizer()
        self._claim_tokenizer = claim_tokenizer or WordTokenizer()
        self._token_indexers = token_indexers or {'tokens': SingleIdTokenIndexer()}

        self.db = db

        self.formatter = FEVERGoldFormatter(set(self.db.get_doc_ids()), FEVERLabelSchema(),filtering=filtering)
        self.reader = JSONLineReader() 
Example #7
Source File: single_id_token_indexer_test.py    From allennlp with Apache License 2.0 6 votes vote down vote up
def test_count_vocab_items_with_non_default_feature_name(self):
        tokenizer = SpacyTokenizer(parse=True)
        tokens = tokenizer.tokenize("This is a sentence.")
        tokens = [Token("<S>")] + [t for t in tokens] + [Token("</S>")]
        indexer = SingleIdTokenIndexer(
            namespace="dep_labels", feature_name="dep_", default_value="NONE"
        )
        counter = defaultdict(lambda: defaultdict(int))
        for token in tokens:
            indexer.count_vocab_items(token, counter)

        assert counter["dep_labels"] == {
            "ROOT": 1,
            "nsubj": 1,
            "det": 1,
            "NONE": 2,
            "attr": 1,
            "punct": 1,
        } 
Example #8
Source File: datareader.py    From NLP_Toolkit with Apache License 2.0 6 votes vote down vote up
def __init__(self,
                 token_indexers: Dict[str, TokenIndexer] = None,
                 delimeters: dict = SEQ_DELIMETERS,
                 skip_correct: bool = False,
                 skip_complex: int = 0,
                 lazy: bool = False,
                 max_len: int = None,
                 test_mode: bool = False,
                 tag_strategy: str = "keep_one",
                 tn_prob: float = 0,
                 tp_prob: float = 0,
                 broken_dot_strategy: str = "keep") -> None:
        super().__init__(lazy)
        self._token_indexers = token_indexers or {'tokens': SingleIdTokenIndexer()}
        self._delimeters = delimeters
        self._max_len = max_len
        self._skip_correct = skip_correct
        self._skip_complex = skip_complex
        self._tag_strategy = tag_strategy
        self._broken_dot_strategy = broken_dot_strategy
        self._test_mode = test_mode
        self._tn_prob = tn_prob
        self._tp_prob = tp_prob 
Example #9
Source File: text_field_test.py    From allennlp with Apache License 2.0 6 votes vote down vote up
def test_token_padding_lengths_are_computed_correctly(self):
        field = TextField(
            [Token(t) for t in ["A", "sentence"]],
            token_indexers={
                "field_with_dict": DictReturningTokenIndexer(token_min_padding_length=3),
                "words": SingleIdTokenIndexer("words", token_min_padding_length=3),
                "characters": TokenCharactersIndexer(
                    "characters", min_padding_length=1, token_min_padding_length=3
                ),
            },
        )
        field.index(self.vocab)
        padding_lengths = field.get_padding_lengths()
        assert padding_lengths == {
            "field_with_dict___token_ids": 5,
            "field_with_dict___additional_key": 3,
            "words___tokens": 3,
            "characters___token_characters": 3,
            "characters___num_token_characters": 8,
        }
        tensors = field.as_tensor(padding_lengths)
        assert tensors["field_with_dict"]["additional_key"].tolist()[-1] == 0
        assert tensors["words"]["tokens"].tolist()[-1] == 0
        assert tensors["characters"]["token_characters"].tolist()[-1] == [0] * 8 
Example #10
Source File: text_field_test.py    From allennlp with Apache License 2.0 5 votes vote down vote up
def test_index_converts_field_correctly(self):
        vocab = Vocabulary()
        sentence_index = vocab.add_token_to_namespace("sentence", namespace="words")
        capital_a_index = vocab.add_token_to_namespace("A", namespace="words")
        capital_a_char_index = vocab.add_token_to_namespace("A", namespace="characters")
        s_index = vocab.add_token_to_namespace("s", namespace="characters")
        e_index = vocab.add_token_to_namespace("e", namespace="characters")
        n_index = vocab.add_token_to_namespace("n", namespace="characters")
        t_index = vocab.add_token_to_namespace("t", namespace="characters")
        c_index = vocab.add_token_to_namespace("c", namespace="characters")

        field = TextField(
            [Token(t) for t in ["A", "sentence"]],
            {"words": SingleIdTokenIndexer(namespace="words")},
        )
        field.index(vocab)

        assert field._indexed_tokens["words"]["tokens"] == [capital_a_index, sentence_index]

        field1 = TextField(
            [Token(t) for t in ["A", "sentence"]],
            {"characters": TokenCharactersIndexer(namespace="characters", min_padding_length=1)},
        )
        field1.index(vocab)
        assert field1._indexed_tokens["characters"]["token_characters"] == [
            [capital_a_char_index],
            [s_index, e_index, n_index, t_index, e_index, n_index, c_index, e_index],
        ]
        field2 = TextField(
            [Token(t) for t in ["A", "sentence"]],
            token_indexers={
                "words": SingleIdTokenIndexer(namespace="words"),
                "characters": TokenCharactersIndexer(namespace="characters", min_padding_length=1),
            },
        )
        field2.index(vocab)
        assert field2._indexed_tokens["words"]["tokens"] == [capital_a_index, sentence_index]
        assert field2._indexed_tokens["characters"]["token_characters"] == [
            [capital_a_char_index],
            [s_index, e_index, n_index, t_index, e_index, n_index, c_index, e_index],
        ] 
Example #11
Source File: text_field_test.py    From allennlp with Apache License 2.0 5 votes vote down vote up
def test_as_tensor_handles_words(self):
        field = TextField(
            [Token(t) for t in ["This", "is", "a", "sentence", "."]],
            token_indexers={"words": SingleIdTokenIndexer("words")},
        )
        field.index(self.vocab)
        padding_lengths = field.get_padding_lengths()
        tensor_dict = field.as_tensor(padding_lengths)
        numpy.testing.assert_array_almost_equal(
            tensor_dict["words"]["tokens"].detach().cpu().numpy(), numpy.array([1, 1, 1, 2, 1])
        ) 
Example #12
Source File: text_field_test.py    From allennlp with Apache License 2.0 5 votes vote down vote up
def test_padding_lengths_are_computed_correctly(self):
        field = TextField(
            [Token(t) for t in ["This", "is", "a", "sentence", "."]],
            token_indexers={"words": SingleIdTokenIndexer("words")},
        )
        field.index(self.vocab)
        padding_lengths = field.get_padding_lengths()
        assert padding_lengths == {"words___tokens": 5}

        field = TextField(
            [Token(t) for t in ["This", "is", "a", "sentence", "."]],
            token_indexers={
                "characters": TokenCharactersIndexer("characters", min_padding_length=1)
            },
        )
        field.index(self.vocab)
        padding_lengths = field.get_padding_lengths()
        assert padding_lengths == {
            "characters___token_characters": 5,
            "characters___num_token_characters": 8,
        }

        field = TextField(
            [Token(t) for t in ["This", "is", "a", "sentence", "."]],
            token_indexers={
                "characters": TokenCharactersIndexer("characters", min_padding_length=1),
                "words": SingleIdTokenIndexer("words"),
            },
        )
        field.index(self.vocab)
        padding_lengths = field.get_padding_lengths()
        assert padding_lengths == {
            "characters___token_characters": 5,
            "characters___num_token_characters": 8,
            "words___tokens": 5,
        } 
Example #13
Source File: text_field_test.py    From allennlp with Apache License 2.0 5 votes vote down vote up
def test_get_padding_lengths_raises_if_no_indexed_tokens(self):

        field = TextField(
            [Token(t) for t in ["This", "is", "a", "sentence", "."]],
            token_indexers={"words": SingleIdTokenIndexer("words")},
        )
        with pytest.raises(ConfigurationError):
            field.get_padding_lengths() 
Example #14
Source File: dataset_reader.py    From ConvLab with MIT License 5 votes vote down vote up
def __init__(self,
                 context_size: int = 0,
                 agent: str = None,
                 random_context_size: bool = True,
                 token_delimiter: str = None,
                 token_indexers: Dict[str, TokenIndexer] = None,
                 lazy: bool = False) -> None:
        super().__init__(lazy)
        self._context_size = context_size
        self._agent = agent 
        self._random_context_size = random_context_size
        self._token_indexers = token_indexers or {'tokens': SingleIdTokenIndexer()}
        self._token_delimiter = token_delimiter 
Example #15
Source File: index_field_test.py    From allennlp with Apache License 2.0 5 votes vote down vote up
def setup_method(self):
        super().setup_method()
        self.text = TextField(
            [Token(t) for t in ["here", "is", "a", "sentence", "."]],
            {"words": SingleIdTokenIndexer("words")},
        ) 
Example #16
Source File: span_field_test.py    From allennlp with Apache License 2.0 5 votes vote down vote up
def setup_method(self):
        super().setup_method()
        self.indexers = {"words": SingleIdTokenIndexer("words")}
        self.text = TextField(
            [Token(t) for t in ["here", "is", "a", "sentence", "for", "spans", "."]], self.indexers
        ) 
Example #17
Source File: lazy_dataset_reader_test.py    From allennlp with Apache License 2.0 5 votes vote down vote up
def setup_method(self):
        super().setup_method()
        token_indexer = {"tokens": SingleIdTokenIndexer()}

        field1 = TextField([Token(t) for t in ["this", "is", "a", "sentence", "."]], token_indexer)
        field2 = TextField(
            [Token(t) for t in ["this", "is", "a", "different", "sentence", "."]], token_indexer
        )
        field3 = TextField([Token(t) for t in ["here", "is", "a", "sentence", "."]], token_indexer)
        field4 = TextField([Token(t) for t in ["this", "is", "short"]], token_indexer)
        self.instances = [
            Instance({"text1": field1, "text2": field2}),
            Instance({"text1": field3, "text2": field4}),
        ] 
Example #18
Source File: ebmnlp.py    From scibert with Apache License 2.0 5 votes vote down vote up
def __init__(self,
                 token_indexers: Dict[str, TokenIndexer] = None,
                 label_namespace: str = "labels") -> None:
        super().__init__()
        self._token_indexers = token_indexers or {"tokens": SingleIdTokenIndexer()}
        self.label_namespace = label_namespace 
Example #19
Source File: text_field_test.py    From allennlp with Apache License 2.0 5 votes vote down vote up
def test_as_tensor_handles_words_and_characters_with_longer_lengths(self):
        field = TextField(
            [Token(t) for t in ["a", "sentence", "."]],
            token_indexers={
                "words": SingleIdTokenIndexer("words"),
                "characters": TokenCharactersIndexer("characters", min_padding_length=1),
            },
        )
        field.index(self.vocab)
        padding_lengths = field.get_padding_lengths()
        padding_lengths["words___tokens"] = 5
        padding_lengths["characters___token_characters"] = 5
        padding_lengths["characters___num_token_characters"] = 10
        tensor_dict = field.as_tensor(padding_lengths)

        numpy.testing.assert_array_almost_equal(
            tensor_dict["words"]["tokens"].detach().cpu().numpy(), numpy.array([1, 2, 1, 0, 0])
        )
        numpy.testing.assert_array_almost_equal(
            tensor_dict["characters"]["token_characters"].detach().cpu().numpy(),
            numpy.array(
                [
                    [1, 0, 0, 0, 0, 0, 0, 0, 0, 0],
                    [3, 4, 5, 6, 4, 5, 7, 4, 0, 0],
                    [1, 0, 0, 0, 0, 0, 0, 0, 0, 0],
                    [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
                    [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
                ]
            ),
        ) 
Example #20
Source File: text_field_test.py    From allennlp with Apache License 2.0 5 votes vote down vote up
def test_printing_doesnt_crash(self):
        field = TextField(
            [Token(t) for t in ["A", "sentence"]],
            {"words": SingleIdTokenIndexer(namespace="words")},
        )
        print(field) 
Example #21
Source File: text_field_test.py    From allennlp with Apache License 2.0 5 votes vote down vote up
def test_token_indexer_returns_dict(self):
        field = TextField(
            [Token(t) for t in ["A", "sentence"]],
            token_indexers={
                "field_with_dict": DictReturningTokenIndexer(),
                "words": SingleIdTokenIndexer("words"),
                "characters": TokenCharactersIndexer("characters", min_padding_length=1),
            },
        )
        field.index(self.vocab)
        padding_lengths = field.get_padding_lengths()
        assert padding_lengths == {
            "field_with_dict___token_ids": 5,
            "field_with_dict___additional_key": 2,
            "words___tokens": 2,
            "characters___token_characters": 2,
            "characters___num_token_characters": 8,
        }
        padding_lengths["field_with_dict___token_ids"] = 7
        padding_lengths["field_with_dict___additional_key"] = 3
        padding_lengths["words___tokens"] = 4
        padding_lengths["characters___token_characters"] = 4
        tensors = field.as_tensor(padding_lengths)
        assert list(tensors["field_with_dict"]["token_ids"].shape) == [7]
        assert list(tensors["field_with_dict"]["additional_key"].shape) == [3]
        assert list(tensors["words"]["tokens"].shape) == [4]
        assert list(tensors["characters"]["token_characters"].shape) == [4, 8] 
Example #22
Source File: sequence_label_field_test.py    From allennlp with Apache License 2.0 5 votes vote down vote up
def setup_method(self):
        super().setup_method()
        self.text = TextField(
            [Token(t) for t in ["here", "are", "some", "words", "."]],
            {"words": SingleIdTokenIndexer("words")},
        ) 
Example #23
Source File: entailment_tuple_reader.py    From scitail with Apache License 2.0 5 votes vote down vote up
def __init__(self,
                 max_tokens: int, max_tuples: int,
                 tokenizer: Tokenizer = None,
                 token_indexers: Dict[str, TokenIndexer] = None) -> None:
        self._max_tokens = max_tokens
        self._max_tuples = max_tuples
        self._tokenizer = tokenizer or WordTokenizer()
        self._token_indexers = token_indexers or {'tokens': SingleIdTokenIndexer()} 
Example #24
Source File: universal_dependencies.py    From udify with MIT License 5 votes vote down vote up
def __init__(self,
                 token_indexers: Dict[str, TokenIndexer] = None,
                 lazy: bool = False) -> None:
        super().__init__(lazy)
        self._token_indexers = token_indexers or {'tokens': SingleIdTokenIndexer()} 
Example #25
Source File: sigmorphon_2019_task_2.py    From udify with MIT License 5 votes vote down vote up
def __init__(self,
                 token_indexers: Dict[str, TokenIndexer] = None,
                 lazy: bool = False) -> None:
        super().__init__(lazy)
        self._token_indexers = token_indexers or {'tokens': SingleIdTokenIndexer()}

        self.label_to_dimension = {}
        for dimension, labels in unimorph_schema.items():
            for label in labels:
                self.label_to_dimension[label] = dimension 
Example #26
Source File: mesim_wn_simi_v1_2.py    From combine-FEVER-NSMN with MIT License 5 votes vote down vote up
def __init__(self, model_path):
        # Prepare Data
        lazy = False
        token_indexers = {
            'tokens': SingleIdTokenIndexer(namespace='tokens'),  # This is the raw tokens
            'elmo_chars': ELMoTokenCharactersIndexer(namespace='elmo_characters')  # This is the elmo_characters
        }

        p_dict = wn_persistent_api.persistence_load()

        dev_fever_data_reader = WNSIMIReader(token_indexers=token_indexers, lazy=lazy, wn_p_dict=p_dict, max_l=420)

        vocab, weight_dict = load_vocab_embeddings(config.DATA_ROOT / "vocab_cache" / "nli_basic")
        vocab.change_token_with_index_to_namespace('hidden', -2, namespace='labels')

        # Build Model
        # device = torch.device("cuda" if torch.cuda.is_available() else "cpu", index=0)
        # device_num = -1 if device.type == 'cpu' else 0

        device = torch.device("cpu")
        device_num = -1 if device.type == 'cpu' else 0

        biterator = BasicIterator(batch_size=16)
        biterator.index_with(vocab)

        model = Model(rnn_size_in=(1024 + 300 + dev_fever_data_reader.wn_feature_size,
                                   1024 + 450 + dev_fever_data_reader.wn_feature_size),
                      rnn_size_out=(450, 450),
                      weight=weight_dict['glove.840B.300d'],
                      vocab_size=vocab.get_vocab_size('tokens'),
                      mlp_d=900,
                      embedding_dim=300, max_l=400)

        model.display()
        model.to(device)
        model.load_state_dict(torch.load(model_path))

        self.model = model
        self.dev_fever_data_reader = dev_fever_data_reader
        self.device_num = device_num
        self.biterator = biterator 
Example #27
Source File: nsmn_sent_wise_v1_1.py    From combine-FEVER-NSMN with MIT License 5 votes vote down vote up
def __init__(self, model_path):
        # Prepare Data
        lazy = False
        token_indexers = {
            'tokens': SingleIdTokenIndexer(namespace='tokens'),  # This is the raw tokens
            'elmo_chars': ELMoTokenCharactersIndexer(namespace='elmo_characters')  # This is the elmo_characters
        }

        p_dict = wn_persistent_api.persistence_load()

        dev_fever_data_reader = WNSIMIReader(token_indexers=token_indexers, lazy=lazy, wn_p_dict=p_dict, max_l=420)

        vocab, weight_dict = load_vocab_embeddings(config.DATA_ROOT / "vocab_cache" / "nli_basic")
        vocab.change_token_with_index_to_namespace('hidden', -2, namespace='labels')

        # Build Model
        # device = torch.device("cuda" if torch.cuda.is_available() else "cpu", index=0)
        # device_num = -1 if device.type == 'cpu' else 0

        device = torch.device("cpu")
        device_num = -1 if device.type == 'cpu' else 0

        biterator = BasicIterator(batch_size=16)
        biterator.index_with(vocab)

        model = Model(rnn_size_in=(1024 + 300 + dev_fever_data_reader.wn_feature_size,
                                   1024 + 450 + dev_fever_data_reader.wn_feature_size),
                      rnn_size_out=(450, 450),
                      weight=weight_dict['glove.840B.300d'],
                      vocab_size=vocab.get_vocab_size('tokens'),
                      mlp_d=900,
                      embedding_dim=300, max_l=400)

        model.display()
        model.to(device)
        model.load_state_dict(torch.load(model_path))

        self.model = model
        self.dev_fever_data_reader = dev_fever_data_reader
        self.device_num = device_num
        self.biterator = biterator 
Example #28
Source File: fever_reader_with_wn_simi.py    From combine-FEVER-NSMN with MIT License 5 votes vote down vote up
def __init__(self,
                 token_indexers: Dict[str, TokenIndexer] = None,
                 lazy: bool = False,
                 example_filter=None,
                 wn_p_dict=None, wn_feature_list=wn_persistent_api.default_fn_list,
                 max_l=None, num_encoding=True, shuffle_sentences=False, ablation=None) -> None:

        # {'rm_wn': True, 'rm_simi': True}

        super().__init__(lazy=lazy)
        self._token_indexers = token_indexers or {'tokens': SingleIdTokenIndexer(namespace='tokens')}
        self._example_filter = example_filter
        self.wn_p_dict = wn_p_dict
        if wn_p_dict is None:
            print("Need to specify WN feature dict for FEVER Reader. Or if we don't need that.")
        self.wn_feature_list = wn_feature_list
        num_encoding_dim = 5 if num_encoding else 0
        self.wn_feature_size = len(self.wn_feature_list) * 3 + num_encoding_dim + 1 if wn_p_dict is not None else 0
        self.max_l = max_l
        self.shuffle_sentences = shuffle_sentences
        self.ablation = ablation

        if self.ablation is not None and self.ablation['rm_wn']:
            self.wn_feature_size -= (len(self.wn_feature_list) * 3 + num_encoding_dim)
        elif self.ablation is not None and self.ablation['rm_simi']:
            self.wn_feature_size -= 1 
Example #29
Source File: fever_sselection_reader.py    From combine-FEVER-NSMN with MIT License 5 votes vote down vote up
def __init__(self,
                 token_indexers: Dict[str, TokenIndexer] = None,
                 lazy: bool = False,
                 example_filter=None,
                 max_l=None) -> None:

        super().__init__(lazy=lazy)
        self._token_indexers = token_indexers or {'tokens': SingleIdTokenIndexer(namespace='tokens')}
        self._example_filter = example_filter
        self.max_l = max_l 
Example #30
Source File: fever_reader.py    From combine-FEVER-NSMN with MIT License 5 votes vote down vote up
def __init__(self,
                 token_indexers: Dict[str, TokenIndexer] = None,
                 lazy: bool = False,
                 example_filter=None,
                 max_l=None) -> None:

        super().__init__(lazy=lazy)
        self._token_indexers = token_indexers or {'tokens': SingleIdTokenIndexer(namespace='tokens')}
        self._example_filter = example_filter
        self.max_l = max_l