Python allennlp.common.util.ensure_list() Examples
The following are 30
code examples of allennlp.common.util.ensure_list().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
allennlp.common.util
, or try the search function
.
Example #1
Source File: quora_paraphrase_test.py From magnitude with MIT License | 6 votes |
def test_read_from_file(self, lazy): reader = QuoraParaphraseDatasetReader(lazy=lazy) instances = reader.read(AllenNlpTestCase.FIXTURES_ROOT / u'data' / u'quora_paraphrase.tsv') instances = ensure_list(instances) instance1 = {u"premise": u"What should I do to avoid sleeping in class ?".split(), u"hypothesis": u"How do I not sleep in a boring class ?".split(), u"label": u"1"} instance2 = {u"premise": u"Do women support each other more than men do ?".split(), u"hypothesis": u"Do women need more compliments than men ?".split(), u"label": u"0"} instance3 = {u"premise": u"How can one root android devices ?".split(), u"hypothesis": u"How do I root an Android device ?".split(), u"label": u"1"} assert len(instances) == 3 for instance, expected_instance in izip(instances, [instance1, instance2, instance3]): fields = instance.fields assert [t.text for t in fields[u"premise"].tokens] == expected_instance[u"premise"] assert [t.text for t in fields[u"hypothesis"].tokens] == expected_instance[u"hypothesis"] assert fields[u"label"].label == expected_instance[u"label"]
Example #2
Source File: imdb_test.py From topic-rnn with Apache License 2.0 | 6 votes |
def test_read_from_file(self): # pylint: disable=R0201 reader = IMDBLanguageModelingReader() dataset = reader.read(TestIMDBReader.DATASET_PATH) instances = ensure_list(dataset) assert len(instances) == 10 fields = instances[0].fields assert [t.text for t in fields["source"].tokens] == TestIMDBReader.INSTANCE_0["source"] assert [t.text for t in fields["target"].tokens] == TestIMDBReader.INSTANCE_0["target"] fields = instances[1].fields assert [t.text for t in fields["source"].tokens] == TestIMDBReader.INSTANCE_1["source"] assert [t.text for t in fields["target"].tokens] == TestIMDBReader.INSTANCE_1["target"] fields = instances[7].fields assert [t.text for t in fields["source"].tokens] == TestIMDBReader.INSTANCE_7["source"] assert [t.text for t in fields["target"].tokens] == TestIMDBReader.INSTANCE_7["target"]
Example #3
Source File: conll2003_dataset_reader_test.py From magnitude with MIT License | 6 votes |
def test_read_from_file(self, lazy, coding_scheme): conll_reader = Conll2003DatasetReader(lazy=lazy, coding_scheme=coding_scheme) instances = conll_reader.read(unicode(AllenNlpTestCase.FIXTURES_ROOT / u'data' / u'conll2003.txt')) instances = ensure_list(instances) if coding_scheme == u'IOB1': expected_labels = [u'I-ORG', u'O', u'I-PER', u'O', u'O', u'I-LOC', u'O'] else: expected_labels = [u'U-ORG', u'O', u'U-PER', u'O', u'O', u'U-LOC', u'O'] fields = instances[0].fields tokens = [t.text for t in fields[u'tokens'].tokens] assert tokens == [u'U.N.', u'official', u'Ekeus', u'heads', u'for', u'Baghdad', u'.'] assert fields[u"tags"].labels == expected_labels fields = instances[1].fields tokens = [t.text for t in fields[u'tokens'].tokens] assert tokens == [u'AI2', u'engineer', u'Joel', u'lives', u'in', u'Seattle', u'.'] assert fields[u"tags"].labels == expected_labels
Example #4
Source File: sequence_tagging_test.py From magnitude with MIT License | 6 votes |
def test_brown_corpus_format(self): reader = SequenceTaggingDatasetReader(word_tag_delimiter=u'/') instances = reader.read(AllenNlpTestCase.FIXTURES_ROOT / u'data' / u'brown_corpus.txt') instances = ensure_list(instances) assert len(instances) == 4 fields = instances[0].fields assert [t.text for t in fields[u"tokens"].tokens] == [u"cats", u"are", u"animals", u"."] assert fields[u"tags"].labels == [u"N", u"V", u"N", u"N"] fields = instances[1].fields assert [t.text for t in fields[u"tokens"].tokens] == [u"dogs", u"are", u"animals", u"."] assert fields[u"tags"].labels == [u"N", u"V", u"N", u"N"] fields = instances[2].fields assert [t.text for t in fields[u"tokens"].tokens] == [u"snakes", u"are", u"animals", u"."] assert fields[u"tags"].labels == [u"N", u"V", u"N", u"N"] fields = instances[3].fields assert [t.text for t in fields[u"tokens"].tokens] == [u"birds", u"are", u"animals", u"."] assert fields[u"tags"].labels == [u"N", u"V", u"N", u"N"]
Example #5
Source File: sequence_tagging_test.py From magnitude with MIT License | 6 votes |
def test_default_format(self, lazy): reader = SequenceTaggingDatasetReader(lazy=lazy) instances = reader.read(AllenNlpTestCase.FIXTURES_ROOT / u'data' / u'sequence_tagging.tsv') instances = ensure_list(instances) assert len(instances) == 4 fields = instances[0].fields assert [t.text for t in fields[u"tokens"].tokens] == [u"cats", u"are", u"animals", u"."] assert fields[u"tags"].labels == [u"N", u"V", u"N", u"N"] fields = instances[1].fields assert [t.text for t in fields[u"tokens"].tokens] == [u"dogs", u"are", u"animals", u"."] assert fields[u"tags"].labels == [u"N", u"V", u"N", u"N"] fields = instances[2].fields assert [t.text for t in fields[u"tokens"].tokens] == [u"snakes", u"are", u"animals", u"."] assert fields[u"tags"].labels == [u"N", u"V", u"N", u"N"] fields = instances[3].fields assert [t.text for t in fields[u"tokens"].tokens] == [u"birds", u"are", u"animals", u"."] assert fields[u"tags"].labels == [u"N", u"V", u"N", u"N"]
Example #6
Source File: stanford_sentiment_tree_bank_test.py From magnitude with MIT License | 6 votes |
def test_2_class(self): reader = StanfordSentimentTreeBankDatasetReader(granularity=u"2-class") instances = reader.read(self.sst_path) instances = ensure_list(instances) instance1 = {u"tokens": [u"The", u"actors", u"are", u"fantastic", u"."], u"label": u"1"} instance2 = {u"tokens": [u"It", u"was", u"terrible", u"."], u"label": u"0"} assert len(instances) == 2 fields = instances[0].fields assert [t.text for t in fields[u"tokens"].tokens] == instance1[u"tokens"] assert fields[u"label"].label == instance1[u"label"] fields = instances[1].fields assert [t.text for t in fields[u"tokens"].tokens] == instance2[u"tokens"] assert fields[u"label"].label == instance2[u"label"]
Example #7
Source File: stanford_sentiment_tree_bank_test.py From magnitude with MIT License | 6 votes |
def test_3_class(self): reader = StanfordSentimentTreeBankDatasetReader(granularity=u"3-class") instances = reader.read(self.sst_path) instances = ensure_list(instances) instance1 = {u"tokens": [u"The", u"actors", u"are", u"fantastic", u"."], u"label": u"2"} instance2 = {u"tokens": [u"It", u"was", u"terrible", u"."], u"label": u"0"} instance3 = {u"tokens": [u"Chomp", u"chomp", u"!"], u"label": u"1"} assert len(instances) == 3 fields = instances[0].fields assert [t.text for t in fields[u"tokens"].tokens] == instance1[u"tokens"] assert fields[u"label"].label == instance1[u"label"] fields = instances[1].fields assert [t.text for t in fields[u"tokens"].tokens] == instance2[u"tokens"] assert fields[u"label"].label == instance2[u"label"] fields = instances[2].fields assert [t.text for t in fields[u"tokens"].tokens] == instance3[u"tokens"] assert fields[u"label"].label == instance3[u"label"]
Example #8
Source File: stanford_sentiment_tree_bank_test.py From magnitude with MIT License | 6 votes |
def test_use_subtrees(self): reader = StanfordSentimentTreeBankDatasetReader(use_subtrees=True) instances = reader.read(self.sst_path) instances = ensure_list(instances) instance1 = {u"tokens": [u"The", u"actors", u"are", u"fantastic", u"."], u"label": u"4"} instance2 = {u"tokens": [u"The", u"actors"], u"label": u"2"} instance3 = {u"tokens": [u"The"], u"label": u"2"} assert len(instances) == 21 fields = instances[0].fields assert [t.text for t in fields[u"tokens"].tokens] == instance1[u"tokens"] assert fields[u"label"].label == instance1[u"label"] fields = instances[1].fields assert [t.text for t in fields[u"tokens"].tokens] == instance2[u"tokens"] assert fields[u"label"].label == instance2[u"label"] fields = instances[2].fields assert [t.text for t in fields[u"tokens"].tokens] == instance3[u"tokens"] assert fields[u"label"].label == instance3[u"label"]
Example #9
Source File: stanford_sentiment_tree_bank_test.py From magnitude with MIT License | 6 votes |
def test_read_from_file(self, lazy): reader = StanfordSentimentTreeBankDatasetReader(lazy=lazy) instances = reader.read(self.sst_path) instances = ensure_list(instances) instance1 = {u"tokens": [u"The", u"actors", u"are", u"fantastic", u"."], u"label": u"4"} instance2 = {u"tokens": [u"It", u"was", u"terrible", u"."], u"label": u"0"} instance3 = {u"tokens": [u"Chomp", u"chomp", u"!"], u"label": u"2"} assert len(instances) == 3 fields = instances[0].fields assert [t.text for t in fields[u"tokens"].tokens] == instance1[u"tokens"] assert fields[u"label"].label == instance1[u"label"] fields = instances[1].fields assert [t.text for t in fields[u"tokens"].tokens] == instance2[u"tokens"] assert fields[u"label"].label == instance2[u"label"] fields = instances[2].fields assert [t.text for t in fields[u"tokens"].tokens] == instance3[u"tokens"] assert fields[u"label"].label == instance3[u"label"]
Example #10
Source File: language_modeling_dataset_test.py From magnitude with MIT License | 6 votes |
def test_read_from_file(self, lazy): reader = LanguageModelingReader(tokens_per_instance=3, lazy=lazy) instances = ensure_list(reader.read(AllenNlpTestCase.FIXTURES_ROOT / u'data' / u'language_modeling.txt')) # The last potential instance is left out, which is ok, because we don't have an end token # in here, anyway. assert len(instances) == 5 assert [t.text for t in instances[0].fields[u"input_tokens"].tokens] == [u"This", u"is", u"a"] assert [t.text for t in instances[0].fields[u"output_tokens"].tokens] == [u"is", u"a", u"sentence"] assert [t.text for t in instances[1].fields[u"input_tokens"].tokens] == [u"sentence", u"for", u"language"] assert [t.text for t in instances[1].fields[u"output_tokens"].tokens] == [u"for", u"language", u"modelling"] assert [t.text for t in instances[2].fields[u"input_tokens"].tokens] == [u"modelling", u".", u"Here"] assert [t.text for t in instances[2].fields[u"output_tokens"].tokens] == [u".", u"Here", u"'s"] assert [t.text for t in instances[3].fields[u"input_tokens"].tokens] == [u"'s", u"another", u"one"] assert [t.text for t in instances[3].fields[u"output_tokens"].tokens] == [u"another", u"one", u"for"] assert [t.text for t in instances[4].fields[u"input_tokens"].tokens] == [u"for", u"extra", u"language"] assert [t.text for t in instances[4].fields[u"output_tokens"].tokens] == [u"extra", u"language", u"modelling"]
Example #11
Source File: test_tacred_reader.py From kb with Apache License 2.0 | 6 votes |
def test_tacred_dataset_reader(self): reader = get_reader() instances = ensure_list(reader.read('tests/fixtures/tacred/LDC2018T24.json')) # Check number of instances is correct self.assertEqual(len(instances), 3) # Check that first instance's tokens are correct tokens_0 = [x.text for x in instances[0]['tokens']] initial_tokens_0 = tokens_0[:6] expected_initial_tokens_0 = ['[CLS]', 'douglas', 'flint', '[SEP]', 'chairman', '[SEP]'] self.assertListEqual(initial_tokens_0, expected_initial_tokens_0) final_tokens_0 = tokens_0[-6:] expected_final_tokens_0 = ['a', 'govern', '##ment', '[UNK]', '.', '[SEP]'] self.assertListEqual(final_tokens_0, expected_final_tokens_0) # Check that first instances label is correct label_0 = instances[0]['label_ids'].label expected_label_0 = LABEL_MAP['per:title'] self.assertEqual(label_0, expected_label_0)
Example #12
Source File: test_tacred_reader.py From kb with Apache License 2.0 | 6 votes |
def test_entity_mask(self): # Check 'mask' mode has expected behavior reader = get_reader() reader.entity_masking = 'mask' instances = ensure_list(reader.read('tests/fixtures/tacred/LDC2018T24.json')) tokens_0 = [x.text for x in instances[0]['tokens']] subj_tokens_0 = tokens_0[14] self.assertEqual(subj_tokens_0, '[MASK]') tokens_0 = [x.text for x in instances[0]['tokens']] obj_tokens_0 = tokens_0[17] self.assertEqual(obj_tokens_0, '[MASK]') # Check 'type/role' mode has expected behavior reader.entity_masking = 'type/role' instances = ensure_list(reader.read('tests/fixtures/tacred/LDC2018T24.json')) tokens_0 = [x.text for x in instances[0]['tokens']] subj_tokens_0 = tokens_0[14] self.assertEqual(subj_tokens_0, '[s-person]') tokens_0 = [x.text for x in instances[0]['tokens']] obj_tokens_0 = tokens_0[17] self.assertEqual(obj_tokens_0, '[o-title]')
Example #13
Source File: test_dataset_reader_main.py From scicite with Apache License 2.0 | 6 votes |
def test_read_from_file(self): reader = AclarcDatasetReader() instances = ensure_list(reader.read('tests/fixtures/aclarc-train.jsonl')) instance1 = {"citation_text": ['Typical', 'examples', 'are', 'Bulgarian']} assert len(instances) == 10 fields = instances[0].fields assert isinstance(instances, list) assert [t.text for t in fields['citation_text'].tokens][:4] == instance1['citation_text'] reader = AclSectionTitleDatasetReader() instances = ensure_list(reader.read('tests/fixtures/aclarc-section-title.jsonl')) instance1 = {"section_name": 'related work', "citation_text": ['With', 'C99']} assert len(instances) == 10 fields = instances[1].fields assert isinstance(instances, list) assert [t.text for t in fields['citation_text'].tokens][:2] == instance1['citation_text'] assert fields['section_label'].label == instance1['section_name'] reader = AclCiteWorthinessDatasetReader() instances = ensure_list(reader.read('tests/fixtures/aclarc-cite-worthiness.jsonl')) instance1 = {"is_citation": 'False'} fields = instances[1].fields assert isinstance(instances, list) assert fields['is_citation'].label == instance1['is_citation']
Example #14
Source File: sequence_tagging_test.py From allennlp with Apache License 2.0 | 6 votes |
def test_brown_corpus_format(self): reader = SequenceTaggingDatasetReader(word_tag_delimiter="/") instances = reader.read(AllenNlpTestCase.FIXTURES_ROOT / "data" / "brown_corpus.txt") instances = ensure_list(instances) assert len(instances) == 4 fields = instances[0].fields assert [t.text for t in fields["tokens"].tokens] == ["cats", "are", "animals", "."] assert fields["tags"].labels == ["N", "V", "N", "N"] fields = instances[1].fields assert [t.text for t in fields["tokens"].tokens] == ["dogs", "are", "animals", "."] assert fields["tags"].labels == ["N", "V", "N", "N"] fields = instances[2].fields assert [t.text for t in fields["tokens"].tokens] == ["snakes", "are", "animals", "."] assert fields["tags"].labels == ["N", "V", "N", "N"] fields = instances[3].fields assert [t.text for t in fields["tokens"].tokens] == ["birds", "are", "animals", "."] assert fields["tags"].labels == ["N", "V", "N", "N"]
Example #15
Source File: semisupervised_text_classification_json_test.py From vampire with Apache License 2.0 | 6 votes |
def test_read_from_file_and_truncates_properly(self): reader = SemiSupervisedTextClassificationJsonReader(max_sequence_length=5) ag_path = self.FIXTURES_ROOT / "imdb" / "train.jsonl" instances = reader.read(ag_path) instances = ensure_list(instances) instance1 = {"tokens": ['...', 'And', 'I', 'never', 'thought'], "label": "neg"} instance2 = {"tokens": ['The', 'fight', 'scenes', 'were', 'great'], "label": "pos"} instance3 = {"tokens": ['The', 'only', 'way', 'this', 'is'], "label": "neg"} assert len(instances) == 3 fields = instances[0].fields assert [t.text for t in fields["tokens"].tokens] == instance1["tokens"] assert fields["label"].label == instance1["label"] fields = instances[1].fields assert [t.text for t in fields["tokens"].tokens] == instance2["tokens"] assert fields["label"].label == instance2["label"] fields = instances[2].fields assert [t.text for t in fields["tokens"].tokens] == instance3["tokens"] assert fields["label"].label == instance3["label"]
Example #16
Source File: seq2seq_test.py From magnitude with MIT License | 6 votes |
def test_default_format(self, lazy): reader = Seq2SeqDatasetReader(lazy=lazy) instances = reader.read(unicode(AllenNlpTestCase.FIXTURES_ROOT / u'data' / u'seq2seq_copy.tsv')) instances = ensure_list(instances) assert len(instances) == 3 fields = instances[0].fields assert [t.text for t in fields[u"source_tokens"].tokens] == [u"@start@", u"this", u"is", u"a", u"sentence", u"@end@"] assert [t.text for t in fields[u"target_tokens"].tokens] == [u"@start@", u"this", u"is", u"a", u"sentence", u"@end@"] fields = instances[1].fields assert [t.text for t in fields[u"source_tokens"].tokens] == [u"@start@", u"this", u"is", u"another", u"@end@"] assert [t.text for t in fields[u"target_tokens"].tokens] == [u"@start@", u"this", u"is", u"another", u"@end@"] fields = instances[2].fields assert [t.text for t in fields[u"source_tokens"].tokens] == [u"@start@", u"all", u"these", u"sentences", u"should", u"get", u"copied", u"@end@"] assert [t.text for t in fields[u"target_tokens"].tokens] == [u"@start@", u"all", u"these", u"sentences", u"should", u"get", u"copied", u"@end@"]
Example #17
Source File: text_classification_json_test.py From allennlp with Apache License 2.0 | 6 votes |
def test_read_from_file_ag_news_corpus_and_truncates_properly(self, lazy): reader = TextClassificationJsonReader(lazy=lazy, max_sequence_length=5) ag_path = ( AllenNlpTestCase.FIXTURES_ROOT / "data" / "text_classification_json" / "ag_news_corpus.jsonl" ) instances = reader.read(ag_path) instances = ensure_list(instances) instance1 = {"tokens": ["Memphis", "Rout", "Still", "Stings", "for"], "label": "2"} instance2 = {"tokens": ["AP", "-", "Eli", "Manning", "has"], "label": "2"} instance3 = {"tokens": ["A", "conference", "dedicated", "to", "online"], "label": "4"} assert len(instances) == 3 fields = instances[0].fields assert [t.text for t in fields["tokens"].tokens] == instance1["tokens"] assert fields["label"].label == instance1["label"] fields = instances[1].fields assert [t.text for t in fields["tokens"].tokens] == instance2["tokens"] assert fields["label"].label == instance2["label"] fields = instances[2].fields assert [t.text for t in fields["tokens"].tokens] == instance3["tokens"] assert fields["label"].label == instance3["label"]
Example #18
Source File: srl_dataset_reader_test.py From magnitude with MIT License | 5 votes |
def test_srl_reader_can_filter_by_domain(self): conll_reader = SrlReader(domain_identifier=u"subdomain2") instances = conll_reader.read(AllenNlpTestCase.FIXTURES_ROOT / u'conll_2012') instances = ensure_list(instances) # If we'd included the folder, we'd have 9 instances. assert len(instances) == 2
Example #19
Source File: copynet_test.py From nlp-models with MIT License | 5 votes |
def setUp(self): super(TestCopyNetReader, self).setUp() params = Params.from_file("nlpete/tests/fixtures/copynet/experiment.json") self.reader = DatasetReader.from_params(params["dataset_reader"]) instances = self.reader.read("nlpete/tests/fixtures/copynet/copyover.tsv") self.instances = ensure_list(instances) self.vocab = Vocabulary.from_params( params=params["vocabulary"], instances=instances )
Example #20
Source File: nl2bash_test.py From nlp-models with MIT License | 5 votes |
def setUp(self): super(TestNL2BashReader, self).setUp() self.reader = NL2BashDatasetReader("target_tokens") instances = self.reader.read("nlpete/tests/fixtures/nl2bash/train.tsv") self.instances = ensure_list(instances)
Example #21
Source File: prolocal_dataset_reader_test.py From propara with Apache License 2.0 | 5 votes |
def test_read_from_file(self): sc_reader = ProLocalDatasetReader() instances = sc_reader.read('tests/fixtures/prolocal_toy_data.tsv') instances = ensure_list(instances) # read first instance fields = instances[0].fields correct_tokens = ["Green", "plants", "absorb", "water", "from", "the", "soil"] read_tokens = [t.text for t in fields["tokens"].tokens] assert correct_tokens == read_tokens assert fields["entity_span"].labels == [0, 0, 0, 1, 0, 0, 0] assert fields["verb_span"].labels == [0, 0, 1, 0, 0, 0, 0] assert fields["state_change_type_labels"].label == 'MOVE' assert fields["state_change_tags"].labels == ['B-LOC-TO', 'I-LOC-TO', 'O', 'O', 'O', 'B-LOC-FROM', 'I-LOC-FROM'] # read second instance fields = instances[1].fields print(fields) read_tokens = [t.text for t in fields["tokens"].tokens] assert read_tokens == ["Rocks", "in", "the", "shore", "break"] assert fields["entity_span"].labels == [1, 0, 0, 0, 0] assert fields["verb_span"].labels == [0, 0, 0, 0, 1] assert fields["state_change_type_labels"].label == 'DESTROY' assert fields["state_change_tags"].labels == ['O', 'O', 'B-LOC-FROM', 'I-LOC-FROM', 'O']
Example #22
Source File: text_classification_json_test.py From allennlp with Apache License 2.0 | 5 votes |
def test_set_skip_indexing_true(self, lazy): reader = TextClassificationJsonReader(lazy=lazy, skip_label_indexing=True) ag_path = ( AllenNlpTestCase.FIXTURES_ROOT / "data" / "text_classification_json" / "integer_labels.jsonl" ) instances = reader.read(ag_path) instances = ensure_list(instances) instance1 = {"tokens": ["This", "text", "has", "label", "0"], "label": 0} instance2 = {"tokens": ["This", "text", "has", "label", "1"], "label": 1} assert len(instances) == 2 fields = instances[0].fields assert [t.text for t in fields["tokens"].tokens] == instance1["tokens"] assert fields["label"].label == instance1["label"] fields = instances[1].fields assert [t.text for t in fields["tokens"].tokens] == instance2["tokens"] assert fields["label"].label == instance2["label"] with pytest.raises(ValueError) as exec_info: ag_path = ( AllenNlpTestCase.FIXTURES_ROOT / "data" / "text_classification_json" / "imdb_corpus.jsonl" ) ensure_list(reader.read(ag_path)) assert str(exec_info.value) == "Labels must be integers if skip_label_indexing is True."
Example #23
Source File: test_kg_probe_reader.py From kb with Apache License 2.0 | 5 votes |
def test_kg_probe_reader(self): reader = get_reader() instances = ensure_list(reader.read('tests/fixtures/kg_probe/file1.txt')) # Check instances are correct length self.assertEqual(len(instances), 2) # Check masking is performed properly expected_tokens_0 = ['[CLS]', '[MASK]', '[MASK]', '[UNK]', 'quick', '##est', '.', '[SEP]'] tokens_0 = [x.text for x in instances[0]['tokens'].tokens] self.assertListEqual(expected_tokens_0, tokens_0) expected_mask_indicator_0 = np.array([0,1,1,0,0,0,0,0], dtype=np.uint8) mask_indicator_0 = instances[0]['mask_indicator'].array assert np.allclose(expected_mask_indicator_0, mask_indicator_0) expected_tokens_1 = ['[CLS]', 'the', 'brown', 'fox', 'jumped', 'over', 'the', '[MASK]', '[MASK]', '[MASK]', '[MASK]', '.', '[SEP]'] tokens_1 = [x.text for x in instances[1]['tokens'].tokens] self.assertListEqual(expected_tokens_1, tokens_1) expected_mask_indicator_1 = np.array([0,0,0,0,0,0,0,1,1,1,1,0,0], dtype=np.uint8) mask_indicator_1 = instances[1]['mask_indicator'].array assert np.allclose(expected_mask_indicator_1, mask_indicator_1)
Example #24
Source File: test_ultra_fine_reader.py From kb with Apache License 2.0 | 5 votes |
def test_ultra_fine_reader_entity_markers(self): reader = get_reader("entity_markers") instances = ensure_list(reader.read('tests/fixtures/evaluation/ultra_fine/train.json')) # Check number of instances is correct self.assertEqual(len(instances), 2) # Check that first instance's tokens are correct tokens_0 = [x.text for x in instances[0]['tokens']] segments_0 = list(instances[0]['segment_ids'].array) actual = list(zip(tokens_0, segments_0)) expected = [('[CLS]', 0), ('the', 0), ('british', 0), ('information', 0), ('commissioner', 0), ("'s", 0), ('office', 0), ('invites', 0), ('[e1start]', 0), ('web', 0), ('users', 0), ('[e1end]', 0), ('to', 0), ('locate', 0), ('its', 0), ('add', 0), ('##ress', 0), ('using', 0), ('google', 0), ('[UNK]', 0), ('.', 0), ('[SEP]', 0)] self.assertListEqual(actual, expected) self.assertEqual(actual[instances[0]['index_a'].label], ('[e1start]', 0))
Example #25
Source File: test_dataset_reader_main.py From scicite with Apache License 2.0 | 5 votes |
def test_read_from_file_scicite(self): reader = SciciteDatasetReader() instances = ensure_list(reader.read('tests/fixtures/scicite-train.jsonl')) instance1 = {"citation_text": ['These', 'results', 'are', 'in']} assert len(instances) == 10 fields = instances[0].fields assert isinstance(instances, list) assert [t.text for t in fields['citation_text'].tokens][:4] == instance1['citation_text'] print(fields.keys()) assert fields['labels'].label == "result" reader = SciciteSectitleDatasetReader() instances = ensure_list(reader.read('tests/fixtures/scicite-section-title.jsonl')) instance1 = {"section_name": 'introduction', "citation_text": ['SVM', 'and']} assert len(instances) == 10 fields = instances[0].fields assert isinstance(instances, list) assert [t.text for t in fields['citation_text'].tokens][:2] == instance1['citation_text'] assert fields['section_label'].label == instance1['section_name'] assert 'is_citation' not in fields reader = SciCiteWorthinessDataReader() instances = ensure_list(reader.read('tests/fixtures/scicite-cite-worthiness.jsonl')) instance1 = {"is_citation": 'True'} fields = instances[0].fields assert isinstance(instances, list) assert fields['is_citation'].label == instance1['is_citation'] assert 'section_name' not in fields.keys()
Example #26
Source File: semisupervised_text_classification_json_test.py From vampire with Apache License 2.0 | 5 votes |
def test_samples_properly(self): reader = SemiSupervisedTextClassificationJsonReader(sample=1, max_sequence_length=5) ag_path = self.FIXTURES_ROOT / "imdb" / "train.jsonl" params = Params({"random_seed": 5, "numpy_seed": 5, "pytorch_seed": 5}) prepare_environment(params) instances = reader.read(ag_path) instances = ensure_list(instances) instance = {"tokens": ['The', 'fight', 'scenes', 'were', 'great'], "label": "pos"} assert len(instances) == 1 fields = instances[0].fields assert [t.text for t in fields["tokens"].tokens] == instance["tokens"] assert fields["label"].label == instance["label"]
Example #27
Source File: semisupervised_text_classification_json_test.py From vampire with Apache License 2.0 | 5 votes |
def test_ignores_label_properly(self): imdb_labeled_path = self.FIXTURES_ROOT / "imdb" / "train.jsonl" reader = SemiSupervisedTextClassificationJsonReader(ignore_labels=True) instances = reader.read(imdb_labeled_path) instances = ensure_list(instances) fields = [i.fields for i in instances] labels = [f.get('label') for f in fields] assert labels == [None] * 3
Example #28
Source File: seq2seq_test.py From magnitude with MIT License | 5 votes |
def test_source_add_start_token(self): reader = Seq2SeqDatasetReader(source_add_start_token=False) instances = reader.read(unicode(AllenNlpTestCase.FIXTURES_ROOT / u'data' / u'seq2seq_copy.tsv')) instances = ensure_list(instances) assert len(instances) == 3 fields = instances[0].fields assert [t.text for t in fields[u"source_tokens"].tokens] == [u"this", u"is", u"a", u"sentence", u"@end@"] assert [t.text for t in fields[u"target_tokens"].tokens] == [u"@start@", u"this", u"is", u"a", u"sentence", u"@end@"]
Example #29
Source File: snli_reader_test.py From magnitude with MIT License | 5 votes |
def test_read_from_file(self, lazy): reader = SnliReader(lazy=lazy) instances = reader.read(AllenNlpTestCase.FIXTURES_ROOT / u'data' / u'snli.jsonl') instances = ensure_list(instances) instance1 = {u"premise": [u"A", u"person", u"on", u"a", u"horse", u"jumps", u"over", u"a", u"broken", u"down", u"airplane", u"."], u"hypothesis": [u"A", u"person", u"is", u"training", u"his", u"horse", u"for", u"a", u"competition", u"."], u"label": u"neutral"} instance2 = {u"premise": [u"A", u"person", u"on", u"a", u"horse", u"jumps", u"over", u"a", u"broken", u"down", u"airplane", u"."], u"hypothesis": [u"A", u"person", u"is", u"at", u"a", u"diner", u",", u"ordering", u"an", u"omelette", u"."], u"label": u"contradiction"} instance3 = {u"premise": [u"A", u"person", u"on", u"a", u"horse", u"jumps", u"over", u"a", u"broken", u"down", u"airplane", u"."], u"hypothesis": [u"A", u"person", u"is", u"outdoors", u",", u"on", u"a", u"horse", u"."], u"label": u"entailment"} assert len(instances) == 3 fields = instances[0].fields assert [t.text for t in fields[u"premise"].tokens] == instance1[u"premise"] assert [t.text for t in fields[u"hypothesis"].tokens] == instance1[u"hypothesis"] assert fields[u"label"].label == instance1[u"label"] fields = instances[1].fields assert [t.text for t in fields[u"premise"].tokens] == instance2[u"premise"] assert [t.text for t in fields[u"hypothesis"].tokens] == instance2[u"hypothesis"] assert fields[u"label"].label == instance2[u"label"] fields = instances[2].fields assert [t.text for t in fields[u"premise"].tokens] == instance3[u"premise"] assert [t.text for t in fields[u"hypothesis"].tokens] == instance3[u"hypothesis"] assert fields[u"label"].label == instance3[u"label"]
Example #30
Source File: triviaqa_test.py From magnitude with MIT License | 5 votes |
def test_read(self, lazy): params = Params({ u'base_tarball_path': unicode(AllenNlpTestCase.FIXTURES_ROOT / u'data' / u'triviaqa-sample.tgz'), u'lazy': lazy }) reader = TriviaQaReader.from_params(params) instances = reader.read(u'web-train.json') instances = ensure_list(instances) assert len(instances) == 3 assert [t.text for t in instances[0].fields[u"question"].tokens[:3]] == [u"Which", u"American", u"-"] assert [t.text for t in instances[0].fields[u"passage"].tokens[:3]] == [u"The", u"Nobel", u"Prize"] url = u"http://www.nobelprize.org/nobel_prizes/literature/laureates/1930/" assert [t.text for t in instances[0].fields[u"passage"].tokens[-3:]] == [u"<", url, u">"] assert instances[0].fields[u"span_start"].sequence_index == 12 assert instances[0].fields[u"span_end"].sequence_index == 13 assert [t.text for t in instances[1].fields[u"question"].tokens[:3]] == [u"Which", u"American", u"-"] assert [t.text for t in instances[1].fields[u"passage"].tokens[:3]] == [u"Why", u"Do", u"n’t"] assert [t.text for t in instances[1].fields[u"passage"].tokens[-3:]] == [u"adults", u",", u"and"] assert instances[1].fields[u"span_start"].sequence_index == 38 assert instances[1].fields[u"span_end"].sequence_index == 39 assert [t.text for t in instances[2].fields[u"question"].tokens[:3]] == [u"Where", u"in", u"England"] assert [t.text for t in instances[2].fields[u"passage"].tokens[:3]] == [u"Judi", u"Dench", u"-"] assert [t.text for t in instances[2].fields[u"passage"].tokens[-3:]] == [u")", u"(", u"special"] assert instances[2].fields[u"span_start"].sequence_index == 16 assert instances[2].fields[u"span_end"].sequence_index == 16