Python torchtext.data.RawField() Examples
The following are 6
code examples of torchtext.data.RawField().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
torchtext.data
, or try the search function
.
Example #1
Source File: test_field.py From decaNLP with BSD 3-Clause "New" or "Revised" License | 6 votes |
def test_process(self): raw_field = data.RawField() field = data.Field(sequential=True, use_vocab=False, batch_first=True) # Test tensor-like batch data which is accepted by both RawField and Field batch = [[1, 2, 3], [2, 3, 4]] batch_tensor = torch.LongTensor(batch) raw_field_processed = raw_field.process(batch) field_processed = field.process(batch, device=-1, train=False) assert raw_field_processed == batch assert field_processed.data.equal(batch_tensor) # Test non-tensor data which is only accepted by RawField any_obj = [object() for _ in range(5)] raw_field_processed = raw_field.process(any_obj) assert any_obj == raw_field_processed with pytest.raises(TypeError): field.process(any_obj)
Example #2
Source File: test_field.py From text with BSD 3-Clause "New" or "Revised" License | 6 votes |
def test_process(self): raw_field = data.RawField() field = data.Field(sequential=True, use_vocab=False, batch_first=True) # Test tensor-like batch data which is accepted by both RawField and Field batch = [[1, 2, 3], [2, 3, 4]] batch_tensor = torch.LongTensor(batch) raw_field_processed = raw_field.process(batch) field_processed = field.process(batch) assert raw_field_processed == batch assert field_processed.data.equal(batch_tensor) # Test non-tensor data which is only accepted by RawField any_obj = [object() for _ in range(5)] raw_field_processed = raw_field.process(any_obj) assert any_obj == raw_field_processed with pytest.raises(TypeError): field.process(any_obj)
Example #3
Source File: semantic_similar_data.py From glyce with Apache License 2.0 | 6 votes |
def __init__(self, args): self.RAW = data.RawField() self.RAW.is_target = False tokenize = lambda x: list(x) self.TEXT = data.Field(batch_first=True, tokenize=tokenize) self.LABEL = data.Field(sequential=False, unk_token=None) self.train, self.dev, self.test = data.TabularDataset.splits( path='/data/nfsdata/nlp/datasets/sentence_pair/bq_corpus_torch10', train='BQ_train.json', validation='BQ_dev.json', test='BQ_test.json', format='json', fields={"gold_label": ("label", self.LABEL), "sentence1": ("q1", self.TEXT), "sentence2": ("q2", self.TEXT), "ID": ("id", self.RAW)}) self.TEXT.build_vocab(self.train, self.dev, self.test, vectors=Vectors("BQ300", args.data)) self.LABEL.build_vocab(self.train) sort_key = lambda x: data.interleave_keys(len(x.q1), len(x.q2)) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") self.train_iter = data.BucketIterator(self.train, batch_size=args.batch_size, device=device, sort_key=sort_key, sort=True) self.dev_iter = data.BucketIterator(self.dev, batch_size=args.batch_size, device=device, sort_key=sort_key, sort=True) self.test_iter = data.BucketIterator(self.test, batch_size=args.batch_size, device=device, sort_key=sort_key, sort=True)
Example #4
Source File: wikiqa.py From sentence-similarity with MIT License | 5 votes |
def __init__(self, path, format, fields, skip_header=True, **kwargs): super(WikiQA, self).__init__(path, format, fields, skip_header, **kwargs) # We want to keep a raw copy of the sentence for some models and for debugging RAW_TEXT_FIELD = RawField() for ex in self.examples: raw_sentence_a, raw_sentence_b = ex.sentence_a[:], ex.sentence_b[:] setattr(ex, 'raw_sentence_a', raw_sentence_a) setattr(ex, 'raw_sentence_b', raw_sentence_b) self.fields['raw_sentence_a'] = RAW_TEXT_FIELD self.fields['raw_sentence_b'] = RAW_TEXT_FIELD
Example #5
Source File: wikiqa.py From sentence-similarity with MIT License | 5 votes |
def iters(cls, batch_size=64, device=-1, shuffle=True, vectors='glove.840B.300d'): cls.TEXT = Field(sequential=True, tokenize='spacy', lower=True, batch_first=True) cls.LABEL = Field(sequential=False, use_vocab=False, batch_first=True, tensor_type=torch.FloatTensor, postprocessing=Pipeline(get_class_probs)) cls.ID = RawField() train, val, test = cls.splits(cls.TEXT, cls.LABEL, cls.ID) cls.TEXT.build_vocab(train, vectors=vectors) return BucketIterator.splits((train, val, test), batch_size=batch_size, shuffle=shuffle, repeat=False, device=device)
Example #6
Source File: sick.py From sentence-similarity with MIT License | 5 votes |
def __init__(self, path, format, fields, skip_header=True, **kwargs): super(SICK, self).__init__(path, format, fields, skip_header, **kwargs) # We want to keep a raw copy of the sentence for some models and for debugging RAW_TEXT_FIELD = RawField() for ex in self.examples: raw_sentence_a, raw_sentence_b = ex.sentence_a[:], ex.sentence_b[:] setattr(ex, 'raw_sentence_a', raw_sentence_a) setattr(ex, 'raw_sentence_b', raw_sentence_b) self.fields['raw_sentence_a'] = RAW_TEXT_FIELD self.fields['raw_sentence_b'] = RAW_TEXT_FIELD