Python Examples of allennlp.nn.util.batch_tensor

Source File: text_field.py From allennlp with Apache License 2.0

6 votes

def batch_tensors(self, tensor_list: List[TextFieldTensors]) -> TextFieldTensors:
        # This is creating a dict of {token_indexer_name: {token_indexer_outputs: batched_tensor}}
        # for each token indexer used to index this field.
        indexer_lists: Dict[str, List[Dict[str, torch.Tensor]]] = defaultdict(list)
        for tensor_dict in tensor_list:
            for indexer_name, indexer_output in tensor_dict.items():
                indexer_lists[indexer_name].append(indexer_output)
        batched_tensors = {
            # NOTE(mattg): if an indexer has its own nested structure, rather than one tensor per
            # argument, then this will break.  If that ever happens, we should move this to an
            # `indexer.batch_tensors` method, with this logic as the default implementation in the
            # base class.
            indexer_name: util.batch_tensor_dicts(indexer_outputs)
            for indexer_name, indexer_outputs in indexer_lists.items()
        }
        return batched_tensors

Source File: bert_field.py From HGL-pytorch with MIT License

5 votes

def batch_tensors(self, tensor_list: List[Dict[str, torch.Tensor]]) -> Dict[str, torch.Tensor]:
        # pylint: disable=no-self-use
        # This is creating a dict of {token_indexer_key: batch_tensor} for each token indexer used
        # to index this field.
        return util.batch_tensor_dicts(tensor_list)

Source File: text_field.py From magnitude with MIT License

5 votes

def batch_tensors(self, tensor_list                               )                           :
        # pylint: disable=no-self-use
        # This is creating a dict of {token_indexer_key: batch_tensor} for each token indexer used
        # to index this field.
        return util.batch_tensor_dicts(tensor_list)

Source File: knowledge_graph_field.py From magnitude with MIT License

5 votes

def batch_tensors(self, tensor_list                               )                           :
        # pylint: disable=no-self-use
        batched_text = nn_util.batch_tensor_dicts(tensor[u'text'] for tensor in tensor_list)  # type: ignore
        batched_linking = torch.stack([tensor[u'linking'] for tensor in tensor_list])
        return {u'text': batched_text, u'linking': batched_linking}

    # Below here we have feature extractor functions.  To keep a consistent API for easy logic
    # above, some of these functions have unused arguments.
    # pylint: disable=unused-argument,no-self-use

    # These feature extractors are generally pretty specific to the logical form language and
    # problem setting in WikiTableQuestions.  This whole notion of feature extraction should
    # eventually be made more general (or just removed, if we can replace it with CNN features...).
    # For the feature functions used in the original parser written in PNP, see here:
    # https://github.com/allenai/pnp/blob/wikitables2/src/main/scala/org/allenai/wikitables/SemanticParserFeatureGenerator.scala

    # One notable difference between how the features work here and how they worked in PNP is that
    # we're using the table text when computing string matches, while PNP used the _entity name_.
    # It turns out that the entity name is derived from the table text, so this should be roughly
    # equivalent, except in the case of some numbers.  If there are cells with different text that
    # normalize to the same name, you could get `_2` or similar appended to the name, so the way we
    # do it here should just be better.  But it's a possible minor source of variation from the
    # original parser.

    # Another difference between these features and the PNP features is that the span overlap used
    # a weighting scheme to downweight matches on frequent words (like "the"), and the lemma
    # overlap feature value was calculated a little differently.  I'm guessing that doesn't make a
    # huge difference...

Source File: bert_field.py From r2c with MIT License

5 votes

def batch_tensors(self, tensor_list: List[Dict[str, torch.Tensor]]) -> Dict[str, torch.Tensor]:
        # pylint: disable=no-self-use
        # This is creating a dict of {token_indexer_key: batch_tensor} for each token indexer used
        # to index this field.
        return util.batch_tensor_dicts(tensor_list)

Source File: knowledge_graph_field.py From gtos with MIT License

5 votes

def batch_tensors(self, tensor_list: List[Dict[str, torch.Tensor]]) -> Dict[str, torch.Tensor]:
        # pylint: disable=no-self-use
        batched_text = nn_util.batch_tensor_dicts(tensor['text'] for tensor in tensor_list)  # type: ignore
        batched_linking = torch.stack([tensor['linking'] for tensor in tensor_list])
        return {'text': batched_text, 'linking': batched_linking}

    # Below here we have feature extractor functions.  To keep a consistent API for easy logic
    # above, some of these functions have unused arguments.
    # pylint: disable=unused-argument,no-self-use

    # These feature extractors are generally pretty specific to the logical form language and
    # problem setting in WikiTableQuestions.  This whole notion of feature extraction should
    # eventually be made more general (or just removed, if we can replace it with CNN features...).
    # For the feature functions used in the original parser written in PNP, see here:
    # https://github.com/allenai/pnp/blob/wikitables2/src/main/scala/org/allenai/wikitables/SemanticParserFeatureGenerator.scala

    # One notable difference between how the features work here and how they worked in PNP is that
    # we're using the table text when computing string matches, while PNP used the _entity name_.
    # It turns out that the entity name is derived from the table text, so this should be roughly
    # equivalent, except in the case of some numbers.  If there are cells with different text that
    # normalize to the same name, you could get `_2` or similar appended to the name, so the way we
    # do it here should just be better.  But it's a possible minor source of variation from the
    # original parser.

    # Another difference between these features and the PNP features is that the span overlap used
    # a weighting scheme to downweight matches on frequent words (like "the"), and the lemma
    # overlap feature value was calculated a little differently.  I'm guessing that doesn't make a
    # huge difference...

Source File: knowledge_graph_field.py From stog with MIT License

5 votes

def batch_tensors(self, tensor_list: List[Dict[str, torch.Tensor]]) -> Dict[str, torch.Tensor]:
        # pylint: disable=no-self-use
        batched_text = nn_util.batch_tensor_dicts(tensor['text'] for tensor in tensor_list)  # type: ignore
        batched_linking = torch.stack([tensor['linking'] for tensor in tensor_list])
        return {'text': batched_text, 'linking': batched_linking}

    # Below here we have feature extractor functions.  To keep a consistent API for easy logic
    # above, some of these functions have unused arguments.
    # pylint: disable=unused-argument,no-self-use

    # These feature extractors are generally pretty specific to the logical form language and
    # problem setting in WikiTableQuestions.  This whole notion of feature extraction should
    # eventually be made more general (or just removed, if we can replace it with CNN features...).
    # For the feature functions used in the original parser written in PNP, see here:
    # https://github.com/allenai/pnp/blob/wikitables2/src/main/scala/org/allenai/wikitables/SemanticParserFeatureGenerator.scala

    # One notable difference between how the features work here and how they worked in PNP is that
    # we're using the table text when computing string matches, while PNP used the _entity name_.
    # It turns out that the entity name is derived from the table text, so this should be roughly
    # equivalent, except in the case of some numbers.  If there are cells with different text that
    # normalize to the same name, you could get `_2` or similar appended to the name, so the way we
    # do it here should just be better.  But it's a possible minor source of variation from the
    # original parser.

    # Another difference between these features and the PNP features is that the span overlap used
    # a weighting scheme to downweight matches on frequent words (like "the"), and the lemma
    # overlap feature value was calculated a little differently.  I'm guessing that doesn't make a
    # huge difference...

Python allennlp.nn.util.batch_tensor_dicts() Examples