Python allennlp.nn.InitializerApplicator() Examples
The following are 30
code examples of allennlp.nn.InitializerApplicator().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
allennlp.nn
, or try the search function
.
Example #1
Source File: bert_text_classifier.py From scibert with Apache License 2.0 | 6 votes |
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, verbose_metrics: bool = False, dropout: float = 0.2, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None, ) -> None: super(TextClassifier, self).__init__(vocab, regularizer) self.text_field_embedder = text_field_embedder self.dropout = torch.nn.Dropout(dropout) self.num_classes = self.vocab.get_vocab_size("labels") self.classifier_feedforward = torch.nn.Linear(self.text_field_embedder.get_output_dim() , self.num_classes) self.label_accuracy = CategoricalAccuracy() self.label_f1_metrics = {} self.verbose_metrics = verbose_metrics for i in range(self.num_classes): self.label_f1_metrics[vocab.get_token_from_index(index=i, namespace="labels")] = F1Measure(positive_label=i) self.loss = torch.nn.CrossEntropyLoss() initializer(self)
Example #2
Source File: maxout_test.py From magnitude with MIT License | 6 votes |
def test_forward_gives_correct_output(self): params = Params({ u'input_dim': 2, u'output_dims': 3, u'pool_sizes': 4, u'dropout': 0.0, u'num_layers': 2 }) maxout = Maxout.from_params(params) constant_init = lambda tensor: torch.nn.init.constant_(tensor, 1.) initializer = InitializerApplicator([(u".*", constant_init)]) initializer(maxout) input_tensor = torch.FloatTensor([[-3, 1]]) output = maxout(input_tensor).data.numpy() assert output.shape == (1, 3) # This output was checked by hand # The output of the first maxout layer is [-1, -1, -1], since the # matrix multiply gives us [-2]*12. Reshaping and maxing # produces [-2, -2, -2] and the bias increments these values. # The second layer output is [-2, -2, -2], since the matrix # matrix multiply gives us [-3]*12. Reshaping and maxing # produces [-3, -3, -3] and the bias increments these values. assert_almost_equal(output, [[-2, -2, -2]])
Example #3
Source File: simple_tagger.py From magnitude with MIT License | 6 votes |
def __init__(self, vocab , text_field_embedder , encoder , initializer = InitializerApplicator(), regularizer = None) : super(SimpleTagger, self).__init__(vocab, regularizer) self.text_field_embedder = text_field_embedder self.num_classes = self.vocab.get_vocab_size(u"labels") self.encoder = encoder self.tag_projection_layer = TimeDistributed(Linear(self.encoder.get_output_dim(), self.num_classes)) check_dimensions_match(text_field_embedder.get_output_dim(), encoder.get_input_dim(), u"text field embedding dim", u"encoder input dim") self.metrics = { u"accuracy": CategoricalAccuracy(), u"accuracy3": CategoricalAccuracy(top_k=3) } initializer(self) #overrides
Example #4
Source File: maxout_test.py From allennlp with Apache License 2.0 | 6 votes |
def test_forward_gives_correct_output(self): params = Params( {"input_dim": 2, "output_dims": 3, "pool_sizes": 4, "dropout": 0.0, "num_layers": 2} ) maxout = Maxout.from_params(params) constant_init = Initializer.from_params(Params({"type": "constant", "val": 1.0})) initializer = InitializerApplicator([(".*", constant_init)]) initializer(maxout) input_tensor = torch.FloatTensor([[-3, 1]]) output = maxout(input_tensor).data.numpy() assert output.shape == (1, 3) # This output was checked by hand # The output of the first maxout layer is [-1, -1, -1], since the # matrix multiply gives us [-2]*12. Reshaping and maxing # produces [-2, -2, -2] and the bias increments these values. # The second layer output is [-2, -2, -2], since the matrix # matrix multiply gives us [-3]*12. Reshaping and maxing # produces [-3, -3, -3] and the bias increments these values. assert_almost_equal(output, [[-2, -2, -2]])
Example #5
Source File: custom_composed_seq2seq.py From summarus with Apache License 2.0 | 5 votes |
def __init__(self, vocab: Vocabulary, source_text_embedder: TextFieldEmbedder, encoder: Seq2SeqEncoder, decoder: SeqDecoder, tied_source_embedder_key: Optional[str] = None, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super(CustomComposedSeq2Seq, self).__init__(vocab, regularizer) self._source_text_embedder = source_text_embedder self._encoder = encoder self._decoder = decoder if self._encoder.get_output_dim() != self._decoder.get_output_dim(): raise ConfigurationError(f"Encoder output dimension {self._encoder.get_output_dim()} should be" f" equal to decoder dimension {self._decoder.get_output_dim()}.") if tied_source_embedder_key: if not isinstance(self._source_text_embedder, BasicTextFieldEmbedder): raise ConfigurationError("Unable to tie embeddings," "Source text embedder is not an instance of `BasicTextFieldEmbedder`.") source_embedder = self._source_text_embedder._token_embedders[tied_source_embedder_key] if not isinstance(source_embedder, Embedding): raise ConfigurationError("Unable to tie embeddings," "Selected source embedder is not an instance of `Embedding`.") if source_embedder.get_output_dim() != self._decoder.target_embedder.get_output_dim(): raise ConfigurationError(f"Output Dimensions mismatch between" f"source embedder and target embedder.") self._source_text_embedder._token_embedders[tied_source_embedder_key] = self._decoder.target_embedder initializer(self)
Example #6
Source File: regularizers_test.py From allennlp with Apache License 2.0 | 5 votes |
def test_frozen_params(self): model = torch.nn.Sequential(torch.nn.Linear(5, 10), torch.nn.Linear(10, 5)) constant_init = Initializer.from_params(Params({"type": "constant", "val": -1})) initializer = InitializerApplicator([(".*", constant_init)]) initializer(model) # freeze the parameters of the first linear for name, param in model.named_parameters(): if re.search(r"0.*$", name): param.requires_grad = False value = RegularizerApplicator([("", L1Regularizer(1.0))])(model) # 55 because of bias (5*10 + 5) assert value.data.numpy() == 55
Example #7
Source File: regularizers_test.py From magnitude with MIT License | 5 votes |
def test_regularizer_applicator_respects_regex_matching(self): model = torch.nn.Sequential( torch.nn.Linear(5, 10), torch.nn.Linear(10, 5) ) initializer = InitializerApplicator([(u".*", lambda tensor: constant_(tensor, 1.))]) initializer(model) value = RegularizerApplicator([(u"weight", L2Regularizer(0.5)), (u"bias", L1Regularizer(1.0))])(model) assert value.data.numpy() == 65.0
Example #8
Source File: slqa_h.py From SLQA with Apache License 2.0 | 5 votes |
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, phrase_layer: Seq2SeqEncoder, projected_layer: Seq2SeqEncoder, flow_layer: Seq2SeqEncoder, contextual_passage: Seq2SeqEncoder, contextual_question: Seq2SeqEncoder, dropout: float = 0.2, regularizer: Optional[RegularizerApplicator] = None, initializer: InitializerApplicator = InitializerApplicator(), ): super(MultiGranularityHierarchicalAttentionFusionNetworks, self).__init__(vocab, regularizer) self._text_field_embedder = text_field_embedder self._phrase_layer = phrase_layer self._encoding_dim = self._phrase_layer.get_output_dim() self.projected_layer = torch.nn.Linear(self._encoding_dim + 1024, self._encoding_dim) self.fuse = FusionLayer(self._encoding_dim) self.projected_lstm = projected_layer self.flow = flow_layer self.contextual_layer_p = contextual_passage self.contextual_layer_q = contextual_question self.linear_self_align = torch.nn.Linear(self._encoding_dim, 1) self.bilinear_layer_s = BilinearSeqAtt(self._encoding_dim, self._encoding_dim) self.bilinear_layer_e = BilinearSeqAtt(self._encoding_dim, self._encoding_dim) self.yesno_predictor = torch.nn.Linear(self._encoding_dim, 3) self.relu = torch.nn.ReLU() self._max_span_length = 30 self._span_start_accuracy = CategoricalAccuracy() self._span_end_accuracy = CategoricalAccuracy() self._span_accuracy = BooleanAccuracy() self._squad_metrics = SquadEmAndF1() self._span_yesno_accuracy = CategoricalAccuracy() self._official_f1 = Average() self._variational_dropout = InputVariationalDropout(dropout) self._loss = torch.nn.CrossEntropyLoss() initializer(self)
Example #9
Source File: regularizers_test.py From magnitude with MIT License | 5 votes |
def test_l2_regularization(self): model = torch.nn.Sequential( torch.nn.Linear(5, 10), torch.nn.Linear(10, 5) ) initializer = InitializerApplicator([(u".*", lambda tensor: constant_(tensor, 0.5))]) initializer(model) value = RegularizerApplicator([(u"", L2Regularizer(1.0))])(model) assert value.data.numpy() == 28.75
Example #10
Source File: regularizers_test.py From magnitude with MIT License | 5 votes |
def test_l1_regularization(self): model = torch.nn.Sequential( torch.nn.Linear(5, 10), torch.nn.Linear(10, 5) ) initializer = InitializerApplicator([(u".*", lambda tensor: constant_(tensor, -1))]) initializer(model) value = RegularizerApplicator([(u"", L1Regularizer(1.0))])(model) # 115 because of biases. assert value.data.numpy() == 115.0
Example #11
Source File: model.py From ConvLab with MIT License | 5 votes |
def __init__(self, vocab: Vocabulary, input_dim: int, num_classes: int, label_namespace: str = "labels", feedforward: Optional[FeedForward] = None, dropout: Optional[float] = None, verbose_metrics: bool = False, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super().__init__(vocab, regularizer) self.label_namespace = label_namespace self.input_dim = input_dim self.num_classes = num_classes self._verbose_metrics = verbose_metrics if dropout: self.dropout = torch.nn.Dropout(dropout) else: self.dropout = None self._feedforward = feedforward if self._feedforward is not None: self.projection_layer = Linear(feedforward.get_output_dim(), self.num_classes) else: self.projection_layer = Linear(self.input_dim, self.num_classes) self.metrics = { "accuracy": CategoricalAccuracy(), "accuracy3": CategoricalAccuracy(top_k=3), "accuracy5": CategoricalAccuracy(top_k=5) } self._loss = torch.nn.CrossEntropyLoss() initializer(self)
Example #12
Source File: multiple_correct_mcq_multee_esim.py From multee with Apache License 2.0 | 5 votes |
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, encoder: Seq2SeqEncoder, projection_feedforward: FeedForward, inference_encoder: Seq2SeqEncoder, output_feedforward: FeedForward, output_logit: FeedForward, final_feedforward: FeedForward, coverage_loss: CoverageLoss, similarity_function: SimilarityFunction = DotProductSimilarity(), dropout: float = 0.5, contextualize_pair_comparators: bool = False, pair_context_encoder: Seq2SeqEncoder = None, pair_feedforward: FeedForward = None, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super().__init__(vocab=vocab, text_field_embedder=text_field_embedder, encoder=encoder, similarity_function=similarity_function, projection_feedforward=projection_feedforward, inference_encoder=inference_encoder, output_feedforward=output_feedforward, output_logit=output_logit, final_feedforward=final_feedforward, coverage_loss=coverage_loss, contextualize_pair_comparators=contextualize_pair_comparators, pair_context_encoder=pair_context_encoder, pair_feedforward=pair_feedforward, dropout=dropout, initializer=initializer, regularizer=regularizer) self._ignore_index = -1 self._answer_loss = torch.nn.CrossEntropyLoss(ignore_index=self._ignore_index) self._coverage_loss = coverage_loss self._accuracy = CategoricalAccuracy() self._entailment_f1 = F1Measure(self._label2idx["entailment"])
Example #13
Source File: single_correct_mcq_multee_esim.py From multee with Apache License 2.0 | 5 votes |
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, encoder: Seq2SeqEncoder, projection_feedforward: FeedForward, inference_encoder: Seq2SeqEncoder, output_feedforward: FeedForward, output_logit: FeedForward, final_feedforward: FeedForward, coverage_loss: CoverageLoss, similarity_function: SimilarityFunction = DotProductSimilarity(), dropout: float = 0.5, contextualize_pair_comparators: bool = False, pair_context_encoder: Seq2SeqEncoder = None, pair_feedforward: FeedForward = None, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: # Need to send it verbatim because otherwise FromParams doesn't work appropriately. super().__init__(vocab=vocab, text_field_embedder=text_field_embedder, encoder=encoder, similarity_function=similarity_function, projection_feedforward=projection_feedforward, inference_encoder=inference_encoder, output_feedforward=output_feedforward, output_logit=output_logit, final_feedforward=final_feedforward, contextualize_pair_comparators=contextualize_pair_comparators, coverage_loss=coverage_loss, pair_context_encoder=pair_context_encoder, pair_feedforward=pair_feedforward, dropout=dropout, initializer=initializer, regularizer=regularizer) self._answer_loss = torch.nn.CrossEntropyLoss() self._accuracy = CategoricalAccuracy()
Example #14
Source File: semantic_role_labeler.py From magnitude with MIT License | 5 votes |
def __init__(self, vocab , text_field_embedder , encoder , binary_feature_dim , embedding_dropout = 0.0, initializer = InitializerApplicator(), regularizer = None, label_smoothing = None) : super(SemanticRoleLabeler, self).__init__(vocab, regularizer) self.text_field_embedder = text_field_embedder self.num_classes = self.vocab.get_vocab_size(u"labels") # For the span based evaluation, we don't want to consider labels # for verb, because the verb index is provided to the model. self.span_metric = SpanBasedF1Measure(vocab, tag_namespace=u"labels", ignore_classes=[u"V"]) self.encoder = encoder # There are exactly 2 binary features for the verb predicate embedding. self.binary_feature_embedding = Embedding(2, binary_feature_dim) self.tag_projection_layer = TimeDistributed(Linear(self.encoder.get_output_dim(), self.num_classes)) self.embedding_dropout = Dropout(p=embedding_dropout) self._label_smoothing = label_smoothing check_dimensions_match(text_field_embedder.get_output_dim() + binary_feature_dim, encoder.get_input_dim(), u"text embedding dim + verb indicator embedding dim", u"encoder input dim") initializer(self)
Example #15
Source File: augmented_lstm_test.py From magnitude with MIT License | 5 votes |
def test_augmented_lstm_computes_same_function_as_pytorch_lstm(self): augmented_lstm = AugmentedLstm(10, 11) pytorch_lstm = LSTM(10, 11, num_layers=1, batch_first=True) # Initialize all weights to be == 1. initializer = InitializerApplicator([(u".*", lambda tensor: torch.nn.init.constant_(tensor, 1.))]) initializer(augmented_lstm) initializer(pytorch_lstm) initial_state = torch.zeros([1, 5, 11]) initial_memory = torch.zeros([1, 5, 11]) # Use bigger numbers to avoid floating point instability. sorted_tensor, sorted_sequence, _, _ = sort_batch_by_length(self.random_tensor * 5., self.sequence_lengths) lstm_input = pack_padded_sequence(sorted_tensor, sorted_sequence.data.tolist(), batch_first=True) augmented_output, augmented_state = augmented_lstm(lstm_input, (initial_state, initial_memory)) pytorch_output, pytorch_state = pytorch_lstm(lstm_input, (initial_state, initial_memory)) pytorch_output_sequence, _ = pad_packed_sequence(pytorch_output, batch_first=True) augmented_output_sequence, _ = pad_packed_sequence(augmented_output, batch_first=True) numpy.testing.assert_array_almost_equal(pytorch_output_sequence.data.numpy(), augmented_output_sequence.data.numpy(), decimal=4) numpy.testing.assert_array_almost_equal(pytorch_state[0].data.numpy(), augmented_state[0].data.numpy(), decimal=4) numpy.testing.assert_array_almost_equal(pytorch_state[1].data.numpy(), augmented_state[1].data.numpy(), decimal=4)
Example #16
Source File: token_characters_encoder_test.py From magnitude with MIT License | 5 votes |
def setUp(self): super(TestTokenCharactersEncoder, self).setUp() self.vocab = Vocabulary() self.vocab.add_token_to_namespace(u"1", u"token_characters") self.vocab.add_token_to_namespace(u"2", u"token_characters") self.vocab.add_token_to_namespace(u"3", u"token_characters") self.vocab.add_token_to_namespace(u"4", u"token_characters") params = Params({ u"embedding": { u"embedding_dim": 2, u"vocab_namespace": u"token_characters" }, u"encoder": { u"type": u"cnn", u"embedding_dim": 2, u"num_filters": 4, u"ngram_filter_sizes": [1, 2], u"output_dim": 3 } }) self.encoder = TokenCharactersEncoder.from_params(vocab=self.vocab, params=deepcopy(params)) self.embedding = Embedding.from_params(vocab=self.vocab, params=params[u"embedding"]) self.inner_encoder = Seq2VecEncoder.from_params(params[u"encoder"]) constant_init = lambda tensor: torch.nn.init.constant_(tensor, 1.) initializer = InitializerApplicator([(u".*", constant_init)]) initializer(self.encoder) initializer(self.embedding) initializer(self.inner_encoder)
Example #17
Source File: cnn_encoder_test.py From magnitude with MIT License | 5 votes |
def test_forward_does_correct_computation(self): encoder = CnnEncoder(embedding_dim=2, num_filters=1, ngram_filter_sizes=(1, 2)) constant_init = lambda tensor: torch.nn.init.constant_(tensor, 1.) initializer = InitializerApplicator([(u".*", constant_init)]) initializer(encoder) input_tensor = torch.FloatTensor([[[.7, .8], [.1, 1.5]]]) encoder_output = encoder(input_tensor, None) assert_almost_equal(encoder_output.data.numpy(), numpy.asarray([[1.6 + 1.0, 3.1 + 1.0]]), decimal=6)
Example #18
Source File: pretrained_model_initializer_test.py From allennlp with Apache License 2.0 | 5 votes |
def _get_applicator( self, regex: str, weights_file_path: str, parameter_name_overrides: Optional[Dict[str, str]] = None, ) -> InitializerApplicator: initializer = PretrainedModelInitializer(weights_file_path, parameter_name_overrides) return InitializerApplicator([(regex, initializer)])
Example #19
Source File: regularizers_test.py From allennlp with Apache License 2.0 | 5 votes |
def test_l2_regularization(self): model = torch.nn.Sequential(torch.nn.Linear(5, 10), torch.nn.Linear(10, 5)) constant_init = Initializer.from_params(Params({"type": "constant", "val": 0.5})) initializer = InitializerApplicator([(".*", constant_init)]) initializer(model) value = RegularizerApplicator([("", L2Regularizer(1.0))])(model) assert value.data.numpy() == 28.75
Example #20
Source File: regularizers_test.py From allennlp with Apache License 2.0 | 5 votes |
def test_l1_regularization(self): model = torch.nn.Sequential(torch.nn.Linear(5, 10), torch.nn.Linear(10, 5)) constant_init = Initializer.from_params(Params({"type": "constant", "val": -1})) initializer = InitializerApplicator([(".*", constant_init)]) initializer(model) value = RegularizerApplicator([("", L1Regularizer(1.0))])(model) # 115 because of biases. assert value.data.numpy() == 115.0
Example #21
Source File: cnn_encoder_test.py From allennlp with Apache License 2.0 | 5 votes |
def test_forward_does_correct_computation(self): encoder = CnnEncoder(embedding_dim=2, num_filters=1, ngram_filter_sizes=(1, 2)) constant_init = Initializer.from_params(Params({"type": "constant", "val": 1.0})) initializer = InitializerApplicator([(".*", constant_init)]) initializer(encoder) input_tensor = torch.FloatTensor([[[0.7, 0.8], [0.1, 1.5]]]) encoder_output = encoder(input_tensor, None) assert_almost_equal( encoder_output.data.numpy(), numpy.asarray([[1.6 + 1.0, 3.1 + 1.0]]), decimal=6 )
Example #22
Source File: token_characters_encoder_test.py From allennlp with Apache License 2.0 | 5 votes |
def setup_method(self): super().setup_method() self.vocab = Vocabulary() self.vocab.add_token_to_namespace("1", "token_characters") self.vocab.add_token_to_namespace("2", "token_characters") self.vocab.add_token_to_namespace("3", "token_characters") self.vocab.add_token_to_namespace("4", "token_characters") params = Params( { "embedding": {"embedding_dim": 2, "vocab_namespace": "token_characters"}, "encoder": { "type": "cnn", "embedding_dim": 2, "num_filters": 4, "ngram_filter_sizes": [1, 2], "output_dim": 3, }, } ) self.encoder = TokenCharactersEncoder.from_params(vocab=self.vocab, params=deepcopy(params)) self.embedding = Embedding.from_params(vocab=self.vocab, params=params["embedding"]) self.inner_encoder = Seq2VecEncoder.from_params(params["encoder"]) constant_init = Initializer.from_params(Params({"type": "constant", "val": 1.0})) initializer = InitializerApplicator([(".*", constant_init)]) initializer(self.encoder) initializer(self.embedding) initializer(self.inner_encoder)
Example #23
Source File: feedforward_test.py From allennlp with Apache License 2.0 | 5 votes |
def test_forward_gives_correct_output(self): params = Params({"input_dim": 2, "hidden_dims": 3, "activations": "relu", "num_layers": 2}) feedforward = FeedForward.from_params(params) constant_init = Initializer.from_params(Params({"type": "constant", "val": 1.0})) initializer = InitializerApplicator([(".*", constant_init)]) initializer(feedforward) input_tensor = torch.FloatTensor([[-3, 1]]) output = feedforward(input_tensor).data.numpy() assert output.shape == (1, 3) # This output was checked by hand - ReLU makes output after first hidden layer [0, 0, 0], # which then gets a bias added in the second layer to be [1, 1, 1]. assert_almost_equal(output, [[1, 1, 1]])
Example #24
Source File: augmented_lstm_test.py From allennlp with Apache License 2.0 | 5 votes |
def test_augmented_lstm_computes_same_function_as_pytorch_lstm(self): augmented_lstm = AugmentedLstm(10, 11) pytorch_lstm = LSTM(10, 11, num_layers=1, batch_first=True) # Initialize all weights to be == 1. constant_init = Initializer.from_params(Params({"type": "constant", "val": 1.0})) initializer = InitializerApplicator([(".*", constant_init)]) initializer(augmented_lstm) initializer(pytorch_lstm) initial_state = torch.zeros([1, 5, 11]) initial_memory = torch.zeros([1, 5, 11]) # Use bigger numbers to avoid floating point instability. sorted_tensor, sorted_sequence, _, _ = sort_batch_by_length( self.random_tensor * 5.0, self.sequence_lengths ) lstm_input = pack_padded_sequence( sorted_tensor, sorted_sequence.data.tolist(), batch_first=True ) augmented_output, augmented_state = augmented_lstm( lstm_input, (initial_state, initial_memory) ) pytorch_output, pytorch_state = pytorch_lstm(lstm_input, (initial_state, initial_memory)) pytorch_output_sequence, _ = pad_packed_sequence(pytorch_output, batch_first=True) augmented_output_sequence, _ = pad_packed_sequence(augmented_output, batch_first=True) numpy.testing.assert_array_almost_equal( pytorch_output_sequence.data.numpy(), augmented_output_sequence.data.numpy(), decimal=4 ) numpy.testing.assert_array_almost_equal( pytorch_state[0].data.numpy(), augmented_state[0].data.numpy(), decimal=4 ) numpy.testing.assert_array_almost_equal( pytorch_state[1].data.numpy(), augmented_state[1].data.numpy(), decimal=4 )
Example #25
Source File: rnet.py From R-net with MIT License | 5 votes |
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, question_encoder: Seq2SeqEncoder, passage_encoder: Seq2SeqEncoder, pair_encoder: AttentionEncoder, self_encoder: AttentionEncoder, output_layer: QAOutputLayer, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None, share_encoder: bool = False): super().__init__(vocab, regularizer) self.text_field_embedder = text_field_embedder self.question_encoder = question_encoder self.passage_encoder = passage_encoder self.pair_encoder = pair_encoder self.self_encoder = self_encoder self.output_layer = output_layer self._span_start_accuracy = CategoricalAccuracy() self._span_end_accuracy = CategoricalAccuracy() self._span_accuracy = BooleanAccuracy() self._squad_metrics = SquadEmAndF1() self.share_encoder = share_encoder self.loss = torch.nn.CrossEntropyLoss() initializer(self)
Example #26
Source File: classifier.py From vampire with Apache License 2.0 | 5 votes |
def __init__(self, vocab: Vocabulary, input_embedder: TextFieldEmbedder, encoder: Encoder = None, dropout: float = None, initializer: InitializerApplicator = InitializerApplicator() ) -> None: """ Parameters ---------- vocab: `Vocabulary` vocab to use input_embedder: `TextFieldEmbedder` generic embedder of tokens encoder: `Encoder`, optional (default = None) Seq2Vec or Seq2Seq Encoder wrapper. If no encoder is provided, assume that the input is a bag of word counts, for linear classification. dropout: `float`, optional (default = None) if set, will apply dropout to output of encoder. initializer: `InitializerApplicator` generic initializer """ super().__init__(vocab) self._input_embedder = input_embedder if dropout: self._dropout = torch.nn.Dropout(dropout) else: self._dropout = None self._encoder = encoder self._num_labels = vocab.get_vocab_size(namespace="labels") if self._encoder: self._clf_input_dim = self._encoder.get_output_dim() else: self._clf_input_dim = self._input_embedder.get_output_dim() self._classification_layer = torch.nn.Linear(self._clf_input_dim, self._num_labels) self._accuracy = CategoricalAccuracy() self._loss = torch.nn.CrossEntropyLoss() initializer(self)
Example #27
Source File: simple_tagger.py From HIT-SCIR-CoNLL2019 with Apache License 2.0 | 5 votes |
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, encoder: Seq2SeqEncoder, calculate_span_f1: bool = None, label_encoding: Optional[str] = None, label_namespace: str = "labels", verbose_metrics: bool = False, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super(SimpleTagger, self).__init__(vocab, regularizer) self.label_namespace = label_namespace self.text_field_embedder = text_field_embedder self.num_classes = self.vocab.get_vocab_size(label_namespace) self.encoder = encoder self._verbose_metrics = verbose_metrics self.tag_projection_layer = TimeDistributed(Linear(self.encoder.get_output_dim(), self.num_classes)) check_dimensions_match(text_field_embedder.get_output_dim(), encoder.get_input_dim(), "text field embedding dim", "encoder input dim") # We keep calculate_span_f1 as a constructor argument for API consistency with # the CrfTagger, even it is redundant in this class # (label_encoding serves the same purpose). if calculate_span_f1 and not label_encoding: raise ConfigurationError("calculate_span_f1 is True, but " "no label_encoding was specified.") self.metrics = { "accuracy": CategoricalAccuracy(), "accuracy3": CategoricalAccuracy(top_k=3) } if calculate_span_f1 or label_encoding: self._f1_metric = SpanBasedF1Measure(vocab, tag_namespace=label_namespace, label_encoding=label_encoding) else: self._f1_metric = None initializer(self)
Example #28
Source File: seq2labels_model.py From NLP_Toolkit with Apache License 2.0 | 5 votes |
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, predictor_dropout=0.0, labels_namespace: str = "labels", detect_namespace: str = "d_tags", verbose_metrics: bool = False, label_smoothing: float = 0.0, confidence: float = 0.0, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super(Seq2Labels, self).__init__(vocab, regularizer) self.label_namespaces = [labels_namespace, detect_namespace] self.text_field_embedder = text_field_embedder self.num_labels_classes = self.vocab.get_vocab_size(labels_namespace) self.num_detect_classes = self.vocab.get_vocab_size(detect_namespace) self.label_smoothing = label_smoothing self.confidence = confidence self.incorr_index = self.vocab.get_token_index("INCORRECT", namespace=detect_namespace) self._verbose_metrics = verbose_metrics self.predictor_dropout = TimeDistributed(torch.nn.Dropout(predictor_dropout)) self.tag_labels_projection_layer = TimeDistributed( Linear(text_field_embedder._token_embedders['bert'].get_output_dim(), self.num_labels_classes)) self.tag_detect_projection_layer = TimeDistributed( Linear(text_field_embedder._token_embedders['bert'].get_output_dim(), self.num_detect_classes)) self.metrics = {"accuracy": CategoricalAccuracy()} initializer(self)
Example #29
Source File: pico_crf_tagger.py From scibert with Apache License 2.0 | 5 votes |
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, encoder: Seq2SeqEncoder, include_start_end_transitions: bool = True, dropout: Optional[float] = None, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super().__init__(vocab, regularizer) self.label_namespace = 'labels' self.num_tags = self.vocab.get_vocab_size(self.label_namespace) # encode text self.text_field_embedder = text_field_embedder self.encoder = encoder self.dropout = torch.nn.Dropout(dropout) if dropout else None # crf output_dim = self.encoder.get_output_dim() self.tag_projection_layer = TimeDistributed(Linear(output_dim, self.num_tags)) self.crf = ConditionalRandomField(self.num_tags, constraints=None, include_start_end_transitions=include_start_end_transitions) self.metrics = { "accuracy": CategoricalAccuracy(), "accuracy3": CategoricalAccuracy(top_k=3) } for index, label in self.vocab.get_index_to_token_vocabulary(self.label_namespace).items(): self.metrics['F1_' + label] = F1Measure(positive_label=index) initializer(self)
Example #30
Source File: text_classifier.py From scibert with Apache License 2.0 | 5 votes |
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, text_encoder: Seq2SeqEncoder, classifier_feedforward: FeedForward, verbose_metrics: False, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None, ) -> None: super(TextClassifier, self).__init__(vocab, regularizer) self.text_field_embedder = text_field_embedder self.num_classes = self.vocab.get_vocab_size("labels") self.text_encoder = text_encoder self.classifier_feedforward = classifier_feedforward self.prediction_layer = torch.nn.Linear(self.classifier_feedforward.get_output_dim() , self.num_classes) self.label_accuracy = CategoricalAccuracy() self.label_f1_metrics = {} self.verbose_metrics = verbose_metrics for i in range(self.num_classes): self.label_f1_metrics[vocab.get_token_from_index(index=i, namespace="labels")] = F1Measure(positive_label=i) self.loss = torch.nn.CrossEntropyLoss() self.pool = lambda text, mask: util.get_final_encoder_states(text, mask, bidirectional=True) initializer(self)