Python bert.modeling.BertModel() Examples

The following are 30 code examples of bert.modeling.BertModel(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module bert.modeling , or try the search function .
Example #1
Source File: bert_utils.py    From language with Apache License 2.0 6 votes vote down vote up
def get_bert_embeddings(input_ids,
                        bert_config,
                        input_mask=None,
                        token_type_ids=None,
                        is_training=False,
                        use_one_hot_embeddings=False,
                        scope=None):
  """Returns embeddings for BERT."""
  model = modeling.BertModel(
      config=bert_config,
      is_training=is_training,
      input_ids=input_ids,
      input_mask=input_mask,
      token_type_ids=token_type_ids,
      use_one_hot_embeddings=use_one_hot_embeddings,
      scope=scope)
  return model.get_sequence_output() 
Example #2
Source File: train_shallow_layer.py    From sqlova with Apache License 2.0 6 votes vote down vote up
def get_bert(BERT_PT_PATH, bert_type, do_lower_case, no_pretraining):


    bert_config_file = os.path.join(BERT_PT_PATH, f'bert_config_{bert_type}.json')
    vocab_file = os.path.join(BERT_PT_PATH, f'vocab_{bert_type}.txt')
    init_checkpoint = os.path.join(BERT_PT_PATH, f'pytorch_model_{bert_type}.bin')



    bert_config = BertConfig.from_json_file(bert_config_file)
    tokenizer = tokenization.FullTokenizer(
        vocab_file=vocab_file, do_lower_case=do_lower_case)
    bert_config.print_status()

    model_bert = BertModel(bert_config)
    if no_pretraining:
        pass
    else:
        model_bert.load_state_dict(torch.load(init_checkpoint, map_location='cpu'))
        print("Load pre-trained parameters.")
    model_bert.to(device)

    return model_bert, tokenizer, bert_config 
Example #3
Source File: train.py    From sqlova with Apache License 2.0 6 votes vote down vote up
def get_bert(BERT_PT_PATH, bert_type, do_lower_case, no_pretraining):
    bert_config_file = os.path.join(BERT_PT_PATH, f'bert_config_{bert_type}.json')
    vocab_file = os.path.join(BERT_PT_PATH, f'vocab_{bert_type}.txt')
    init_checkpoint = os.path.join(BERT_PT_PATH, f'pytorch_model_{bert_type}.bin')

    bert_config = BertConfig.from_json_file(bert_config_file)
    tokenizer = tokenization.FullTokenizer(
        vocab_file=vocab_file, do_lower_case=do_lower_case)
    bert_config.print_status()

    model_bert = BertModel(bert_config)
    if no_pretraining:
        pass
    else:
        model_bert.load_state_dict(torch.load(init_checkpoint, map_location='cpu'))
        print("Load pre-trained parameters.")
    model_bert.to(device)

    return model_bert, tokenizer, bert_config 
Example #4
Source File: train_decoder_layer.py    From sqlova with Apache License 2.0 6 votes vote down vote up
def get_bert(BERT_PT_PATH, bert_type, do_lower_case, no_pretraining):


    bert_config_file = os.path.join(BERT_PT_PATH, f'bert_config_{bert_type}.json')
    vocab_file = os.path.join(BERT_PT_PATH, f'vocab_{bert_type}.txt')
    init_checkpoint = os.path.join(BERT_PT_PATH, f'pytorch_model_{bert_type}.bin')



    bert_config = BertConfig.from_json_file(bert_config_file)
    tokenizer = tokenization.FullTokenizer(
        vocab_file=vocab_file, do_lower_case=do_lower_case)
    bert_config.print_status()

    model_bert = BertModel(bert_config)
    if no_pretraining:
        pass
    else:
        model_bert.load_state_dict(torch.load(init_checkpoint, map_location='cpu'))
        print("Load pre-trained parameters.")
    model_bert.to(device)

    return model_bert, tokenizer, bert_config 
Example #5
Source File: sentiment_modeling.py    From SpanABSA with Apache License 2.0 6 votes vote down vote up
def __init__(self, config, use_crf=False):
        super(BertForBIOAspectExtraction, self).__init__()
        self.bert = BertModel(config)
        self.use_crf = use_crf
        # TODO check with Google if it's normal there is no dropout on the token classifier of SQuAD in the TF version
        # self.dropout = nn.Dropout(config.hidden_dropout_prob)
        self.affine = nn.Linear(config.hidden_size, 3)
        if self.use_crf:
            self.crf = ConditionalRandomField(3)

        def init_weights(module):
            if isinstance(module, (nn.Linear, nn.Embedding)):
                # Slightly different from the TF version which uses truncated_normal for initialization
                # cf https://github.com/pytorch/pytorch/pull/5617
                module.weight.data.normal_(mean=0.0, std=config.initializer_range)
            elif isinstance(module, BERTLayerNorm):
                module.beta.data.normal_(mean=0.0, std=config.initializer_range)
                module.gamma.data.normal_(mean=0.0, std=config.initializer_range)
            if isinstance(module, nn.Linear):
                module.bias.data.zero_()
        self.apply(init_weights) 
Example #6
Source File: sentiment_modeling.py    From SpanABSA with Apache License 2.0 6 votes vote down vote up
def __init__(self, config, use_crf=False):
        super(BertForBIOAspectClassification, self).__init__()
        self.bert = BertModel(config)
        self.use_crf = use_crf
        # TODO check with Google if it's normal there is no dropout on the token classifier of SQuAD in the TF version
        # self.dropout = nn.Dropout(config.hidden_dropout_prob)
        self.affine = nn.Linear(config.hidden_size, 5)
        if self.use_crf:
            self.crf = ConditionalRandomField(5)

        def init_weights(module):
            if isinstance(module, (nn.Linear, nn.Embedding)):
                # Slightly different from the TF version which uses truncated_normal for initialization
                # cf https://github.com/pytorch/pytorch/pull/5617
                module.weight.data.normal_(mean=0.0, std=config.initializer_range)
            elif isinstance(module, BERTLayerNorm):
                module.beta.data.normal_(mean=0.0, std=config.initializer_range)
                module.gamma.data.normal_(mean=0.0, std=config.initializer_range)
            if isinstance(module, nn.Linear):
                module.bias.data.zero_()
        self.apply(init_weights) 
Example #7
Source File: sentiment_modeling.py    From SpanABSA with Apache License 2.0 6 votes vote down vote up
def __init__(self, config):
        super(BertForSpanAspectExtraction, self).__init__()
        self.bert = BertModel(config)
        # TODO check with Google if it's normal there is no dropout on the token classifier of SQuAD in the TF version
        # self.dropout = nn.Dropout(config.hidden_dropout_prob)
        self.qa_outputs = nn.Linear(config.hidden_size, 2)

        def init_weights(module):
            if isinstance(module, (nn.Linear, nn.Embedding)):
                # Slightly different from the TF version which uses truncated_normal for initialization
                # cf https://github.com/pytorch/pytorch/pull/5617
                module.weight.data.normal_(mean=0.0, std=config.initializer_range)
            elif isinstance(module, BERTLayerNorm):
                module.beta.data.normal_(mean=0.0, std=config.initializer_range)
                module.gamma.data.normal_(mean=0.0, std=config.initializer_range)
            if isinstance(module, nn.Linear):
                module.bias.data.zero_()
        self.apply(init_weights) 
Example #8
Source File: sentiment_modeling.py    From SpanABSA with Apache License 2.0 6 votes vote down vote up
def __init__(self, config):
        super(BertForSpanAspectClassification, self).__init__()
        self.bert = BertModel(config)
        # TODO check with Google if it's normal there is no dropout on the token classifier of SQuAD in the TF version
        self.dropout = nn.Dropout(config.hidden_dropout_prob)
        self.dense = nn.Linear(config.hidden_size, config.hidden_size)
        self.activation = nn.Tanh()
        self.affine = nn.Linear(config.hidden_size, 1)
        self.classifier = nn.Linear(config.hidden_size, 5)

        def init_weights(module):
            if isinstance(module, (nn.Linear, nn.Embedding)):
                # Slightly different from the TF version which uses truncated_normal for initialization
                # cf https://github.com/pytorch/pytorch/pull/5617
                module.weight.data.normal_(mean=0.0, std=config.initializer_range)
            elif isinstance(module, BERTLayerNorm):
                module.beta.data.normal_(mean=0.0, std=config.initializer_range)
                module.gamma.data.normal_(mean=0.0, std=config.initializer_range)
            if isinstance(module, nn.Linear):
                module.bias.data.zero_()
        self.apply(init_weights) 
Example #9
Source File: sentiment_modeling.py    From SpanABSA with Apache License 2.0 6 votes vote down vote up
def __init__(self, config):
        super(BertForCollapsedSpanAspectExtractionAndClassification, self).__init__()
        self.bert = BertModel(config)
        # TODO check with Google if it's normal there is no dropout on the token classifier of SQuAD in the TF version
        # self.dropout = nn.Dropout(config.hidden_dropout_prob)
        self.neu_outputs = nn.Linear(config.hidden_size, 2)
        self.pos_outputs = nn.Linear(config.hidden_size, 2)
        self.neg_outputs = nn.Linear(config.hidden_size, 2)

        def init_weights(module):
            if isinstance(module, (nn.Linear, nn.Embedding)):
                # Slightly different from the TF version which uses truncated_normal for initialization
                # cf https://github.com/pytorch/pytorch/pull/5617
                module.weight.data.normal_(mean=0.0, std=config.initializer_range)
            elif isinstance(module, BERTLayerNorm):
                module.beta.data.normal_(mean=0.0, std=config.initializer_range)
                module.gamma.data.normal_(mean=0.0, std=config.initializer_range)
            if isinstance(module, nn.Linear):
                module.bias.data.zero_()
        self.apply(init_weights) 
Example #10
Source File: sentiment_modeling.py    From SpanABSA with Apache License 2.0 6 votes vote down vote up
def __init__(self, config, use_crf=False):
        super(BertForCollapsedBIOAspectExtractionAndClassification, self).__init__()
        self.bert = BertModel(config)
        self.use_crf = use_crf
        # TODO check with Google if it's normal there is no dropout on the token classifier of SQuAD in the TF version
        # self.dropout = nn.Dropout(config.hidden_dropout_prob)
        self.affine = nn.Linear(config.hidden_size, 7)
        if self.use_crf:
            self.crf = ConditionalRandomField(7)

        def init_weights(module):
            if isinstance(module, (nn.Linear, nn.Embedding)):
                # Slightly different from the TF version which uses truncated_normal for initialization
                # cf https://github.com/pytorch/pytorch/pull/5617
                module.weight.data.normal_(mean=0.0, std=config.initializer_range)
            elif isinstance(module, BERTLayerNorm):
                module.beta.data.normal_(mean=0.0, std=config.initializer_range)
                module.gamma.data.normal_(mean=0.0, std=config.initializer_range)
            if isinstance(module, nn.Linear):
                module.bias.data.zero_()
        self.apply(init_weights) 
Example #11
Source File: sentiment_modeling.py    From SpanABSA with Apache License 2.0 6 votes vote down vote up
def __init__(self, config, use_crf=False):
        super(BertForJointBIOExtractAndClassification, self).__init__()
        self.bert = BertModel(config)
        self.use_crf = use_crf
        # TODO check with Google if it's normal there is no dropout on the token classifier of SQuAD in the TF version
        # self.dropout = nn.Dropout(config.hidden_dropout_prob)
        self.bio_affine = nn.Linear(config.hidden_size, 3)
        self.cls_affine = nn.Linear(config.hidden_size, 5)
        if self.use_crf:
            self.cls_crf = ConditionalRandomField(5)

        def init_weights(module):
            if isinstance(module, (nn.Linear, nn.Embedding)):
                # Slightly different from the TF version which uses truncated_normal for initialization
                # cf https://github.com/pytorch/pytorch/pull/5617
                module.weight.data.normal_(mean=0.0, std=config.initializer_range)
            elif isinstance(module, BERTLayerNorm):
                module.beta.data.normal_(mean=0.0, std=config.initializer_range)
                module.gamma.data.normal_(mean=0.0, std=config.initializer_range)
            if isinstance(module, nn.Linear):
                module.bias.data.zero_()
        self.apply(init_weights) 
Example #12
Source File: transfer_bert_model.py    From delta with Apache License 2.0 5 votes vote down vote up
def transfer_bert_model(bert_model_dir, output_bert_model):
  graph = tf.Graph()
  max_seq_len = 512
  num_labels = 2
  use_one_hot_embeddings = False
  with graph.as_default():
    with tf.Session() as sess:
      input_ids = tf.placeholder(tf.int32, (None, None), 'input_ids')
      input_mask = tf.placeholder(tf.int32, (None, None), 'input_mask')
      segment_ids = tf.placeholder(tf.int32, (None, None), 'segment_ids')

      bert_config = modeling.BertConfig.from_json_file(os.path.join(bert_model_dir, 'bert_config.json'))
      model = modeling.BertModel(
        config=bert_config,
        is_training=False,
        input_ids=input_ids,
        input_mask=input_mask,
        token_type_ids=segment_ids,
        use_one_hot_embeddings=use_one_hot_embeddings)
      all_encoder_layers = model.get_all_encoder_layers()
      input_x_bert_cls = model.get_pooled_output()
      for idx, layer in enumerate(all_encoder_layers):
        layer = tf.identity(layer, "encoder_layers_" + str(idx))
        print("layer:", layer)
      input_x_bert_cls = tf.identity(input_x_bert_cls, "input_x_bert_cls")
      print("input_x_bert_cls", input_x_bert_cls)
      saver = tf.train.Saver()

    with tf.Session() as sess:
      sess.run(tf.global_variables_initializer())
      saver.restore(sess, bert_model_dir + "/bert_model.ckpt")
      saver.save(sess, output_bert_model) 
Example #13
Source File: coherence_eval.py    From language with Apache License 2.0 5 votes vote down vote up
def create_cpc_model_and_placeholders(num_choices):
  """Build model and placeholders."""

  bert_config = modeling.BertConfig.from_json_file(FLAGS.bert_config)
  is_training = False
  use_one_hot_embeddings = False
  seq_length = 512

  Placeholders = namedtuple("Placeholders", [
      "input_ids", "input_mask", "segment_ids", "labels", "label_types",
      "softmax_mask"
  ])

  input_ids = tf.placeholder(dtype=tf.int32, shape=[None, seq_length])
  input_mask = tf.placeholder(dtype=tf.int32, shape=[None, seq_length])
  segment_ids = tf.placeholder(dtype=tf.int32, shape=[None, seq_length])
  labels = tf.placeholder(dtype=tf.int32, shape=[None, 8])
  label_types = tf.placeholder(dtype=tf.int32, shape=[None, 8])
  softmax_mask = tf.placeholder(dtype=tf.bool, shape=[None])
  placeholders = Placeholders(input_ids, input_mask, segment_ids, labels,
                              label_types, softmax_mask)

  model = modeling.BertModel(
      config=bert_config,
      is_training=is_training,
      input_ids=input_ids,
      input_mask=input_mask,
      token_type_ids=segment_ids,
      use_one_hot_embeddings=use_one_hot_embeddings)

  logits, probabilities = create_cpc_model(model, num_choices, False,
                                           softmax_mask)

  Model = namedtuple("Model", ["logits", "probabilities"])
  model = Model(logits, probabilities)

  return placeholders, model 
Example #14
Source File: run_mrqa.py    From language with Apache License 2.0 5 votes vote down vote up
def create_model(bert_config, is_training, input_ids, input_mask,
                 use_one_hot_embeddings):
  """Creates a classification model."""
  model = modeling.BertModel(
      config=bert_config,
      is_training=is_training,
      input_ids=input_ids,
      input_mask=input_mask,
      use_one_hot_embeddings=use_one_hot_embeddings)

  # Get the logits for the start and end predictions.
  final_hidden = model.get_sequence_output()

  final_hidden_shape = modeling.get_shape_list(final_hidden, expected_rank=3)
  batch_size = final_hidden_shape[0]
  seq_length = final_hidden_shape[1]
  hidden_size = final_hidden_shape[2]

  output_weights = tf.get_variable(
      "cls/nq/output_weights", [2, hidden_size],
      initializer=tf.truncated_normal_initializer(stddev=0.02))

  output_bias = tf.get_variable(
      "cls/nq/output_bias", [2], initializer=tf.zeros_initializer())

  final_hidden_matrix = tf.reshape(final_hidden,
                                   [batch_size * seq_length, hidden_size])
  logits = tf.matmul(final_hidden_matrix, output_weights, transpose_b=True)
  logits = tf.nn.bias_add(logits, output_bias)

  logits = tf.reshape(logits, [batch_size, seq_length, 2])
  logits = tf.transpose(logits, [2, 0, 1])

  unstacked_logits = tf.unstack(logits, axis=0)

  (start_logits, end_logits) = (unstacked_logits[0], unstacked_logits[1])
  return (start_logits, end_logits) 
Example #15
Source File: model_fns.py    From language with Apache License 2.0 5 votes vote down vote up
def shared_qry_encoder_v2(qry_input_ids, qry_input_mask, is_training,
                          use_one_hot_embeddings, bert_config, qa_config):
  """Embed query into a BOW and shared dense representation."""
  qry_model = modeling.BertModel(
      config=bert_config,
      is_training=is_training,
      input_ids=qry_input_ids,
      input_mask=qry_input_mask,
      use_one_hot_embeddings=use_one_hot_embeddings,
      scope="bert")
  qry_seq_emb, _ = _get_bert_embeddings(
      qry_model, [qa_config.qry_num_layers - 2], "concat", name="qry")
  word_emb_table = qry_model.get_embedding_table()

  return qry_seq_emb, word_emb_table 
Example #16
Source File: run_dual_encoder.py    From language with Apache License 2.0 5 votes vote down vote up
def create_model(bert_config, is_training, input_ids, input_mask, segment_ids,
                 image_vector, use_one_hot_embeddings, scope):
  """Creates a model."""
  model = modeling.BertModel(
      config=bert_config,
      is_training=is_training,
      input_ids=input_ids,
      input_mask=input_mask,
      token_type_ids=segment_ids,
      use_one_hot_embeddings=use_one_hot_embeddings,
      scope=scope)

  if FLAGS.ignore_image:
    logit = tf.layers.dense(
        model.get_pooled_output(), 1, activation=tf.tanh,
        kernel_initializer=
        modeling.create_initializer(bert_config.initializer_range))
    logit = tf.squeeze(logit, axis=1)
  else:
    logit = tf.einsum("ij,ij->i", tf.layers.dense(
        image_vector,
        bert_config.hidden_size,
        activation=tf.tanh,
        kernel_initializer=
        modeling.create_initializer(bert_config.initializer_range)),
                      model.get_pooled_output(),
                      name="inner")

  return tf.stack([-logit, logit], axis=1) 
Example #17
Source File: run_bert_open_qa_eval.py    From XQA with MIT License 5 votes vote down vote up
def create_predict_model(bert_config, input_ids, input_mask,
                 segment_ids, use_one_hot_embeddings):
  """Creates a classification model."""
  all_logits = []
  input_ids_shape = modeling.get_shape_list(input_ids, expected_rank=2)
  batch_size = input_ids_shape[0]
  seq_length = input_ids_shape[1]

  model = modeling.BertModel(
      config=bert_config,
      is_training=False,
      input_ids=input_ids,
      input_mask=input_mask,
      token_type_ids=segment_ids,
      use_one_hot_embeddings=use_one_hot_embeddings,
      scope="bert")
  final_hidden = model.get_sequence_output()

  final_hidden_shape = modeling.get_shape_list(final_hidden, expected_rank=3)
  hidden_size = final_hidden_shape[2]

  output_weights = tf.get_variable(
      "cls/open_qa/output_weights", [2, hidden_size],
      initializer=tf.truncated_normal_initializer(stddev=0.02))
  output_bias = tf.get_variable(
      "cls/open_qa/output_bias", [2], initializer=tf.zeros_initializer())

  final_hidden_matrix = tf.reshape(final_hidden,
                                   [batch_size * seq_length, hidden_size])
  logits = tf.matmul(final_hidden_matrix, output_weights, transpose_b=True)
  logits = tf.nn.bias_add(logits, output_bias)

  logits = tf.reshape(logits, [batch_size, seq_length, 2])
  logits = tf.transpose(logits, [2, 0, 1])
  unstacked_logits = tf.unstack(logits, axis=0)
  (start_logits, end_logits) = (unstacked_logits[0], unstacked_logits[1])

  return (start_logits, end_logits) 
Example #18
Source File: run_squad.py    From MAX-Question-Answering with Apache License 2.0 5 votes vote down vote up
def create_model(bert_config, is_training, input_ids, input_mask, segment_ids,
                 use_one_hot_embeddings):
    """Creates a classification model."""
    model = modeling.BertModel(
        config=bert_config,
        is_training=is_training,
        input_ids=input_ids,
        input_mask=input_mask,
        token_type_ids=segment_ids,
        use_one_hot_embeddings=use_one_hot_embeddings)

    final_hidden = model.get_sequence_output()

    final_hidden_shape = modeling.get_shape_list(final_hidden, expected_rank=3)
    batch_size = final_hidden_shape[0]
    seq_length = final_hidden_shape[1]
    hidden_size = final_hidden_shape[2]

    output_weights = tf.get_variable(
        "cls/squad/output_weights", [2, hidden_size],
        initializer=tf.truncated_normal_initializer(stddev=0.02))

    output_bias = tf.get_variable(
        "cls/squad/output_bias", [2], initializer=tf.zeros_initializer())

    final_hidden_matrix = tf.reshape(final_hidden,
                                     [batch_size * seq_length, hidden_size])
    logits = tf.matmul(final_hidden_matrix, output_weights, transpose_b=True)
    logits = tf.nn.bias_add(logits, output_bias)

    logits = tf.reshape(logits, [batch_size, seq_length, 2])
    logits = tf.transpose(logits, [2, 0, 1])

    unstacked_logits = tf.unstack(logits, axis=0)

    (start_logits, end_logits) = (unstacked_logits[0], unstacked_logits[1])

    return (start_logits, end_logits) 
Example #19
Source File: bert_sim.py    From chinese-bert-similarity with MIT License 5 votes vote down vote up
def _init_graph(self):
        """
        init bert graph
        """
        try:
            from bert import modeling
            bert_config = modeling.BertConfig.from_json_file(os.path.join(self.bert_sim_dir, 'bert_config.json'))
            self.model = modeling.BertModel(config=bert_config,
                                            is_training=False,
                                            input_ids=self.input_ids,
                                            input_mask=self.input_mask,
                                            token_type_ids=self.input_type_ids,
                                            use_one_hot_embeddings=False)

            # get output weights and output bias
            ckpt = self.tf.train.get_checkpoint_state(self.bert_sim_dir).all_model_checkpoint_paths[-1]
            reader = self.tf.train.NewCheckpointReader(ckpt)
            output_weights = reader.get_tensor('output_weights')
            output_bias = reader.get_tensor('output_bias')

            # get result op
            output_layer = self.model.get_pooled_output()
            logits = self.tf.matmul(output_layer, output_weights, transpose_b=True)
            logits = self.tf.nn.bias_add(logits, output_bias)
            self.probabilities = self.tf.nn.softmax(logits, axis=-1)

            sess_config = self.tf.ConfigProto(allow_soft_placement=True)
            sess_config.gpu_options.allow_growth = True

            graph = self.probabilities.graph
            saver = self.tf.train.Saver()
            self.sess = self.tf.Session(config=sess_config, graph=graph)
            self.sess.run(self.tf.global_variables_initializer())
            self.tf.reset_default_graph()
            saver.restore(self.sess, ckpt)

        except Exception as e:
            self.logger.error(e) 
Example #20
Source File: train.py    From chinese-bert-similarity with MIT License 5 votes vote down vote up
def create_model(bert_config, is_training, input_ids, input_mask, segment_ids,
                 labels, num_labels, use_one_hot_embeddings):
  """Creates a classification model."""
  model = modeling.BertModel(
      config=bert_config,
      is_training=is_training,
      input_ids=input_ids,
      input_mask=input_mask,
      token_type_ids=segment_ids,
      use_one_hot_embeddings=use_one_hot_embeddings)
  output_layer = model.get_pooled_output()
  # output_layer = [tf.squeeze(model.all_encoder_layers[i][:, 0:1, :], axis=1) for i in range(-4, 0, 1)]

  output_layer = tf.concat(output_layer, axis=1)

  hidden_size = output_layer.shape[-1].value
  output_layer = tf.layers.dropout(output_layer, rate=0.8)
  output_weights = tf.get_variable(
      "output_weights", [num_labels, hidden_size],
      initializer=tf.truncated_normal_initializer(stddev=0.02))

  output_bias = tf.get_variable(
      "output_bias", [num_labels], initializer=tf.zeros_initializer())

  with tf.variable_scope("loss"):
    if is_training:
      # I.e., 0.1 dropout
      output_layer = tf.nn.dropout(output_layer, keep_prob=0.9)

    logits = tf.matmul(output_layer, output_weights, transpose_b=True)
    logits = tf.nn.bias_add(logits, output_bias)
    probabilities = tf.nn.softmax(logits, axis=-1)
    log_probs = tf.nn.log_softmax(logits, axis=-1)

    one_hot_labels = tf.one_hot(labels, depth=num_labels, dtype=tf.float32)

    per_example_loss = -tf.reduce_sum(one_hot_labels * log_probs, axis=-1)
    loss = tf.reduce_mean(per_example_loss)

    return (loss, per_example_loss, logits, probabilities) 
Example #21
Source File: BERT_NER.py    From bert-chinese-ner with MIT License 5 votes vote down vote up
def create_model(bert_config, is_training, input_ids, input_mask,
                 segment_ids, labels, num_labels, use_one_hot_embeddings):
    model = modeling.BertModel(
        config=bert_config,
        is_training=is_training,
        input_ids=input_ids,
        input_mask=input_mask,
        token_type_ids=segment_ids,
        use_one_hot_embeddings=use_one_hot_embeddings
    )

    output_layer = model.get_sequence_output()

    hidden_size = output_layer.shape[-1].value

    output_weight = tf.get_variable(
        "output_weights", [num_labels, hidden_size],
        initializer=tf.truncated_normal_initializer(stddev=0.02)
    )
    output_bias = tf.get_variable(
        "output_bias", [num_labels], initializer=tf.zeros_initializer()
    )
    with tf.variable_scope("loss"):
        if is_training:
            output_layer = tf.nn.dropout(output_layer, keep_prob=0.9)
        output_layer = tf.reshape(output_layer, [-1, hidden_size])
        logits = tf.matmul(output_layer, output_weight, transpose_b=True)
        logits = tf.nn.bias_add(logits, output_bias)
        logits = tf.reshape(logits, [-1, FLAGS.max_seq_length, 11])
        # mask = tf.cast(input_mask,tf.float32)
        # loss = tf.contrib.seq2seq.sequence_loss(logits,labels,mask)
        # return (loss, logits, predict)
        ##########################################################################
        log_probs = tf.nn.log_softmax(logits, axis=-1)
        one_hot_labels = tf.one_hot(labels, depth=num_labels, dtype=tf.float32)
        per_example_loss = -tf.reduce_sum(one_hot_labels * log_probs, axis=-1)
        loss = tf.reduce_sum(per_example_loss)
        probabilities = tf.nn.softmax(logits, axis=-1)
        predict = tf.argmax(probabilities,axis=-1)
        return (loss, per_example_loss, logits,predict)
        ########################################################################## 
Example #22
Source File: run_sequence_labeling.py    From pynlp with MIT License 5 votes vote down vote up
def create_model(bert_config, is_training, input_ids, input_mask,
                 segment_ids, labels, num_labels, use_one_hot_embeddings):
    model = modeling.BertModel(
        config=bert_config,
        is_training=is_training,
        input_ids=input_ids,
        input_mask=input_mask,
        token_type_ids=segment_ids,
        use_one_hot_embeddings=use_one_hot_embeddings
    )

    output_layer = model.get_sequence_output()

    hidden_size = output_layer.shape[-1].value

    output_weight = tf.get_variable(
        "output_weights", [num_labels, hidden_size],
        initializer=tf.truncated_normal_initializer(stddev=0.02)
    )
    output_bias = tf.get_variable(
        "output_bias", [num_labels], initializer=tf.zeros_initializer()
    )
    with tf.variable_scope("loss"):
        if is_training:
            output_layer = tf.nn.dropout(output_layer, keep_prob=0.9)
        output_layer = tf.reshape(output_layer, [-1, hidden_size])
        logits = tf.matmul(output_layer, output_weight, transpose_b=True)
        logits = tf.nn.bias_add(logits, output_bias)
        logits = tf.reshape(logits, [-1, FLAGS.max_seq_length, num_labels])
        # mask = tf.cast(input_mask,tf.float32)
        # loss = tf.contrib.seq2seq.sequence_loss(logits,labels,mask)
        # return (loss, logits, predict)
        ##########################################################################
        log_probs = tf.nn.log_softmax(logits, axis=-1)
        one_hot_labels = tf.one_hot(labels, depth=num_labels, dtype=tf.float32)
        per_example_loss = -tf.reduce_sum(one_hot_labels * log_probs, axis=-1)
        loss = tf.reduce_sum(per_example_loss)
        probabilities = tf.nn.softmax(logits, axis=-1)
        predicts = tf.argmax(probabilities, axis=-1)
        return (loss, per_example_loss, logits, predicts)
        ########################################################################## 
Example #23
Source File: bert_lstm_ner.py    From pynlp with MIT License 5 votes vote down vote up
def create_model(bert_config, is_training, input_ids, input_mask,
                 segment_ids, labels, num_labels, use_one_hot_embeddings):
    """
    创建X模型
    :param bert_config: bert 配置
    :param is_training:
    :param input_ids: 数据的idx 表示
    :param input_mask:
    :param segment_ids:
    :param labels: 标签的idx 表示
    :param num_labels: 类别数量
    :param use_one_hot_embeddings:
    :return:
    """
    # 使用数据加载BertModel,获取对应的字embedding
    model = modeling.BertModel(
        config=bert_config,
        is_training=is_training,
        input_ids=input_ids,
        input_mask=input_mask,
        token_type_ids=segment_ids,
        use_one_hot_embeddings=use_one_hot_embeddings
    )
    # 获取对应的embedding 输入数据[batch_size, seq_length, embedding_size]
    embedding = model.get_sequence_output()
    max_seq_length = embedding.shape[1].value

    used = tf.sign(tf.abs(input_ids))
    lengths = tf.reduce_sum(used, reduction_indices=1)  # [batch_size] 大小的向量,包含了当前batch中的序列长度

    blstm_crf = BLSTM_CRF(embedded_chars=embedding, hidden_unit=FLAGS.lstm_size, cell_type=FLAGS.cell, num_layers=FLAGS.num_layers,
                          dropout_rate=FLAGS.droupout_rate, initializers=initializers, num_labels=num_labels,
                          seq_length=max_seq_length, labels=labels, lengths=lengths, is_training=is_training)
    rst = blstm_crf.add_blstm_crf_layer(crf_only=True)
    return rst 
Example #24
Source File: train_bert_ner.py    From FoolNLTK with Apache License 2.0 5 votes vote down vote up
def create_model(bert_config, is_training, input_ids, input_mask,
                 segment_ids, num_labels, use_one_hot_embeddings):
  model = modeling.BertModel(
    config=bert_config,
    is_training=is_training,
    input_ids=input_ids,
    input_mask=input_mask,
    token_type_ids=segment_ids,
    use_one_hot_embeddings=use_one_hot_embeddings
  )

  output_layer = model.get_sequence_output()

  hidden_size = output_layer.shape[-1].value

  output_weight = tf.get_variable(
    "output_weights", [num_labels, hidden_size],
    initializer=tf.truncated_normal_initializer(stddev=0.02)
  )
  output_bias = tf.get_variable(
    "output_bias", [num_labels], initializer=tf.zeros_initializer()
  )
  if is_training:
    output_layer = tf.nn.dropout(output_layer, keep_prob=0.9)
  output_layer = tf.reshape(output_layer, [-1, hidden_size])
  logits = tf.matmul(output_layer, output_weight, transpose_b=True)
  logits = tf.nn.bias_add(logits, output_bias)
  print("logits====>", logits)
  return logits
  ########################################################################## 
Example #25
Source File: run_sequence_labeling.py    From BERT-for-Sequence-Labeling-and-Text-Classification with Apache License 2.0 5 votes vote down vote up
def create_model(bert_config, is_training, input_ids, input_mask,
                 segment_ids, labels, num_labels, use_one_hot_embeddings):
    model = modeling.BertModel(
        config=bert_config,
        is_training=is_training,
        input_ids=input_ids,
        input_mask=input_mask,
        token_type_ids=segment_ids,
        use_one_hot_embeddings=use_one_hot_embeddings
    )

    output_layer = model.get_sequence_output()

    hidden_size = output_layer.shape[-1].value

    output_weight = tf.get_variable(
        "output_weights", [num_labels, hidden_size],
        initializer=tf.truncated_normal_initializer(stddev=0.02)
    )
    output_bias = tf.get_variable(
        "output_bias", [num_labels], initializer=tf.zeros_initializer()
    )
    with tf.variable_scope("loss"):
        if is_training:
            output_layer = tf.nn.dropout(output_layer, keep_prob=0.9)
        output_layer = tf.reshape(output_layer, [-1, hidden_size])
        logits = tf.matmul(output_layer, output_weight, transpose_b=True)
        logits = tf.nn.bias_add(logits, output_bias)
        logits = tf.reshape(logits, [-1, FLAGS.max_seq_length, num_labels])
        # mask = tf.cast(input_mask,tf.float32)
        # loss = tf.contrib.seq2seq.sequence_loss(logits,labels,mask)
        # return (loss, logits, predict)
        ##########################################################################
        log_probs = tf.nn.log_softmax(logits, axis=-1)
        one_hot_labels = tf.one_hot(labels, depth=num_labels, dtype=tf.float32)
        per_example_loss = -tf.reduce_sum(one_hot_labels * log_probs, axis=-1)
        loss = tf.reduce_sum(per_example_loss)
        probabilities = tf.nn.softmax(logits, axis=-1)
        predicts = tf.argmax(probabilities, axis=-1)
        return (loss, per_example_loss, logits, predicts)
        ########################################################################## 
Example #26
Source File: run_binary_coherence.py    From language with Apache License 2.0 5 votes vote down vote up
def create_model(bert_config, is_training, input_ids, input_mask, segment_ids,
                 labels, use_one_hot_embeddings):
  """Creates a classification model."""
  model = modeling.BertModel(
      config=bert_config,
      is_training=is_training,
      input_ids=input_ids,
      input_mask=input_mask,
      token_type_ids=segment_ids,
      use_one_hot_embeddings=use_one_hot_embeddings)

  output_layer = model.get_pooled_output()

  hidden_size = output_layer.shape[-1].value

  num_labels = 2  # This is hardcoded for binary classification

  with tf.variable_scope("cls/seq_relationship"):
    output_weights = tf.get_variable(
        "output_weights",
        shape=[num_labels, hidden_size],
        initializer=modeling.create_initializer(bert_config.initializer_range))
    output_bias = tf.get_variable(
        "output_bias", shape=[num_labels], initializer=tf.zeros_initializer())

    logits = tf.matmul(output_layer, output_weights, transpose_b=True)
    logits = tf.nn.bias_add(logits, output_bias)
    log_probs = tf.nn.log_softmax(logits, axis=-1)
    probabilities = tf.nn.softmax(logits, axis=-1)
    labels = tf.reshape(labels, [-1])
    one_hot_labels = tf.one_hot(labels, depth=2, dtype=tf.float32)
    per_example_loss = -tf.reduce_sum(one_hot_labels * log_probs, axis=-1)
    loss = tf.reduce_mean(per_example_loss)
    return (loss, per_example_loss, logits, probabilities) 
Example #27
Source File: run_ner.py    From KBQA-BERT with MIT License 5 votes vote down vote up
def create_model(bert_config, is_training, input_ids, input_mask,
                 segment_ids, labels, num_labels, use_one_hot_embeddings):
    """
    创建X模型
    :param bert_config: bert 配置
    :param is_training:
    :param input_ids: 数据的idx 表示
    :param input_mask:
    :param segment_ids:
    :param labels: 标签的idx 表示
    :param num_labels: 类别数量
    :param use_one_hot_embeddings:
    :return:
    """
    # 使用数据加载BertModel,获取对应的字embedding
    model = modeling.BertModel(
        config=bert_config,
        is_training=is_training,
        input_ids=input_ids,
        input_mask=input_mask,
        token_type_ids=segment_ids,
        use_one_hot_embeddings=use_one_hot_embeddings
    )
    # 获取对应的embedding 输入数据[batch_size, seq_length, embedding_size]
    embedding = model.get_sequence_output()
    max_seq_length = embedding.shape[1].value

    used = tf.sign(tf.abs(input_ids))
    lengths = tf.reduce_sum(used, reduction_indices=1)  # [batch_size] 大小的向量,包含了当前batch中的序列长度

    blstm_crf = BLSTM_CRF(embedded_chars=embedding, hidden_unit=FLAGS.lstm_size, cell_type=FLAGS.cell, num_layers=FLAGS.num_layers,
                          dropout_rate=FLAGS.droupout_rate, initializers=initializers, num_labels=num_labels,
                          seq_length=max_seq_length, labels=labels, lengths=lengths, is_training=is_training)
    rst = blstm_crf.add_blstm_crf_layer(crf_only=True)
    return rst 
Example #28
Source File: run_classifier_distillation.py    From language with Apache License 2.0 5 votes vote down vote up
def create_model(bert_config, is_training, input_ids, input_mask, segment_ids,
                 labels, num_labels, use_one_hot_embeddings):
  """Creates a classification model."""
  model = modeling.BertModel(
      config=bert_config,
      is_training=is_training,
      input_ids=input_ids,
      input_mask=input_mask,
      token_type_ids=segment_ids,
      use_one_hot_embeddings=use_one_hot_embeddings)

  # In the demo, we are doing a simple classification task on the entire
  # segment.
  #
  # If you want to use the token-level output, use model.get_sequence_output()
  # instead.
  output_layer = model.get_pooled_output()

  hidden_size = output_layer.shape[-1].value

  output_weights = tf.get_variable(
      "output_weights", [num_labels, hidden_size],
      initializer=tf.truncated_normal_initializer(stddev=0.02))

  output_bias = tf.get_variable(
      "output_bias", [num_labels], initializer=tf.zeros_initializer())

  with tf.variable_scope("loss"):
    if is_training:
      # I.e., 0.1 dropout
      output_layer = tf.nn.dropout(output_layer, keep_prob=0.9)

    logits = tf.matmul(output_layer, output_weights, transpose_b=True)
    logits = tf.nn.bias_add(logits, output_bias)
    probabilities = tf.nn.softmax(logits, axis=-1)
    log_probs = tf.nn.log_softmax(logits, axis=-1)

    per_example_loss = -tf.reduce_sum(labels * log_probs, axis=-1)
    loss = tf.reduce_mean(per_example_loss)

    return (loss, per_example_loss, logits, probabilities) 
Example #29
Source File: run_bert_boolq_diff.py    From language with Apache License 2.0 5 votes vote down vote up
def create_model(bert_config, is_training, input_ids, input_mask, segment_ids,
                 gt_probs, num_labels, use_one_hot_embeddings):
  """Creates a classification model."""
  model = modeling.BertModel(
      config=bert_config,
      is_training=is_training,
      input_ids=input_ids,
      input_mask=input_mask,
      token_type_ids=segment_ids,
      use_one_hot_embeddings=use_one_hot_embeddings)

  # In the demo, we are doing a simple classification task on the entire
  # segment.
  #
  # If you want to use the token-level output, use model.get_sequence_output()
  # instead.
  output_layer = model.get_pooled_output()

  hidden_size = output_layer.shape[-1].value

  output_weights = tf.get_variable(
      "output_weights", [num_labels, hidden_size],
      initializer=tf.truncated_normal_initializer(stddev=0.02))

  output_bias = tf.get_variable(
      "output_bias", [num_labels], initializer=tf.zeros_initializer())

  with tf.variable_scope("loss"):
    if is_training:
      # I.e., 0.1 dropout
      output_layer = tf.nn.dropout(output_layer, keep_prob=0.9)

    logits = tf.matmul(output_layer, output_weights, transpose_b=True)
    logits = tf.nn.bias_add(logits, output_bias)
    probabilities = tf.nn.softmax(logits, axis=-1)
    log_probs = tf.nn.log_softmax(logits, axis=-1)

    per_example_loss = -tf.reduce_sum(gt_probs * log_probs, axis=-1)
    loss = tf.reduce_mean(per_example_loss)

    return (loss, per_example_loss, logits, probabilities) 
Example #30
Source File: run_squad.py    From language with Apache License 2.0 5 votes vote down vote up
def create_model(bert_config, is_training, input_ids, input_mask, segment_ids,
                 use_one_hot_embeddings):
  """Creates a classification model."""
  model = modeling.BertModel(
      config=bert_config,
      is_training=is_training,
      input_ids=input_ids,
      input_mask=input_mask,
      token_type_ids=segment_ids,
      use_one_hot_embeddings=use_one_hot_embeddings)

  final_hidden = model.get_sequence_output()

  final_hidden_shape = modeling.get_shape_list(final_hidden, expected_rank=3)
  batch_size = final_hidden_shape[0]
  seq_length = final_hidden_shape[1]
  hidden_size = final_hidden_shape[2]

  output_weights = tf.get_variable(
      "cls/squad/output_weights", [2, hidden_size],
      initializer=tf.truncated_normal_initializer(stddev=0.02))

  output_bias = tf.get_variable(
      "cls/squad/output_bias", [2], initializer=tf.zeros_initializer())

  final_hidden_matrix = tf.reshape(final_hidden,
                                   [batch_size * seq_length, hidden_size])
  logits = tf.matmul(final_hidden_matrix, output_weights, transpose_b=True)
  logits = tf.nn.bias_add(logits, output_bias)

  logits = tf.reshape(logits, [batch_size, seq_length, 2])
  logits = tf.transpose(logits, [2, 0, 1])

  unstacked_logits = tf.unstack(logits, axis=0)

  (start_logits, end_logits) = (unstacked_logits[0], unstacked_logits[1])

  return (start_logits, end_logits)