Python bert.modeling.BertModel() Examples
The following are 30
code examples of bert.modeling.BertModel().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
bert.modeling
, or try the search function
.
Example #1
Source File: bert_utils.py From language with Apache License 2.0 | 6 votes |
def get_bert_embeddings(input_ids, bert_config, input_mask=None, token_type_ids=None, is_training=False, use_one_hot_embeddings=False, scope=None): """Returns embeddings for BERT.""" model = modeling.BertModel( config=bert_config, is_training=is_training, input_ids=input_ids, input_mask=input_mask, token_type_ids=token_type_ids, use_one_hot_embeddings=use_one_hot_embeddings, scope=scope) return model.get_sequence_output()
Example #2
Source File: train_shallow_layer.py From sqlova with Apache License 2.0 | 6 votes |
def get_bert(BERT_PT_PATH, bert_type, do_lower_case, no_pretraining): bert_config_file = os.path.join(BERT_PT_PATH, f'bert_config_{bert_type}.json') vocab_file = os.path.join(BERT_PT_PATH, f'vocab_{bert_type}.txt') init_checkpoint = os.path.join(BERT_PT_PATH, f'pytorch_model_{bert_type}.bin') bert_config = BertConfig.from_json_file(bert_config_file) tokenizer = tokenization.FullTokenizer( vocab_file=vocab_file, do_lower_case=do_lower_case) bert_config.print_status() model_bert = BertModel(bert_config) if no_pretraining: pass else: model_bert.load_state_dict(torch.load(init_checkpoint, map_location='cpu')) print("Load pre-trained parameters.") model_bert.to(device) return model_bert, tokenizer, bert_config
Example #3
Source File: train.py From sqlova with Apache License 2.0 | 6 votes |
def get_bert(BERT_PT_PATH, bert_type, do_lower_case, no_pretraining): bert_config_file = os.path.join(BERT_PT_PATH, f'bert_config_{bert_type}.json') vocab_file = os.path.join(BERT_PT_PATH, f'vocab_{bert_type}.txt') init_checkpoint = os.path.join(BERT_PT_PATH, f'pytorch_model_{bert_type}.bin') bert_config = BertConfig.from_json_file(bert_config_file) tokenizer = tokenization.FullTokenizer( vocab_file=vocab_file, do_lower_case=do_lower_case) bert_config.print_status() model_bert = BertModel(bert_config) if no_pretraining: pass else: model_bert.load_state_dict(torch.load(init_checkpoint, map_location='cpu')) print("Load pre-trained parameters.") model_bert.to(device) return model_bert, tokenizer, bert_config
Example #4
Source File: train_decoder_layer.py From sqlova with Apache License 2.0 | 6 votes |
def get_bert(BERT_PT_PATH, bert_type, do_lower_case, no_pretraining): bert_config_file = os.path.join(BERT_PT_PATH, f'bert_config_{bert_type}.json') vocab_file = os.path.join(BERT_PT_PATH, f'vocab_{bert_type}.txt') init_checkpoint = os.path.join(BERT_PT_PATH, f'pytorch_model_{bert_type}.bin') bert_config = BertConfig.from_json_file(bert_config_file) tokenizer = tokenization.FullTokenizer( vocab_file=vocab_file, do_lower_case=do_lower_case) bert_config.print_status() model_bert = BertModel(bert_config) if no_pretraining: pass else: model_bert.load_state_dict(torch.load(init_checkpoint, map_location='cpu')) print("Load pre-trained parameters.") model_bert.to(device) return model_bert, tokenizer, bert_config
Example #5
Source File: sentiment_modeling.py From SpanABSA with Apache License 2.0 | 6 votes |
def __init__(self, config, use_crf=False): super(BertForBIOAspectExtraction, self).__init__() self.bert = BertModel(config) self.use_crf = use_crf # TODO check with Google if it's normal there is no dropout on the token classifier of SQuAD in the TF version # self.dropout = nn.Dropout(config.hidden_dropout_prob) self.affine = nn.Linear(config.hidden_size, 3) if self.use_crf: self.crf = ConditionalRandomField(3) def init_weights(module): if isinstance(module, (nn.Linear, nn.Embedding)): # Slightly different from the TF version which uses truncated_normal for initialization # cf https://github.com/pytorch/pytorch/pull/5617 module.weight.data.normal_(mean=0.0, std=config.initializer_range) elif isinstance(module, BERTLayerNorm): module.beta.data.normal_(mean=0.0, std=config.initializer_range) module.gamma.data.normal_(mean=0.0, std=config.initializer_range) if isinstance(module, nn.Linear): module.bias.data.zero_() self.apply(init_weights)
Example #6
Source File: sentiment_modeling.py From SpanABSA with Apache License 2.0 | 6 votes |
def __init__(self, config, use_crf=False): super(BertForBIOAspectClassification, self).__init__() self.bert = BertModel(config) self.use_crf = use_crf # TODO check with Google if it's normal there is no dropout on the token classifier of SQuAD in the TF version # self.dropout = nn.Dropout(config.hidden_dropout_prob) self.affine = nn.Linear(config.hidden_size, 5) if self.use_crf: self.crf = ConditionalRandomField(5) def init_weights(module): if isinstance(module, (nn.Linear, nn.Embedding)): # Slightly different from the TF version which uses truncated_normal for initialization # cf https://github.com/pytorch/pytorch/pull/5617 module.weight.data.normal_(mean=0.0, std=config.initializer_range) elif isinstance(module, BERTLayerNorm): module.beta.data.normal_(mean=0.0, std=config.initializer_range) module.gamma.data.normal_(mean=0.0, std=config.initializer_range) if isinstance(module, nn.Linear): module.bias.data.zero_() self.apply(init_weights)
Example #7
Source File: sentiment_modeling.py From SpanABSA with Apache License 2.0 | 6 votes |
def __init__(self, config): super(BertForSpanAspectExtraction, self).__init__() self.bert = BertModel(config) # TODO check with Google if it's normal there is no dropout on the token classifier of SQuAD in the TF version # self.dropout = nn.Dropout(config.hidden_dropout_prob) self.qa_outputs = nn.Linear(config.hidden_size, 2) def init_weights(module): if isinstance(module, (nn.Linear, nn.Embedding)): # Slightly different from the TF version which uses truncated_normal for initialization # cf https://github.com/pytorch/pytorch/pull/5617 module.weight.data.normal_(mean=0.0, std=config.initializer_range) elif isinstance(module, BERTLayerNorm): module.beta.data.normal_(mean=0.0, std=config.initializer_range) module.gamma.data.normal_(mean=0.0, std=config.initializer_range) if isinstance(module, nn.Linear): module.bias.data.zero_() self.apply(init_weights)
Example #8
Source File: sentiment_modeling.py From SpanABSA with Apache License 2.0 | 6 votes |
def __init__(self, config): super(BertForSpanAspectClassification, self).__init__() self.bert = BertModel(config) # TODO check with Google if it's normal there is no dropout on the token classifier of SQuAD in the TF version self.dropout = nn.Dropout(config.hidden_dropout_prob) self.dense = nn.Linear(config.hidden_size, config.hidden_size) self.activation = nn.Tanh() self.affine = nn.Linear(config.hidden_size, 1) self.classifier = nn.Linear(config.hidden_size, 5) def init_weights(module): if isinstance(module, (nn.Linear, nn.Embedding)): # Slightly different from the TF version which uses truncated_normal for initialization # cf https://github.com/pytorch/pytorch/pull/5617 module.weight.data.normal_(mean=0.0, std=config.initializer_range) elif isinstance(module, BERTLayerNorm): module.beta.data.normal_(mean=0.0, std=config.initializer_range) module.gamma.data.normal_(mean=0.0, std=config.initializer_range) if isinstance(module, nn.Linear): module.bias.data.zero_() self.apply(init_weights)
Example #9
Source File: sentiment_modeling.py From SpanABSA with Apache License 2.0 | 6 votes |
def __init__(self, config): super(BertForCollapsedSpanAspectExtractionAndClassification, self).__init__() self.bert = BertModel(config) # TODO check with Google if it's normal there is no dropout on the token classifier of SQuAD in the TF version # self.dropout = nn.Dropout(config.hidden_dropout_prob) self.neu_outputs = nn.Linear(config.hidden_size, 2) self.pos_outputs = nn.Linear(config.hidden_size, 2) self.neg_outputs = nn.Linear(config.hidden_size, 2) def init_weights(module): if isinstance(module, (nn.Linear, nn.Embedding)): # Slightly different from the TF version which uses truncated_normal for initialization # cf https://github.com/pytorch/pytorch/pull/5617 module.weight.data.normal_(mean=0.0, std=config.initializer_range) elif isinstance(module, BERTLayerNorm): module.beta.data.normal_(mean=0.0, std=config.initializer_range) module.gamma.data.normal_(mean=0.0, std=config.initializer_range) if isinstance(module, nn.Linear): module.bias.data.zero_() self.apply(init_weights)
Example #10
Source File: sentiment_modeling.py From SpanABSA with Apache License 2.0 | 6 votes |
def __init__(self, config, use_crf=False): super(BertForCollapsedBIOAspectExtractionAndClassification, self).__init__() self.bert = BertModel(config) self.use_crf = use_crf # TODO check with Google if it's normal there is no dropout on the token classifier of SQuAD in the TF version # self.dropout = nn.Dropout(config.hidden_dropout_prob) self.affine = nn.Linear(config.hidden_size, 7) if self.use_crf: self.crf = ConditionalRandomField(7) def init_weights(module): if isinstance(module, (nn.Linear, nn.Embedding)): # Slightly different from the TF version which uses truncated_normal for initialization # cf https://github.com/pytorch/pytorch/pull/5617 module.weight.data.normal_(mean=0.0, std=config.initializer_range) elif isinstance(module, BERTLayerNorm): module.beta.data.normal_(mean=0.0, std=config.initializer_range) module.gamma.data.normal_(mean=0.0, std=config.initializer_range) if isinstance(module, nn.Linear): module.bias.data.zero_() self.apply(init_weights)
Example #11
Source File: sentiment_modeling.py From SpanABSA with Apache License 2.0 | 6 votes |
def __init__(self, config, use_crf=False): super(BertForJointBIOExtractAndClassification, self).__init__() self.bert = BertModel(config) self.use_crf = use_crf # TODO check with Google if it's normal there is no dropout on the token classifier of SQuAD in the TF version # self.dropout = nn.Dropout(config.hidden_dropout_prob) self.bio_affine = nn.Linear(config.hidden_size, 3) self.cls_affine = nn.Linear(config.hidden_size, 5) if self.use_crf: self.cls_crf = ConditionalRandomField(5) def init_weights(module): if isinstance(module, (nn.Linear, nn.Embedding)): # Slightly different from the TF version which uses truncated_normal for initialization # cf https://github.com/pytorch/pytorch/pull/5617 module.weight.data.normal_(mean=0.0, std=config.initializer_range) elif isinstance(module, BERTLayerNorm): module.beta.data.normal_(mean=0.0, std=config.initializer_range) module.gamma.data.normal_(mean=0.0, std=config.initializer_range) if isinstance(module, nn.Linear): module.bias.data.zero_() self.apply(init_weights)
Example #12
Source File: transfer_bert_model.py From delta with Apache License 2.0 | 5 votes |
def transfer_bert_model(bert_model_dir, output_bert_model): graph = tf.Graph() max_seq_len = 512 num_labels = 2 use_one_hot_embeddings = False with graph.as_default(): with tf.Session() as sess: input_ids = tf.placeholder(tf.int32, (None, None), 'input_ids') input_mask = tf.placeholder(tf.int32, (None, None), 'input_mask') segment_ids = tf.placeholder(tf.int32, (None, None), 'segment_ids') bert_config = modeling.BertConfig.from_json_file(os.path.join(bert_model_dir, 'bert_config.json')) model = modeling.BertModel( config=bert_config, is_training=False, input_ids=input_ids, input_mask=input_mask, token_type_ids=segment_ids, use_one_hot_embeddings=use_one_hot_embeddings) all_encoder_layers = model.get_all_encoder_layers() input_x_bert_cls = model.get_pooled_output() for idx, layer in enumerate(all_encoder_layers): layer = tf.identity(layer, "encoder_layers_" + str(idx)) print("layer:", layer) input_x_bert_cls = tf.identity(input_x_bert_cls, "input_x_bert_cls") print("input_x_bert_cls", input_x_bert_cls) saver = tf.train.Saver() with tf.Session() as sess: sess.run(tf.global_variables_initializer()) saver.restore(sess, bert_model_dir + "/bert_model.ckpt") saver.save(sess, output_bert_model)
Example #13
Source File: coherence_eval.py From language with Apache License 2.0 | 5 votes |
def create_cpc_model_and_placeholders(num_choices): """Build model and placeholders.""" bert_config = modeling.BertConfig.from_json_file(FLAGS.bert_config) is_training = False use_one_hot_embeddings = False seq_length = 512 Placeholders = namedtuple("Placeholders", [ "input_ids", "input_mask", "segment_ids", "labels", "label_types", "softmax_mask" ]) input_ids = tf.placeholder(dtype=tf.int32, shape=[None, seq_length]) input_mask = tf.placeholder(dtype=tf.int32, shape=[None, seq_length]) segment_ids = tf.placeholder(dtype=tf.int32, shape=[None, seq_length]) labels = tf.placeholder(dtype=tf.int32, shape=[None, 8]) label_types = tf.placeholder(dtype=tf.int32, shape=[None, 8]) softmax_mask = tf.placeholder(dtype=tf.bool, shape=[None]) placeholders = Placeholders(input_ids, input_mask, segment_ids, labels, label_types, softmax_mask) model = modeling.BertModel( config=bert_config, is_training=is_training, input_ids=input_ids, input_mask=input_mask, token_type_ids=segment_ids, use_one_hot_embeddings=use_one_hot_embeddings) logits, probabilities = create_cpc_model(model, num_choices, False, softmax_mask) Model = namedtuple("Model", ["logits", "probabilities"]) model = Model(logits, probabilities) return placeholders, model
Example #14
Source File: run_mrqa.py From language with Apache License 2.0 | 5 votes |
def create_model(bert_config, is_training, input_ids, input_mask, use_one_hot_embeddings): """Creates a classification model.""" model = modeling.BertModel( config=bert_config, is_training=is_training, input_ids=input_ids, input_mask=input_mask, use_one_hot_embeddings=use_one_hot_embeddings) # Get the logits for the start and end predictions. final_hidden = model.get_sequence_output() final_hidden_shape = modeling.get_shape_list(final_hidden, expected_rank=3) batch_size = final_hidden_shape[0] seq_length = final_hidden_shape[1] hidden_size = final_hidden_shape[2] output_weights = tf.get_variable( "cls/nq/output_weights", [2, hidden_size], initializer=tf.truncated_normal_initializer(stddev=0.02)) output_bias = tf.get_variable( "cls/nq/output_bias", [2], initializer=tf.zeros_initializer()) final_hidden_matrix = tf.reshape(final_hidden, [batch_size * seq_length, hidden_size]) logits = tf.matmul(final_hidden_matrix, output_weights, transpose_b=True) logits = tf.nn.bias_add(logits, output_bias) logits = tf.reshape(logits, [batch_size, seq_length, 2]) logits = tf.transpose(logits, [2, 0, 1]) unstacked_logits = tf.unstack(logits, axis=0) (start_logits, end_logits) = (unstacked_logits[0], unstacked_logits[1]) return (start_logits, end_logits)
Example #15
Source File: model_fns.py From language with Apache License 2.0 | 5 votes |
def shared_qry_encoder_v2(qry_input_ids, qry_input_mask, is_training, use_one_hot_embeddings, bert_config, qa_config): """Embed query into a BOW and shared dense representation.""" qry_model = modeling.BertModel( config=bert_config, is_training=is_training, input_ids=qry_input_ids, input_mask=qry_input_mask, use_one_hot_embeddings=use_one_hot_embeddings, scope="bert") qry_seq_emb, _ = _get_bert_embeddings( qry_model, [qa_config.qry_num_layers - 2], "concat", name="qry") word_emb_table = qry_model.get_embedding_table() return qry_seq_emb, word_emb_table
Example #16
Source File: run_dual_encoder.py From language with Apache License 2.0 | 5 votes |
def create_model(bert_config, is_training, input_ids, input_mask, segment_ids, image_vector, use_one_hot_embeddings, scope): """Creates a model.""" model = modeling.BertModel( config=bert_config, is_training=is_training, input_ids=input_ids, input_mask=input_mask, token_type_ids=segment_ids, use_one_hot_embeddings=use_one_hot_embeddings, scope=scope) if FLAGS.ignore_image: logit = tf.layers.dense( model.get_pooled_output(), 1, activation=tf.tanh, kernel_initializer= modeling.create_initializer(bert_config.initializer_range)) logit = tf.squeeze(logit, axis=1) else: logit = tf.einsum("ij,ij->i", tf.layers.dense( image_vector, bert_config.hidden_size, activation=tf.tanh, kernel_initializer= modeling.create_initializer(bert_config.initializer_range)), model.get_pooled_output(), name="inner") return tf.stack([-logit, logit], axis=1)
Example #17
Source File: run_bert_open_qa_eval.py From XQA with MIT License | 5 votes |
def create_predict_model(bert_config, input_ids, input_mask, segment_ids, use_one_hot_embeddings): """Creates a classification model.""" all_logits = [] input_ids_shape = modeling.get_shape_list(input_ids, expected_rank=2) batch_size = input_ids_shape[0] seq_length = input_ids_shape[1] model = modeling.BertModel( config=bert_config, is_training=False, input_ids=input_ids, input_mask=input_mask, token_type_ids=segment_ids, use_one_hot_embeddings=use_one_hot_embeddings, scope="bert") final_hidden = model.get_sequence_output() final_hidden_shape = modeling.get_shape_list(final_hidden, expected_rank=3) hidden_size = final_hidden_shape[2] output_weights = tf.get_variable( "cls/open_qa/output_weights", [2, hidden_size], initializer=tf.truncated_normal_initializer(stddev=0.02)) output_bias = tf.get_variable( "cls/open_qa/output_bias", [2], initializer=tf.zeros_initializer()) final_hidden_matrix = tf.reshape(final_hidden, [batch_size * seq_length, hidden_size]) logits = tf.matmul(final_hidden_matrix, output_weights, transpose_b=True) logits = tf.nn.bias_add(logits, output_bias) logits = tf.reshape(logits, [batch_size, seq_length, 2]) logits = tf.transpose(logits, [2, 0, 1]) unstacked_logits = tf.unstack(logits, axis=0) (start_logits, end_logits) = (unstacked_logits[0], unstacked_logits[1]) return (start_logits, end_logits)
Example #18
Source File: run_squad.py From MAX-Question-Answering with Apache License 2.0 | 5 votes |
def create_model(bert_config, is_training, input_ids, input_mask, segment_ids, use_one_hot_embeddings): """Creates a classification model.""" model = modeling.BertModel( config=bert_config, is_training=is_training, input_ids=input_ids, input_mask=input_mask, token_type_ids=segment_ids, use_one_hot_embeddings=use_one_hot_embeddings) final_hidden = model.get_sequence_output() final_hidden_shape = modeling.get_shape_list(final_hidden, expected_rank=3) batch_size = final_hidden_shape[0] seq_length = final_hidden_shape[1] hidden_size = final_hidden_shape[2] output_weights = tf.get_variable( "cls/squad/output_weights", [2, hidden_size], initializer=tf.truncated_normal_initializer(stddev=0.02)) output_bias = tf.get_variable( "cls/squad/output_bias", [2], initializer=tf.zeros_initializer()) final_hidden_matrix = tf.reshape(final_hidden, [batch_size * seq_length, hidden_size]) logits = tf.matmul(final_hidden_matrix, output_weights, transpose_b=True) logits = tf.nn.bias_add(logits, output_bias) logits = tf.reshape(logits, [batch_size, seq_length, 2]) logits = tf.transpose(logits, [2, 0, 1]) unstacked_logits = tf.unstack(logits, axis=0) (start_logits, end_logits) = (unstacked_logits[0], unstacked_logits[1]) return (start_logits, end_logits)
Example #19
Source File: bert_sim.py From chinese-bert-similarity with MIT License | 5 votes |
def _init_graph(self): """ init bert graph """ try: from bert import modeling bert_config = modeling.BertConfig.from_json_file(os.path.join(self.bert_sim_dir, 'bert_config.json')) self.model = modeling.BertModel(config=bert_config, is_training=False, input_ids=self.input_ids, input_mask=self.input_mask, token_type_ids=self.input_type_ids, use_one_hot_embeddings=False) # get output weights and output bias ckpt = self.tf.train.get_checkpoint_state(self.bert_sim_dir).all_model_checkpoint_paths[-1] reader = self.tf.train.NewCheckpointReader(ckpt) output_weights = reader.get_tensor('output_weights') output_bias = reader.get_tensor('output_bias') # get result op output_layer = self.model.get_pooled_output() logits = self.tf.matmul(output_layer, output_weights, transpose_b=True) logits = self.tf.nn.bias_add(logits, output_bias) self.probabilities = self.tf.nn.softmax(logits, axis=-1) sess_config = self.tf.ConfigProto(allow_soft_placement=True) sess_config.gpu_options.allow_growth = True graph = self.probabilities.graph saver = self.tf.train.Saver() self.sess = self.tf.Session(config=sess_config, graph=graph) self.sess.run(self.tf.global_variables_initializer()) self.tf.reset_default_graph() saver.restore(self.sess, ckpt) except Exception as e: self.logger.error(e)
Example #20
Source File: train.py From chinese-bert-similarity with MIT License | 5 votes |
def create_model(bert_config, is_training, input_ids, input_mask, segment_ids, labels, num_labels, use_one_hot_embeddings): """Creates a classification model.""" model = modeling.BertModel( config=bert_config, is_training=is_training, input_ids=input_ids, input_mask=input_mask, token_type_ids=segment_ids, use_one_hot_embeddings=use_one_hot_embeddings) output_layer = model.get_pooled_output() # output_layer = [tf.squeeze(model.all_encoder_layers[i][:, 0:1, :], axis=1) for i in range(-4, 0, 1)] output_layer = tf.concat(output_layer, axis=1) hidden_size = output_layer.shape[-1].value output_layer = tf.layers.dropout(output_layer, rate=0.8) output_weights = tf.get_variable( "output_weights", [num_labels, hidden_size], initializer=tf.truncated_normal_initializer(stddev=0.02)) output_bias = tf.get_variable( "output_bias", [num_labels], initializer=tf.zeros_initializer()) with tf.variable_scope("loss"): if is_training: # I.e., 0.1 dropout output_layer = tf.nn.dropout(output_layer, keep_prob=0.9) logits = tf.matmul(output_layer, output_weights, transpose_b=True) logits = tf.nn.bias_add(logits, output_bias) probabilities = tf.nn.softmax(logits, axis=-1) log_probs = tf.nn.log_softmax(logits, axis=-1) one_hot_labels = tf.one_hot(labels, depth=num_labels, dtype=tf.float32) per_example_loss = -tf.reduce_sum(one_hot_labels * log_probs, axis=-1) loss = tf.reduce_mean(per_example_loss) return (loss, per_example_loss, logits, probabilities)
Example #21
Source File: BERT_NER.py From bert-chinese-ner with MIT License | 5 votes |
def create_model(bert_config, is_training, input_ids, input_mask, segment_ids, labels, num_labels, use_one_hot_embeddings): model = modeling.BertModel( config=bert_config, is_training=is_training, input_ids=input_ids, input_mask=input_mask, token_type_ids=segment_ids, use_one_hot_embeddings=use_one_hot_embeddings ) output_layer = model.get_sequence_output() hidden_size = output_layer.shape[-1].value output_weight = tf.get_variable( "output_weights", [num_labels, hidden_size], initializer=tf.truncated_normal_initializer(stddev=0.02) ) output_bias = tf.get_variable( "output_bias", [num_labels], initializer=tf.zeros_initializer() ) with tf.variable_scope("loss"): if is_training: output_layer = tf.nn.dropout(output_layer, keep_prob=0.9) output_layer = tf.reshape(output_layer, [-1, hidden_size]) logits = tf.matmul(output_layer, output_weight, transpose_b=True) logits = tf.nn.bias_add(logits, output_bias) logits = tf.reshape(logits, [-1, FLAGS.max_seq_length, 11]) # mask = tf.cast(input_mask,tf.float32) # loss = tf.contrib.seq2seq.sequence_loss(logits,labels,mask) # return (loss, logits, predict) ########################################################################## log_probs = tf.nn.log_softmax(logits, axis=-1) one_hot_labels = tf.one_hot(labels, depth=num_labels, dtype=tf.float32) per_example_loss = -tf.reduce_sum(one_hot_labels * log_probs, axis=-1) loss = tf.reduce_sum(per_example_loss) probabilities = tf.nn.softmax(logits, axis=-1) predict = tf.argmax(probabilities,axis=-1) return (loss, per_example_loss, logits,predict) ##########################################################################
Example #22
Source File: run_sequence_labeling.py From pynlp with MIT License | 5 votes |
def create_model(bert_config, is_training, input_ids, input_mask, segment_ids, labels, num_labels, use_one_hot_embeddings): model = modeling.BertModel( config=bert_config, is_training=is_training, input_ids=input_ids, input_mask=input_mask, token_type_ids=segment_ids, use_one_hot_embeddings=use_one_hot_embeddings ) output_layer = model.get_sequence_output() hidden_size = output_layer.shape[-1].value output_weight = tf.get_variable( "output_weights", [num_labels, hidden_size], initializer=tf.truncated_normal_initializer(stddev=0.02) ) output_bias = tf.get_variable( "output_bias", [num_labels], initializer=tf.zeros_initializer() ) with tf.variable_scope("loss"): if is_training: output_layer = tf.nn.dropout(output_layer, keep_prob=0.9) output_layer = tf.reshape(output_layer, [-1, hidden_size]) logits = tf.matmul(output_layer, output_weight, transpose_b=True) logits = tf.nn.bias_add(logits, output_bias) logits = tf.reshape(logits, [-1, FLAGS.max_seq_length, num_labels]) # mask = tf.cast(input_mask,tf.float32) # loss = tf.contrib.seq2seq.sequence_loss(logits,labels,mask) # return (loss, logits, predict) ########################################################################## log_probs = tf.nn.log_softmax(logits, axis=-1) one_hot_labels = tf.one_hot(labels, depth=num_labels, dtype=tf.float32) per_example_loss = -tf.reduce_sum(one_hot_labels * log_probs, axis=-1) loss = tf.reduce_sum(per_example_loss) probabilities = tf.nn.softmax(logits, axis=-1) predicts = tf.argmax(probabilities, axis=-1) return (loss, per_example_loss, logits, predicts) ##########################################################################
Example #23
Source File: bert_lstm_ner.py From pynlp with MIT License | 5 votes |
def create_model(bert_config, is_training, input_ids, input_mask, segment_ids, labels, num_labels, use_one_hot_embeddings): """ 创建X模型 :param bert_config: bert 配置 :param is_training: :param input_ids: 数据的idx 表示 :param input_mask: :param segment_ids: :param labels: 标签的idx 表示 :param num_labels: 类别数量 :param use_one_hot_embeddings: :return: """ # 使用数据加载BertModel,获取对应的字embedding model = modeling.BertModel( config=bert_config, is_training=is_training, input_ids=input_ids, input_mask=input_mask, token_type_ids=segment_ids, use_one_hot_embeddings=use_one_hot_embeddings ) # 获取对应的embedding 输入数据[batch_size, seq_length, embedding_size] embedding = model.get_sequence_output() max_seq_length = embedding.shape[1].value used = tf.sign(tf.abs(input_ids)) lengths = tf.reduce_sum(used, reduction_indices=1) # [batch_size] 大小的向量,包含了当前batch中的序列长度 blstm_crf = BLSTM_CRF(embedded_chars=embedding, hidden_unit=FLAGS.lstm_size, cell_type=FLAGS.cell, num_layers=FLAGS.num_layers, dropout_rate=FLAGS.droupout_rate, initializers=initializers, num_labels=num_labels, seq_length=max_seq_length, labels=labels, lengths=lengths, is_training=is_training) rst = blstm_crf.add_blstm_crf_layer(crf_only=True) return rst
Example #24
Source File: train_bert_ner.py From FoolNLTK with Apache License 2.0 | 5 votes |
def create_model(bert_config, is_training, input_ids, input_mask, segment_ids, num_labels, use_one_hot_embeddings): model = modeling.BertModel( config=bert_config, is_training=is_training, input_ids=input_ids, input_mask=input_mask, token_type_ids=segment_ids, use_one_hot_embeddings=use_one_hot_embeddings ) output_layer = model.get_sequence_output() hidden_size = output_layer.shape[-1].value output_weight = tf.get_variable( "output_weights", [num_labels, hidden_size], initializer=tf.truncated_normal_initializer(stddev=0.02) ) output_bias = tf.get_variable( "output_bias", [num_labels], initializer=tf.zeros_initializer() ) if is_training: output_layer = tf.nn.dropout(output_layer, keep_prob=0.9) output_layer = tf.reshape(output_layer, [-1, hidden_size]) logits = tf.matmul(output_layer, output_weight, transpose_b=True) logits = tf.nn.bias_add(logits, output_bias) print("logits====>", logits) return logits ##########################################################################
Example #25
Source File: run_sequence_labeling.py From BERT-for-Sequence-Labeling-and-Text-Classification with Apache License 2.0 | 5 votes |
def create_model(bert_config, is_training, input_ids, input_mask, segment_ids, labels, num_labels, use_one_hot_embeddings): model = modeling.BertModel( config=bert_config, is_training=is_training, input_ids=input_ids, input_mask=input_mask, token_type_ids=segment_ids, use_one_hot_embeddings=use_one_hot_embeddings ) output_layer = model.get_sequence_output() hidden_size = output_layer.shape[-1].value output_weight = tf.get_variable( "output_weights", [num_labels, hidden_size], initializer=tf.truncated_normal_initializer(stddev=0.02) ) output_bias = tf.get_variable( "output_bias", [num_labels], initializer=tf.zeros_initializer() ) with tf.variable_scope("loss"): if is_training: output_layer = tf.nn.dropout(output_layer, keep_prob=0.9) output_layer = tf.reshape(output_layer, [-1, hidden_size]) logits = tf.matmul(output_layer, output_weight, transpose_b=True) logits = tf.nn.bias_add(logits, output_bias) logits = tf.reshape(logits, [-1, FLAGS.max_seq_length, num_labels]) # mask = tf.cast(input_mask,tf.float32) # loss = tf.contrib.seq2seq.sequence_loss(logits,labels,mask) # return (loss, logits, predict) ########################################################################## log_probs = tf.nn.log_softmax(logits, axis=-1) one_hot_labels = tf.one_hot(labels, depth=num_labels, dtype=tf.float32) per_example_loss = -tf.reduce_sum(one_hot_labels * log_probs, axis=-1) loss = tf.reduce_sum(per_example_loss) probabilities = tf.nn.softmax(logits, axis=-1) predicts = tf.argmax(probabilities, axis=-1) return (loss, per_example_loss, logits, predicts) ##########################################################################
Example #26
Source File: run_binary_coherence.py From language with Apache License 2.0 | 5 votes |
def create_model(bert_config, is_training, input_ids, input_mask, segment_ids, labels, use_one_hot_embeddings): """Creates a classification model.""" model = modeling.BertModel( config=bert_config, is_training=is_training, input_ids=input_ids, input_mask=input_mask, token_type_ids=segment_ids, use_one_hot_embeddings=use_one_hot_embeddings) output_layer = model.get_pooled_output() hidden_size = output_layer.shape[-1].value num_labels = 2 # This is hardcoded for binary classification with tf.variable_scope("cls/seq_relationship"): output_weights = tf.get_variable( "output_weights", shape=[num_labels, hidden_size], initializer=modeling.create_initializer(bert_config.initializer_range)) output_bias = tf.get_variable( "output_bias", shape=[num_labels], initializer=tf.zeros_initializer()) logits = tf.matmul(output_layer, output_weights, transpose_b=True) logits = tf.nn.bias_add(logits, output_bias) log_probs = tf.nn.log_softmax(logits, axis=-1) probabilities = tf.nn.softmax(logits, axis=-1) labels = tf.reshape(labels, [-1]) one_hot_labels = tf.one_hot(labels, depth=2, dtype=tf.float32) per_example_loss = -tf.reduce_sum(one_hot_labels * log_probs, axis=-1) loss = tf.reduce_mean(per_example_loss) return (loss, per_example_loss, logits, probabilities)
Example #27
Source File: run_ner.py From KBQA-BERT with MIT License | 5 votes |
def create_model(bert_config, is_training, input_ids, input_mask, segment_ids, labels, num_labels, use_one_hot_embeddings): """ 创建X模型 :param bert_config: bert 配置 :param is_training: :param input_ids: 数据的idx 表示 :param input_mask: :param segment_ids: :param labels: 标签的idx 表示 :param num_labels: 类别数量 :param use_one_hot_embeddings: :return: """ # 使用数据加载BertModel,获取对应的字embedding model = modeling.BertModel( config=bert_config, is_training=is_training, input_ids=input_ids, input_mask=input_mask, token_type_ids=segment_ids, use_one_hot_embeddings=use_one_hot_embeddings ) # 获取对应的embedding 输入数据[batch_size, seq_length, embedding_size] embedding = model.get_sequence_output() max_seq_length = embedding.shape[1].value used = tf.sign(tf.abs(input_ids)) lengths = tf.reduce_sum(used, reduction_indices=1) # [batch_size] 大小的向量,包含了当前batch中的序列长度 blstm_crf = BLSTM_CRF(embedded_chars=embedding, hidden_unit=FLAGS.lstm_size, cell_type=FLAGS.cell, num_layers=FLAGS.num_layers, dropout_rate=FLAGS.droupout_rate, initializers=initializers, num_labels=num_labels, seq_length=max_seq_length, labels=labels, lengths=lengths, is_training=is_training) rst = blstm_crf.add_blstm_crf_layer(crf_only=True) return rst
Example #28
Source File: run_classifier_distillation.py From language with Apache License 2.0 | 5 votes |
def create_model(bert_config, is_training, input_ids, input_mask, segment_ids, labels, num_labels, use_one_hot_embeddings): """Creates a classification model.""" model = modeling.BertModel( config=bert_config, is_training=is_training, input_ids=input_ids, input_mask=input_mask, token_type_ids=segment_ids, use_one_hot_embeddings=use_one_hot_embeddings) # In the demo, we are doing a simple classification task on the entire # segment. # # If you want to use the token-level output, use model.get_sequence_output() # instead. output_layer = model.get_pooled_output() hidden_size = output_layer.shape[-1].value output_weights = tf.get_variable( "output_weights", [num_labels, hidden_size], initializer=tf.truncated_normal_initializer(stddev=0.02)) output_bias = tf.get_variable( "output_bias", [num_labels], initializer=tf.zeros_initializer()) with tf.variable_scope("loss"): if is_training: # I.e., 0.1 dropout output_layer = tf.nn.dropout(output_layer, keep_prob=0.9) logits = tf.matmul(output_layer, output_weights, transpose_b=True) logits = tf.nn.bias_add(logits, output_bias) probabilities = tf.nn.softmax(logits, axis=-1) log_probs = tf.nn.log_softmax(logits, axis=-1) per_example_loss = -tf.reduce_sum(labels * log_probs, axis=-1) loss = tf.reduce_mean(per_example_loss) return (loss, per_example_loss, logits, probabilities)
Example #29
Source File: run_bert_boolq_diff.py From language with Apache License 2.0 | 5 votes |
def create_model(bert_config, is_training, input_ids, input_mask, segment_ids, gt_probs, num_labels, use_one_hot_embeddings): """Creates a classification model.""" model = modeling.BertModel( config=bert_config, is_training=is_training, input_ids=input_ids, input_mask=input_mask, token_type_ids=segment_ids, use_one_hot_embeddings=use_one_hot_embeddings) # In the demo, we are doing a simple classification task on the entire # segment. # # If you want to use the token-level output, use model.get_sequence_output() # instead. output_layer = model.get_pooled_output() hidden_size = output_layer.shape[-1].value output_weights = tf.get_variable( "output_weights", [num_labels, hidden_size], initializer=tf.truncated_normal_initializer(stddev=0.02)) output_bias = tf.get_variable( "output_bias", [num_labels], initializer=tf.zeros_initializer()) with tf.variable_scope("loss"): if is_training: # I.e., 0.1 dropout output_layer = tf.nn.dropout(output_layer, keep_prob=0.9) logits = tf.matmul(output_layer, output_weights, transpose_b=True) logits = tf.nn.bias_add(logits, output_bias) probabilities = tf.nn.softmax(logits, axis=-1) log_probs = tf.nn.log_softmax(logits, axis=-1) per_example_loss = -tf.reduce_sum(gt_probs * log_probs, axis=-1) loss = tf.reduce_mean(per_example_loss) return (loss, per_example_loss, logits, probabilities)
Example #30
Source File: run_squad.py From language with Apache License 2.0 | 5 votes |
def create_model(bert_config, is_training, input_ids, input_mask, segment_ids, use_one_hot_embeddings): """Creates a classification model.""" model = modeling.BertModel( config=bert_config, is_training=is_training, input_ids=input_ids, input_mask=input_mask, token_type_ids=segment_ids, use_one_hot_embeddings=use_one_hot_embeddings) final_hidden = model.get_sequence_output() final_hidden_shape = modeling.get_shape_list(final_hidden, expected_rank=3) batch_size = final_hidden_shape[0] seq_length = final_hidden_shape[1] hidden_size = final_hidden_shape[2] output_weights = tf.get_variable( "cls/squad/output_weights", [2, hidden_size], initializer=tf.truncated_normal_initializer(stddev=0.02)) output_bias = tf.get_variable( "cls/squad/output_bias", [2], initializer=tf.zeros_initializer()) final_hidden_matrix = tf.reshape(final_hidden, [batch_size * seq_length, hidden_size]) logits = tf.matmul(final_hidden_matrix, output_weights, transpose_b=True) logits = tf.nn.bias_add(logits, output_bias) logits = tf.reshape(logits, [batch_size, seq_length, 2]) logits = tf.transpose(logits, [2, 0, 1]) unstacked_logits = tf.unstack(logits, axis=0) (start_logits, end_logits) = (unstacked_logits[0], unstacked_logits[1]) return (start_logits, end_logits)