Python tflearn.data_utils.pad_sequences() Examples
The following are 9
code examples of tflearn.data_utils.pad_sequences().
Example #1
Source File: From Text-Pairs-Relation-Classification with Apache License 2.0 | 6 votes |
def pad_data(data, pad_seq_len): """ Padding each sentence of research data according to the max sentence length. Return the padded data and data labels. Args: data: The research data pad_seq_len: The max sentence length of research data Returns: data_front: The padded front data data_behind: The padded behind data onehot_labels: The one-hot labels """ data_front = pad_sequences(data.front_tokenindex, maxlen=pad_seq_len, value=0.) data_behind = pad_sequences(data.behind_tokenindex, maxlen=pad_seq_len, value=0.) onehot_labels = to_categorical(data.labels, nb_classes=2) return data_front, data_behind, onehot_labels
Example #2
Source File: From Hierarchical-Multi-Label-Text-Classification with Apache License 2.0 | 6 votes |
def pad_data(data, pad_seq_len): """ Padding each sentence of research data according to the max sentence length. Return the padded data and data labels. Args: data: The research data pad_seq_len: The max sentence length of research data Returns: pad_seq: The padded data labels: The data labels """ abstract_pad_seq = pad_sequences(data.abstract_tokenindex, maxlen=pad_seq_len, value=0.) onehot_labels_list = data.onehot_labels onehot_labels_list_tuple = data.onehot_labels_tuple return abstract_pad_seq, onehot_labels_list, onehot_labels_list_tuple
Example #3
Source File: From Multi-Label-Text-Classification with Apache License 2.0 | 5 votes |
def pad_data(data, pad_seq_len): """ Padding each sentence of research data according to the max sentence length. Return the padded data and data labels. Args: data: The research data pad_seq_len: The max sentence length of research data Returns: pad_seq: The padded data labels: The data labels """ pad_seq = pad_sequences(data.tokenindex, maxlen=pad_seq_len, value=0.) onehot_labels = data.onehot_labels return pad_seq, onehot_labels
Example #4
Source File: From text_classification with MIT License | 5 votes |
def test_pad(): trainX='w18476 w4454 w1674 w6 w25 w474 w1333 w1467 w863 w6 w4430 w11 w813 w4463 w863 w6 w4430 w111' trainX=trainX.split(" ") trainX = pad_sequences([[trainX]], maxlen=100, value=0.) print("trainX:",trainX)
Example #5
Source File: From text_classification with MIT License | 5 votes |
def load_data_multilabel(traning_data_path,vocab_word2index, vocab_label2index,sentence_len,training_portion=0.95): """ convert data as indexes using word2index dicts. :param traning_data_path: :param vocab_word2index: :param vocab_label2index: :return: """ file_object =, mode='r', encoding='utf-8') lines = file_object.readlines() random.shuffle(lines) label_size=len(vocab_label2index) X = [] Y = [] for i,line in enumerate(lines): raw_list = line.strip().split("__label__") input_list = raw_list[0].strip().split(" ") input_list = [x.strip().replace(" ", "") for x in input_list if x != ''] x=[vocab_word2index.get(x,UNK_ID) for x in input_list] label_list = raw_list[1:] label_list=[l.strip().replace(" ", "") for l in label_list if l != ''] label_list=[vocab_label2index[label] for label in label_list] y=transform_multilabel_as_multihot(label_list,label_size) X.append(x) Y.append(y) if i<10:print(i,"line:",line) X = pad_sequences(X, maxlen=sentence_len, value=0.) # padding to max length number_examples = len(lines) training_number=int(training_portion* number_examples) train = (X[0:training_number], Y[0:training_number]) valid_number=min(1000,number_examples-training_number) test = (X[training_number+ 1:training_number+valid_number+1], Y[training_number + 1:training_number+valid_number+1]) return train,test
Example #6
Source File: From text_classification with MIT License | 5 votes |
def test_pad(): trainX='w18476 w4454 w1674 w6 w25 w474 w1333 w1467 w863 w6 w4430 w11 w813 w4463 w863 w6 w4430 w111' trainX=trainX.split(" ") trainX = pad_sequences([[trainX]], maxlen=100, value=0.) print("trainX:",trainX)
Example #7
Source File: From text_classification with MIT License | 5 votes |
def test_pad(): trainX='w18476 w4454 w1674 w6 w25 w474 w1333 w1467 w863 w6 w4430 w11 w813 w4463 w863 w6 w4430 w111' trainX=trainX.split(" ") trainX = pad_sequences([[trainX]], maxlen=100, value=0.) print("trainX:",trainX)
Example #8
Source File: From text_classification with MIT License | 5 votes |
def test_pad(): trainX='w18476 w4454 w1674 w6 w25 w474 w1333 w1467 w863 w6 w4430 w11 w813 w4463 w863 w6 w4430 w111' trainX=trainX.split(" ") trainX = pad_sequences([[trainX]], maxlen=100, value=0.) print("trainX:",trainX)
Example #9
Source File: From text_classification with MIT License | 4 votes |
def main(_): # 1.load data with vocabulary of words and labels vocabulary_word2index, vocabulary_index2word = create_voabulary() vocab_size = len(vocabulary_word2index) print("vocab_size:",vocab_size) #iii=0 #iii/0 vocabulary_word2index_label,vocabulary_index2word_label = create_voabulary_label() questionid_question_lists=load_final_test_data(FLAGS.predict_source_file) #TODO test= load_data_predict(vocabulary_word2index,vocabulary_word2index_label,questionid_question_lists) #TODO testX=[] question_id_list=[] for tuple in test: question_id,question_string_list=tuple question_id_list.append(question_id) testX.append(question_string_list) # 2.Data preprocessing: Sequence padding print("start padding....") testX2 = pad_sequences(testX, maxlen=FLAGS.sentence_len, value=0.) # padding to max length print("end padding...") # 3.create session. config=tf.ConfigProto() config.gpu_options.allow_growth=True with tf.Session(config=config) as sess: # 4.Instantiate Model fast_text=fastText(FLAGS.label_size, FLAGS.learning_rate, FLAGS.batch_size, FLAGS.decay_steps, FLAGS.decay_rate,FLAGS.num_sampled,FLAGS.sentence_len,vocab_size,FLAGS.embed_size,FLAGS.is_training) saver=tf.train.Saver() if os.path.exists(FLAGS.ckpt_dir+"checkpoint"): print("Restoring Variables from Checkpoint") saver.restore(sess,tf.train.latest_checkpoint(FLAGS.ckpt_dir)) else: print("Can't find the checkpoint.going to stop") return # 5.feed data, to get logits number_of_training_data=len(testX2);print("number_of_training_data:",number_of_training_data) batch_size=1 index=0 predict_target_file_f =, 'a', 'utf8') for start, end in zip(range(0, number_of_training_data, batch_size),range(batch_size, number_of_training_data+1, batch_size)):,feed_dict={fast_text.sentence:testX2[start:end]}) #'shape of logits:', ( 1, 1999) # 6. get lable using logtis predicted_labels=get_label_using_logits(logits[0],vocabulary_index2word_label) # 7. write question id and labels to file system. write_question_id_with_labels(question_id_list[index],predicted_labels,predict_target_file_f) index=index+1 predict_target_file_f.close() # get label using logits