Python tflearn.data_utils.pad_sequences() Examples
The following are 9
code examples of tflearn.data_utils.pad_sequences().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
tflearn.data_utils
, or try the search function
.
Example #1
Source File: data_helpers.py From Text-Pairs-Relation-Classification with Apache License 2.0 | 6 votes |
def pad_data(data, pad_seq_len): """ Padding each sentence of research data according to the max sentence length. Return the padded data and data labels. Args: data: The research data pad_seq_len: The max sentence length of research data Returns: data_front: The padded front data data_behind: The padded behind data onehot_labels: The one-hot labels """ data_front = pad_sequences(data.front_tokenindex, maxlen=pad_seq_len, value=0.) data_behind = pad_sequences(data.behind_tokenindex, maxlen=pad_seq_len, value=0.) onehot_labels = to_categorical(data.labels, nb_classes=2) return data_front, data_behind, onehot_labels
Example #2
Source File: data_helpers.py From Hierarchical-Multi-Label-Text-Classification with Apache License 2.0 | 6 votes |
def pad_data(data, pad_seq_len): """ Padding each sentence of research data according to the max sentence length. Return the padded data and data labels. Args: data: The research data pad_seq_len: The max sentence length of research data Returns: pad_seq: The padded data labels: The data labels """ abstract_pad_seq = pad_sequences(data.abstract_tokenindex, maxlen=pad_seq_len, value=0.) onehot_labels_list = data.onehot_labels onehot_labels_list_tuple = data.onehot_labels_tuple return abstract_pad_seq, onehot_labels_list, onehot_labels_list_tuple
Example #3
Source File: data_helpers.py From Multi-Label-Text-Classification with Apache License 2.0 | 5 votes |
def pad_data(data, pad_seq_len): """ Padding each sentence of research data according to the max sentence length. Return the padded data and data labels. Args: data: The research data pad_seq_len: The max sentence length of research data Returns: pad_seq: The padded data labels: The data labels """ pad_seq = pad_sequences(data.tokenindex, maxlen=pad_seq_len, value=0.) onehot_labels = data.onehot_labels return pad_seq, onehot_labels
Example #4
Source File: data_util_zhihu.py From text_classification with MIT License | 5 votes |
def test_pad(): trainX='w18476 w4454 w1674 w6 w25 w474 w1333 w1467 w863 w6 w4430 w11 w813 w4463 w863 w6 w4430 w111' trainX=trainX.split(" ") trainX = pad_sequences([[trainX]], maxlen=100, value=0.) print("trainX:",trainX)
Example #5
Source File: data_util.py From text_classification with MIT License | 5 votes |
def load_data_multilabel(traning_data_path,vocab_word2index, vocab_label2index,sentence_len,training_portion=0.95): """ convert data as indexes using word2index dicts. :param traning_data_path: :param vocab_word2index: :param vocab_label2index: :return: """ file_object = codecs.open(traning_data_path, mode='r', encoding='utf-8') lines = file_object.readlines() random.shuffle(lines) label_size=len(vocab_label2index) X = [] Y = [] for i,line in enumerate(lines): raw_list = line.strip().split("__label__") input_list = raw_list[0].strip().split(" ") input_list = [x.strip().replace(" ", "") for x in input_list if x != ''] x=[vocab_word2index.get(x,UNK_ID) for x in input_list] label_list = raw_list[1:] label_list=[l.strip().replace(" ", "") for l in label_list if l != ''] label_list=[vocab_label2index[label] for label in label_list] y=transform_multilabel_as_multihot(label_list,label_size) X.append(x) Y.append(y) if i<10:print(i,"line:",line) X = pad_sequences(X, maxlen=sentence_len, value=0.) # padding to max length number_examples = len(lines) training_number=int(training_portion* number_examples) train = (X[0:training_number], Y[0:training_number]) valid_number=min(1000,number_examples-training_number) test = (X[training_number+ 1:training_number+valid_number+1], Y[training_number + 1:training_number+valid_number+1]) return train,test
Example #6
Source File: data_util_zhihu.py From text_classification with MIT License | 5 votes |
def test_pad(): trainX='w18476 w4454 w1674 w6 w25 w474 w1333 w1467 w863 w6 w4430 w11 w813 w4463 w863 w6 w4430 w111' trainX=trainX.split(" ") trainX = pad_sequences([[trainX]], maxlen=100, value=0.) print("trainX:",trainX)
Example #7
Source File: data_util_zhihu.py From text_classification with MIT License | 5 votes |
def test_pad(): trainX='w18476 w4454 w1674 w6 w25 w474 w1333 w1467 w863 w6 w4430 w11 w813 w4463 w863 w6 w4430 w111' trainX=trainX.split(" ") trainX = pad_sequences([[trainX]], maxlen=100, value=0.) print("trainX:",trainX)
Example #8
Source File: data_util_zhihu.py From text_classification with MIT License | 5 votes |
def test_pad(): trainX='w18476 w4454 w1674 w6 w25 w474 w1333 w1467 w863 w6 w4430 w11 w813 w4463 w863 w6 w4430 w111' trainX=trainX.split(" ") trainX = pad_sequences([[trainX]], maxlen=100, value=0.) print("trainX:",trainX)
Example #9
Source File: p5_fastTextB_predict_multilabel.py From text_classification with MIT License | 4 votes |
def main(_): # 1.load data with vocabulary of words and labels vocabulary_word2index, vocabulary_index2word = create_voabulary() vocab_size = len(vocabulary_word2index) print("vocab_size:",vocab_size) #iii=0 #iii/0 vocabulary_word2index_label,vocabulary_index2word_label = create_voabulary_label() questionid_question_lists=load_final_test_data(FLAGS.predict_source_file) #TODO test= load_data_predict(vocabulary_word2index,vocabulary_word2index_label,questionid_question_lists) #TODO testX=[] question_id_list=[] for tuple in test: question_id,question_string_list=tuple question_id_list.append(question_id) testX.append(question_string_list) # 2.Data preprocessing: Sequence padding print("start padding....") testX2 = pad_sequences(testX, maxlen=FLAGS.sentence_len, value=0.) # padding to max length print("end padding...") # 3.create session. config=tf.ConfigProto() config.gpu_options.allow_growth=True with tf.Session(config=config) as sess: # 4.Instantiate Model fast_text=fastText(FLAGS.label_size, FLAGS.learning_rate, FLAGS.batch_size, FLAGS.decay_steps, FLAGS.decay_rate,FLAGS.num_sampled,FLAGS.sentence_len,vocab_size,FLAGS.embed_size,FLAGS.is_training) saver=tf.train.Saver() if os.path.exists(FLAGS.ckpt_dir+"checkpoint"): print("Restoring Variables from Checkpoint") saver.restore(sess,tf.train.latest_checkpoint(FLAGS.ckpt_dir)) else: print("Can't find the checkpoint.going to stop") return # 5.feed data, to get logits number_of_training_data=len(testX2);print("number_of_training_data:",number_of_training_data) batch_size=1 index=0 predict_target_file_f = codecs.open(FLAGS.predict_target_file, 'a', 'utf8') for start, end in zip(range(0, number_of_training_data, batch_size),range(batch_size, number_of_training_data+1, batch_size)): logits=sess.run(fast_text.logits,feed_dict={fast_text.sentence:testX2[start:end]}) #'shape of logits:', ( 1, 1999) # 6. get lable using logtis predicted_labels=get_label_using_logits(logits[0],vocabulary_index2word_label) # 7. write question id and labels to file system. write_question_id_with_labels(question_id_list[index],predicted_labels,predict_target_file_f) index=index+1 predict_target_file_f.close() # get label using logits