Python tflearn.data_utils.pad_sequences() Examples

The following are 9 code examples of tflearn.data_utils.pad_sequences(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module tflearn.data_utils , or try the search function .
Example #1
Source File: data_helpers.py    From Text-Pairs-Relation-Classification with Apache License 2.0 6 votes vote down vote up
def pad_data(data, pad_seq_len):
    """
    Padding each sentence of research data according to the max sentence length.
    Return the padded data and data labels.

    Args:
        data: The research data
        pad_seq_len: The max sentence length of research data
    Returns:
        data_front: The padded front data
        data_behind: The padded behind data
        onehot_labels: The one-hot labels
    """
    data_front = pad_sequences(data.front_tokenindex, maxlen=pad_seq_len, value=0.)
    data_behind = pad_sequences(data.behind_tokenindex, maxlen=pad_seq_len, value=0.)
    onehot_labels = to_categorical(data.labels, nb_classes=2)
    return data_front, data_behind, onehot_labels 
Example #2
Source File: data_helpers.py    From Hierarchical-Multi-Label-Text-Classification with Apache License 2.0 6 votes vote down vote up
def pad_data(data, pad_seq_len):
    """
    Padding each sentence of research data according to the max sentence length.
    Return the padded data and data labels.

    Args:
        data: The research data
        pad_seq_len: The max sentence length of research data
    Returns:
        pad_seq: The padded data
        labels: The data labels
    """
    abstract_pad_seq = pad_sequences(data.abstract_tokenindex, maxlen=pad_seq_len, value=0.)
    onehot_labels_list = data.onehot_labels
    onehot_labels_list_tuple = data.onehot_labels_tuple
    return abstract_pad_seq, onehot_labels_list, onehot_labels_list_tuple 
Example #3
Source File: data_helpers.py    From Multi-Label-Text-Classification with Apache License 2.0 5 votes vote down vote up
def pad_data(data, pad_seq_len):
    """
    Padding each sentence of research data according to the max sentence length.
    Return the padded data and data labels.

    Args:
        data: The research data
        pad_seq_len: The max sentence length of research data
    Returns:
        pad_seq: The padded data
        labels: The data labels
    """
    pad_seq = pad_sequences(data.tokenindex, maxlen=pad_seq_len, value=0.)
    onehot_labels = data.onehot_labels
    return pad_seq, onehot_labels 
Example #4
Source File: data_util_zhihu.py    From text_classification with MIT License 5 votes vote down vote up
def test_pad():
    trainX='w18476 w4454 w1674 w6 w25 w474 w1333 w1467 w863 w6 w4430 w11 w813 w4463 w863 w6 w4430 w111'
    trainX=trainX.split(" ")
    trainX = pad_sequences([[trainX]], maxlen=100, value=0.)
    print("trainX:",trainX) 
Example #5
Source File: data_util.py    From text_classification with MIT License 5 votes vote down vote up
def load_data_multilabel(traning_data_path,vocab_word2index, vocab_label2index,sentence_len,training_portion=0.95):
    """
    convert data as indexes using word2index dicts.
    :param traning_data_path:
    :param vocab_word2index:
    :param vocab_label2index:
    :return:
    """
    file_object = codecs.open(traning_data_path, mode='r', encoding='utf-8')
    lines = file_object.readlines()
    random.shuffle(lines)
    label_size=len(vocab_label2index)
    X = []
    Y = []
    for i,line in enumerate(lines):
        raw_list = line.strip().split("__label__")
        input_list = raw_list[0].strip().split(" ")
        input_list = [x.strip().replace(" ", "") for x in input_list if x != '']
        x=[vocab_word2index.get(x,UNK_ID) for x in input_list]
        label_list = raw_list[1:]
        label_list=[l.strip().replace(" ", "") for l in label_list if l != '']
        label_list=[vocab_label2index[label] for label in label_list]
        y=transform_multilabel_as_multihot(label_list,label_size)
        X.append(x)
        Y.append(y)
        if i<10:print(i,"line:",line)

    X = pad_sequences(X, maxlen=sentence_len, value=0.)  # padding to max length
    number_examples = len(lines)
    training_number=int(training_portion* number_examples)
    train = (X[0:training_number], Y[0:training_number])
    valid_number=min(1000,number_examples-training_number)
    test = (X[training_number+ 1:training_number+valid_number+1], Y[training_number + 1:training_number+valid_number+1])
    return train,test 
Example #6
Source File: data_util_zhihu.py    From text_classification with MIT License 5 votes vote down vote up
def test_pad():
    trainX='w18476 w4454 w1674 w6 w25 w474 w1333 w1467 w863 w6 w4430 w11 w813 w4463 w863 w6 w4430 w111'
    trainX=trainX.split(" ")
    trainX = pad_sequences([[trainX]], maxlen=100, value=0.)
    print("trainX:",trainX) 
Example #7
Source File: data_util_zhihu.py    From text_classification with MIT License 5 votes vote down vote up
def test_pad():
    trainX='w18476 w4454 w1674 w6 w25 w474 w1333 w1467 w863 w6 w4430 w11 w813 w4463 w863 w6 w4430 w111'
    trainX=trainX.split(" ")
    trainX = pad_sequences([[trainX]], maxlen=100, value=0.)
    print("trainX:",trainX) 
Example #8
Source File: data_util_zhihu.py    From text_classification with MIT License 5 votes vote down vote up
def test_pad():
    trainX='w18476 w4454 w1674 w6 w25 w474 w1333 w1467 w863 w6 w4430 w11 w813 w4463 w863 w6 w4430 w111'
    trainX=trainX.split(" ")
    trainX = pad_sequences([[trainX]], maxlen=100, value=0.)
    print("trainX:",trainX) 
Example #9
Source File: p5_fastTextB_predict_multilabel.py    From text_classification with MIT License 4 votes vote down vote up
def main(_):
    # 1.load data with vocabulary of words and labels
    vocabulary_word2index, vocabulary_index2word = create_voabulary()
    vocab_size = len(vocabulary_word2index)
    print("vocab_size:",vocab_size)
    #iii=0
    #iii/0
    vocabulary_word2index_label,vocabulary_index2word_label = create_voabulary_label()
    questionid_question_lists=load_final_test_data(FLAGS.predict_source_file) #TODO
    test= load_data_predict(vocabulary_word2index,vocabulary_word2index_label,questionid_question_lists) #TODO
    testX=[]
    question_id_list=[]
    for tuple in test:
        question_id,question_string_list=tuple
        question_id_list.append(question_id)
        testX.append(question_string_list)

    # 2.Data preprocessing: Sequence padding
    print("start padding....")
    testX2 = pad_sequences(testX, maxlen=FLAGS.sentence_len, value=0.)  # padding to max length
    print("end padding...")

    # 3.create session.
    config=tf.ConfigProto()
    config.gpu_options.allow_growth=True
    with tf.Session(config=config) as sess:
        # 4.Instantiate Model
        fast_text=fastText(FLAGS.label_size, FLAGS.learning_rate, FLAGS.batch_size, FLAGS.decay_steps, FLAGS.decay_rate,FLAGS.num_sampled,FLAGS.sentence_len,vocab_size,FLAGS.embed_size,FLAGS.is_training)
        saver=tf.train.Saver()
        if os.path.exists(FLAGS.ckpt_dir+"checkpoint"):
            print("Restoring Variables from Checkpoint")
            saver.restore(sess,tf.train.latest_checkpoint(FLAGS.ckpt_dir))
        else:
            print("Can't find the checkpoint.going to stop")
            return
        # 5.feed data, to get logits
        number_of_training_data=len(testX2);print("number_of_training_data:",number_of_training_data)
        batch_size=1
        index=0
        predict_target_file_f = codecs.open(FLAGS.predict_target_file, 'a', 'utf8')
        for start, end in zip(range(0, number_of_training_data, batch_size),range(batch_size, number_of_training_data+1, batch_size)):
            logits=sess.run(fast_text.logits,feed_dict={fast_text.sentence:testX2[start:end]}) #'shape of logits:', ( 1, 1999)
            # 6. get lable using logtis
            predicted_labels=get_label_using_logits(logits[0],vocabulary_index2word_label)
            # 7. write question id and labels to file system.
            write_question_id_with_labels(question_id_list[index],predicted_labels,predict_target_file_f)
            index=index+1
        predict_target_file_f.close()

# get label using logits