Python Examples of data_helpers.batch

Source File: train.py From question-classification-cnn-rnn-attention with Apache License 2.0

5 votes

def dev_step(x_dev, y_dev):
            """
            Evaluates model on a dev set
            """
            batches = data_helpers.batch_iter(
                list(zip(x_dev, y_dev)), FLAGS.batch_size, 1)
            loss_sum = 0
            accuracy_sum = 0
            count = 0
            for batch in batches:
                x_batch, y_batch = zip(*batch)
                feed_dict = {
                  rnn.input_x: x_batch,
                  rnn.input_y: y_batch,
                  rnn.dropout_keep_prob: 1.0,
                  rnn.batch_size: len(x_batch),
                  rnn.real_len: real_len(x_batch)
                }
                step, summaries, loss, accuracy = sess.run(
                    [global_step, dev_summary_op, rnn.loss, rnn.accuracy],
                    feed_dict)
                loss_sum = loss_sum + loss
                accuracy_sum = accuracy_sum + loss
                count = count + 1
            loss = loss_sum / count
            accuracy = accuracy_sum / count
            time_str = datetime.datetime.now().isoformat()
            logger.info("{}: step {}, loss {:g}, acc {:g}".format(time_str, step, loss, accuracy))
            dev_summary_writer.add_summary(summaries, step)

        # Generate batches

Source File: getSentiment.py From DNN-Sentiment with MIT License

4 votes

def getSentimentCNN(fileToLoad, modelDir):
    checkpoint_dir = "./rnn_runs/"+modelDir+"/checkpoints/"
    batch_size = 64
    x_test, y_test, vocabulary, vocabulary_inv,trainS = data_helpers.load_data_for_books("./data/"+fileToLoad+".txt")
    y_test = np.argmax(y_test, axis=1)
    print("Vocabulary size: {:d}".format(len(vocabulary)))
    print("Test set size {:d}".format(len(y_test)))
    
    checkpoint_file = tf.train.latest_checkpoint(checkpoint_dir)
    graph = tf.Graph()
    with graph.as_default():
        session_conf = tf.ConfigProto(
          allow_soft_placement=True,
          log_device_placement=False)
        sess = tf.Session(config=session_conf)
        with sess.as_default():
            # Load the saved meta graph and restore variables
            saver = tf.train.import_meta_graph("{}.meta".format(checkpoint_file))
            saver.restore(sess, checkpoint_file)

            # Get the placeholders from the graph by name
            input_x = graph.get_operation_by_name("input_x").outputs[0]
            # input_y = graph.get_operation_by_name("input_y").outputs[0]
            dropout_keep_prob = graph.get_operation_by_name("dropout_keep_prob").outputs[0]

            # Tensors we want to evaluate
            predictions = graph.get_operation_by_name("output/predictions").outputs[0]
            scores = graph.get_operation_by_name("output/scores").outputs[0]
            # Generate batches for one epoch
            batches = data_helpers.batch_iter(x_test, batch_size, 1, shuffle=False)

            # Collect the predictions here
            all_predictions = []
            all_scores = []
            for x_test_batch in batches:
                batch_scores = sess.run(scores, {input_x: x_test_batch, dropout_keep_prob: 1.0})
                batch_predictions = np.argmax(batch_scores,axis=1)
                #batch_predictions = sess.run(predictions, {input_x: x_test_batch, dropout_keep_prob: 1.0})
                all_predictions = np.concatenate([all_predictions, batch_predictions])
                all_scores = np.concatenate([all_scores,batch_scores[:,1] - batch_scores[:,0]])
                
    mbs = float(len(all_predictions[all_predictions == 1]))/len(all_predictions)
    mss = np.mean(all_scores)
    print "Mean Binary Sentiment",mbs
    print "Mean Smooth Sentiment",mss
    return all_predictions,all_scores

Source File: getSentiment.py From DNN-Sentiment with MIT License

4 votes

def getSentimentRNN(fileToLoad,modelDir):
    checkpoint_dir = "./rnn_runs/"+modelDir+"/checkpoints/"
    batch_size = 64
    n_hidden = 256
    
    x_test, y_test, vocabulary, vocabulary_inv,trainS = data_helpers.load_data_for_books("./data/"+fileToLoad+".txt")
    y_test = np.argmax(y_test, axis=1)
    print("Vocabulary size: {:d}".format(len(vocabulary)))
    print("Test set size {:d}".format(len(y_test)))
    x_test = np.fliplr(x_test)
    
    checkpoint_file = tf.train.latest_checkpoint(checkpoint_dir)
    graph = tf.Graph()
    with graph.as_default():
        session_conf = tf.ConfigProto(
          allow_soft_placement=True,
          log_device_placement=False)
        sess = tf.Session(config=session_conf)
        with sess.as_default():
            # Load the saved meta graph and restore variables
            saver = tf.train.import_meta_graph("{}.meta".format(checkpoint_file))
            print("{}.meta".format(checkpoint_file))
            saver.restore(sess, checkpoint_file)

            # Get the placeholders from the graph by name
            input_x = graph.get_operation_by_name("x_input").outputs[0]
            predictions = graph.get_operation_by_name("prediction").outputs[0]
            istate = graph.get_operation_by_name('initial_state').outputs[0]
            keep_prob = graph.get_operation_by_name('keep_prob').outputs[0]
            # Generate batches for one epoch
            batches = data_helpers.batch_iter(x_test, batch_size, 1, shuffle=False)

            # Collect the predictions here
            all_predictions = []
            all_scores = []
            for x_test_batch in batches:
                batch_predictions = sess.run(predictions, {input_x: x_test_batch, istate: np.zeros((len(x_test_batch), 2*n_hidden)), keep_prob: 1.0})
                binaryPred = np.argmax(batch_predictions,axis=1)
                all_predictions = np.concatenate([all_predictions, binaryPred])
                all_scores = np.concatenate([all_scores, batch_predictions[:,1] - batch_predictions[:,0]])
                
        mbs = float(len(all_predictions[all_predictions == 1]))/len(all_predictions)
        mss = np.mean(all_scores)
        print "Mean Binary Sentiment",mbs
        print "Mean Smooth Sentiment",mss
        return all_predictions,all_scores

Source File: eval.py From rnn-text-classification-tf with MIT License

4 votes

def eval():
    with tf.device('/cpu:0'):
        x_text, y = data_helpers.load_data_and_labels(FLAGS.pos_dir, FLAGS.neg_dir)

    # Map data into vocabulary
    text_path = os.path.join(FLAGS.checkpoint_dir, "..", "text_vocab")
    text_vocab_processor = tf.contrib.learn.preprocessing.VocabularyProcessor.restore(text_path)

    x_eval = np.array(list(text_vocab_processor.transform(x_text)))
    y_eval = np.argmax(y, axis=1)

    checkpoint_file = tf.train.latest_checkpoint(FLAGS.checkpoint_dir)

    graph = tf.Graph()
    with graph.as_default():
        session_conf = tf.ConfigProto(
            allow_soft_placement=FLAGS.allow_soft_placement,
            log_device_placement=FLAGS.log_device_placement)
        sess = tf.Session(config=session_conf)
        with sess.as_default():
            # Load the saved meta graph and restore variables
            saver = tf.train.import_meta_graph("{}.meta".format(checkpoint_file))
            saver.restore(sess, checkpoint_file)

            # Get the placeholders from the graph by name
            input_text = graph.get_operation_by_name("input_text").outputs[0]
            # input_y = graph.get_operation_by_name("input_y").outputs[0]
            dropout_keep_prob = graph.get_operation_by_name("dropout_keep_prob").outputs[0]

            # Tensors we want to evaluate
            predictions = graph.get_operation_by_name("output/predictions").outputs[0]

            # Generate batches for one epoch
            batches = data_helpers.batch_iter(list(x_eval), FLAGS.batch_size, 1, shuffle=False)

            # Collect the predictions here
            all_predictions = []
            for x_batch in batches:
                batch_predictions = sess.run(predictions, {input_text: x_batch,
                                                           dropout_keep_prob: 1.0})
                all_predictions = np.concatenate([all_predictions, batch_predictions])

            correct_predictions = float(sum(all_predictions == y_eval))
            print("Total number of test examples: {}".format(len(y_eval)))
            print("Accuracy: {:g}".format(correct_predictions / float(len(y_eval))))

Python data_helpers.batch_iter() Examples