Python data_helpers.load_data_and_labels() Examples
The following are 3
code examples of data_helpers.load_data_and_labels().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
data_helpers
, or try the search function
.
Example #1
Source File: train.py From AI_for_Resumes with Apache License 2.0 | 5 votes |
def preprocess(): # Data Preparation # ================================================== # Load data print("Loading data...") x_text, y = data_helpers.load_data_and_labels(FLAGS.positive_data_file, FLAGS.negative_data_file) # Build vocabulary max_document_length = max([len(x.split(" ")) for x in x_text]) vocab_processor = learn.preprocessing.VocabularyProcessor(max_document_length) x = np.array(list(vocab_processor.fit_transform(x_text))) # Randomly shuffle data np.random.seed(10) shuffle_indices = np.random.permutation(np.arange(len(y))) x_shuffled = x[shuffle_indices] y_shuffled = y[shuffle_indices] # Split train/test set # TODO: This is very crude, should use cross-validation dev_sample_index = -1 * int(FLAGS.dev_sample_percentage * float(len(y))) x_train, x_dev = x_shuffled[:dev_sample_index], x_shuffled[dev_sample_index:] y_train, y_dev = y_shuffled[:dev_sample_index], y_shuffled[dev_sample_index:] del x, y, x_shuffled, y_shuffled print("Vocabulary Size: {:d}".format(len(vocab_processor.vocabulary_))) print("Train/Dev split: {:d}/{:d}".format(len(y_train), len(y_dev))) return x_train, y_train, vocab_processor, x_dev, y_dev
Example #2
Source File: train.py From cnn-text-classification-tf with Apache License 2.0 | 5 votes |
def preprocess(): # Data Preparation # ================================================== # Load data print("Loading data...") x_text, y = data_helpers.load_data_and_labels(FLAGS.positive_data_file, FLAGS.negative_data_file) # Build vocabulary max_document_length = max([len(x.split(" ")) for x in x_text]) vocab_processor = learn.preprocessing.VocabularyProcessor(max_document_length) x = np.array(list(vocab_processor.fit_transform(x_text))) # Randomly shuffle data np.random.seed(10) shuffle_indices = np.random.permutation(np.arange(len(y))) x_shuffled = x[shuffle_indices] y_shuffled = y[shuffle_indices] # Split train/test set # TODO: This is very crude, should use cross-validation dev_sample_index = -1 * int(FLAGS.dev_sample_percentage * float(len(y))) x_train, x_dev = x_shuffled[:dev_sample_index], x_shuffled[dev_sample_index:] y_train, y_dev = y_shuffled[:dev_sample_index], y_shuffled[dev_sample_index:] del x, y, x_shuffled, y_shuffled print("Vocabulary Size: {:d}".format(len(vocab_processor.vocabulary_))) print("Train/Dev split: {:d}/{:d}".format(len(y_train), len(y_dev))) return x_train, y_train, vocab_processor, x_dev, y_dev
Example #3
Source File: eval.py From rnn-text-classification-tf with MIT License | 4 votes |
def eval(): with tf.device('/cpu:0'): x_text, y = data_helpers.load_data_and_labels(FLAGS.pos_dir, FLAGS.neg_dir) # Map data into vocabulary text_path = os.path.join(FLAGS.checkpoint_dir, "..", "text_vocab") text_vocab_processor = tf.contrib.learn.preprocessing.VocabularyProcessor.restore(text_path) x_eval = np.array(list(text_vocab_processor.transform(x_text))) y_eval = np.argmax(y, axis=1) checkpoint_file = tf.train.latest_checkpoint(FLAGS.checkpoint_dir) graph = tf.Graph() with graph.as_default(): session_conf = tf.ConfigProto( allow_soft_placement=FLAGS.allow_soft_placement, log_device_placement=FLAGS.log_device_placement) sess = tf.Session(config=session_conf) with sess.as_default(): # Load the saved meta graph and restore variables saver = tf.train.import_meta_graph("{}.meta".format(checkpoint_file)) saver.restore(sess, checkpoint_file) # Get the placeholders from the graph by name input_text = graph.get_operation_by_name("input_text").outputs[0] # input_y = graph.get_operation_by_name("input_y").outputs[0] dropout_keep_prob = graph.get_operation_by_name("dropout_keep_prob").outputs[0] # Tensors we want to evaluate predictions = graph.get_operation_by_name("output/predictions").outputs[0] # Generate batches for one epoch batches = data_helpers.batch_iter(list(x_eval), FLAGS.batch_size, 1, shuffle=False) # Collect the predictions here all_predictions = [] for x_batch in batches: batch_predictions = sess.run(predictions, {input_text: x_batch, dropout_keep_prob: 1.0}) all_predictions = np.concatenate([all_predictions, batch_predictions]) correct_predictions = float(sum(all_predictions == y_eval)) print("Total number of test examples: {}".format(len(y_eval))) print("Accuracy: {:g}".format(correct_predictions / float(len(y_eval))))