Python utils.load_data() Examples
The following are 16
code examples of utils.load_data().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
utils
, or try the search function
.
Example #1
Source File: blender.py From kaggle_otto with BSD 3-Clause "New" or "Revised" License | 6 votes |
def get_weights(): # Read validation labels _, labels, _, _, _ = utils.load_data() skf = StratifiedKFold(labels, n_folds=5, random_state=23) test_index = None for _, test_idx in skf: test_index = np.append(test_index, test_idx) if test_index is not None else test_idx val_labels = labels[test_index] # Read predictions on validation set val_predictions = [] prediction_files = utils.get_prediction_files() for preds_file in prediction_files: vp = np.genfromtxt(os.path.join(consts.BLEND_PATH, preds_file), delimiter=',') val_predictions.append(vp) # Minimize blending function p0 = [1.] * len(prediction_files) p = fmin_cobyla(error, p0, args=(val_predictions, val_labels), cons=[constraint], rhoend=1e-5) return p
Example #2
Source File: main.py From capsule-networks with MIT License | 6 votes |
def evaluation(model, supervisor, num_label): teX, teY, num_te_batch = load_data(cfg.dataset, cfg.batch_size, is_training=False) fd_test_acc = save_to() with supervisor.managed_session(config=tf.ConfigProto(allow_soft_placement=True)) as sess: supervisor.saver.restore(sess, tf.train.latest_checkpoint(cfg.logdir)) tf.logging.info('Model restored!') test_acc = 0 for i in tqdm(range(num_te_batch), total=num_te_batch, ncols=70, leave=False, unit='b'): start = i * cfg.batch_size end = start + cfg.batch_size acc = sess.run(model.accuracy, {model.X: teX[start:end], model.labels: teY[start:end]}) test_acc += acc test_acc = test_acc / (cfg.batch_size * num_te_batch) fd_test_acc.write(str(test_acc)) fd_test_acc.close() print('Test accuracy has been saved to ' + cfg.results + '/test_acc.csv')
Example #3
Source File: main.py From CapsNet-Tensorflow with Apache License 2.0 | 6 votes |
def evaluation(model, supervisor, num_label): teX, teY, num_te_batch = load_data(cfg.dataset, cfg.batch_size, is_training=False) fd_test_acc = save_to() with supervisor.managed_session(config=tf.ConfigProto(allow_soft_placement=True)) as sess: supervisor.saver.restore(sess, tf.train.latest_checkpoint(cfg.logdir)) tf.logging.info('Model restored!') test_acc = 0 for i in tqdm(range(num_te_batch), total=num_te_batch, ncols=70, leave=False, unit='b'): start = i * cfg.batch_size end = start + cfg.batch_size acc = sess.run(model.accuracy, {model.X: teX[start:end], model.labels: teY[start:end]}) test_acc += acc test_acc = test_acc / (cfg.batch_size * num_te_batch) fd_test_acc.write(str(test_acc)) fd_test_acc.close() print('Test accuracy has been saved to ' + cfg.results + '/test_acc.csv')
Example #4
Source File: tagger.py From nagisa with MIT License | 5 votes |
def __init__(self, vocabs=None, params=None, hp=None, single_word_list=None): if vocabs is None: vocabs = base + '/data/nagisa_v001.dict' if params is None: params = base + '/data/nagisa_v001.model' if hp is None: hp = base + '/data/nagisa_v001.hp' # Load vocaburary files vocabs = utils.load_data(vocabs) self._uni2id, self._bi2id, self._word2id, self._pos2id, self._word2postags = vocabs self._id2pos = {v:k for k, v in self._pos2id.items()} self.id2pos = self._id2pos self.postags = [postag for postag in self._pos2id.keys()] # Load a hyper-parameter file self._hp = utils.load_data(hp) # Construct a word segmentation model and a pos tagging model self._model = model.Model(self._hp, params) # If a word is included in the single_word_list, # it is recognized as a single word forcibly. self.pattern = None if single_word_list: single_word_list = [utils.preprocess(w) for w in single_word_list if len(w) > 1] single_word_list = [w.replace('(', '\(').replace(')', '\)') for w in single_word_list] single_word_list = sorted(single_word_list, key=lambda x:-len(x)) if len(single_word_list) > 0: self.pattern = re.compile('|'.join(single_word_list)) # If use_noun_heuristic is True, nouns are more lilely to appear. if u'名詞' in self._pos2id: self.use_noun_heuristic = True else: self.use_noun_heuristic = False
Example #5
Source File: sample.py From char-cnn-text-classification-tensorflow with Apache License 2.0 | 5 votes |
def sample(args): print 'Loading data' x, y, vocabulary, vocabulary_inv = utils.load_data() text = [list(args.text)] sentences_padded = utils.pad_sentences(text, maxlen=x.shape[1]) raw_x, dummy_y = utils.build_input_data(sentences_padded, [0], vocabulary) checkpoint_file = tf.train.latest_checkpoint(args.checkpoint_dir) graph = tf.Graph() with graph.as_default(): sess = tf.Session() with sess.as_default(): # Load the saved meta graph and restore variables saver = tf.train.import_meta_graph("{}.meta".format(checkpoint_file)) saver.restore(sess, checkpoint_file) # Get the placeholders from the graph by name input_x = graph.get_operation_by_name("input_x").outputs[0] # input_y = graph.get_operation_by_name("input_y").outputs[0] dropout_keep_prob = graph.get_operation_by_name("dropout_keep_prob").outputs[0] # Tensors we want to evaluate predictions = graph.get_operation_by_name("output/predictions").outputs[0] predicted_result = sess.run(predictions, {input_x: raw_x, dropout_keep_prob: 1.0}) if (predicted_result[0] == 0): print args.text + ": negative" else: print args.text + ": positive"
Example #6
Source File: supervised_train.py From DGFraud with Apache License 2.0 | 5 votes |
def main(argv=None): print("Loading training data..") # file_name = 'small_sample.mat' file_name = FLAGS.file_name train_perc = FLAGS.train_perc relations = ['net_rur', 'net_rtr', 'net_rsr'] train_data = load_data(FLAGS.train_prefix, file_name, relations, train_perc) print("Done loading training data..") train(train_data)
Example #7
Source File: preprocess.py From FastSpeech with MIT License | 5 votes |
def main(): path = os.path.join("data", "LJSpeech-1.1") preprocess_ljspeech(path) text_path = os.path.join("data", "train.txt") texts = process_text(text_path) if not os.path.exists(hp.alignment_path): os.mkdir(hp.alignment_path) else: return tacotron2 = get_Tacotron2() num = 0 for ind, text in enumerate(texts[num:]): print(ind) character = text[0:len(text)-1] mel_gt_name = os.path.join( hp.mel_ground_truth, "ljspeech-mel-%05d.npy" % (ind+num+1)) mel_gt_target = np.load(mel_gt_name) _, _, D = load_data(character, mel_gt_target, tacotron2) np.save(os.path.join(hp.alignment_path, str( ind+num) + ".npy"), D, allow_pickle=False)
Example #8
Source File: network.py From lgcn with GNU General Public License v3.0 | 5 votes |
def process_data(self): data = load_data('cora') adj, feas = data[:2] self.adj = adj.todense() self.normed_adj = preprocess_adj(adj) self.feas = preprocess_features(feas, False) self.y_train, self.y_val, self.y_test = data[2:5] self.train_mask, self.val_mask, self.test_mask = data[5:]
Example #9
Source File: draw.py From learn-to-cluster with MIT License | 5 votes |
def draw_graph(ofolder, idx2lb, g_label, idx, prob): fpath = os.path.join(ofolder, '{}.npz'.format(idx)) ograph_folder = 'graph/' + ofolder.split('/')[-1] if not os.path.exists(ograph_folder): os.makedirs(ograph_folder) color_dict = {1: "red", 0: "lightblue"} vertices, raw_edges = load_data(fpath) vertices = list(vertices) lb = idx2lb[idx] abs2rel = {} for i, v in enumerate(vertices): abs2rel[v] = i edges = [(abs2rel[p1], abs2rel[p2]) for p1, p2, _ in raw_edges] g = Graph(vertex_attrs={"label": vertices}, edges=edges, directed=False) edge_weights = [1 - d for _, _, d in raw_edges] if len(edge_weights) > 0: w_mean = sum(edge_weights) / len(edge_weights) w_max = max(edge_weights) w_min = min(edge_weights) else: w_mean, w_max, w_min = 1, 1, 1 visual_style = {} visual_style["vertex_color"] = [ color_dict[lb == idx2lb[v]] for v in vertices ] visual_style['edge_width'] = [5 * w for w in edge_weights] plot(g, **visual_style, target="{}/{}_{}_{:.2f}_{:.2f}_{:.2f}_{:.2f}.png".format( ograph_folder, g_label, idx, prob, w_mean, w_min, w_max))
Example #10
Source File: cluster_det_processor.py From learn-to-cluster with MIT License | 5 votes |
def build_graph(self, fn_node, fn_edge): ''' build graph from graph file - nodes: NxD, each row represents the feature of a node - adj: NxN, a symmetric similarity matrix with self-connection ''' node = load_data(fn_node) edge = load_data(fn_edge) assert len(node) > 1, '#node of {}: {}'.format(fn_node, len(node)) # take majority as label of the graph if not self.dataset.ignore_label: lb2cnt = {} for idx in node: if idx not in self.dataset.idx2lb: continue lb = self.dataset.idx2lb[idx] if lb not in lb2cnt: lb2cnt[lb] = 0 lb2cnt[lb] += 1 gt_lb, _ = get_majority(lb2cnt) gt_node = self.dataset.lb2idxs[gt_lb] if self.dataset.det_label == 'iou': label = compute_iou(node, gt_node) elif self.dataset.det_label == 'iop': label = compute_iop(node, gt_node) else: raise KeyError('Unknown det_label type: {}'.format( self.dataset.det_label)) else: label = -1. adj, _, _ = self.build_adj(node, edge) features = self.build_features(node) return features, adj, label
Example #11
Source File: main.py From dgl with Apache License 2.0 | 4 votes |
def main(args): # If args['hetero'] is True, g would be a heterogeneous graph. # Otherwise, it will be a list of homogeneous graphs. g, features, labels, num_classes, train_idx, val_idx, test_idx, train_mask, \ val_mask, test_mask = load_data(args['dataset']) features = features.to(args['device']) labels = labels.to(args['device']) train_mask = train_mask.to(args['device']) val_mask = val_mask.to(args['device']) test_mask = test_mask.to(args['device']) if args['hetero']: from model_hetero import HAN model = HAN(meta_paths=[['pa', 'ap'], ['pf', 'fp']], in_size=features.shape[1], hidden_size=args['hidden_units'], out_size=num_classes, num_heads=args['num_heads'], dropout=args['dropout']).to(args['device']) else: from model import HAN model = HAN(num_meta_paths=len(g), in_size=features.shape[1], hidden_size=args['hidden_units'], out_size=num_classes, num_heads=args['num_heads'], dropout=args['dropout']).to(args['device']) stopper = EarlyStopping(patience=args['patience']) loss_fcn = torch.nn.CrossEntropyLoss() optimizer = torch.optim.Adam(model.parameters(), lr=args['lr'], weight_decay=args['weight_decay']) for epoch in range(args['num_epochs']): model.train() logits = model(g, features) loss = loss_fcn(logits[train_mask], labels[train_mask]) optimizer.zero_grad() loss.backward() optimizer.step() train_acc, train_micro_f1, train_macro_f1 = score(logits[train_mask], labels[train_mask]) val_loss, val_acc, val_micro_f1, val_macro_f1 = evaluate(model, g, features, labels, val_mask, loss_fcn) early_stop = stopper.step(val_loss.data.item(), val_acc, model) print('Epoch {:d} | Train Loss {:.4f} | Train Micro f1 {:.4f} | Train Macro f1 {:.4f} | ' 'Val Loss {:.4f} | Val Micro f1 {:.4f} | Val Macro f1 {:.4f}'.format( epoch + 1, loss.item(), train_micro_f1, train_macro_f1, val_loss.item(), val_micro_f1, val_macro_f1)) if early_stop: break stopper.load_checkpoint(model) test_loss, test_acc, test_micro_f1, test_macro_f1 = evaluate(model, g, features, labels, test_mask, loss_fcn) print('Test loss {:.4f} | Test Micro f1 {:.4f} | Test Macro f1 {:.4f}'.format( test_loss.item(), test_micro_f1, test_macro_f1))
Example #12
Source File: test.py From aitom with GNU General Public License v3.0 | 4 votes |
def test(args): with tf.Graph().as_default(): with tf.Session() as sess: #saver = tf.train.Saver(tf.trainable_variables(), max_to_keep=3) saver = tf.train.Saver(tf.global_variables()) saver.restore(sess, args.model) # Read the file containing the pairs used for testing pairs = lfw.read_pairs(os.path.expanduser(args.test_list_dir)) # Get the paths for the corresponding images paths, actual_issame = lfw.get_paths(os.path.expanduser(args.test_data_dir), pairs, args.test_list_dir) image_size = args.image_size print('image size',image_size) images_placeholder = tf.placeholder(tf.float32,shape=(None,args.image_height,args.image_width,args.image_width),name='image') phase_train_placeholder = tf.placeholder(tf.bool, name='phase_train') #network definition. prelogits1 = network.infer(images_placeholder,args.embedding_size) if args.fc_bn: print('do batch norm after network') prelogits = slim.batch_norm(prelogits1, is_training=phase_train_placeholder,epsilon=1e-5, scale=True,scope='softmax_bn') #embeddings = tf.nn.l2_normalize(prelogits, 1, 1e-10, name='embeddings') embeddings = tf.identity(prelogits) embedding_size = embeddings.get_shape()[1] # Run forward pass to calculate embeddings print('Runnning forward pass on testing images') batch_size = args.test_batch_size nrof_images = len(paths) nrof_batches = int(math.ceil(1.0*nrof_images / batch_size)) emb_array = np.zeros((nrof_images, embedding_size)) for i in range(nrof_batches): start_index = i*batch_size print('handing {}/{}'.format(start_index,nrof_images)) end_index = min((i+1)*batch_size, nrof_images) paths_batch = paths[start_index:end_index] images = utils.load_data(paths_batch, False, False, args.image_height,args.image_width,False,\ (args.image_height,args.image_width)) feed_dict = { images_placeholder:images, phase_train_placeholder:False } feats,a = sess.run([embeddings,prelogits], feed_dict=feed_dict) # do not know for sure whether we should turn this on? it depends. feats = utils.l2_normalize(feats) emb_array[start_index:end_index,:] = feats tpr, fpr, accuracy, val, val_std, far = lfw.evaluate(emb_array, actual_issame, 0.001, nrof_folds=args.test_nrof_folds) print('Accuracy: %1.3f+-%1.3f' % (np.mean(accuracy), np.std(accuracy))) print('Validation rate: %2.5f+-%2.5f @ FAR=%2.5f' % (val, val_std, far)) auc = metrics.auc(fpr, tpr) print('Area Under Curve (AUC): %1.3f' % auc) # eer = brentq(lambda x: 1. - x - interpolate.interp1d(fpr, tpr)(x), 0., 1.)#fill_value="extrapolate" print('Equal Error Rate (EER): %1.3f' % eer) tpr1, fpr1, accuracy1, val1, val_std1, far1 = lfw.evaluate(emb_array, actual_issame, 0.0001, nrof_folds=args.test_nrof_folds) print('Accuracy: %1.3f+-%1.3f' % (np.mean(accuracy1), np.std(accuracy1))) print('Validation rate: %2.5f+-%2.5f @ FAR=%2.5f' % (val1, val_std1, far1)) auc = metrics.auc(fpr1, tpr1) print('Area Under Curve (AUC): %1.3f' % auc) # eer = brentq(lambda x: 1. - x - interpolate.interp1d(fpr1, tpr1)(x), 0., 1.)#fill_value="extrapolate" print('Equal Error Rate (EER): %1.3f' % eer)
Example #13
Source File: main.py From capsule-networks with MIT License | 4 votes |
def train(model, supervisor, num_label): trX, trY, num_tr_batch, valX, valY, num_val_batch = load_data(cfg.dataset, cfg.batch_size, is_training=True) Y = valY[:num_val_batch * cfg.batch_size].reshape((-1, 1)) fd_train_acc, fd_loss, fd_val_acc = save_to() config = tf.ConfigProto() config.gpu_options.allow_growth = True with supervisor.managed_session(config=config) as sess: print("\nNote: all of results will be saved to directory: " + cfg.results) for epoch in range(cfg.epoch): print('Training for epoch ' + str(epoch) + '/' + str(cfg.epoch) + ':') if supervisor.should_stop(): print('supervisor stoped!') break for step in tqdm(range(num_tr_batch), total=num_tr_batch, ncols=70, leave=False, unit='b'): start = step * cfg.batch_size end = start + cfg.batch_size global_step = epoch * num_tr_batch + step if global_step % cfg.train_sum_freq == 0: _, loss, train_acc, summary_str = sess.run([model.train_op, model.total_loss, model.accuracy, model.train_summary]) assert not np.isnan(loss), 'Something wrong! loss is nan...' supervisor.summary_writer.add_summary(summary_str, global_step) fd_loss.write(str(global_step) + ',' + str(loss) + "\n") fd_loss.flush() fd_train_acc.write(str(global_step) + ',' + str(train_acc / cfg.batch_size) + "\n") fd_train_acc.flush() else: sess.run(model.train_op) if cfg.val_sum_freq != 0 and (global_step) % cfg.val_sum_freq == 0: val_acc = 0 for i in range(num_val_batch): start = i * cfg.batch_size end = start + cfg.batch_size acc = sess.run(model.accuracy, {model.X: valX[start:end], model.labels: valY[start:end]}) val_acc += acc val_acc = val_acc / (cfg.batch_size * num_val_batch) fd_val_acc.write(str(global_step) + ',' + str(val_acc) + '\n') fd_val_acc.flush() if (epoch + 1) % cfg.save_freq == 0: supervisor.saver.save(sess, cfg.logdir + '/model_epoch_%04d_step_%02d' % (epoch, global_step)) fd_val_acc.close() fd_train_acc.close() fd_loss.close()
Example #14
Source File: main.py From CapsNet-Tensorflow with Apache License 2.0 | 4 votes |
def train(model, supervisor, num_label): trX, trY, num_tr_batch, valX, valY, num_val_batch = load_data(cfg.dataset, cfg.batch_size, is_training=True) Y = valY[:num_val_batch * cfg.batch_size].reshape((-1, 1)) fd_train_acc, fd_loss, fd_val_acc = save_to() config = tf.ConfigProto() config.gpu_options.allow_growth = True with supervisor.managed_session(config=config) as sess: print("\nNote: all of results will be saved to directory: " + cfg.results) for epoch in range(cfg.epoch): print("Training for epoch %d/%d:" % (epoch, cfg.epoch)) if supervisor.should_stop(): print('supervisor stoped!') break for step in tqdm(range(num_tr_batch), total=num_tr_batch, ncols=70, leave=False, unit='b'): start = step * cfg.batch_size end = start + cfg.batch_size global_step = epoch * num_tr_batch + step if global_step % cfg.train_sum_freq == 0: _, loss, train_acc, summary_str = sess.run([model.train_op, model.total_loss, model.accuracy, model.train_summary]) assert not np.isnan(loss), 'Something wrong! loss is nan...' supervisor.summary_writer.add_summary(summary_str, global_step) fd_loss.write(str(global_step) + ',' + str(loss) + "\n") fd_loss.flush() fd_train_acc.write(str(global_step) + ',' + str(train_acc / cfg.batch_size) + "\n") fd_train_acc.flush() else: sess.run(model.train_op) if cfg.val_sum_freq != 0 and (global_step) % cfg.val_sum_freq == 0: val_acc = 0 for i in range(num_val_batch): start = i * cfg.batch_size end = start + cfg.batch_size acc = sess.run(model.accuracy, {model.X: valX[start:end], model.labels: valY[start:end]}) val_acc += acc val_acc = val_acc / (cfg.batch_size * num_val_batch) fd_val_acc.write(str(global_step) + ',' + str(val_acc) + '\n') fd_val_acc.flush() if (epoch + 1) % cfg.save_freq == 0: supervisor.saver.save(sess, cfg.logdir + '/model_epoch_%04d_step_%02d' % (epoch, global_step)) fd_val_acc.close() fd_train_acc.close() fd_loss.close()
Example #15
Source File: deoverlap.py From learn-to-cluster with MIT License | 4 votes |
def deoverlap(scores, proposals, tot_inst_num, th_pos=-1, th_iou=1, pred_label_fn=None, outlier_scores=None, th_outlier=0.5, keep_outlier=False): print('avg_score(mean: {:.2f}, max: {:.2f}, min: {:.2f})'.format( scores.mean(), scores.max(), scores.min())) assert len(proposals) == len(scores), '{} vs {}'.format( len(proposals), len(scores)) assert (outlier_scores is None) or isinstance(outlier_scores, dict) pos_lst = [] for idx, prob in enumerate(scores): if prob < th_pos: continue pos_lst.append([idx, prob]) pos_lst = sorted(pos_lst, key=lambda x: x[1], reverse=True) # get all clusters clusters = [] if keep_outlier: o_clusters = [] for idx, _ in tqdm(pos_lst): fn_node = proposals[idx] cluster = load_data(fn_node) cluster, o_cluster = filter_outlier(cluster, fn_node, outlier_scores, th_outlier) clusters.append(cluster) if keep_outlier and len(o_cluster) > 0: o_clusters.append(o_cluster) if keep_outlier: print('#outlier_clusters: {}'.format(len(o_clusters))) clusters.extend(o_clusters) idx2lb, idx2lbs = nms(clusters, th_iou) # output stats multi_lb_num = 0 for _, lbs in idx2lbs.items(): if len(lbs) > 1: multi_lb_num += 1 inst_num = len(idx2lb) cls_num = len(set(idx2lb.values())) print('#inst: {}, #class: {}, #multi-label: {}'.format( inst_num, cls_num, multi_lb_num)) print('#inst-coverage: {:.2f}'.format(1. * inst_num / tot_inst_num)) # save to file pred_labels = write_meta(pred_label_fn, idx2lb, inst_num=tot_inst_num) return pred_labels
Example #16
Source File: cluster_seg_processor.py From learn-to-cluster with MIT License | 4 votes |
def build_graph(self, fn_node, fn_edge): ''' build graph from graph file - nodes: NxD, each row represents the feature of a node - adj: NxN, a symmetric similarity matrix with self-connection ''' node = load_data(fn_node) edge = load_data(fn_edge) assert len(node) > 1, '#node of {}: {}'.format(fn_node, len(node)) adj, abs2rel, rel2abs = self.build_adj(node, edge) # compute label & mask if self.dataset.use_random_seed: ''' except using node with max degree as seed, you can explore more creative designs. e.g., applying random seed for multiple times, and take the best results. ''' if self.dataset.use_max_degree_seed: s = adj.sum(axis=1, keepdims=True) rel_center_idx = np.argmax(s) center_idx = rel2abs[rel_center_idx] else: center_idx = random.choice(node) rel_center_idx = abs2rel[center_idx] mask = np.zeros(len(node)) mask[rel_center_idx] = 1 mask = mask.reshape(-1, 1) if not self.dataset.ignore_label: lb = self.dataset.idx2lb[center_idx] gt_node = self.dataset.lb2idxs[lb] else: # do not use mask if not self.dataset.ignore_label: lb2cnt = {} for idx in node: if idx not in self.dataset.idx2lb: continue lb = self.dataset.idx2lb[idx] if lb not in lb2cnt: lb2cnt[lb] = 0 lb2cnt[lb] += 1 gt_lb, _ = get_majority(lb2cnt) gt_node = self.dataset.lb2idxs[gt_lb] if not self.dataset.ignore_label: g_label = self.get_node_lb(node, gt_node) else: g_label = np.zeros_like(node) features = self.build_features(node) if self.dataset.use_random_seed: features = np.concatenate((features, mask), axis=1) return features, adj, g_label