Python utils.load_data() Examples

The following are 16 code examples of utils.load_data(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module utils , or try the search function .
Example #1
Source File: blender.py    From kaggle_otto with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def get_weights():
    # Read validation labels
    _, labels, _, _, _ = utils.load_data()
    skf = StratifiedKFold(labels, n_folds=5, random_state=23)
    test_index = None
    for _, test_idx in skf:
        test_index = np.append(test_index, test_idx) if test_index is not None else test_idx
    val_labels = labels[test_index]
    # Read predictions on validation set
    val_predictions = []
    prediction_files = utils.get_prediction_files()
    for preds_file in prediction_files:
        vp = np.genfromtxt(os.path.join(consts.BLEND_PATH, preds_file), delimiter=',')
        val_predictions.append(vp)
    # Minimize blending function
    p0 = [1.] * len(prediction_files)
    p = fmin_cobyla(error, p0, args=(val_predictions, val_labels), cons=[constraint], rhoend=1e-5)

    return p 
Example #2
Source File: main.py    From capsule-networks with MIT License 6 votes vote down vote up
def evaluation(model, supervisor, num_label):
    teX, teY, num_te_batch = load_data(cfg.dataset, cfg.batch_size, is_training=False)
    fd_test_acc = save_to()
    with supervisor.managed_session(config=tf.ConfigProto(allow_soft_placement=True)) as sess:
        supervisor.saver.restore(sess, tf.train.latest_checkpoint(cfg.logdir))
        tf.logging.info('Model restored!')

        test_acc = 0
        for i in tqdm(range(num_te_batch), total=num_te_batch, ncols=70, leave=False, unit='b'):
            start = i * cfg.batch_size
            end = start + cfg.batch_size
            acc = sess.run(model.accuracy, {model.X: teX[start:end], model.labels: teY[start:end]})
            test_acc += acc
        test_acc = test_acc / (cfg.batch_size * num_te_batch)
        fd_test_acc.write(str(test_acc))
        fd_test_acc.close()
        print('Test accuracy has been saved to ' + cfg.results + '/test_acc.csv') 
Example #3
Source File: main.py    From CapsNet-Tensorflow with Apache License 2.0 6 votes vote down vote up
def evaluation(model, supervisor, num_label):
    teX, teY, num_te_batch = load_data(cfg.dataset, cfg.batch_size, is_training=False)
    fd_test_acc = save_to()
    with supervisor.managed_session(config=tf.ConfigProto(allow_soft_placement=True)) as sess:
        supervisor.saver.restore(sess, tf.train.latest_checkpoint(cfg.logdir))
        tf.logging.info('Model restored!')

        test_acc = 0
        for i in tqdm(range(num_te_batch), total=num_te_batch, ncols=70, leave=False, unit='b'):
            start = i * cfg.batch_size
            end = start + cfg.batch_size
            acc = sess.run(model.accuracy, {model.X: teX[start:end], model.labels: teY[start:end]})
            test_acc += acc
        test_acc = test_acc / (cfg.batch_size * num_te_batch)
        fd_test_acc.write(str(test_acc))
        fd_test_acc.close()
        print('Test accuracy has been saved to ' + cfg.results + '/test_acc.csv') 
Example #4
Source File: tagger.py    From nagisa with MIT License 5 votes vote down vote up
def __init__(self, vocabs=None, params=None, hp=None, single_word_list=None):
        if vocabs is None:
            vocabs = base + '/data/nagisa_v001.dict'
        if params is None:
            params = base + '/data/nagisa_v001.model'
        if hp is None:
            hp = base + '/data/nagisa_v001.hp'

        # Load vocaburary files
        vocabs = utils.load_data(vocabs)
        self._uni2id, self._bi2id, self._word2id, self._pos2id, self._word2postags = vocabs
        self._id2pos = {v:k for k, v in self._pos2id.items()}
        self.id2pos  = self._id2pos
        self.postags = [postag for postag in self._pos2id.keys()]
        # Load a hyper-parameter file
        self._hp = utils.load_data(hp)
        # Construct a word segmentation model and a pos tagging model
        self._model = model.Model(self._hp, params)

        # If a word is included in the single_word_list,
        # it is recognized as a single word forcibly.
        self.pattern = None
        if single_word_list:
            single_word_list = [utils.preprocess(w) for w in single_word_list if len(w) > 1]
            single_word_list = [w.replace('(', '\(').replace(')', '\)')
                                for w in single_word_list]
            single_word_list = sorted(single_word_list, key=lambda x:-len(x))
            if len(single_word_list) > 0:
                self.pattern = re.compile('|'.join(single_word_list))

        # If use_noun_heuristic is True, nouns are more lilely to appear.
        if u'名詞' in self._pos2id:
            self.use_noun_heuristic = True
        else:
            self.use_noun_heuristic = False 
Example #5
Source File: sample.py    From char-cnn-text-classification-tensorflow with Apache License 2.0 5 votes vote down vote up
def sample(args):
    print 'Loading data'
    x, y, vocabulary, vocabulary_inv = utils.load_data()

    text = [list(args.text)]
    sentences_padded = utils.pad_sentences(text, maxlen=x.shape[1])
    raw_x, dummy_y = utils.build_input_data(sentences_padded, [0], vocabulary)

    checkpoint_file = tf.train.latest_checkpoint(args.checkpoint_dir)
    graph = tf.Graph()
    with graph.as_default():
        sess = tf.Session()
        with sess.as_default():
            # Load the saved meta graph and restore variables
            saver = tf.train.import_meta_graph("{}.meta".format(checkpoint_file))
            saver.restore(sess, checkpoint_file)

            # Get the placeholders from the graph by name
            input_x = graph.get_operation_by_name("input_x").outputs[0]
            # input_y = graph.get_operation_by_name("input_y").outputs[0]
            dropout_keep_prob = graph.get_operation_by_name("dropout_keep_prob").outputs[0]

            # Tensors we want to evaluate
            predictions = graph.get_operation_by_name("output/predictions").outputs[0]

            predicted_result = sess.run(predictions, {input_x: raw_x, dropout_keep_prob: 1.0})
            if (predicted_result[0] == 0):
                print args.text + ": negative"
            else:
                print args.text + ": positive" 
Example #6
Source File: supervised_train.py    From DGFraud with Apache License 2.0 5 votes vote down vote up
def main(argv=None):
    print("Loading training data..")
    # file_name = 'small_sample.mat'
    file_name = FLAGS.file_name
    train_perc = FLAGS.train_perc
    relations = ['net_rur', 'net_rtr', 'net_rsr']
    train_data = load_data(FLAGS.train_prefix, file_name, relations, train_perc)
    print("Done loading training data..")
    train(train_data) 
Example #7
Source File: preprocess.py    From FastSpeech with MIT License 5 votes vote down vote up
def main():
    path = os.path.join("data", "LJSpeech-1.1")
    preprocess_ljspeech(path)

    text_path = os.path.join("data", "train.txt")
    texts = process_text(text_path)

    if not os.path.exists(hp.alignment_path):
        os.mkdir(hp.alignment_path)
    else:
        return

    tacotron2 = get_Tacotron2()

    num = 0
    for ind, text in enumerate(texts[num:]):
        print(ind)

        character = text[0:len(text)-1]
        mel_gt_name = os.path.join(
            hp.mel_ground_truth, "ljspeech-mel-%05d.npy" % (ind+num+1))
        mel_gt_target = np.load(mel_gt_name)
        _, _, D = load_data(character, mel_gt_target, tacotron2)

        np.save(os.path.join(hp.alignment_path, str(
            ind+num) + ".npy"), D, allow_pickle=False) 
Example #8
Source File: network.py    From lgcn with GNU General Public License v3.0 5 votes vote down vote up
def process_data(self):
        data = load_data('cora')
        adj, feas = data[:2]
        self.adj = adj.todense()
        self.normed_adj = preprocess_adj(adj)
        self.feas = preprocess_features(feas, False)
        self.y_train, self.y_val, self.y_test = data[2:5]
        self.train_mask, self.val_mask, self.test_mask = data[5:] 
Example #9
Source File: draw.py    From learn-to-cluster with MIT License 5 votes vote down vote up
def draw_graph(ofolder, idx2lb, g_label, idx, prob):
    fpath = os.path.join(ofolder, '{}.npz'.format(idx))
    ograph_folder = 'graph/' + ofolder.split('/')[-1]
    if not os.path.exists(ograph_folder):
        os.makedirs(ograph_folder)
    color_dict = {1: "red", 0: "lightblue"}
    vertices, raw_edges = load_data(fpath)
    vertices = list(vertices)
    lb = idx2lb[idx]
    abs2rel = {}
    for i, v in enumerate(vertices):
        abs2rel[v] = i
    edges = [(abs2rel[p1], abs2rel[p2]) for p1, p2, _ in raw_edges]
    g = Graph(vertex_attrs={"label": vertices}, edges=edges, directed=False)
    edge_weights = [1 - d for _, _, d in raw_edges]
    if len(edge_weights) > 0:
        w_mean = sum(edge_weights) / len(edge_weights)
        w_max = max(edge_weights)
        w_min = min(edge_weights)
    else:
        w_mean, w_max, w_min = 1, 1, 1

    visual_style = {}
    visual_style["vertex_color"] = [
        color_dict[lb == idx2lb[v]] for v in vertices
    ]
    visual_style['edge_width'] = [5 * w for w in edge_weights]

    plot(g,
         **visual_style,
         target="{}/{}_{}_{:.2f}_{:.2f}_{:.2f}_{:.2f}.png".format(
             ograph_folder, g_label, idx, prob, w_mean, w_min, w_max)) 
Example #10
Source File: cluster_det_processor.py    From learn-to-cluster with MIT License 5 votes vote down vote up
def build_graph(self, fn_node, fn_edge):
        ''' build graph from graph file
            - nodes: NxD,
                     each row represents the feature of a node
            - adj:   NxN,
                     a symmetric similarity matrix with self-connection
        '''
        node = load_data(fn_node)
        edge = load_data(fn_edge)
        assert len(node) > 1, '#node of {}: {}'.format(fn_node, len(node))
        # take majority as label of the graph
        if not self.dataset.ignore_label:
            lb2cnt = {}
            for idx in node:
                if idx not in self.dataset.idx2lb:
                    continue
                lb = self.dataset.idx2lb[idx]
                if lb not in lb2cnt:
                    lb2cnt[lb] = 0
                lb2cnt[lb] += 1
            gt_lb, _ = get_majority(lb2cnt)
            gt_node = self.dataset.lb2idxs[gt_lb]
            if self.dataset.det_label == 'iou':
                label = compute_iou(node, gt_node)
            elif self.dataset.det_label == 'iop':
                label = compute_iop(node, gt_node)
            else:
                raise KeyError('Unknown det_label type: {}'.format(
                    self.dataset.det_label))
        else:
            label = -1.

        adj, _, _ = self.build_adj(node, edge)
        features = self.build_features(node)
        return features, adj, label 
Example #11
Source File: main.py    From dgl with Apache License 2.0 4 votes vote down vote up
def main(args):
    # If args['hetero'] is True, g would be a heterogeneous graph.
    # Otherwise, it will be a list of homogeneous graphs.
    g, features, labels, num_classes, train_idx, val_idx, test_idx, train_mask, \
    val_mask, test_mask = load_data(args['dataset'])

    features = features.to(args['device'])
    labels = labels.to(args['device'])
    train_mask = train_mask.to(args['device'])
    val_mask = val_mask.to(args['device'])
    test_mask = test_mask.to(args['device'])

    if args['hetero']:
        from model_hetero import HAN
        model = HAN(meta_paths=[['pa', 'ap'], ['pf', 'fp']],
                    in_size=features.shape[1],
                    hidden_size=args['hidden_units'],
                    out_size=num_classes,
                    num_heads=args['num_heads'],
                    dropout=args['dropout']).to(args['device'])
    else:
        from model import HAN
        model = HAN(num_meta_paths=len(g),
                    in_size=features.shape[1],
                    hidden_size=args['hidden_units'],
                    out_size=num_classes,
                    num_heads=args['num_heads'],
                    dropout=args['dropout']).to(args['device'])

    stopper = EarlyStopping(patience=args['patience'])
    loss_fcn = torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=args['lr'],
                                 weight_decay=args['weight_decay'])

    for epoch in range(args['num_epochs']):
        model.train()
        logits = model(g, features)
        loss = loss_fcn(logits[train_mask], labels[train_mask])

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        train_acc, train_micro_f1, train_macro_f1 = score(logits[train_mask], labels[train_mask])
        val_loss, val_acc, val_micro_f1, val_macro_f1 = evaluate(model, g, features, labels, val_mask, loss_fcn)
        early_stop = stopper.step(val_loss.data.item(), val_acc, model)

        print('Epoch {:d} | Train Loss {:.4f} | Train Micro f1 {:.4f} | Train Macro f1 {:.4f} | '
              'Val Loss {:.4f} | Val Micro f1 {:.4f} | Val Macro f1 {:.4f}'.format(
            epoch + 1, loss.item(), train_micro_f1, train_macro_f1, val_loss.item(), val_micro_f1, val_macro_f1))

        if early_stop:
            break

    stopper.load_checkpoint(model)
    test_loss, test_acc, test_micro_f1, test_macro_f1 = evaluate(model, g, features, labels, test_mask, loss_fcn)
    print('Test loss {:.4f} | Test Micro f1 {:.4f} | Test Macro f1 {:.4f}'.format(
        test_loss.item(), test_micro_f1, test_macro_f1)) 
Example #12
Source File: test.py    From aitom with GNU General Public License v3.0 4 votes vote down vote up
def test(args):
    with tf.Graph().as_default():
        with tf.Session() as sess:
            #saver = tf.train.Saver(tf.trainable_variables(), max_to_keep=3)
            saver = tf.train.Saver(tf.global_variables())
            saver.restore(sess, args.model)
            # Read the file containing the pairs used for testing
            pairs = lfw.read_pairs(os.path.expanduser(args.test_list_dir))
            # Get the paths for the corresponding images
            paths, actual_issame = lfw.get_paths(os.path.expanduser(args.test_data_dir), pairs, args.test_list_dir)
            image_size = args.image_size
            print('image size',image_size)
            images_placeholder = tf.placeholder(tf.float32,shape=(None,args.image_height,args.image_width,args.image_width),name='image')
            phase_train_placeholder = tf.placeholder(tf.bool, name='phase_train')
            #network definition.
            prelogits1 = network.infer(images_placeholder,args.embedding_size)
            if args.fc_bn:
                print('do batch norm after network')
                prelogits = slim.batch_norm(prelogits1, is_training=phase_train_placeholder,epsilon=1e-5, scale=True,scope='softmax_bn')
            #embeddings = tf.nn.l2_normalize(prelogits, 1, 1e-10, name='embeddings')
            embeddings = tf.identity(prelogits)
            embedding_size = embeddings.get_shape()[1]
            # Run forward pass to calculate embeddings
            print('Runnning forward pass on testing images')
            batch_size = args.test_batch_size
            nrof_images = len(paths)
            nrof_batches = int(math.ceil(1.0*nrof_images / batch_size))
            emb_array = np.zeros((nrof_images, embedding_size))

            for i in range(nrof_batches):
                start_index = i*batch_size
                print('handing {}/{}'.format(start_index,nrof_images))
                end_index = min((i+1)*batch_size, nrof_images)
                paths_batch = paths[start_index:end_index]
                images = utils.load_data(paths_batch, False, False, args.image_height,args.image_width,False,\
                    (args.image_height,args.image_width))
                feed_dict = { images_placeholder:images, phase_train_placeholder:False }
                feats,a = sess.run([embeddings,prelogits], feed_dict=feed_dict)
                # do not know for sure whether we should turn this on? it depends.
                feats = utils.l2_normalize(feats)
                emb_array[start_index:end_index,:] = feats
        
            tpr, fpr, accuracy, val, val_std, far = lfw.evaluate(emb_array, 
                actual_issame, 0.001, nrof_folds=args.test_nrof_folds)
            print('Accuracy: %1.3f+-%1.3f' % (np.mean(accuracy), np.std(accuracy)))
            print('Validation rate: %2.5f+-%2.5f @ FAR=%2.5f' % (val, val_std, far))
            auc = metrics.auc(fpr, tpr)
            print('Area Under Curve (AUC): %1.3f' % auc) #
            eer = brentq(lambda x: 1. - x - interpolate.interp1d(fpr, tpr)(x), 0., 1.)#fill_value="extrapolate"
            print('Equal Error Rate (EER): %1.3f' % eer)


            tpr1, fpr1, accuracy1, val1, val_std1, far1 = lfw.evaluate(emb_array, 
                actual_issame, 0.0001, nrof_folds=args.test_nrof_folds)
            print('Accuracy: %1.3f+-%1.3f' % (np.mean(accuracy1), np.std(accuracy1)))
            print('Validation rate: %2.5f+-%2.5f @ FAR=%2.5f' % (val1, val_std1, far1))
            auc = metrics.auc(fpr1, tpr1)
            print('Area Under Curve (AUC): %1.3f' % auc) #
            eer = brentq(lambda x: 1. - x - interpolate.interp1d(fpr1, tpr1)(x), 0., 1.)#fill_value="extrapolate"
            print('Equal Error Rate (EER): %1.3f' % eer) 
Example #13
Source File: main.py    From capsule-networks with MIT License 4 votes vote down vote up
def train(model, supervisor, num_label):
    trX, trY, num_tr_batch, valX, valY, num_val_batch = load_data(cfg.dataset, cfg.batch_size, is_training=True)
    Y = valY[:num_val_batch * cfg.batch_size].reshape((-1, 1))

    fd_train_acc, fd_loss, fd_val_acc = save_to()
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    with supervisor.managed_session(config=config) as sess:
        print("\nNote: all of results will be saved to directory: " + cfg.results)
        for epoch in range(cfg.epoch):
            print('Training for epoch ' + str(epoch) + '/' + str(cfg.epoch) + ':')
            if supervisor.should_stop():
                print('supervisor stoped!')
                break
            for step in tqdm(range(num_tr_batch), total=num_tr_batch, ncols=70, leave=False, unit='b'):
                start = step * cfg.batch_size
                end = start + cfg.batch_size
                global_step = epoch * num_tr_batch + step

                if global_step % cfg.train_sum_freq == 0:
                    _, loss, train_acc, summary_str = sess.run([model.train_op, model.total_loss, model.accuracy, model.train_summary])
                    assert not np.isnan(loss), 'Something wrong! loss is nan...'
                    supervisor.summary_writer.add_summary(summary_str, global_step)

                    fd_loss.write(str(global_step) + ',' + str(loss) + "\n")
                    fd_loss.flush()
                    fd_train_acc.write(str(global_step) + ',' + str(train_acc / cfg.batch_size) + "\n")
                    fd_train_acc.flush()
                else:
                    sess.run(model.train_op)

                if cfg.val_sum_freq != 0 and (global_step) % cfg.val_sum_freq == 0:
                    val_acc = 0
                    for i in range(num_val_batch):
                        start = i * cfg.batch_size
                        end = start + cfg.batch_size
                        acc = sess.run(model.accuracy, {model.X: valX[start:end], model.labels: valY[start:end]})
                        val_acc += acc
                    val_acc = val_acc / (cfg.batch_size * num_val_batch)
                    fd_val_acc.write(str(global_step) + ',' + str(val_acc) + '\n')
                    fd_val_acc.flush()

            if (epoch + 1) % cfg.save_freq == 0:
                supervisor.saver.save(sess, cfg.logdir + '/model_epoch_%04d_step_%02d' % (epoch, global_step))

        fd_val_acc.close()
        fd_train_acc.close()
        fd_loss.close() 
Example #14
Source File: main.py    From CapsNet-Tensorflow with Apache License 2.0 4 votes vote down vote up
def train(model, supervisor, num_label):
    trX, trY, num_tr_batch, valX, valY, num_val_batch = load_data(cfg.dataset, cfg.batch_size, is_training=True)
    Y = valY[:num_val_batch * cfg.batch_size].reshape((-1, 1))

    fd_train_acc, fd_loss, fd_val_acc = save_to()
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    with supervisor.managed_session(config=config) as sess:
        print("\nNote: all of results will be saved to directory: " + cfg.results)
        for epoch in range(cfg.epoch):
            print("Training for epoch %d/%d:" % (epoch, cfg.epoch))
            if supervisor.should_stop():
                print('supervisor stoped!')
                break
            for step in tqdm(range(num_tr_batch), total=num_tr_batch, ncols=70, leave=False, unit='b'):
                start = step * cfg.batch_size
                end = start + cfg.batch_size
                global_step = epoch * num_tr_batch + step

                if global_step % cfg.train_sum_freq == 0:
                    _, loss, train_acc, summary_str = sess.run([model.train_op, model.total_loss, model.accuracy, model.train_summary])
                    assert not np.isnan(loss), 'Something wrong! loss is nan...'
                    supervisor.summary_writer.add_summary(summary_str, global_step)

                    fd_loss.write(str(global_step) + ',' + str(loss) + "\n")
                    fd_loss.flush()
                    fd_train_acc.write(str(global_step) + ',' + str(train_acc / cfg.batch_size) + "\n")
                    fd_train_acc.flush()
                else:
                    sess.run(model.train_op)

                if cfg.val_sum_freq != 0 and (global_step) % cfg.val_sum_freq == 0:
                    val_acc = 0
                    for i in range(num_val_batch):
                        start = i * cfg.batch_size
                        end = start + cfg.batch_size
                        acc = sess.run(model.accuracy, {model.X: valX[start:end], model.labels: valY[start:end]})
                        val_acc += acc
                    val_acc = val_acc / (cfg.batch_size * num_val_batch)
                    fd_val_acc.write(str(global_step) + ',' + str(val_acc) + '\n')
                    fd_val_acc.flush()

            if (epoch + 1) % cfg.save_freq == 0:
                supervisor.saver.save(sess, cfg.logdir + '/model_epoch_%04d_step_%02d' % (epoch, global_step))

        fd_val_acc.close()
        fd_train_acc.close()
        fd_loss.close() 
Example #15
Source File: deoverlap.py    From learn-to-cluster with MIT License 4 votes vote down vote up
def deoverlap(scores,
              proposals,
              tot_inst_num,
              th_pos=-1,
              th_iou=1,
              pred_label_fn=None,
              outlier_scores=None,
              th_outlier=0.5,
              keep_outlier=False):
    print('avg_score(mean: {:.2f}, max: {:.2f}, min: {:.2f})'.format(
        scores.mean(), scores.max(), scores.min()))

    assert len(proposals) == len(scores), '{} vs {}'.format(
        len(proposals), len(scores))
    assert (outlier_scores is None) or isinstance(outlier_scores, dict)

    pos_lst = []
    for idx, prob in enumerate(scores):
        if prob < th_pos:
            continue
        pos_lst.append([idx, prob])
    pos_lst = sorted(pos_lst, key=lambda x: x[1], reverse=True)

    # get all clusters
    clusters = []
    if keep_outlier:
        o_clusters = []
    for idx, _ in tqdm(pos_lst):
        fn_node = proposals[idx]
        cluster = load_data(fn_node)
        cluster, o_cluster = filter_outlier(cluster, fn_node, outlier_scores, th_outlier)
        clusters.append(cluster)
        if keep_outlier and len(o_cluster) > 0:
            o_clusters.append(o_cluster)

    if keep_outlier:
        print('#outlier_clusters: {}'.format(len(o_clusters)))
        clusters.extend(o_clusters)

    idx2lb, idx2lbs = nms(clusters, th_iou)

    # output stats
    multi_lb_num = 0
    for _, lbs in idx2lbs.items():
        if len(lbs) > 1:
            multi_lb_num += 1
    inst_num = len(idx2lb)
    cls_num = len(set(idx2lb.values()))

    print('#inst: {}, #class: {}, #multi-label: {}'.format(
        inst_num, cls_num, multi_lb_num))
    print('#inst-coverage: {:.2f}'.format(1. * inst_num / tot_inst_num))

    # save to file
    pred_labels = write_meta(pred_label_fn, idx2lb, inst_num=tot_inst_num)

    return pred_labels 
Example #16
Source File: cluster_seg_processor.py    From learn-to-cluster with MIT License 4 votes vote down vote up
def build_graph(self, fn_node, fn_edge):
        ''' build graph from graph file
            - nodes: NxD,
                     each row represents the feature of a node
            - adj:   NxN,
                     a symmetric similarity matrix with self-connection
        '''
        node = load_data(fn_node)
        edge = load_data(fn_edge)
        assert len(node) > 1, '#node of {}: {}'.format(fn_node, len(node))

        adj, abs2rel, rel2abs = self.build_adj(node, edge)
        # compute label & mask
        if self.dataset.use_random_seed:
            ''' except using node with max degree as seed,
                you can explore more creative designs.
                e.g., applying random seed for multiple times,
                and take the best results.
            '''
            if self.dataset.use_max_degree_seed:
                s = adj.sum(axis=1, keepdims=True)
                rel_center_idx = np.argmax(s)
                center_idx = rel2abs[rel_center_idx]
            else:
                center_idx = random.choice(node)
                rel_center_idx = abs2rel[center_idx]
            mask = np.zeros(len(node))
            mask[rel_center_idx] = 1
            mask = mask.reshape(-1, 1)
            if not self.dataset.ignore_label:
                lb = self.dataset.idx2lb[center_idx]
                gt_node = self.dataset.lb2idxs[lb]
        else:
            # do not use mask
            if not self.dataset.ignore_label:
                lb2cnt = {}
                for idx in node:
                    if idx not in self.dataset.idx2lb:
                        continue
                    lb = self.dataset.idx2lb[idx]
                    if lb not in lb2cnt:
                        lb2cnt[lb] = 0
                    lb2cnt[lb] += 1
                gt_lb, _ = get_majority(lb2cnt)
                gt_node = self.dataset.lb2idxs[gt_lb]

        if not self.dataset.ignore_label:
            g_label = self.get_node_lb(node, gt_node)
        else:
            g_label = np.zeros_like(node)

        features = self.build_features(node)
        if self.dataset.use_random_seed:
            features = np.concatenate((features, mask), axis=1)
        return features, adj, g_label