Python provider.shuffle_data() Examples
The following are 9
code examples of provider.shuffle_data().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
provider
, or try the search function
.
Example #1
Source File: train.py From ASIS with MIT License | 5 votes |
def train_one_epoch(sess, ops, train_writer): """ ops: dict mapping from string to tf ops """ is_training = True log_string('----') current_data, current_label, shuffled_idx = provider.shuffle_data(train_data[:, 0:NUM_POINT, :], train_group) current_sem = train_sem[shuffled_idx] file_size = current_data.shape[0] num_batches = file_size // BATCH_SIZE loss_sum = 0 for batch_idx in range(num_batches): if batch_idx % 100 == 0: print('Current batch/total batch num: %d/%d'%(batch_idx,num_batches)) start_idx = batch_idx * BATCH_SIZE end_idx = (batch_idx+1) * BATCH_SIZE feed_dict = {ops['pointclouds_pl']: current_data[start_idx:end_idx, :, :], ops['labels_pl']: current_label[start_idx:end_idx], ops['sem_labels_pl']: current_sem[start_idx:end_idx], ops['is_training_pl']: is_training,} summary, step, _, loss_val, sem_loss_val, disc_loss_val, l_var_val, l_dist_val, l_reg_val = sess.run([ops['merged'], ops['step'], ops['train_op'], ops['loss'], ops['sem_loss'], ops['disc_loss'], ops['l_var'], ops['l_dist'], ops['l_reg']], feed_dict=feed_dict) train_writer.add_summary(summary, step) loss_sum += loss_val if batch_idx % 50 == 0: log_string("loss: {:.2f}; sem_loss: {:.2f}; disc_loss: {:.2f}; l_var: {:.2f}; l_dist: {:.2f}; l_reg: {:.3f}.".format(loss_val, sem_loss_val, disc_loss_val, l_var_val, l_dist_val, l_reg_val)) log_string('mean loss: %f' % (loss_sum / float(num_batches)))
Example #2
Source File: train.py From PointCNN.Pytorch with MIT License | 5 votes |
def train_one_epoch(sess, ops, train_writer): """ ops: dict mapping from string to tf ops """ is_training = True log_string('----') current_data, current_label, _ = provider.shuffle_data(train_data[:,0:NUM_POINT,:], train_label) file_size = current_data.shape[0] num_batches = file_size // BATCH_SIZE total_correct = 0 total_seen = 0 loss_sum = 0 for batch_idx in range(num_batches): if batch_idx % 100 == 0: print('Current batch/total batch num: %d/%d'%(batch_idx,num_batches)) start_idx = batch_idx * BATCH_SIZE end_idx = (batch_idx+1) * BATCH_SIZE feed_dict = {ops['pointclouds_pl']: current_data[start_idx:end_idx, :, :], ops['labels_pl']: current_label[start_idx:end_idx], ops['is_training_pl']: is_training,} summary, step, _, loss_val, pred_val = sess.run([ops['merged'], ops['step'], ops['train_op'], ops['loss'], ops['pred']], feed_dict=feed_dict) train_writer.add_summary(summary, step) pred_val = np.argmax(pred_val, 2) correct = np.sum(pred_val == current_label[start_idx:end_idx]) total_correct += correct total_seen += (BATCH_SIZE*NUM_POINT) loss_sum += loss_val log_string('mean loss: %f' % (loss_sum / float(num_batches))) log_string('accuracy: %f' % (total_correct / float(total_seen)))
Example #3
Source File: train.py From ldgcnn with MIT License | 5 votes |
def train_classifier_one_epoch(sess, ops, train_writer): """ ops: dict mapping from string to tf ops """ is_training = True for fn in range(len(TRAIN_FILES_CLS)): # Shuffle train files current_data, current_label = provider.loadDataFile(TRAIN_FILES_CLS[fn]) current_data, current_label, _ = provider.shuffle_data(current_data, np.squeeze(current_label)) current_label = np.squeeze(current_label) # I find that we can increase the accuracy by about 0.2% after # padding zero vectors, but I do not know the reason. current_data = np.concatenate([current_data, np.zeros(( current_data.shape[0], NUM_FEATURE_CLS - current_data.shape[1]))], axis = -1) file_size = current_data.shape[0] num_batches = file_size // BATCH_SIZE total_correct = 0 total_seen = 0 loss_sum = 0 for batch_idx in range(num_batches): start_idx = batch_idx * BATCH_SIZE end_idx = (batch_idx+1) * BATCH_SIZE # Input the features and labels to the graph. feed_dict = {ops['pointclouds_pl']: current_data[start_idx:end_idx,...], ops['labels_pl']: current_label[start_idx:end_idx], ops['is_training_pl']: is_training,} # Calculate the loss and classification scores. summary, step, _, loss_val, pred_val = sess.run([ops['merged'], ops['step'], ops['train_op'], ops['loss'], ops['pred']], feed_dict=feed_dict) train_writer.add_summary(summary, step) pred_val = np.argmax(pred_val, 1) correct = np.sum(pred_val == current_label[start_idx:end_idx]) total_correct += correct total_seen += BATCH_SIZE loss_sum += loss_val
Example #4
Source File: train_xyz.py From SpiderCNN with MIT License | 4 votes |
def train_one_epoch(sess, ops, train_writer): """ ops: dict mapping from string to tf ops """ is_training = True # Shuffle train files train_file_idxs = np.arange(0, len(TRAIN_FILES)) np.random.shuffle(train_file_idxs) for fn in range(len(TRAIN_FILES)): log_string('----' + str(fn) + '-----') current_data, current_label, _ = provider.loadDataFile_with_normal(TRAIN_FILES[train_file_idxs[fn]]) current_data = current_data[:,0:NUM_POINT,:] current_data, current_label, _ = provider.shuffle_data(current_data, np.squeeze(current_label)) current_label = np.squeeze(current_label) file_size = current_data.shape[0] num_batches = file_size // BATCH_SIZE total_correct = 0 total_seen = 0 loss_sum = 0 for batch_idx in range(num_batches): start_idx = batch_idx * BATCH_SIZE end_idx = (batch_idx+1) * BATCH_SIZE # Augment batched point clouds by rotation and jittering rotated_data = provider.rotate_point_cloud(current_data[start_idx:end_idx, :, :]) jittered_data = provider.jitter_point_cloud(rotated_data) feed_dict = {ops['pointclouds_pl']: jittered_data, ops['labels_pl']: current_label[start_idx:end_idx], ops['is_training_pl']: is_training,} summary, step, _, loss_val, pred_val = sess.run([ops['merged'], ops['step'], ops['train_op'], ops['loss'], ops['pred']], feed_dict=feed_dict) train_writer.add_summary(summary, step) pred_val = np.argmax(pred_val, 1) correct = np.sum(pred_val == current_label[start_idx:end_idx]) total_correct += correct total_seen += BATCH_SIZE loss_sum += loss_val log_string('mean loss: %f' % (loss_sum / float(num_batches))) log_string('accuracy: %f' % (total_correct / float(total_seen)))
Example #5
Source File: train.py From SpiderCNN with MIT License | 4 votes |
def train_one_epoch(sess, ops, train_writer): """ ops: dict mapping from string to tf ops """ is_training = True # Shuffle train files train_file_idxs = np.arange(0, len(TRAIN_FILES)) np.random.shuffle(train_file_idxs) for fn in range(len(TRAIN_FILES)): log_string('----' + str(fn) + '-----') current_data, current_label, normal_data = provider.loadDataFile_with_normal(TRAIN_FILES[train_file_idxs[fn]]) normal_data = normal_data[:,0:NUM_POINT,:] current_data = current_data[:,0:NUM_POINT,:] current_data, current_label, shuffle_idx = provider.shuffle_data(current_data, np.squeeze(current_label)) current_label = np.squeeze(current_label) normal_data = normal_data[shuffle_idx, ...] file_size = current_data.shape[0] num_batches = file_size // BATCH_SIZE total_correct = 0 total_seen = 0 loss_sum = 0 for batch_idx in range(num_batches): start_idx = batch_idx * BATCH_SIZE end_idx = (batch_idx+1) * BATCH_SIZE # Augment batched point clouds by rotation and jittering rotated_data = provider.rotate_point_cloud(current_data[start_idx:end_idx, :, :]) jittered_data = provider.jitter_point_cloud(rotated_data) input_data = np.concatenate((jittered_data, normal_data[start_idx:end_idx, :, :]), 2) #random point dropout input_data = provider.random_point_dropout(input_data) feed_dict = {ops['pointclouds_pl']: input_data, ops['labels_pl']: current_label[start_idx:end_idx], ops['is_training_pl']: is_training,} summary, step, _, loss_val, pred_val = sess.run([ops['merged'], ops['step'], ops['train_op'], ops['loss'], ops['pred']], feed_dict=feed_dict) train_writer.add_summary(summary, step) pred_val = np.argmax(pred_val, 1) correct = np.sum(pred_val == current_label[start_idx:end_idx]) total_correct += correct total_seen += BATCH_SIZE loss_sum += loss_val log_string('mean loss: %f' % (loss_sum / float(num_batches))) log_string('accuracy: %f' % (total_correct / float(total_seen)))
Example #6
Source File: train.py From PointCNN.Pytorch with MIT License | 4 votes |
def train_one_epoch(sess, ops, train_writer): """ ops: dict mapping from string to tf ops """ is_training = True # Shuffle train files train_file_idxs = np.arange(0, len(TRAIN_FILES)) np.random.shuffle(train_file_idxs) for fn in range(len(TRAIN_FILES)): log_string('----' + str(fn) + '-----') current_data, current_label = provider.loadDataFile(TRAIN_FILES[train_file_idxs[fn]]) current_data = current_data[:,0:NUM_POINT,:] current_data, current_label, _ = provider.shuffle_data(current_data, np.squeeze(current_label)) current_label = np.squeeze(current_label) file_size = current_data.shape[0] num_batches = file_size // BATCH_SIZE total_correct = 0 total_seen = 0 loss_sum = 0 for batch_idx in range(num_batches): start_idx = batch_idx * BATCH_SIZE end_idx = (batch_idx+1) * BATCH_SIZE # Augment batched point clouds by rotation and jittering rotated_data = provider.rotate_point_cloud(current_data[start_idx:end_idx, :, :]) jittered_data = provider.jitter_point_cloud(rotated_data) feed_dict = {ops['pointclouds_pl']: jittered_data, ops['labels_pl']: current_label[start_idx:end_idx], ops['is_training_pl']: is_training,} summary, step, _, loss_val, pred_val = sess.run([ops['merged'], ops['step'], ops['train_op'], ops['loss'], ops['pred']], feed_dict=feed_dict) train_writer.add_summary(summary, step) pred_val = np.argmax(pred_val, 1) correct = np.sum(pred_val == current_label[start_idx:end_idx]) total_correct += correct total_seen += BATCH_SIZE loss_sum += loss_val log_string('mean loss: %f' % (loss_sum / float(num_batches))) log_string('accuracy: %f' % (total_correct / float(total_seen)))
Example #7
Source File: train.py From deep_gcns with MIT License | 4 votes |
def train_one_epoch(sess, ops, train_writer): """ ops: dict mapping from string to tf ops """ is_training = True sem_seg_util.log_string(LOG_FOUT, '----') current_data, current_label, _ = provider.shuffle_data(train_data[:,0:NUM_POINTS,:], train_label) file_size = current_data.shape[0] num_batches = file_size // (NUM_GPU * BATCH_SIZE) total_correct = 0 total_seen = 0 loss_sum = 0 for batch_idx in range(num_batches): if batch_idx % 100 == 0: print('Current batch/total batch num: %d/%d'%(batch_idx,num_batches)) start_idx = [] end_idx = [] for gpu_idx in range(NUM_GPU): start_idx.append((batch_idx + gpu_idx) * BATCH_SIZE) end_idx.append((batch_idx + gpu_idx + 1) * BATCH_SIZE) feed_dict = dict() for gpu_idx in range(NUM_GPU): feed_dict[ops['inputs_phs'][gpu_idx]] = current_data[start_idx[gpu_idx]:end_idx[gpu_idx], :, :] feed_dict[ops['labels_phs'][gpu_idx]] = current_label[start_idx[gpu_idx]:end_idx[gpu_idx]] feed_dict[ops['is_training_phs'][gpu_idx]] = is_training summary, step, _, loss_val, pred_val = sess.run([ops['merged'], ops['step'], ops['train_op'], ops['loss'], ops['pred']], feed_dict=feed_dict) train_writer.add_summary(summary, step) pred_val = np.argmax(pred_val, 2) correct = np.sum(pred_val == current_label[start_idx[-1]:end_idx[-1]]) total_correct += correct total_seen += (BATCH_SIZE*NUM_POINTS) loss_sum += loss_val sem_seg_util.log_string(LOG_FOUT, 'mean loss: %f' % (loss_sum / float(num_batches))) sem_seg_util.log_string(LOG_FOUT, 'accuracy: %f' % (total_correct / float(total_seen)))
Example #8
Source File: train.py From ldgcnn with MIT License | 4 votes |
def train_one_epoch(sess, ops, train_writer): """ ops: dict mapping from string to tf ops """ is_training = True # Shuffle train files train_file_idxs = np.arange(0, len(TRAIN_FILES)) np.random.shuffle(train_file_idxs) for fn in range(len(TRAIN_FILES)): log_string('----' + str(fn) + '-----') # Load data and labels from the files. current_data, current_label = provider.loadDataFile(TRAIN_FILES[train_file_idxs[fn]]) current_data = current_data[:,0:NUM_POINT,:] # Shuffle the data in the training set. current_data, current_label, _ = provider.shuffle_data(current_data, np.squeeze(current_label)) current_label = np.squeeze(current_label) file_size = current_data.shape[0] num_batches = file_size // BATCH_SIZE total_correct = 0 total_seen = 0 loss_sum = 0 for batch_idx in range(num_batches): start_idx = batch_idx * BATCH_SIZE end_idx = (batch_idx+1) * BATCH_SIZE # Augment batched point clouds by rotating, jittering, shifting, # and scaling. rotated_data = provider.rotate_point_cloud(current_data[start_idx:end_idx, :, :]) jittered_data = provider.jitter_point_cloud(rotated_data) jittered_data = provider.random_scale_point_cloud(jittered_data) jittered_data = provider.rotate_perturbation_point_cloud(jittered_data) jittered_data = provider.shift_point_cloud(jittered_data) # Input the augmented point cloud and labels to the graph. feed_dict = {ops['pointclouds_pl']: jittered_data, ops['labels_pl']: current_label[start_idx:end_idx], ops['is_training_pl']: is_training,} # Calculate the loss and accuracy of the input batch data. summary, step, _, loss_val, pred_val = sess.run([ops['merged'], ops['step'], ops['train_op'], ops['loss'], ops['pred']], feed_dict=feed_dict) train_writer.add_summary(summary, step) pred_val = np.argmax(pred_val, 1) correct = np.sum(pred_val == current_label[start_idx:end_idx]) total_correct += correct total_seen += BATCH_SIZE loss_sum += loss_val log_string('mean loss: %f' % (loss_sum / float(num_batches))) log_string('accuracy: %f' % (total_correct / float(total_seen)))
Example #9
Source File: dataset_s3dis.py From JSNet with MIT License | 4 votes |
def data_sample(data_sample_queue, input_list, split, epoch, num_works, block_points=4096, block_size=1.0, stride=0.5, random_sample=False, sample_num=None, sample_aug=1): assert (input_list[0].endswith('npy') or input_list[0].endswith('h5')), "data format must be .npy or .h5" input_list_length = len(input_list) num_work = min(min(num_works, multiprocessing.cpu_count()), input_list_length // 4) if input_list_length > 4: num_work = max(num_work, 4) chunksize = input_list_length // num_work print("num input_list: {}, num works: {}, chunksize: {}".format(input_list_length, num_work, chunksize)) if input_list[0].endswith('npy'): data_sample_func = functools_partial( indoor3d_util.room2blocks_wrapper_normalized, num_point=block_points, block_size=block_size, stride=stride, random_sample=random_sample, sample_num=sample_num, sample_aug=sample_aug) elif input_list[0].endswith('h5'): def load_data_file(input_file): cur_data, cur_group, _, cur_sem = provider.loadDataFile_with_groupseglabel_stanfordindoor(input_file) return cur_data, cur_sem, cur_group data_sample_func = load_data_file def data_sample_single(input_file): datalabel = data_sample_func(input_file) if split == 'train': datalabel = provider.shuffle_data(*datalabel) return datalabel for _ in range(epoch): np.random.shuffle(input_list) for idx in range(chunksize + 1): start_idx = min(idx * num_work, input_list_length) end_idx = min((idx + 1) * num_work, input_list_length) if start_idx >= input_list_length or end_idx > input_list_length: continue with futures.ThreadPoolExecutor(num_work) as pool: data_sem_ins = list(pool.map(data_sample_single, input_list[start_idx:end_idx], chunksize=1)) for dsi in data_sem_ins: shuffle_dsi = provider.shuffle_data(*dsi) data_sample_queue.put(shuffle_dsi) del dsi gc.collect() pool.shutdown() gc.collect()