Python tensorpack.utils.gpu.get_nr_gpu() Examples
The following are 6
code examples of tensorpack.utils.gpu.get_nr_gpu().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
tensorpack.utils.gpu
, or try the search function
.
Example #1
Source File: ssh.py From tensorflow-recipes with Apache License 2.0 | 6 votes |
def get_config(): global BATCH nr_tower = max(get_nr_gpu(), 1) BATCH = TOTAL_BATCH_SIZE // nr_tower logger.set_logger_dir() ds_train = get_data('train') ds_test = get_data('test') return TrainConfig( model=Model(), data=QueueInput(ds_train), callbacks=[ ModelSaver(), InferenceRunner(ds_test, [ScalarStats('total_costs')]), ], extra_callbacks=[ MovingAverageSummary(), ProgressBar(['']), MergeAllSummaries(), RunUpdateOps() ], steps_per_epoch=ds_train.size(), max_epoch=100, )
Example #2
Source File: train.py From ADL with MIT License | 6 votes |
def main(): args = get_args() nr_gpu = get_nr_gpu() args.batch_size = args.batch_size // nr_gpu model = Model(args) if args.evaluate: evaluate_wsol(args, model, interval=False) sys.exit() logger.set_logger_dir(ospj('train_log', args.log_dir)) config = get_config(model, args) if args.use_pretrained_model: config.session_init = get_model_loader(_CKPT_NAMES[args.arch_name]) launch_train_with_config(config, SyncMultiGPUTrainerParameterServer(nr_gpu)) evaluate_wsol(args, model, interval=True)
Example #3
Source File: imagenet.py From LQ-Nets with MIT License | 5 votes |
def get_config(model, fake=False, data_aug=True): nr_tower = max(get_nr_gpu(), 1) batch = TOTAL_BATCH_SIZE // nr_tower if fake: logger.info("For benchmark, batch size is fixed to 64 per tower.") dataset_train = FakeData( [[64, 224, 224, 3], [64]], 1000, random=False, dtype='uint8') callbacks = [] else: logger.info("Running on {} towers. Batch size per tower: {}".format(nr_tower, batch)) dataset_train = get_data('train', batch, data_aug) dataset_val = get_data('val', batch, data_aug) callbacks = [ ModelSaver(), ] if data_aug: callbacks.append(ScheduledHyperParamSetter('learning_rate', [(30, 1e-2), (60, 1e-3), (85, 1e-4), (95, 1e-5), (105, 1e-6)])) callbacks.append(HumanHyperParamSetter('learning_rate')) infs = [ClassificationError('wrong-top1', 'val-error-top1'), ClassificationError('wrong-top5', 'val-error-top5')] if nr_tower == 1: # single-GPU inference with queue prefetch callbacks.append(InferenceRunner(QueueInput(dataset_val), infs)) else: # multi-GPU inference (with mandatory queue prefetch) callbacks.append(DataParallelInferenceRunner( dataset_val, infs, list(range(nr_tower)))) return AutoResumeTrainConfig( model=model, dataflow=dataset_train, callbacks=callbacks, steps_per_epoch=5000 if TOTAL_BATCH_SIZE == 256 else 10000, max_epoch=110 if data_aug else 64, nr_tower=nr_tower )
Example #4
Source File: train.py From ADL with MIT License | 5 votes |
def get_steps_per_epoch(option): nr_gpu = get_nr_gpu() total_batch = option.batch_size * nr_gpu if option.dataset_name == 'CUB': steps_per_epoch = 25 * (256 / total_batch) * option.stepscale elif option.dataset_name == 'ILSVRC': steps_per_epoch = 5000 * (256 / total_batch) * option.stepscale else: raise KeyError("Unavailable dataset: {}".format(option.dataset_name)) return int(steps_per_epoch)
Example #5
Source File: train.py From PReMVOS with MIT License | 5 votes |
def get_batch_factor(): nr_gpu = get_nr_gpu() assert nr_gpu in [1, 2, 4, 8], nr_gpu return 8 // nr_gpu
Example #6
Source File: imagenet-resnet.py From webvision-2.0-benchmarks with Apache License 2.0 | 4 votes |
def get_config(model, fake=False): nr_tower = max(get_nr_gpu(), 1) assert args.batch % nr_tower == 0 batch = args.batch // nr_tower if fake: logger.info("For benchmark, batch size is fixed to 64 per tower.") dataset_train = FakeData( [[64, 224, 224, 3], [64]], 1000, random=False, dtype='uint8') callbacks = [] else: logger.info("Running on {} towers. Batch size per tower: {}".format(nr_tower, batch)) dataset_train = get_data('train', batch) dataset_val = get_data('val', batch) BASE_LR = 0.1 * (args.batch / 256.0) callbacks = [ ModelSaver(), ScheduledHyperParamSetter( 'learning_rate', [(0, BASE_LR), (30, BASE_LR * 1e-1), (60, BASE_LR * 1e-2), (90, BASE_LR * 1e-3)]), ] if BASE_LR > 0.1: callbacks.append( ScheduledHyperParamSetter( 'learning_rate', [(0, 0.1), (3, BASE_LR)], interp='linear')) infs = [ClassificationError('wrong-top1', 'val-error-top1'), ClassificationError('wrong-top5', 'val-error-top5')] if nr_tower == 1: # single-GPU inference with queue prefetch callbacks.append(InferenceRunner(QueueInput(dataset_val), infs)) else: # multi-GPU inference (with mandatory queue prefetch) callbacks.append(DataParallelInferenceRunner( dataset_val, infs, list(range(nr_tower)))) return TrainConfig( model=model, dataflow=dataset_train, callbacks=callbacks, steps_per_epoch=100 if args.fake else 1280000 // args.batch, max_epoch=110, )