Python caffe.NCCL Examples

The following are 15 code examples of caffe.NCCL(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module caffe , or try the search function .
Example #1
Source File: multigpu.py    From DTPP with BSD 2-Clause "Simplified" License 6 votes vote down vote up
def train(
        solver,  # solver proto definition
        snapshot,  # solver snapshot to restore
        gpus,  # list of device ids
        timing=False,  # show timing info for compute and communications
):
    # NCCL uses a uid to identify a session
    uid = caffe.NCCL.new_uid()

    caffe.init_log()
    caffe.log('Using devices %s' % str(gpus))

    procs = []
    for rank in range(len(gpus)):
        p = Process(target=solve_step,
                    args=(solver, snapshot, gpus, timing, uid, rank))
        p.daemon = True
        p.start()
        procs.append(p)
    for p in procs:
        p.join() 
Example #2
Source File: multigpu.py    From DTPP with BSD 2-Clause "Simplified" License 6 votes vote down vote up
def solve(proto, snapshot, gpus, timing, uid, rank):
    caffe.set_mode_gpu()
    caffe.set_device(gpus[rank])
    caffe.set_solver_count(len(gpus))
    caffe.set_solver_rank(rank)
    caffe.set_multiprocess(True)

    solver = caffe.SGDSolver(proto)
    if snapshot and len(snapshot) != 0:
        solver.restore(snapshot)

    nccl = caffe.NCCL(solver, uid)
    nccl.bcast()

    if timing and rank == 0:
        time(solver, nccl)
    else:
        solver.add_callback(nccl)

    if solver.param.layer_wise_reduce:
        solver.net.after_backward(nccl)
    solver.step(solver.param.max_iter) 
Example #3
Source File: train_large_file.py    From uai-sdk with Apache License 2.0 6 votes vote down vote up
def solve(proto, snapshot, gpus, timing, uid, rank):
    caffe.set_mode_gpu()
    caffe.set_device(gpus[rank])
    caffe.set_solver_count(len(gpus))
    caffe.set_solver_rank(rank)
    caffe.set_multiprocess(True)

    solver = caffe.SGDSolver(proto)
    if snapshot and len(snapshot) != 0:
        solver.restore(snapshot)

    nccl = caffe.NCCL(solver, uid)
    nccl.bcast()

    if timing and rank == 0:
        time(solver, nccl)
    else:
        solver.add_callback(nccl)

    if solver.param.layer_wise_reduce:
        solver.net.after_backward(nccl)
    solver.step(solver.param.max_iter) 
Example #4
Source File: train.py    From uai-sdk with Apache License 2.0 6 votes vote down vote up
def solve(proto, snapshot, gpus, timing, uid, rank):
    caffe.set_mode_gpu()
    caffe.set_device(gpus[rank])
    caffe.set_solver_count(len(gpus))
    caffe.set_solver_rank(rank)
    caffe.set_multiprocess(True)

    solver = caffe.SGDSolver(proto)
    if snapshot and len(snapshot) != 0:
        solver.restore(snapshot)

    nccl = caffe.NCCL(solver, uid)
    nccl.bcast()

    if timing and rank == 0:
        time(solver, nccl)
    else:
        solver.add_callback(nccl)

    if solver.param.layer_wise_reduce:
        solver.net.after_backward(nccl)
    solver.step(solver.param.max_iter) 
Example #5
Source File: train.py    From uai-sdk with Apache License 2.0 6 votes vote down vote up
def solve(proto, snapshot, gpus, timing, uid, rank):
    caffe.set_mode_gpu()
    caffe.set_device(gpus[rank])
    caffe.set_solver_count(len(gpus))
    caffe.set_solver_rank(rank)
    caffe.set_multiprocess(True)

    solver = caffe.SGDSolver(proto)
    if snapshot and len(snapshot) != 0:
        solver.restore(snapshot)

    nccl = caffe.NCCL(solver, uid)
    nccl.bcast()

    if timing and rank == 0:
        time(solver, nccl)
    else:
        solver.add_callback(nccl)

    if solver.param.layer_wise_reduce:
        solver.net.after_backward(nccl)
    solver.step(solver.param.max_iter) 
Example #6
Source File: train_multi_gpu.py    From face-magnet with Apache License 2.0 6 votes vote down vote up
def train_net_multi_gpu(solver_prototxt, roidb, output_dir, pretrained_model,
                        max_iter, gpus, reload):
    """Train a Fast R-CNN network."""
    roidb = filter_roidb(roidb)
    uid = caffe.NCCL.new_uid()
    caffe.init_log()
    caffe.log('Using devices %s' % str(gpus))
    procs = []

    for rank in range(len(gpus)):
        p = Process(target=solve,
                    args=(
                        solver_prototxt, roidb, pretrained_model, gpus, uid,
                        rank,
                        output_dir, max_iter, reload))
        p.daemon = True
        p.start()
        procs.append(p)
    for p in procs:
        p.join() 
Example #7
Source File: train.py    From Deep-Learning-Based-Structural-Damage-Detection with MIT License 6 votes vote down vote up
def train(
        solver,  # solver proto definition
        snapshot,  # solver snapshot to restore
        gpus,  # list of device ids
        timing=False,  # show timing info for compute and communications
):
    # NCCL uses a uid to identify a session
    uid = caffe.NCCL.new_uid()

    caffe.init_log()
    caffe.log('Using devices %s' % str(gpus))

    procs = []
    for rank in range(len(gpus)):
        p = Process(target=solve,
                    args=(solver, snapshot, gpus, timing, uid, rank))
        p.daemon = True
        p.start()
        procs.append(p)
    for p in procs:
        p.join() 
Example #8
Source File: train.py    From Deep-Learning-Based-Structural-Damage-Detection with MIT License 6 votes vote down vote up
def solve(proto, snapshot, gpus, timing, uid, rank):
    caffe.set_mode_gpu()
    caffe.set_device(gpus[rank])
    caffe.set_solver_count(len(gpus))
    caffe.set_solver_rank(rank)
    caffe.set_multiprocess(True)

    solver = caffe.SGDSolver(proto)
    if snapshot and len(snapshot) != 0:
        solver.restore(snapshot)

    nccl = caffe.NCCL(solver, uid)
    nccl.bcast()

    if timing and rank == 0:
        time(solver, nccl)
    else:
        solver.add_callback(nccl)

    if solver.param.layer_wise_reduce:
        solver.net.after_backward(nccl)
    solver.step(solver.param.max_iter) 
Example #9
Source File: train_net_multi.py    From caffe-model with MIT License 6 votes vote down vote up
def solve(proto, gpus, uid, rank, max_iter):
    caffe.set_mode_gpu()
    caffe.set_device(gpus[rank])
    caffe.set_solver_count(len(gpus))
    caffe.set_solver_rank(rank)
    caffe.set_multiprocess(True)

    solver = caffe.SGDSolver(proto)
    if rank == 0:
        # solver.restore(_snapshot)
        solver.net.copy_from(_weights)
    
    solver.net.layers[0].get_gpu_id(gpus[rank])

    nccl = caffe.NCCL(solver, uid)
    nccl.bcast()
    solver.add_callback(nccl)

    if solver.param.layer_wise_reduce:
        solver.net.after_backward(nccl)

    for _ in range(max_iter):
        solver.step(1) 
Example #10
Source File: train_large_file.py    From uai-sdk with Apache License 2.0 5 votes vote down vote up
def train(
        solver,  # solver proto definition
        snapshot,  # solver snapshot to restore
        use_cpu, #whether use cpu
        gpus,  # list of device ids
        timing=False,  # show timing info for compute and communications
):
    caffe.init_log(0,True)
    caffe.log('Using devices %s' % str(gpus))

    if use_cpu == True:
        p = Process(target=cpu_solve,
                    args=(solver, snapshot, timing))

        p.daemon = True
        p.start()
        p.join()
    else:
        # NCCL uses a uid to identify a session
        uid = caffe.NCCL.new_uid()

        procs = []
        for rank in range(len(gpus)):
            p = Process(target=solve,
                        args=(solver, snapshot, gpus, timing, uid, rank))
            p.daemon = True
            p.start()
            procs.append(p)
        for p in procs:
            p.join() 
Example #11
Source File: train.py    From uai-sdk with Apache License 2.0 5 votes vote down vote up
def train(
        solver,  # solver proto definition
        snapshot,  # solver snapshot to restore
        use_cpu, #whether use cpu
        gpus,  # list of device ids
        timing=False,  # show timing info for compute and communications
):
    caffe.init_log(0,True)
    caffe.log('Using devices %s' % str(gpus))

    if use_cpu == True:
        p = Process(target=cpu_solve,
                    args=(solver, snapshot, timing))

        p.daemon = True
        p.start()
        p.join()
    else:
        # NCCL uses a uid to identify a session
        uid = caffe.NCCL.new_uid()

        procs = []
        for rank in range(len(gpus)):
            p = Process(target=solve,
                        args=(solver, snapshot, gpus, timing, uid, rank))
            p.daemon = True
            p.start()
            procs.append(p)
        for p in procs:
            p.join() 
Example #12
Source File: train.py    From uai-sdk with Apache License 2.0 5 votes vote down vote up
def train(
        solver,  # solver proto definition
        snapshot,  # solver snapshot to restore
        use_cpu, #whether use cpu
        gpus,  # list of device ids
        timing=False,  # show timing info for compute and communications
):
    caffe.init_log(0,True)
    caffe.log('Using devices %s' % str(gpus))

    if use_cpu == True:
        p = Process(target=cpu_solve,
                    args=(solver, snapshot, timing))

        p.daemon = True
        p.start()
        p.join()
    else:
        # NCCL uses a uid to identify a session
        uid = caffe.NCCL.new_uid()

        procs = []
        for rank in range(len(gpus)):
            p = Process(target=solve,
                        args=(solver, snapshot, gpus, timing, uid, rank))
            p.daemon = True
            p.start()
            procs.append(p)
        for p in procs:
            p.join() 
Example #13
Source File: train.py    From uai-sdk with Apache License 2.0 5 votes vote down vote up
def train(
        solver,  # solver proto definition
        snapshot,  # solver snapshot to restore
        use_cpu, #whether use cpu
        gpus,  # list of device ids
        timing=False,  # show timing info for compute and communications
):
    caffe.init_log(0,True)
    caffe.log('Using devices %s' % str(gpus))

    if use_cpu == True:
        p = Process(target=cpu_solve,
                    args=(solver, snapshot, timing))

        p.daemon = True
        p.start()
        p.join()
    else:
        # NCCL uses a uid to identify a session
        uid = caffe.NCCL.new_uid()

        procs = []
        for rank in range(len(gpus)):
            p = Process(target=solve,
                        args=(solver, snapshot, gpus, timing, uid, rank))
            p.daemon = True
            p.start()
            procs.append(p)
        for p in procs:
            p.join() 
Example #14
Source File: train_multi_gpu.py    From face-magnet with Apache License 2.0 5 votes vote down vote up
def solve(proto, roidb, pretrained_model, gpus, uid, rank, output_dir, max_iter,
          reload):
    caffe.set_device(gpus[rank])
    caffe.set_mode_gpu()
    caffe.set_solver_count(len(gpus))

    caffe.set_solver_rank(rank)

    caffe.set_multiprocess(True)
    cfg.GPU_ID = gpus[rank]

    solverW = SolverWrapper(solver_prototxt=proto, roidb=roidb,
                            output_dir=output_dir, gpu_id=rank,
                            pretrained_model=pretrained_model, reload=reload)
    solver = solverW.get_solver()
    nccl = caffe.NCCL(solver, uid)
    nccl.bcast()
    solver.add_callback(nccl)

    if solver.param.layer_wise_reduce:
        solver.net.after_backward(nccl)
    while solver.iter < max_iter:
        solver.step(1)

        if solver.iter % cfg.TRAIN.SNAPSHOT_ITERS == 0 and rank == 0:
            solverW.snapshot() 
Example #15
Source File: multigpu.py    From DTPP with BSD 2-Clause "Simplified" License 4 votes vote down vote up
def solve_step(proto, snapshot, gpus, timing, uid, rank):
    caffe.set_mode_gpu()
    caffe.set_device(gpus[rank])
    caffe.set_solver_count(len(gpus))
    caffe.set_solver_rank(rank)
    caffe.set_multiprocess(True)

    solver = caffe.SGDSolver(proto)
    if snapshot and len(snapshot) != 0:
        solver.restore(snapshot)

    nccl = caffe.NCCL(solver, uid)
    nccl.bcast()

    if timing and rank == 0:
        time(solver, nccl)
    else:
        solver.add_callback(nccl)

    if solver.param.layer_wise_reduce:
        solver.net.after_backward(nccl)

    niter = solver.param.max_iter
    display = solver.param.display
    test_iter = 950
    test_interval = 200
    # 初始化
    train_loss = zeros(int(ceil(niter // display)))
    test_loss = zeros(int(ceil(niter // test_interval)))
    test_acc = zeros(int(ceil(niter // test_interval)))
    # 辅助变量
    _train_loss = 0;
    _test_loss = 0;
    _accuracy = 0;
    _max_accuracy = 0;
    _max_accuracy_iter = 0;
    # 进行解算
    for it in range(niter):
        solver.step(1)


#def train_this(
#        solver,  # solver proto definition
#        snapshot,  # solver snapshot to restore
#        gpus,  # list of device ids
#        timing=False,  # show timing info for compute and communications
#):
 #   train(solver, snapshot, gpus, timing)

#solver = caffe.SGDSolver('/home/zhujiagang/temporal-segment-networks/models/ucf101/gating_three_solver.prototxt')
#solver.restore('/home/zhujiagang/temporal-segment-networks/models/ucf101_split_1_gating_three_iter_200.solverstate')