Python Examples of caffe.NCCL

Source File: multigpu.py From DTPP with BSD 2-Clause "Simplified" License

6 votes

def train(
        solver,  # solver proto definition
        snapshot,  # solver snapshot to restore
        gpus,  # list of device ids
        timing=False,  # show timing info for compute and communications
):
    # NCCL uses a uid to identify a session
    uid = caffe.NCCL.new_uid()

    caffe.init_log()
    caffe.log('Using devices %s' % str(gpus))

    procs = []
    for rank in range(len(gpus)):
        p = Process(target=solve_step,
                    args=(solver, snapshot, gpus, timing, uid, rank))
        p.daemon = True
        p.start()
        procs.append(p)
    for p in procs:
        p.join()

Source File: multigpu.py From DTPP with BSD 2-Clause "Simplified" License

6 votes

def solve(proto, snapshot, gpus, timing, uid, rank):
    caffe.set_mode_gpu()
    caffe.set_device(gpus[rank])
    caffe.set_solver_count(len(gpus))
    caffe.set_solver_rank(rank)
    caffe.set_multiprocess(True)

    solver = caffe.SGDSolver(proto)
    if snapshot and len(snapshot) != 0:
        solver.restore(snapshot)

    nccl = caffe.NCCL(solver, uid)
    nccl.bcast()

    if timing and rank == 0:
        time(solver, nccl)
    else:
        solver.add_callback(nccl)

    if solver.param.layer_wise_reduce:
        solver.net.after_backward(nccl)
    solver.step(solver.param.max_iter)

Source File: train_large_file.py From uai-sdk with Apache License 2.0

6 votes

def solve(proto, snapshot, gpus, timing, uid, rank):
    caffe.set_mode_gpu()
    caffe.set_device(gpus[rank])
    caffe.set_solver_count(len(gpus))
    caffe.set_solver_rank(rank)
    caffe.set_multiprocess(True)

    solver = caffe.SGDSolver(proto)
    if snapshot and len(snapshot) != 0:
        solver.restore(snapshot)

    nccl = caffe.NCCL(solver, uid)
    nccl.bcast()

    if timing and rank == 0:
        time(solver, nccl)
    else:
        solver.add_callback(nccl)

    if solver.param.layer_wise_reduce:
        solver.net.after_backward(nccl)
    solver.step(solver.param.max_iter)

Source File: train.py From uai-sdk with Apache License 2.0

6 votes

def solve(proto, snapshot, gpus, timing, uid, rank):
    caffe.set_mode_gpu()
    caffe.set_device(gpus[rank])
    caffe.set_solver_count(len(gpus))
    caffe.set_solver_rank(rank)
    caffe.set_multiprocess(True)

    solver = caffe.SGDSolver(proto)
    if snapshot and len(snapshot) != 0:
        solver.restore(snapshot)

    nccl = caffe.NCCL(solver, uid)
    nccl.bcast()

    if timing and rank == 0:
        time(solver, nccl)
    else:
        solver.add_callback(nccl)

    if solver.param.layer_wise_reduce:
        solver.net.after_backward(nccl)
    solver.step(solver.param.max_iter)

Source File: train.py From uai-sdk with Apache License 2.0

6 votes

def solve(proto, snapshot, gpus, timing, uid, rank):
    caffe.set_mode_gpu()
    caffe.set_device(gpus[rank])
    caffe.set_solver_count(len(gpus))
    caffe.set_solver_rank(rank)
    caffe.set_multiprocess(True)

    solver = caffe.SGDSolver(proto)
    if snapshot and len(snapshot) != 0:
        solver.restore(snapshot)

    nccl = caffe.NCCL(solver, uid)
    nccl.bcast()

    if timing and rank == 0:
        time(solver, nccl)
    else:
        solver.add_callback(nccl)

    if solver.param.layer_wise_reduce:
        solver.net.after_backward(nccl)
    solver.step(solver.param.max_iter)

Source File: train_multi_gpu.py From face-magnet with Apache License 2.0

6 votes

def train_net_multi_gpu(solver_prototxt, roidb, output_dir, pretrained_model,
                        max_iter, gpus, reload):
    """Train a Fast R-CNN network."""
    roidb = filter_roidb(roidb)
    uid = caffe.NCCL.new_uid()
    caffe.init_log()
    caffe.log('Using devices %s' % str(gpus))
    procs = []

    for rank in range(len(gpus)):
        p = Process(target=solve,
                    args=(
                        solver_prototxt, roidb, pretrained_model, gpus, uid,
                        rank,
                        output_dir, max_iter, reload))
        p.daemon = True
        p.start()
        procs.append(p)
    for p in procs:
        p.join()

Source File: train.py From Deep-Learning-Based-Structural-Damage-Detection with MIT License

6 votes

def train(
        solver,  # solver proto definition
        snapshot,  # solver snapshot to restore
        gpus,  # list of device ids
        timing=False,  # show timing info for compute and communications
):
    # NCCL uses a uid to identify a session
    uid = caffe.NCCL.new_uid()

    caffe.init_log()
    caffe.log('Using devices %s' % str(gpus))

    procs = []
    for rank in range(len(gpus)):
        p = Process(target=solve,
                    args=(solver, snapshot, gpus, timing, uid, rank))
        p.daemon = True
        p.start()
        procs.append(p)
    for p in procs:
        p.join()

Source File: train.py From Deep-Learning-Based-Structural-Damage-Detection with MIT License

6 votes

def solve(proto, snapshot, gpus, timing, uid, rank):
    caffe.set_mode_gpu()
    caffe.set_device(gpus[rank])
    caffe.set_solver_count(len(gpus))
    caffe.set_solver_rank(rank)
    caffe.set_multiprocess(True)

    solver = caffe.SGDSolver(proto)
    if snapshot and len(snapshot) != 0:
        solver.restore(snapshot)

    nccl = caffe.NCCL(solver, uid)
    nccl.bcast()

    if timing and rank == 0:
        time(solver, nccl)
    else:
        solver.add_callback(nccl)

    if solver.param.layer_wise_reduce:
        solver.net.after_backward(nccl)
    solver.step(solver.param.max_iter)

Source File: train_net_multi.py From caffe-model with MIT License

6 votes

def solve(proto, gpus, uid, rank, max_iter):
    caffe.set_mode_gpu()
    caffe.set_device(gpus[rank])
    caffe.set_solver_count(len(gpus))
    caffe.set_solver_rank(rank)
    caffe.set_multiprocess(True)

    solver = caffe.SGDSolver(proto)
    if rank == 0:
        # solver.restore(_snapshot)
        solver.net.copy_from(_weights)
    
    solver.net.layers[0].get_gpu_id(gpus[rank])

    nccl = caffe.NCCL(solver, uid)
    nccl.bcast()
    solver.add_callback(nccl)

    if solver.param.layer_wise_reduce:
        solver.net.after_backward(nccl)

    for _ in range(max_iter):
        solver.step(1)

Source File: train_large_file.py From uai-sdk with Apache License 2.0

5 votes

def train(
        solver,  # solver proto definition
        snapshot,  # solver snapshot to restore
        use_cpu, #whether use cpu
        gpus,  # list of device ids
        timing=False,  # show timing info for compute and communications
):
    caffe.init_log(0,True)
    caffe.log('Using devices %s' % str(gpus))

    if use_cpu == True:
        p = Process(target=cpu_solve,
                    args=(solver, snapshot, timing))

        p.daemon = True
        p.start()
        p.join()
    else:
        # NCCL uses a uid to identify a session
        uid = caffe.NCCL.new_uid()

        procs = []
        for rank in range(len(gpus)):
            p = Process(target=solve,
                        args=(solver, snapshot, gpus, timing, uid, rank))
            p.daemon = True
            p.start()
            procs.append(p)
        for p in procs:
            p.join()

Source File: train.py From uai-sdk with Apache License 2.0

5 votes

def train(
        solver,  # solver proto definition
        snapshot,  # solver snapshot to restore
        use_cpu, #whether use cpu
        gpus,  # list of device ids
        timing=False,  # show timing info for compute and communications
):
    caffe.init_log(0,True)
    caffe.log('Using devices %s' % str(gpus))

    if use_cpu == True:
        p = Process(target=cpu_solve,
                    args=(solver, snapshot, timing))

        p.daemon = True
        p.start()
        p.join()
    else:
        # NCCL uses a uid to identify a session
        uid = caffe.NCCL.new_uid()

        procs = []
        for rank in range(len(gpus)):
            p = Process(target=solve,
                        args=(solver, snapshot, gpus, timing, uid, rank))
            p.daemon = True
            p.start()
            procs.append(p)
        for p in procs:
            p.join()

Source File: train.py From uai-sdk with Apache License 2.0

5 votes

def train(
        solver,  # solver proto definition
        snapshot,  # solver snapshot to restore
        use_cpu, #whether use cpu
        gpus,  # list of device ids
        timing=False,  # show timing info for compute and communications
):
    caffe.init_log(0,True)
    caffe.log('Using devices %s' % str(gpus))

    if use_cpu == True:
        p = Process(target=cpu_solve,
                    args=(solver, snapshot, timing))

        p.daemon = True
        p.start()
        p.join()
    else:
        # NCCL uses a uid to identify a session
        uid = caffe.NCCL.new_uid()

        procs = []
        for rank in range(len(gpus)):
            p = Process(target=solve,
                        args=(solver, snapshot, gpus, timing, uid, rank))
            p.daemon = True
            p.start()
            procs.append(p)
        for p in procs:
            p.join()

Source File: train.py From uai-sdk with Apache License 2.0

5 votes

def train(
        solver,  # solver proto definition
        snapshot,  # solver snapshot to restore
        use_cpu, #whether use cpu
        gpus,  # list of device ids
        timing=False,  # show timing info for compute and communications
):
    caffe.init_log(0,True)
    caffe.log('Using devices %s' % str(gpus))

    if use_cpu == True:
        p = Process(target=cpu_solve,
                    args=(solver, snapshot, timing))

        p.daemon = True
        p.start()
        p.join()
    else:
        # NCCL uses a uid to identify a session
        uid = caffe.NCCL.new_uid()

        procs = []
        for rank in range(len(gpus)):
            p = Process(target=solve,
                        args=(solver, snapshot, gpus, timing, uid, rank))
            p.daemon = True
            p.start()
            procs.append(p)
        for p in procs:
            p.join()

Source File: train_multi_gpu.py From face-magnet with Apache License 2.0

5 votes

def solve(proto, roidb, pretrained_model, gpus, uid, rank, output_dir, max_iter,
          reload):
    caffe.set_device(gpus[rank])
    caffe.set_mode_gpu()
    caffe.set_solver_count(len(gpus))

    caffe.set_solver_rank(rank)

    caffe.set_multiprocess(True)
    cfg.GPU_ID = gpus[rank]

    solverW = SolverWrapper(solver_prototxt=proto, roidb=roidb,
                            output_dir=output_dir, gpu_id=rank,
                            pretrained_model=pretrained_model, reload=reload)
    solver = solverW.get_solver()
    nccl = caffe.NCCL(solver, uid)
    nccl.bcast()
    solver.add_callback(nccl)

    if solver.param.layer_wise_reduce:
        solver.net.after_backward(nccl)
    while solver.iter < max_iter:
        solver.step(1)

        if solver.iter % cfg.TRAIN.SNAPSHOT_ITERS == 0 and rank == 0:
            solverW.snapshot()

Source File: multigpu.py From DTPP with BSD 2-Clause "Simplified" License

4 votes

def solve_step(proto, snapshot, gpus, timing, uid, rank):
    caffe.set_mode_gpu()
    caffe.set_device(gpus[rank])
    caffe.set_solver_count(len(gpus))
    caffe.set_solver_rank(rank)
    caffe.set_multiprocess(True)

    solver = caffe.SGDSolver(proto)
    if snapshot and len(snapshot) != 0:
        solver.restore(snapshot)

    nccl = caffe.NCCL(solver, uid)
    nccl.bcast()

    if timing and rank == 0:
        time(solver, nccl)
    else:
        solver.add_callback(nccl)

    if solver.param.layer_wise_reduce:
        solver.net.after_backward(nccl)

    niter = solver.param.max_iter
    display = solver.param.display
    test_iter = 950
    test_interval = 200
    # 初始化
    train_loss = zeros(int(ceil(niter // display)))
    test_loss = zeros(int(ceil(niter // test_interval)))
    test_acc = zeros(int(ceil(niter // test_interval)))
    # 辅助变量
    _train_loss = 0;
    _test_loss = 0;
    _accuracy = 0;
    _max_accuracy = 0;
    _max_accuracy_iter = 0;
    # 进行解算
    for it in range(niter):
        solver.step(1)


#def train_this(
#        solver,  # solver proto definition
#        snapshot,  # solver snapshot to restore
#        gpus,  # list of device ids
#        timing=False,  # show timing info for compute and communications
#):
 #   train(solver, snapshot, gpus, timing)

#solver = caffe.SGDSolver('/home/zhujiagang/temporal-segment-networks/models/ucf101/gating_three_solver.prototxt')
#solver.restore('/home/zhujiagang/temporal-segment-networks/models/ucf101_split_1_gating_three_iter_200.solverstate')

Python caffe.NCCL Examples