Python mpi4py.MPI.IN_PLACE Examples
The following are 22
code examples of mpi4py.MPI.IN_PLACE().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
mpi4py.MPI
, or try the search function
.
Example #1
Source File: backend.py From sigpy with BSD 3-Clause "New" or "Revised" License | 6 votes |
def reduce(self, input, root=0): """Reduce operation in-place. Sums input across all nodes in root node. Args: input (array): input array. root (int): root node rank. """ if self.size > 1: cpu_input = to_device(input, cpu_device) if self.rank == root: self.mpi_comm.Reduce(MPI.IN_PLACE, cpu_input, root=root) copyto(input, cpu_input) else: self.mpi_comm.Reduce(cpu_input, None, root=root)
Example #2
Source File: _mesh_advector.py From UWGeodynamics with GNU General Public License v3.0 | 6 votes |
def _get_minmax_coordinates_mesh(self, axis=0): """ Return the minimum and maximum coordinates along axis parameter: ---------- axis: axis returns: ------- tuple: minV, maxV """ maxVal = np.zeros((1)) minVal = np.zeros((1)) maxVal[0] = self.Model.mesh.data[:, axis].max() minVal[0] = self.Model.mesh.data[:, axis].min() comm.Barrier() comm.Allreduce(_MPI.IN_PLACE, maxVal, op=_MPI.MAX) comm.Allreduce(_MPI.IN_PLACE, minVal, op=_MPI.MIN) comm.Barrier() return minVal, maxVal
Example #3
Source File: backend.py From sigpy with BSD 3-Clause "New" or "Revised" License | 5 votes |
def allreduce(self, input): """All reduce operation in-place. Sums input across all nodes and broadcast back to each node. Args: input (array): input array. """ if self.size > 1: if config.nccl_enabled: device = get_device(input) devices = self.mpi_comm.allgather(device.id) if all([d >= 0 for d in devices]): nccl_comm = self._get_nccl_comm(device, devices) nccl_dtype, nccl_size = self._get_nccl_dtype_size(input) with device: nccl_comm.allReduce(input.data.ptr, input.data.ptr, nccl_size, nccl_dtype, nccl.NCCL_SUM, cp.cuda.Stream.null.ptr) return cpu_input = to_device(input, cpu_device) self.mpi_comm.Allreduce(MPI.IN_PLACE, cpu_input) copyto(input, cpu_input)
Example #4
Source File: communication.py From heat with MIT License | 5 votes |
def __reduce_like(self, func, sendbuf, recvbuf, *args, **kwargs): sbuf = None rbuf = None buf = None # unpack the send buffer if it is a HeAT tensor if isinstance(sendbuf, dndarray.DNDarray): sendbuf = sendbuf._DNDarray__array # unpack the receive buffer if it is a HeAT tensor if isinstance(recvbuf, dndarray.DNDarray): recvbuf = recvbuf._DNDarray__array # harmonize the input and output buffers # MPI requires send and receive buffers to be of same type and length. If the torch tensors are either not both # contiguous or differently strided, they have to be made matching (if possible) first. if isinstance(sendbuf, torch.Tensor): # convert the send buffer to a pointer, number of elements and type are identical to the receive buffer dummy = ( sendbuf.contiguous() ) # make a contiguous copy and reassign the storage, old will be collected sendbuf.set_( dummy.storage(), dummy.storage_offset(), size=dummy.shape, stride=dummy.stride() ) sbuf = sendbuf if CUDA_AWARE_MPI else sendbuf.cpu() sendbuf = self.as_buffer(sbuf) if isinstance(recvbuf, torch.Tensor): buf = recvbuf # nothing matches, the buffers have to be made contiguous dummy = recvbuf.contiguous() recvbuf.set_( dummy.storage(), dummy.storage_offset(), size=dummy.shape, stride=dummy.stride() ) rbuf = recvbuf if CUDA_AWARE_MPI else recvbuf.cpu() if sendbuf is MPI.IN_PLACE: recvbuf = self.as_buffer(rbuf) else: recvbuf = (self.as_mpi_memory(rbuf), sendbuf[1], sendbuf[2]) # perform the actual reduction operation return func(sendbuf, recvbuf, *args, **kwargs), sbuf, rbuf, buf
Example #5
Source File: fof.py From nbodykit with GNU General Public License v3.0 | 5 votes |
def count(label, comm=MPI.COMM_WORLD): """ Count the number of particles of the same label. This is a collective operation, and after the call, all ranks will have the particle count. Parameters ---------- label : array_like (integers) Halo label of particles, >=0 comm : :py:class:`MPI.Comm` communicator for the collective operation. Returns ------- count : array_like the count of number of particles in each halo """ Nhalo0 = max(comm.allgather(label.max())) + 1 N = numpy.bincount(label, minlength=Nhalo0) comm.Allreduce(MPI.IN_PLACE, N, op=MPI.SUM) return N
Example #6
Source File: fof.py From nbodykit with GNU General Public License v3.0 | 5 votes |
def fof_find_peaks(source, label, comm, position='Position', column='Density'): """ Find position of the peak (maximum) from a given column for a fof result. """ Nhalo0 = max(comm.allgather(label.max())) + 1 N = numpy.bincount(label, minlength=Nhalo0) comm.Allreduce(MPI.IN_PLACE, N, op=MPI.SUM) return hpos
Example #7
Source File: _mesh_advector.py From UWGeodynamics with GNU General Public License v3.0 | 5 votes |
def _get_minmax_velocity_wall(self, wall, axis=0): """ Return the minimum and maximum velocity component on the wall parameters: ----------- wall: (indexSet) The wall. axis: axis (velocity component). """ # Initialise value to max and min sys values maxV = np.ones((1)) * sys.float_info.min minV = np.ones((1)) * sys.float_info.max # if local domain has wall, get velocities if wall.data.size > 0: velocities = self.Model.velocityField.data[wall.data, axis] # get local min and max maxV[0] = velocities.max() minV[0] = velocities.min() # reduce operation comm.Barrier() comm.Allreduce(_MPI.IN_PLACE, maxV, op=_MPI.MAX) comm.Allreduce(_MPI.IN_PLACE, minV, op=_MPI.MIN) comm.Barrier() return minV, maxV
Example #8
Source File: mpi_helper.py From pyscf with Apache License 2.0 | 5 votes |
def safeAllreduceInPlace(comm, in_array): shape = in_array.shape length = len(shape) # just use 16 for blocksize, size of complex(double) chunk_size = get_max_blocksize_from_mem(list(shape),16.,MEM_SIZE,priority_list=numpy.arange(length)[::-1]) task_list = generate_task_list(chunk_size,shape) for block in task_list: which_slice = [slice(*x) for x in block] tmp = in_array[tuple(which_slice)].copy() comm.Allreduce(MPI.IN_PLACE, tmp, op=MPI.SUM) in_array[tuple(which_slice)] = tmp
Example #9
Source File: kintermediates_rhf.py From pyscf with Apache License 2.0 | 5 votes |
def Wooov(cc,t1,t2,eris,fint=None): nkpts, nocc, nvir = t1.shape kconserv = cc.kconserv if fint is None: Wklid = np.zeros((nkpts,nkpts,nkpts,nocc,nocc,nocc,nvir),dtype=t2.dtype) else: Wklid = fint['Wooov'] # TODO can do much better than this... call recursive function # Adaptive blocking begins here mem = 0.5e9 pre = 1.*nocc*nocc*nvir*nvir*nkpts*16 nkpts_blksize = min(max(int(np.floor(mem/pre)),1),nkpts) nkpts_blksize2 = min(max(int(np.floor(mem/(pre*nkpts_blksize))),1),nkpts) BLKSIZE = (nkpts_blksize2,nkpts_blksize,nkpts,) loader = mpi_load_balancer.load_balancer(BLKSIZE=BLKSIZE) loader.set_ranges((range(nkpts),range(nkpts),range(nkpts),)) # Adaptive blocking ends here ooov_tmp_size = BLKSIZE + (nocc,nocc,nocc,nvir) ooov_tmp = np.empty(ooov_tmp_size,dtype=t2.dtype) good2go = True while(good2go): good2go, data = loader.slave_set() if good2go is False: break ranges0, ranges1, ranges2 = loader.get_blocks_from_data(data) s0,s1,s2 = [slice(min(x),max(x)+1) for x in (ranges0,ranges1,ranges2)] eris_ooov_kli = _cp(eris.ooov[s0,s1,s2]) eris_oovv_kli = _cp(eris.oovv[s0,s1,s2]) for iterkk,kk in enumerate(ranges0): for iterkl,kl in enumerate(ranges1): for iterki,ki in enumerate(ranges2): kd = kconserv[kk,ki,kl] ooov_tmp[iterkk,iterkl,iterki] = eris_ooov_kli[iterkk,iterkl,iterki].copy() ooov_tmp[iterkk,iterkl,iterki] += einsum('ic,klcd->klid',t1[ki],eris_oovv_kli[iterkk,iterkl,iterki]) Wklid[s0,s1,s2] = ooov_tmp[:len(ranges0),:len(ranges1),:len(ranges2)] loader.slave_finished() comm.Barrier() if fint is None: comm.Allreduce(MPI.IN_PLACE, Wklid, op=MPI.SUM) return Wklid
Example #10
Source File: kintermediates_rhf.py From pyscf with Apache License 2.0 | 4 votes |
def Wvoov(cc,t1,t2,eris,fint=None): nkpts, nocc, nvir = t1.shape kconserv = cc.kconserv if fint is None: Wkaci = np.zeros((nkpts,nkpts,nkpts,nvir,nocc,nocc,nvir),dtype=t1.dtype) W1kaci = W1voov(cc,t1,t2,eris,fint) W2kaci = W2voov(cc,t1,t2,eris,fint) else: Wkaci = fint['Wvoov'] W1kaci = fint['W1voov'] W2kaci = fint['W2voov'] # TODO can do much better than this... call recursive function # Adaptive blocking begins here mem = 0.5e9 pre = 1.*nocc*nocc*nvir*nvir*nkpts*16 nkpts_blksize = min(max(int(np.floor(mem/pre)),1),nkpts) nkpts_blksize2 = min(max(int(np.floor(mem/(pre*nkpts_blksize))),1),nkpts) BLKSIZE = (nkpts_blksize2,nkpts_blksize,nkpts,) loader = mpi_load_balancer.load_balancer(BLKSIZE=BLKSIZE) loader.set_ranges((range(nkpts),range(nkpts),range(nkpts),)) # Adaptive blocking ends here good2go = True while(good2go): good2go, data = loader.slave_set() if good2go is False: break ranges0, ranges1, ranges2 = loader.get_blocks_from_data(data) s0,s1,s2 = [slice(min(x),max(x)+1) for x in (ranges0,ranges1,ranges2)] Wkaci[s0,s1,s2] = _cp(W1kaci[s0,s1,s2]) + _cp(W2kaci[s0,s1,s2]) loader.slave_finished() comm.Barrier() if fint is None: comm.Allreduce(MPI.IN_PLACE, Wkaci, op=MPI.SUM) return Wkaci
Example #11
Source File: fof.py From nbodykit with GNU General Public License v3.0 | 4 votes |
def centerofmass(label, pos, boxsize, comm=MPI.COMM_WORLD): """ Calulate the center of mass of particles of the same label. The center of mass is defined as the mean of positions of particles, but care has to be taken regarding to the periodic boundary. This is a collective operation, and after the call, all ranks will have the position of halos. Parameters ---------- label : array_like (integers) Halo label of particles, >=0 pos : array_like (float, 3) position of particles. boxsize : float or None size of the periodic box, or None if no periodic boundary is assumed. comm : :py:class:`MPI.Comm` communicator for the collective operation. Returns ------- hpos : array_like (float, 3) the center of mass position of the halos. """ Nhalo0 = max(comm.allgather(label.max())) + 1 N = numpy.bincount(label, minlength=Nhalo0) comm.Allreduce(MPI.IN_PLACE, N, op=MPI.SUM) if boxsize is not None: posmin = equiv_class(label, pos, op=numpy.fmin, dense_labels=True, identity=numpy.inf, minlength=len(N)) comm.Allreduce(MPI.IN_PLACE, posmin, op=MPI.MIN) dpos = pos - posmin[label] for i in range(dpos.shape[-1]): bhalf = boxsize[i] * 0.5 dpos[..., i][dpos[..., i] < -bhalf] += boxsize[i] dpos[..., i][dpos[..., i] >= bhalf] -= boxsize[i] else: dpos = pos dpos = equiv_class(label, dpos, op=numpy.add, dense_labels=True, minlength=len(N)) comm.Allreduce(MPI.IN_PLACE, dpos, op=MPI.SUM) dpos /= N[:, None] if boxsize is not None: hpos = posmin + dpos hpos %= boxsize else: hpos = dpos return hpos
Example #12
Source File: kintermediates_rhf.py From pyscf with Apache License 2.0 | 4 votes |
def WvoovR1(cc,t1,t2,eris,fint=None): nkpts, nocc, nvir = t1.shape kconserv = cc.kconserv if fint is None: Wkaci = np.zeros((nkpts,nkpts,nkpts,nvir,nocc,nocc,nvir),dtype=t1.dtype) W1kaci = W1voov(cc,t1,t2,eris,fint) W2kaci = W2voov(cc,t1,t2,eris,fint) else: Wkaci = fint['WvoovR1'] W1kaci = fint['W1voov'] W2kaci = fint['W2voov'] # TODO can do much better than this... call recursive function # Adaptive blocking begins here mem = 0.5e9 pre = 1.*nocc*nocc*nvir*nvir*nkpts*16 nkpts_blksize = min(max(int(np.floor(mem/pre)),1),nkpts) nkpts_blksize2 = min(max(int(np.floor(mem/(pre*nkpts_blksize))),1),nkpts) BLKSIZE = (nkpts_blksize2,nkpts_blksize,nkpts,) loader = mpi_load_balancer.load_balancer(BLKSIZE=BLKSIZE) loader.set_ranges((range(nkpts),range(nkpts),range(nkpts),)) # Adaptive blocking ends here good2go = True while(good2go): good2go, data = loader.slave_set() if good2go is False: break ranges0, ranges1, ranges2 = loader.get_blocks_from_data(data) s0,s1,s2 = [slice(min(x),max(x)+1) for x in (ranges0,ranges1,ranges2)] Wkaci[s2,s0,s1] = (_cp(W1kaci[s0,s1,s2]) + _cp(W2kaci[s0,s1,s2])).transpose(2,0,1,3,4,5,6) loader.slave_finished() comm.Barrier() if fint is None: comm.Allreduce(MPI.IN_PLACE, Wkaci, op=MPI.SUM) return Wkaci
Example #13
Source File: kintermediates_rhf.py From pyscf with Apache License 2.0 | 4 votes |
def W2voov(cc,t1,t2,eris,fint=None): nkpts, nocc, nvir = t1.shape kconserv = cc.kconserv if fint is None: Wkaci = np.zeros((nkpts,nkpts,nkpts,nvir,nocc,nocc,nvir),dtype=t1.dtype) WWooov = Wooov(cc,t1,t2,eris) else: Wkaci = fint['W2voov'] WWooov = fint['Wooov'] # Adaptive blocking begins here mem = 0.5e9 pre = 1.*nocc*nvir*nvir*nvir*nkpts*16 nkpts_blksize = min(max(int(np.floor(mem/pre)),1),nkpts) nkpts_blksize2 = min(max(int(np.floor(mem/(pre*nkpts_blksize))),1),nkpts) BLKSIZE = (nkpts_blksize2,nkpts_blksize,nkpts,) loader = mpi_load_balancer.load_balancer(BLKSIZE=BLKSIZE) loader.set_ranges((range(nkpts),range(nkpts),range(nkpts),)) # Adaptive blocking ends here ovvo_tmp_size = BLKSIZE + (nocc,nvir,nvir,nocc) ovvo_tmp = np.empty(ovvo_tmp_size,dtype=t2.dtype) good2go = True while(good2go): good2go, data = loader.slave_set() if good2go is False: break ranges0, ranges1, ranges2 = loader.get_blocks_from_data(data) s0,s1,s2 = [slice(min(x),max(x)+1) for x in (ranges0,ranges1,ranges2)] Wooov_akX = _cp(WWooov[s1,s0]) eris_ovvv_kac = _cp(eris.ovvv[s0,s1,s2]) for iterkk,kk in enumerate(ranges0): for iterka,ka in enumerate(ranges1): for iterkc,kc in enumerate(ranges2): ki = kconserv[kk,kc,ka] ovvo_tmp[iterkk,iterka,iterkc] = einsum('la,lkic->kaci',-t1[ka],Wooov_akX[iterka,iterkk,ki]) ovvo_tmp[iterkk,iterka,iterkc] += einsum('akdc,id->kaci',eris_ovvv_kac[iterkk,iterka,iterkc].transpose(1,0,3,2),t1[ki]) Wkaci[ka,kk,ki] = ovvo_tmp[iterkk,iterka,iterkc].transpose(1,0,3,2) loader.slave_finished() comm.Barrier() if fint is None: comm.Allreduce(MPI.IN_PLACE, Wkaci, op=MPI.SUM) return Wkaci
Example #14
Source File: kintermediates_rhf.py From pyscf with Apache License 2.0 | 4 votes |
def WooooS(cc,t1,t2,eris,fint=None): nkpts, nocc, nvir = t1.shape kconserv = cc.kconserv if fint is None: Wklij = np.zeros((nkpts,nkpts,nkpts,nocc,nocc,nocc,nocc),dtype=t2.dtype) else: Wklij = fint['WooooS'] # Adaptive blocking begins here mem = 0.5e9 pre = 1.*nocc*nocc*nvir*nvir*nkpts*16 nkpts_blksize = min(max(int(np.floor(mem/pre)),1),nkpts) nkpts_blksize2 = min(max(int(np.floor(mem/(pre*nkpts_blksize))),1),nkpts) nkpts_blksize2 = 1 BLKSIZE = (nkpts_blksize2,nkpts_blksize,nkpts,) loader = mpi_load_balancer.load_balancer(BLKSIZE=BLKSIZE) loader.set_ranges((range(nkpts),range(nkpts),range(nkpts),)) # Adaptive blocking ends here oooo_tmp_size = BLKSIZE + (nocc,nocc,nocc,nocc) oooo_tmp = np.empty(oooo_tmp_size,dtype=t2.dtype) good2go = True while(good2go): good2go, data = loader.slave_set() if good2go is False: break ranges0, ranges1, ranges2 = loader.get_blocks_from_data(data) s0,s1,s2 = [slice(min(x),max(x)+1) for x in (ranges0,ranges1,ranges2)] eris_oovv_klX = _cp(eris.oovv[s0,s1,s2]) eris_oooo_kli = _cp(eris.oooo[s0,s1,s2]) eris_ooov_klX = _cp(eris.ooov[s0,s1,s2]) eris_ooov_lkX = _cp(eris.ooov[s1,s0,s2]) for iterkk,kk in enumerate(ranges0): for iterkl,kl in enumerate(ranges1): for iterki,ki in enumerate(ranges2): kj = kconserv[kk,ki,kl] #tau1 = t2[ki,kj,:].copy() tau1 = unpack_tril(t2,nkpts,ki,kj,range(nkpts),kconserv[ki,range(nkpts),kj]).copy() tau1[ki] += einsum('ic,jd->ijcd',t1[ki],t1[kj]) oooo_tmp[iterkk,iterkl,iterki] = eris_oooo_kli[iterkk,iterkl,iterki].copy() oooo_tmp[iterkk,iterkl,iterki] += einsum('kld,ijd->klij',eris_oovv_klX[iterkk,iterkl,:].transpose(1,2,0,3,4).reshape(nocc,nocc,-1), tau1.transpose(1,2,0,3,4).reshape(nocc,nocc,-1)) oooo_tmp[iterkk,iterkl,iterki] += einsum('klid,jd->klij',eris_ooov_klX[iterkk,iterkl,ki],t1[kj]) oooo_tmp[iterkk,iterkl,iterki] += einsum('lkjc,ic->klij',eris_ooov_lkX[iterkl,iterkk,kj],t1[ki]) Wklij[kl,ki,kj] = oooo_tmp[iterkk,iterkl,iterki] loader.slave_finished() comm.Barrier() if fint is None: comm.Allreduce(MPI.IN_PLACE, Wklij, op=MPI.SUM) return Wklij
Example #15
Source File: kintermediates_rhf.py From pyscf with Apache License 2.0 | 4 votes |
def Woooo(cc,t1,t2,eris,fint=None): nkpts, nocc, nvir = t1.shape kconserv = cc.kconserv if fint is None: Wklij = np.zeros((nkpts,nkpts,nkpts,nocc,nocc,nocc,nocc),dtype=t2.dtype) else: Wklij = fint['Woooo'] # Adaptive blocking begins here mem = 0.5e9 pre = 1.*nocc*nocc*nvir*nvir*nkpts*16 nkpts_blksize = min(max(int(np.floor(mem/pre)),1),nkpts) nkpts_blksize2 = min(max(int(np.floor(mem/(pre*nkpts_blksize))),1),nkpts) nkpts_blksize2 = 1 BLKSIZE = (nkpts_blksize2,nkpts_blksize,nkpts,) loader = mpi_load_balancer.load_balancer(BLKSIZE=BLKSIZE) loader.set_ranges((range(nkpts),range(nkpts),range(nkpts),)) # Adaptive blocking ends here oooo_tmp_size = BLKSIZE + (nocc,nocc,nocc,nocc) oooo_tmp = np.empty(oooo_tmp_size,dtype=t2.dtype) good2go = True while(good2go): good2go, data = loader.slave_set() if good2go is False: break ranges0, ranges1, ranges2 = loader.get_blocks_from_data(data) s0,s1,s2 = [slice(min(x),max(x)+1) for x in (ranges0,ranges1,ranges2)] eris_oovv_klX = _cp(eris.oovv[s0,s1,s2]) eris_oooo_kli = _cp(eris.oooo[s0,s1,s2]) eris_ooov_klX = _cp(eris.ooov[s0,s1,s2]) eris_ooov_lkX = _cp(eris.ooov[s1,s0,s2]) for iterkk,kk in enumerate(ranges0): for iterkl,kl in enumerate(ranges1): for iterki,ki in enumerate(ranges2): kj = kconserv[kk,ki,kl] #tau1 = t2[ki,kj,:].copy() tau1 = unpack_tril(t2,nkpts,ki,kj,range(nkpts),kconserv[ki,range(nkpts),kj]).copy() tau1[ki] += einsum('ic,jd->ijcd',t1[ki],t1[kj]) oooo_tmp[iterkk,iterkl,iterki] = eris_oooo_kli[iterkk,iterkl,iterki].copy() oooo_tmp[iterkk,iterkl,iterki] += einsum('kld,ijd->klij',eris_oovv_klX[iterkk,iterkl,:].transpose(1,2,0,3,4).reshape(nocc,nocc,-1), tau1.transpose(1,2,0,3,4).reshape(nocc,nocc,-1)) oooo_tmp[iterkk,iterkl,iterki] += einsum('klid,jd->klij',eris_ooov_klX[iterkk,iterkl,ki],t1[kj]) oooo_tmp[iterkk,iterkl,iterki] += einsum('lkjc,ic->klij',eris_ooov_lkX[iterkl,iterkk,kj],t1[ki]) Wklij[s0,s1,s2] = oooo_tmp[:len(ranges0),:len(ranges1),:len(ranges2)] loader.slave_finished() comm.Barrier() if fint is None: comm.Allreduce(MPI.IN_PLACE, Wklij, op=MPI.SUM) return Wklij # This has different storage compared to Woooo, more amenable to I/O # Instead of calling Woooo[kk,kl,ki] to get Woooo[kk,kl,ki,kj] you call # WooooS[kl,ki,kj]
Example #16
Source File: kintermediates_rhf.py From pyscf with Apache License 2.0 | 4 votes |
def WovovRev(cc,t1,t2,eris,fint=None): nkpts, nocc, nvir = t1.shape kconserv = cc.kconserv if fint is None: Wkbid = np.zeros((nkpts,nkpts,nkpts,nocc,nvir,nocc,nvir),dtype=t2.dtype) WW1ovov = W1ovov(cc,t1,t2,eris) WW2ovov = W2ovov(cc,t1,t2,eris) else: Wkbid = fint['WovovRev'] WW1ovov = fint['W1ovov'] WW2ovov = fint['W2ovov'] # Adaptive blocking begins here mem = 0.5e9 pre = 1.*nocc*nocc*nvir*nvir*nkpts*16 nkpts_blksize = min(max(int(np.floor(mem/pre)),1),nkpts) nkpts_blksize2 = min(max(int(np.floor(mem/(pre*nkpts_blksize))),1),nkpts) BLKSIZE = (nkpts_blksize2,nkpts_blksize,nkpts,) loader = mpi_load_balancer.load_balancer(BLKSIZE=BLKSIZE) loader.set_ranges((range(nkpts),range(nkpts),range(nkpts),)) # Adaptive blocking ends here ovov_tmp_size = BLKSIZE + (nocc,nvir,nocc,nvir) ovov_tmp = np.empty(ovov_tmp_size,dtype=t2.dtype) good2go = True while(good2go): good2go, data = loader.slave_set() if good2go is False: break ranges0, ranges1, ranges2 = loader.get_blocks_from_data(data) s0,s1,s2 = [slice(min(x),max(x)+1) for x in (ranges0,ranges1,ranges2)] Wkbid[s2,s1,s0] = (_cp(WW1ovov[s0,s1,s2]) + _cp(WW2ovov[s0,s1,s2])).transpose(2,1,0,3,4,5,6) loader.slave_finished() comm.Barrier() if fint is None: comm.Allreduce(MPI.IN_PLACE, Wkbid, op=MPI.SUM) return Wkbid # This is the same Woooo intermediate used in cc
Example #17
Source File: kintermediates_rhf.py From pyscf with Apache License 2.0 | 4 votes |
def Wovov(cc,t1,t2,eris,fint=None): nkpts, nocc, nvir = t1.shape kconserv = cc.kconserv if fint is None: Wkbid = np.zeros((nkpts,nkpts,nkpts,nocc,nvir,nocc,nvir),dtype=t2.dtype) WW1ovov = W1ovov(cc,t1,t2,eris) WW2ovov = W2ovov(cc,t1,t2,eris) else: Wkbid = fint['Wovov'] WW1ovov = fint['W1ovov'] WW2ovov = fint['W2ovov'] # Adaptive blocking begins here mem = 0.5e9 pre = 1.*nocc*nocc*nvir*nvir*nkpts*16 nkpts_blksize = min(max(int(np.floor(mem/pre)),1),nkpts) nkpts_blksize2 = min(max(int(np.floor(mem/(pre*nkpts_blksize))),1),nkpts) BLKSIZE = (nkpts_blksize2,nkpts_blksize,nkpts,) loader = mpi_load_balancer.load_balancer(BLKSIZE=BLKSIZE) loader.set_ranges((range(nkpts),range(nkpts),range(nkpts),)) # Adaptive blocking ends here ovov_tmp_size = BLKSIZE + (nocc,nvir,nocc,nvir) ovov_tmp = np.empty(ovov_tmp_size,dtype=t2.dtype) good2go = True while(good2go): good2go, data = loader.slave_set() if good2go is False: break ranges0, ranges1, ranges2 = loader.get_blocks_from_data(data) s0,s1,s2 = [slice(min(x),max(x)+1) for x in (ranges0,ranges1,ranges2)] Wkbid[s0,s1,s2] = _cp(WW1ovov[s0,s1,s2]) + _cp(WW2ovov[s0,s1,s2]) loader.slave_finished() comm.Barrier() if fint is None: comm.Allreduce(MPI.IN_PLACE, Wkbid, op=MPI.SUM) return Wkbid
Example #18
Source File: kintermediates_rhf.py From pyscf with Apache License 2.0 | 4 votes |
def W2ovov(cc,t1,t2,eris,fint=None): nkpts, nocc, nvir = t1.shape kconserv = cc.kconserv if fint is None: Wkbid = np.zeros((nkpts,nkpts,nkpts,nocc,nvir,nocc,nvir),dtype=t2.dtype) WWooov = Wooov(cc,t1,t2,eris) else: Wkbid = fint['W2ovov'] WWooov = fint['Wooov'] # Adaptive blocking begins here mem = 0.5e9 pre = 1.*nocc*nvir*nvir*nvir*nkpts*16 nkpts_blksize = min(max(int(np.floor(mem/pre)),1),nkpts) nkpts_blksize2 = min(max(int(np.floor(mem/(pre*nkpts_blksize))),1),nkpts) BLKSIZE = (nkpts_blksize2,nkpts_blksize,nkpts,) loader = mpi_load_balancer.load_balancer(BLKSIZE=BLKSIZE) loader.set_ranges((range(nkpts),range(nkpts),range(nkpts),)) # Adaptive blocking ends here ovov_tmp_size = BLKSIZE + (nocc,nvir,nocc,nvir) ovov_tmp = np.empty(ovov_tmp_size,dtype=t2.dtype) good2go = True while(good2go): good2go, data = loader.slave_set() if good2go is False: break ranges0, ranges1, ranges2 = loader.get_blocks_from_data(data) s0,s1,s2 = [slice(min(x),max(x)+1) for x in (ranges0,ranges1,ranges2)] eris_ovvv = _cp(eris.ovvv[s0,s1,s2]) WWooov_kbi = _cp(WWooov[s0,s1,s2]) for iterkk,kk in enumerate(ranges0): for iterkb,kb in enumerate(ranges1): for iterki,ki in enumerate(ranges2): kd = kconserv[kk,ki,kb] ovov_tmp[iterkk,iterkb,iterki] = einsum('klid,lb->kbid',WWooov_kbi[iterkk,iterkb,iterki],-t1[kb]) ovov_tmp[iterkk,iterkb,iterki] += einsum('kbcd,ic->kbid',eris_ovvv[iterkk,iterkb,iterki],t1[ki]) Wkbid[s0,s1,s2] = ovov_tmp[:len(ranges0),:len(ranges1),:len(ranges2)] loader.slave_finished() comm.Barrier() if fint is None: comm.Allreduce(MPI.IN_PLACE, Wkbid, op=MPI.SUM) return Wkbid
Example #19
Source File: kintermediates_rhf.py From pyscf with Apache License 2.0 | 4 votes |
def Wovvo(cc,t1,t2,eris,fint=None): nkpts, nocc, nvir = t1.shape kconserv = cc.kconserv if fint is None: Wkaci = np.zeros((nkpts,nkpts,nkpts,nocc,nvir,nvir,nocc),dtype=t2.dtype) W1kaci = W1ovvo(cc,t1,t2,eris,fint) W2kaci = W2ovvo(cc,t1,t2,eris,fint) else: Wkaci = fint['Wovvo'] W1kaci = fint['W1ovvo'] W2kaci = fint['W2ovvo'] # TODO can do much better than this... call recursive function # Adaptive blocking begins here mem = 0.5e9 pre = 1.*nocc*nocc*nvir*nvir*nkpts*16 nkpts_blksize = min(max(int(np.floor(mem/pre)),1),nkpts) nkpts_blksize2 = min(max(int(np.floor(mem/(pre*nkpts_blksize))),1),nkpts) BLKSIZE = (nkpts_blksize2,nkpts_blksize,nkpts,) loader = mpi_load_balancer.load_balancer(BLKSIZE=BLKSIZE) loader.set_ranges((range(nkpts),range(nkpts),range(nkpts),)) # Adaptive blocking ends here good2go = True while(good2go): good2go, data = loader.slave_set() if good2go is False: break ranges0, ranges1, ranges2 = loader.get_blocks_from_data(data) s0,s1,s2 = [slice(min(x),max(x)+1) for x in (ranges0,ranges1,ranges2)] Wkaci[s0,s1,s2] = _cp(W1kaci[s0,s1,s2]) + _cp(W2kaci[s0,s1,s2]) loader.slave_finished() comm.Barrier() if fint is None: comm.Allreduce(MPI.IN_PLACE, Wkaci, op=MPI.SUM) return Wkaci
Example #20
Source File: kintermediates_rhf.py From pyscf with Apache License 2.0 | 4 votes |
def W2ovvo(cc,t1,t2,eris,fint=None): nkpts, nocc, nvir = t1.shape kconserv = cc.kconserv if fint is None: Wkaci = np.zeros((nkpts,nkpts,nkpts,nocc,nvir,nvir,nocc),dtype=t1.dtype) WWooov = Wooov(cc,t1,t2,eris) else: Wkaci = fint['W2ovvo'] WWooov = fint['Wooov'] # Adaptive blocking begins here mem = 0.5e9 pre = 1.*nocc*nvir*nvir*nvir*nkpts*16 nkpts_blksize = min(max(int(np.floor(mem/pre)),1),nkpts) nkpts_blksize2 = min(max(int(np.floor(mem/(pre*nkpts_blksize))),1),nkpts) BLKSIZE = (nkpts_blksize2,nkpts_blksize,nkpts,) loader = mpi_load_balancer.load_balancer(BLKSIZE=BLKSIZE) loader.set_ranges((range(nkpts),range(nkpts),range(nkpts),)) # Adaptive blocking ends here ovvo_tmp_size = BLKSIZE + (nocc,nvir,nvir,nocc) ovvo_tmp = np.empty(ovvo_tmp_size,dtype=t2.dtype) good2go = True while(good2go): good2go, data = loader.slave_set() if good2go is False: break ranges0, ranges1, ranges2 = loader.get_blocks_from_data(data) s0,s1,s2 = [slice(min(x),max(x)+1) for x in (ranges0,ranges1,ranges2)] Wooov_akX = _cp(WWooov[s1,s0]) eris_ovvv_kac = _cp(eris.ovvv[s0,s1,s2]) for iterkk,kk in enumerate(ranges0): for iterka,ka in enumerate(ranges1): for iterkc,kc in enumerate(ranges2): ki = kconserv[kk,kc,ka] ovvo_tmp[iterkk,iterka,iterkc] = einsum('la,lkic->kaci',-t1[ka],Wooov_akX[iterka,iterkk,ki]) ovvo_tmp[iterkk,iterka,iterkc] += einsum('akdc,id->kaci',eris_ovvv_kac[iterkk,iterka,iterkc].transpose(1,0,3,2),t1[ki]) Wkaci[s0,s1,s2] = ovvo_tmp[:len(ranges0),:len(ranges1),:len(ranges2)] loader.slave_finished() comm.Barrier() if fint is None: comm.Allreduce(MPI.IN_PLACE, Wkaci, op=MPI.SUM) return Wkaci
Example #21
Source File: kintermediates_rhf.py From pyscf with Apache License 2.0 | 4 votes |
def W1ovvo(cc,t1,t2,eris,fint=None): nkpts, nocc, nvir = t1.shape kconserv = cc.kconserv if fint is None: Wkaci = np.zeros((nkpts,nkpts,nkpts,nocc,nvir,nvir,nocc),dtype=t1.dtype) else: Wkaci = fint['W1ovvo'] # Adaptive blocking begins here mem = 0.5e9 pre = 1.*nocc*nocc*nvir*nvir*nkpts*16 nkpts_blksize = min(max(int(np.floor(mem/pre)),1),nkpts) nkpts_blksize2 = min(max(int(np.floor(mem/(pre*nkpts_blksize))),1),nkpts) BLKSIZE = (nkpts_blksize2,nkpts_blksize,nkpts,) loader = mpi_load_balancer.load_balancer(BLKSIZE=BLKSIZE) loader.set_ranges((range(nkpts),range(nkpts),range(nkpts),)) # Adaptive blocking ends here ovvo_tmp_size = BLKSIZE + (nocc,nvir,nvir,nocc) ovvo_tmp = np.empty(ovvo_tmp_size,dtype=t2.dtype) good2go = True while(good2go): good2go, data = loader.slave_set() if good2go is False: break ranges0, ranges1, ranges2 = loader.get_blocks_from_data(data) s0,s1,s2 = [slice(min(x),max(x)+1) for x in (ranges0,ranges1,ranges2)] eris_ovvo_kac = _cp(eris.ovvo[s0,s1,s2]) eris_oovv_kXc = _cp(eris.oovv[s0,:,s2]) eris_oovv_Xkc = _cp(eris.oovv[:,s0,s2]) for iterkk,kk in enumerate(ranges0): for iterka,ka in enumerate(ranges1): for iterkc,kc in enumerate(ranges2): ki = kconserv[kk,kc,ka] ovvo_tmp[iterkk,iterka,iterkc] = _cp(eris_ovvo_kac[iterkk,iterka,iterkc]) #St2 = 2.*t2[ki,:,ka] St2 = 2.*unpack_tril(t2,nkpts,ki,range(nkpts),ka,kconserv[ki,ka,range(nkpts)]) #St2 -= t2[:,ki,ka].transpose(0,2,1,3,4) St2 -= unpack_tril(t2,nkpts,range(nkpts),ki,ka,kconserv[range(nkpts),ka,ki]).transpose(0,2,1,3,4) ovvo_tmp[iterkk,iterka,iterkc] += einsum('klcd,ilad->kaci',eris_oovv_kXc[iterkk,:,iterkc].transpose(1,0,2,3,4).reshape(nocc,nkpts*nocc,nvir,nvir), St2.transpose(1,0,2,3,4).reshape(nocc,nkpts*nocc,nvir,nvir)) ovvo_tmp[iterkk,iterka,iterkc] += -einsum('lkcd,ilad->kaci',eris_oovv_Xkc[:,iterkk,iterkc].reshape(nocc*nkpts,nocc,nvir,nvir), unpack_tril(t2,nkpts,ki,range(nkpts),ka,kconserv[ki,ka,range(nkpts)]).transpose(1,0,2,3,4).reshape(nocc,nkpts*nocc,nvir,nvir)) # t2[ki,:,ka].transpose(1,0,2,3,4).reshape(nocc,nkpts*nocc,nvir,nvir)) Wkaci[s0,s1,s2] = ovvo_tmp[:len(ranges0),:len(ranges1),:len(ranges2)] loader.slave_finished() comm.Barrier() if fint is None: comm.Allreduce(MPI.IN_PLACE, Wkaci, op=MPI.SUM) return Wkaci
Example #22
Source File: kintermediates_rhf.py From pyscf with Apache License 2.0 | 4 votes |
def Wvovv(cc,t1,t2,eris,fint=None): nkpts, nocc, nvir = t1.shape kconserv = cc.kconserv if fint is None: Walcd = np.zeros((nkpts,nkpts,nkpts,nvir,nocc,nvir,nvir),dtype=t2.dtype) else: Walcd = fint['Wvovv'] # TODO can do much better than this... call recursive function # Adaptive blocking begins here mem = 0.5e9 pre = 1.*nvir*nocc*nvir*nvir*nkpts*16 nkpts_blksize = min(max(int(np.floor(mem/pre)),1),nkpts) nkpts_blksize2 = min(max(int(np.floor(mem/(pre*nkpts_blksize))),1),nkpts) BLKSIZE = (nkpts_blksize2,nkpts_blksize,nkpts,) loader = mpi_load_balancer.load_balancer(BLKSIZE=BLKSIZE) loader.set_ranges((range(nkpts),range(nkpts),range(nkpts),)) # Adaptive blocking ends here vovv_tmp_size = BLKSIZE + (nvir,nocc,nvir,nvir) vovv_tmp = np.empty(vovv_tmp_size,dtype=t2.dtype) good2go = True while(good2go): good2go, data = loader.slave_set() if good2go is False: break ranges0, ranges1, ranges2 = loader.get_blocks_from_data(data) s0,s1,s2 = [slice(min(x),max(x)+1) for x in (ranges0,ranges1,ranges2)] eris_vovv_alc = _cp(eris.vovv[s0,s1,s2]) eris_oovv_alc = _cp(eris.oovv[s0,s1,s2]) for iterka,ka in enumerate(ranges0): for iterkl,kl in enumerate(ranges1): for iterkc,kc in enumerate(ranges2): kd = kconserv[ka,kc,kl] # vovv[ka,kl,kc,kd] <= ovvv[kl,ka,kd,kc].transpose(1,0,3,2) vovv_tmp[iterka,iterkl,iterkc] = eris_vovv_alc[iterka,iterkl,iterkc] #np.array(eris.ovvv[kl,ka,kd]).transpose(1,0,3,2) vovv_tmp[iterka,iterkl,iterkc] += -einsum('ka,klcd->alcd',t1[ka],eris_oovv_alc[iterka,iterkl,iterkc]) Walcd[s0,s1,s2] = vovv_tmp[:len(ranges0),:len(ranges1),:len(ranges2)] loader.slave_finished() comm.Barrier() if fint is None: comm.Allreduce(MPI.IN_PLACE, Walcd, op=MPI.SUM) return Walcd