Python mpi4py.MPI.IN_PLACE Examples

The following are 22 code examples of mpi4py.MPI.IN_PLACE(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module mpi4py.MPI , or try the search function .
Example #1
Source File: backend.py    From sigpy with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def reduce(self, input, root=0):
        """Reduce operation in-place.

        Sums input across all nodes in root node.

        Args:
            input (array): input array.
            root (int): root node rank.

        """
        if self.size > 1:
            cpu_input = to_device(input, cpu_device)
            if self.rank == root:
                self.mpi_comm.Reduce(MPI.IN_PLACE, cpu_input, root=root)
                copyto(input, cpu_input)
            else:
                self.mpi_comm.Reduce(cpu_input, None, root=root) 
Example #2
Source File: _mesh_advector.py    From UWGeodynamics with GNU General Public License v3.0 6 votes vote down vote up
def _get_minmax_coordinates_mesh(self, axis=0):
        """ Return the minimum and maximum coordinates along axis

        parameter:
        ----------
            axis:
                axis

        returns:
        -------
            tuple: minV, maxV

        """
        maxVal = np.zeros((1))
        minVal = np.zeros((1))
        maxVal[0] = self.Model.mesh.data[:, axis].max()
        minVal[0] = self.Model.mesh.data[:, axis].min()

        comm.Barrier()
        comm.Allreduce(_MPI.IN_PLACE, maxVal, op=_MPI.MAX)
        comm.Allreduce(_MPI.IN_PLACE, minVal, op=_MPI.MIN)
        comm.Barrier()

        return minVal, maxVal 
Example #3
Source File: backend.py    From sigpy with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def allreduce(self, input):
        """All reduce operation in-place.

        Sums input across all nodes and broadcast back to each node.

        Args:
            input (array): input array.

        """
        if self.size > 1:
            if config.nccl_enabled:
                device = get_device(input)
                devices = self.mpi_comm.allgather(device.id)
                if all([d >= 0 for d in devices]):
                    nccl_comm = self._get_nccl_comm(device, devices)
                    nccl_dtype, nccl_size = self._get_nccl_dtype_size(input)
                    with device:
                        nccl_comm.allReduce(input.data.ptr,
                                            input.data.ptr,
                                            nccl_size, nccl_dtype,
                                            nccl.NCCL_SUM,
                                            cp.cuda.Stream.null.ptr)
                        return

            cpu_input = to_device(input, cpu_device)
            self.mpi_comm.Allreduce(MPI.IN_PLACE, cpu_input)
            copyto(input, cpu_input) 
Example #4
Source File: communication.py    From heat with MIT License 5 votes vote down vote up
def __reduce_like(self, func, sendbuf, recvbuf, *args, **kwargs):
        sbuf = None
        rbuf = None
        buf = None
        # unpack the send buffer if it is a HeAT tensor
        if isinstance(sendbuf, dndarray.DNDarray):
            sendbuf = sendbuf._DNDarray__array
        # unpack the receive buffer if it is a HeAT tensor
        if isinstance(recvbuf, dndarray.DNDarray):
            recvbuf = recvbuf._DNDarray__array

        # harmonize the input and output buffers
        # MPI requires send and receive buffers to be of same type and length. If the torch tensors are either not both
        # contiguous or differently strided, they have to be made matching (if possible) first.
        if isinstance(sendbuf, torch.Tensor):
            # convert the send buffer to a pointer, number of elements and type are identical to the receive buffer
            dummy = (
                sendbuf.contiguous()
            )  # make a contiguous copy and reassign the storage, old will be collected
            sendbuf.set_(
                dummy.storage(), dummy.storage_offset(), size=dummy.shape, stride=dummy.stride()
            )
            sbuf = sendbuf if CUDA_AWARE_MPI else sendbuf.cpu()
            sendbuf = self.as_buffer(sbuf)
        if isinstance(recvbuf, torch.Tensor):
            buf = recvbuf
            # nothing matches, the buffers have to be made contiguous
            dummy = recvbuf.contiguous()
            recvbuf.set_(
                dummy.storage(), dummy.storage_offset(), size=dummy.shape, stride=dummy.stride()
            )
            rbuf = recvbuf if CUDA_AWARE_MPI else recvbuf.cpu()
            if sendbuf is MPI.IN_PLACE:
                recvbuf = self.as_buffer(rbuf)
            else:
                recvbuf = (self.as_mpi_memory(rbuf), sendbuf[1], sendbuf[2])

        # perform the actual reduction operation
        return func(sendbuf, recvbuf, *args, **kwargs), sbuf, rbuf, buf 
Example #5
Source File: fof.py    From nbodykit with GNU General Public License v3.0 5 votes vote down vote up
def count(label, comm=MPI.COMM_WORLD):
    """
    Count the number of particles of the same label.

    This is a collective operation, and after the call, all ranks
    will have the particle count.

    Parameters
    ----------
    label : array_like (integers)
        Halo label of particles, >=0
    comm : :py:class:`MPI.Comm`
        communicator for the collective operation.

    Returns
    -------
    count : array_like
        the count of number of particles in each halo

    """
    Nhalo0 = max(comm.allgather(label.max())) + 1

    N = numpy.bincount(label, minlength=Nhalo0)
    comm.Allreduce(MPI.IN_PLACE, N, op=MPI.SUM)

    return N 
Example #6
Source File: fof.py    From nbodykit with GNU General Public License v3.0 5 votes vote down vote up
def fof_find_peaks(source, label, comm,
                position='Position', column='Density'):
    """
    Find position of the peak (maximum) from a given column for a fof result.
    """
    Nhalo0 = max(comm.allgather(label.max())) + 1

    N = numpy.bincount(label, minlength=Nhalo0)
    comm.Allreduce(MPI.IN_PLACE, N, op=MPI.SUM)

    return hpos 
Example #7
Source File: _mesh_advector.py    From UWGeodynamics with GNU General Public License v3.0 5 votes vote down vote up
def _get_minmax_velocity_wall(self, wall, axis=0):
        """ Return the minimum and maximum velocity component on the wall

        parameters:
        -----------
            wall: (indexSet)
                The wall.
            axis:
                axis (velocity component).
        """

        # Initialise value to max and min sys values
        maxV = np.ones((1)) * sys.float_info.min
        minV = np.ones((1)) * sys.float_info.max

        # if local domain has wall, get velocities
        if wall.data.size > 0:
            velocities  = self.Model.velocityField.data[wall.data, axis]
            # get local min and max
            maxV[0] = velocities.max()
            minV[0] = velocities.min()

        # reduce operation
        comm.Barrier()
        comm.Allreduce(_MPI.IN_PLACE, maxV, op=_MPI.MAX)
        comm.Allreduce(_MPI.IN_PLACE, minV, op=_MPI.MIN)
        comm.Barrier()

        return minV, maxV 
Example #8
Source File: mpi_helper.py    From pyscf with Apache License 2.0 5 votes vote down vote up
def safeAllreduceInPlace(comm, in_array):
    shape = in_array.shape
    length = len(shape)
    # just use 16 for blocksize, size of complex(double)
    chunk_size = get_max_blocksize_from_mem(list(shape),16.,MEM_SIZE,priority_list=numpy.arange(length)[::-1])
    task_list = generate_task_list(chunk_size,shape)
    for block in task_list:
        which_slice = [slice(*x) for x in block]
        tmp = in_array[tuple(which_slice)].copy()
        comm.Allreduce(MPI.IN_PLACE, tmp, op=MPI.SUM)
        in_array[tuple(which_slice)] = tmp 
Example #9
Source File: kintermediates_rhf.py    From pyscf with Apache License 2.0 5 votes vote down vote up
def Wooov(cc,t1,t2,eris,fint=None):
    nkpts, nocc, nvir = t1.shape
    kconserv = cc.kconserv

    if fint is None:
        Wklid = np.zeros((nkpts,nkpts,nkpts,nocc,nocc,nocc,nvir),dtype=t2.dtype)
    else:
        Wklid = fint['Wooov']

    # TODO can do much better than this... call recursive function
    # Adaptive blocking begins here
    mem = 0.5e9
    pre = 1.*nocc*nocc*nvir*nvir*nkpts*16
    nkpts_blksize = min(max(int(np.floor(mem/pre)),1),nkpts)
    nkpts_blksize2 = min(max(int(np.floor(mem/(pre*nkpts_blksize))),1),nkpts)
    BLKSIZE = (nkpts_blksize2,nkpts_blksize,nkpts,)
    loader = mpi_load_balancer.load_balancer(BLKSIZE=BLKSIZE)
    loader.set_ranges((range(nkpts),range(nkpts),range(nkpts),))
    # Adaptive blocking ends here
    ooov_tmp_size = BLKSIZE + (nocc,nocc,nocc,nvir)
    ooov_tmp = np.empty(ooov_tmp_size,dtype=t2.dtype)

    good2go = True
    while(good2go):
        good2go, data = loader.slave_set()
        if good2go is False:
            break
        ranges0, ranges1, ranges2 = loader.get_blocks_from_data(data)

        s0,s1,s2 = [slice(min(x),max(x)+1) for x in (ranges0,ranges1,ranges2)]
        eris_ooov_kli = _cp(eris.ooov[s0,s1,s2])
        eris_oovv_kli = _cp(eris.oovv[s0,s1,s2])

        for iterkk,kk in enumerate(ranges0):
            for iterkl,kl in enumerate(ranges1):
                for iterki,ki in enumerate(ranges2):
                    kd = kconserv[kk,ki,kl]
                    ooov_tmp[iterkk,iterkl,iterki] = eris_ooov_kli[iterkk,iterkl,iterki].copy()
                    ooov_tmp[iterkk,iterkl,iterki] += einsum('ic,klcd->klid',t1[ki],eris_oovv_kli[iterkk,iterkl,iterki])
        Wklid[s0,s1,s2] = ooov_tmp[:len(ranges0),:len(ranges1),:len(ranges2)]
        loader.slave_finished()

    comm.Barrier()
    if fint is None:
        comm.Allreduce(MPI.IN_PLACE, Wklid, op=MPI.SUM)

    return Wklid 
Example #10
Source File: kintermediates_rhf.py    From pyscf with Apache License 2.0 4 votes vote down vote up
def Wvoov(cc,t1,t2,eris,fint=None):
    nkpts, nocc, nvir = t1.shape
    kconserv = cc.kconserv

    if fint is None:
        Wkaci  = np.zeros((nkpts,nkpts,nkpts,nvir,nocc,nocc,nvir),dtype=t1.dtype)
        W1kaci = W1voov(cc,t1,t2,eris,fint)
        W2kaci = W2voov(cc,t1,t2,eris,fint)
    else:
        Wkaci = fint['Wvoov']
        W1kaci = fint['W1voov']
        W2kaci = fint['W2voov']

    # TODO can do much better than this... call recursive function
    # Adaptive blocking begins here
    mem = 0.5e9
    pre = 1.*nocc*nocc*nvir*nvir*nkpts*16
    nkpts_blksize = min(max(int(np.floor(mem/pre)),1),nkpts)
    nkpts_blksize2 = min(max(int(np.floor(mem/(pre*nkpts_blksize))),1),nkpts)
    BLKSIZE = (nkpts_blksize2,nkpts_blksize,nkpts,)
    loader = mpi_load_balancer.load_balancer(BLKSIZE=BLKSIZE)
    loader.set_ranges((range(nkpts),range(nkpts),range(nkpts),))
    # Adaptive blocking ends here

    good2go = True
    while(good2go):
        good2go, data = loader.slave_set()
        if good2go is False:
            break
        ranges0, ranges1, ranges2 = loader.get_blocks_from_data(data)

        s0,s1,s2 = [slice(min(x),max(x)+1) for x in (ranges0,ranges1,ranges2)]
        Wkaci[s0,s1,s2] = _cp(W1kaci[s0,s1,s2]) + _cp(W2kaci[s0,s1,s2])

        loader.slave_finished()

    comm.Barrier()

    if fint is None:
        comm.Allreduce(MPI.IN_PLACE, Wkaci, op=MPI.SUM)

    return Wkaci 
Example #11
Source File: fof.py    From nbodykit with GNU General Public License v3.0 4 votes vote down vote up
def centerofmass(label, pos, boxsize, comm=MPI.COMM_WORLD):
    """
    Calulate the center of mass of particles of the same label.

    The center of mass is defined as the mean of positions of particles,
    but care has to be taken regarding to the periodic boundary.

    This is a collective operation, and after the call, all ranks
    will have the position of halos.

    Parameters
    ----------
    label : array_like (integers)
        Halo label of particles, >=0
    pos   : array_like (float, 3)
        position of particles.
    boxsize : float or None
        size of the periodic box, or None if no periodic boundary is assumed.
    comm : :py:class:`MPI.Comm`
        communicator for the collective operation.

    Returns
    -------
    hpos : array_like (float, 3)
        the center of mass position of the halos.

    """
    Nhalo0 = max(comm.allgather(label.max())) + 1

    N = numpy.bincount(label, minlength=Nhalo0)
    comm.Allreduce(MPI.IN_PLACE, N, op=MPI.SUM)

    if boxsize is not None:
        posmin = equiv_class(label, pos, op=numpy.fmin, dense_labels=True, identity=numpy.inf,
                        minlength=len(N))
        comm.Allreduce(MPI.IN_PLACE, posmin, op=MPI.MIN)
        dpos = pos - posmin[label]
        for i in range(dpos.shape[-1]):
            bhalf = boxsize[i] * 0.5
            dpos[..., i][dpos[..., i] < -bhalf] += boxsize[i]
            dpos[..., i][dpos[..., i] >= bhalf] -= boxsize[i]
    else:
        dpos = pos
    dpos = equiv_class(label, dpos, op=numpy.add, dense_labels=True, minlength=len(N))

    comm.Allreduce(MPI.IN_PLACE, dpos, op=MPI.SUM)
    dpos /= N[:, None]

    if boxsize is not None:
        hpos = posmin + dpos
        hpos %= boxsize
    else:
        hpos = dpos
    return hpos 
Example #12
Source File: kintermediates_rhf.py    From pyscf with Apache License 2.0 4 votes vote down vote up
def WvoovR1(cc,t1,t2,eris,fint=None):
    nkpts, nocc, nvir = t1.shape
    kconserv = cc.kconserv

    if fint is None:
        Wkaci  = np.zeros((nkpts,nkpts,nkpts,nvir,nocc,nocc,nvir),dtype=t1.dtype)
        W1kaci = W1voov(cc,t1,t2,eris,fint)
        W2kaci = W2voov(cc,t1,t2,eris,fint)
    else:
        Wkaci = fint['WvoovR1']
        W1kaci = fint['W1voov']
        W2kaci = fint['W2voov']

    # TODO can do much better than this... call recursive function
    # Adaptive blocking begins here
    mem = 0.5e9
    pre = 1.*nocc*nocc*nvir*nvir*nkpts*16
    nkpts_blksize = min(max(int(np.floor(mem/pre)),1),nkpts)
    nkpts_blksize2 = min(max(int(np.floor(mem/(pre*nkpts_blksize))),1),nkpts)
    BLKSIZE = (nkpts_blksize2,nkpts_blksize,nkpts,)
    loader = mpi_load_balancer.load_balancer(BLKSIZE=BLKSIZE)
    loader.set_ranges((range(nkpts),range(nkpts),range(nkpts),))
    # Adaptive blocking ends here

    good2go = True
    while(good2go):
        good2go, data = loader.slave_set()
        if good2go is False:
            break
        ranges0, ranges1, ranges2 = loader.get_blocks_from_data(data)

        s0,s1,s2 = [slice(min(x),max(x)+1) for x in (ranges0,ranges1,ranges2)]
        Wkaci[s2,s0,s1] = (_cp(W1kaci[s0,s1,s2]) + _cp(W2kaci[s0,s1,s2])).transpose(2,0,1,3,4,5,6)

        loader.slave_finished()

    comm.Barrier()

    if fint is None:
        comm.Allreduce(MPI.IN_PLACE, Wkaci, op=MPI.SUM)

    return Wkaci 
Example #13
Source File: kintermediates_rhf.py    From pyscf with Apache License 2.0 4 votes vote down vote up
def W2voov(cc,t1,t2,eris,fint=None):
    nkpts, nocc, nvir = t1.shape
    kconserv = cc.kconserv

    if fint is None:
        Wkaci  = np.zeros((nkpts,nkpts,nkpts,nvir,nocc,nocc,nvir),dtype=t1.dtype)
        WWooov = Wooov(cc,t1,t2,eris)
    else:
        Wkaci  = fint['W2voov']
        WWooov = fint['Wooov']

    # Adaptive blocking begins here
    mem = 0.5e9
    pre = 1.*nocc*nvir*nvir*nvir*nkpts*16
    nkpts_blksize = min(max(int(np.floor(mem/pre)),1),nkpts)
    nkpts_blksize2 = min(max(int(np.floor(mem/(pre*nkpts_blksize))),1),nkpts)
    BLKSIZE = (nkpts_blksize2,nkpts_blksize,nkpts,)
    loader = mpi_load_balancer.load_balancer(BLKSIZE=BLKSIZE)
    loader.set_ranges((range(nkpts),range(nkpts),range(nkpts),))
    # Adaptive blocking ends here
    ovvo_tmp_size = BLKSIZE + (nocc,nvir,nvir,nocc)
    ovvo_tmp = np.empty(ovvo_tmp_size,dtype=t2.dtype)

    good2go = True
    while(good2go):
        good2go, data = loader.slave_set()
        if good2go is False:
            break
        ranges0, ranges1, ranges2 = loader.get_blocks_from_data(data)

        s0,s1,s2 = [slice(min(x),max(x)+1) for x in (ranges0,ranges1,ranges2)]

        Wooov_akX     = _cp(WWooov[s1,s0])
        eris_ovvv_kac = _cp(eris.ovvv[s0,s1,s2])

        for iterkk,kk in enumerate(ranges0):
            for iterka,ka in enumerate(ranges1):
                for iterkc,kc in enumerate(ranges2):
                    ki = kconserv[kk,kc,ka]
                    ovvo_tmp[iterkk,iterka,iterkc] = einsum('la,lkic->kaci',-t1[ka],Wooov_akX[iterka,iterkk,ki])
                    ovvo_tmp[iterkk,iterka,iterkc] += einsum('akdc,id->kaci',eris_ovvv_kac[iterkk,iterka,iterkc].transpose(1,0,3,2),t1[ki])

                    Wkaci[ka,kk,ki] = ovvo_tmp[iterkk,iterka,iterkc].transpose(1,0,3,2)

        loader.slave_finished()

    comm.Barrier()

    if fint is None:
        comm.Allreduce(MPI.IN_PLACE, Wkaci, op=MPI.SUM)

    return Wkaci 
Example #14
Source File: kintermediates_rhf.py    From pyscf with Apache License 2.0 4 votes vote down vote up
def WooooS(cc,t1,t2,eris,fint=None):
    nkpts, nocc, nvir = t1.shape
    kconserv = cc.kconserv

    if fint is None:
        Wklij = np.zeros((nkpts,nkpts,nkpts,nocc,nocc,nocc,nocc),dtype=t2.dtype)
    else:
        Wklij = fint['WooooS']

    # Adaptive blocking begins here
    mem = 0.5e9
    pre = 1.*nocc*nocc*nvir*nvir*nkpts*16
    nkpts_blksize = min(max(int(np.floor(mem/pre)),1),nkpts)
    nkpts_blksize2 = min(max(int(np.floor(mem/(pre*nkpts_blksize))),1),nkpts)
    nkpts_blksize2 = 1
    BLKSIZE = (nkpts_blksize2,nkpts_blksize,nkpts,)
    loader = mpi_load_balancer.load_balancer(BLKSIZE=BLKSIZE)
    loader.set_ranges((range(nkpts),range(nkpts),range(nkpts),))
    # Adaptive blocking ends here
    oooo_tmp_size = BLKSIZE + (nocc,nocc,nocc,nocc)
    oooo_tmp = np.empty(oooo_tmp_size,dtype=t2.dtype)

    good2go = True
    while(good2go):
        good2go, data = loader.slave_set()
        if good2go is False:
            break
        ranges0, ranges1, ranges2 = loader.get_blocks_from_data(data)

        s0,s1,s2 = [slice(min(x),max(x)+1) for x in (ranges0,ranges1,ranges2)]
        eris_oovv_klX = _cp(eris.oovv[s0,s1,s2])
        eris_oooo_kli = _cp(eris.oooo[s0,s1,s2])
        eris_ooov_klX = _cp(eris.ooov[s0,s1,s2])
        eris_ooov_lkX = _cp(eris.ooov[s1,s0,s2])

        for iterkk,kk in enumerate(ranges0):
            for iterkl,kl in enumerate(ranges1):
                for iterki,ki in enumerate(ranges2):
                    kj = kconserv[kk,ki,kl]
                    #tau1 = t2[ki,kj,:].copy()
                    tau1 = unpack_tril(t2,nkpts,ki,kj,range(nkpts),kconserv[ki,range(nkpts),kj]).copy()
                    tau1[ki] += einsum('ic,jd->ijcd',t1[ki],t1[kj])
                    oooo_tmp[iterkk,iterkl,iterki] = eris_oooo_kli[iterkk,iterkl,iterki].copy()
                    oooo_tmp[iterkk,iterkl,iterki] += einsum('kld,ijd->klij',eris_oovv_klX[iterkk,iterkl,:].transpose(1,2,0,3,4).reshape(nocc,nocc,-1),
                                                                 tau1.transpose(1,2,0,3,4).reshape(nocc,nocc,-1))
                    oooo_tmp[iterkk,iterkl,iterki] += einsum('klid,jd->klij',eris_ooov_klX[iterkk,iterkl,ki],t1[kj])
                    oooo_tmp[iterkk,iterkl,iterki] += einsum('lkjc,ic->klij',eris_ooov_lkX[iterkl,iterkk,kj],t1[ki])
                    Wklij[kl,ki,kj] = oooo_tmp[iterkk,iterkl,iterki]
        loader.slave_finished()

    comm.Barrier()
    if fint is None:
        comm.Allreduce(MPI.IN_PLACE, Wklij, op=MPI.SUM)

    return Wklij 
Example #15
Source File: kintermediates_rhf.py    From pyscf with Apache License 2.0 4 votes vote down vote up
def Woooo(cc,t1,t2,eris,fint=None):
    nkpts, nocc, nvir = t1.shape
    kconserv = cc.kconserv

    if fint is None:
        Wklij = np.zeros((nkpts,nkpts,nkpts,nocc,nocc,nocc,nocc),dtype=t2.dtype)
    else:
        Wklij = fint['Woooo']

    # Adaptive blocking begins here
    mem = 0.5e9
    pre = 1.*nocc*nocc*nvir*nvir*nkpts*16
    nkpts_blksize = min(max(int(np.floor(mem/pre)),1),nkpts)
    nkpts_blksize2 = min(max(int(np.floor(mem/(pre*nkpts_blksize))),1),nkpts)
    nkpts_blksize2 = 1
    BLKSIZE = (nkpts_blksize2,nkpts_blksize,nkpts,)
    loader = mpi_load_balancer.load_balancer(BLKSIZE=BLKSIZE)
    loader.set_ranges((range(nkpts),range(nkpts),range(nkpts),))
    # Adaptive blocking ends here
    oooo_tmp_size = BLKSIZE + (nocc,nocc,nocc,nocc)
    oooo_tmp = np.empty(oooo_tmp_size,dtype=t2.dtype)

    good2go = True
    while(good2go):
        good2go, data = loader.slave_set()
        if good2go is False:
            break
        ranges0, ranges1, ranges2 = loader.get_blocks_from_data(data)

        s0,s1,s2 = [slice(min(x),max(x)+1) for x in (ranges0,ranges1,ranges2)]
        eris_oovv_klX = _cp(eris.oovv[s0,s1,s2])
        eris_oooo_kli = _cp(eris.oooo[s0,s1,s2])
        eris_ooov_klX = _cp(eris.ooov[s0,s1,s2])
        eris_ooov_lkX = _cp(eris.ooov[s1,s0,s2])

        for iterkk,kk in enumerate(ranges0):
            for iterkl,kl in enumerate(ranges1):
                for iterki,ki in enumerate(ranges2):
                    kj = kconserv[kk,ki,kl]
                    #tau1 = t2[ki,kj,:].copy()
                    tau1 = unpack_tril(t2,nkpts,ki,kj,range(nkpts),kconserv[ki,range(nkpts),kj]).copy()
                    tau1[ki] += einsum('ic,jd->ijcd',t1[ki],t1[kj])
                    oooo_tmp[iterkk,iterkl,iterki] = eris_oooo_kli[iterkk,iterkl,iterki].copy()
                    oooo_tmp[iterkk,iterkl,iterki] += einsum('kld,ijd->klij',eris_oovv_klX[iterkk,iterkl,:].transpose(1,2,0,3,4).reshape(nocc,nocc,-1),
                                                                 tau1.transpose(1,2,0,3,4).reshape(nocc,nocc,-1))
                    oooo_tmp[iterkk,iterkl,iterki] += einsum('klid,jd->klij',eris_ooov_klX[iterkk,iterkl,ki],t1[kj])
                    oooo_tmp[iterkk,iterkl,iterki] += einsum('lkjc,ic->klij',eris_ooov_lkX[iterkl,iterkk,kj],t1[ki])
        Wklij[s0,s1,s2] = oooo_tmp[:len(ranges0),:len(ranges1),:len(ranges2)]
        loader.slave_finished()

    comm.Barrier()
    if fint is None:
        comm.Allreduce(MPI.IN_PLACE, Wklij, op=MPI.SUM)

    return Wklij

# This has different storage compared to Woooo, more amenable to I/O
# Instead of calling Woooo[kk,kl,ki] to get Woooo[kk,kl,ki,kj] you call
# WooooS[kl,ki,kj] 
Example #16
Source File: kintermediates_rhf.py    From pyscf with Apache License 2.0 4 votes vote down vote up
def WovovRev(cc,t1,t2,eris,fint=None):
    nkpts, nocc, nvir = t1.shape
    kconserv = cc.kconserv

    if fint is None:
        Wkbid = np.zeros((nkpts,nkpts,nkpts,nocc,nvir,nocc,nvir),dtype=t2.dtype)
        WW1ovov = W1ovov(cc,t1,t2,eris)
        WW2ovov = W2ovov(cc,t1,t2,eris)
    else:
        Wkbid = fint['WovovRev']
        WW1ovov = fint['W1ovov']
        WW2ovov = fint['W2ovov']

    # Adaptive blocking begins here
    mem = 0.5e9
    pre = 1.*nocc*nocc*nvir*nvir*nkpts*16
    nkpts_blksize = min(max(int(np.floor(mem/pre)),1),nkpts)
    nkpts_blksize2 = min(max(int(np.floor(mem/(pre*nkpts_blksize))),1),nkpts)
    BLKSIZE = (nkpts_blksize2,nkpts_blksize,nkpts,)
    loader = mpi_load_balancer.load_balancer(BLKSIZE=BLKSIZE)
    loader.set_ranges((range(nkpts),range(nkpts),range(nkpts),))
    # Adaptive blocking ends here
    ovov_tmp_size = BLKSIZE + (nocc,nvir,nocc,nvir)
    ovov_tmp = np.empty(ovov_tmp_size,dtype=t2.dtype)

    good2go = True
    while(good2go):
        good2go, data = loader.slave_set()
        if good2go is False:
            break
        ranges0, ranges1, ranges2 = loader.get_blocks_from_data(data)

        s0,s1,s2 = [slice(min(x),max(x)+1) for x in (ranges0,ranges1,ranges2)]

        Wkbid[s2,s1,s0] = (_cp(WW1ovov[s0,s1,s2]) + _cp(WW2ovov[s0,s1,s2])).transpose(2,1,0,3,4,5,6)

        loader.slave_finished()

    comm.Barrier()
    if fint is None:
        comm.Allreduce(MPI.IN_PLACE, Wkbid, op=MPI.SUM)

    return Wkbid



# This is the same Woooo intermediate used in cc 
Example #17
Source File: kintermediates_rhf.py    From pyscf with Apache License 2.0 4 votes vote down vote up
def Wovov(cc,t1,t2,eris,fint=None):
    nkpts, nocc, nvir = t1.shape
    kconserv = cc.kconserv

    if fint is None:
        Wkbid = np.zeros((nkpts,nkpts,nkpts,nocc,nvir,nocc,nvir),dtype=t2.dtype)
        WW1ovov = W1ovov(cc,t1,t2,eris)
        WW2ovov = W2ovov(cc,t1,t2,eris)
    else:
        Wkbid = fint['Wovov']
        WW1ovov = fint['W1ovov']
        WW2ovov = fint['W2ovov']

    # Adaptive blocking begins here
    mem = 0.5e9
    pre = 1.*nocc*nocc*nvir*nvir*nkpts*16
    nkpts_blksize = min(max(int(np.floor(mem/pre)),1),nkpts)
    nkpts_blksize2 = min(max(int(np.floor(mem/(pre*nkpts_blksize))),1),nkpts)
    BLKSIZE = (nkpts_blksize2,nkpts_blksize,nkpts,)
    loader = mpi_load_balancer.load_balancer(BLKSIZE=BLKSIZE)
    loader.set_ranges((range(nkpts),range(nkpts),range(nkpts),))
    # Adaptive blocking ends here
    ovov_tmp_size = BLKSIZE + (nocc,nvir,nocc,nvir)
    ovov_tmp = np.empty(ovov_tmp_size,dtype=t2.dtype)

    good2go = True
    while(good2go):
        good2go, data = loader.slave_set()
        if good2go is False:
            break
        ranges0, ranges1, ranges2 = loader.get_blocks_from_data(data)

        s0,s1,s2 = [slice(min(x),max(x)+1) for x in (ranges0,ranges1,ranges2)]

        Wkbid[s0,s1,s2] = _cp(WW1ovov[s0,s1,s2]) + _cp(WW2ovov[s0,s1,s2])

        loader.slave_finished()

    comm.Barrier()
    if fint is None:
        comm.Allreduce(MPI.IN_PLACE, Wkbid, op=MPI.SUM)

    return Wkbid 
Example #18
Source File: kintermediates_rhf.py    From pyscf with Apache License 2.0 4 votes vote down vote up
def W2ovov(cc,t1,t2,eris,fint=None):
    nkpts, nocc, nvir = t1.shape
    kconserv = cc.kconserv

    if fint is None:
        Wkbid = np.zeros((nkpts,nkpts,nkpts,nocc,nvir,nocc,nvir),dtype=t2.dtype)
        WWooov = Wooov(cc,t1,t2,eris)
    else:
        Wkbid = fint['W2ovov']
        WWooov = fint['Wooov']

    # Adaptive blocking begins here
    mem = 0.5e9
    pre = 1.*nocc*nvir*nvir*nvir*nkpts*16
    nkpts_blksize = min(max(int(np.floor(mem/pre)),1),nkpts)
    nkpts_blksize2 = min(max(int(np.floor(mem/(pre*nkpts_blksize))),1),nkpts)
    BLKSIZE = (nkpts_blksize2,nkpts_blksize,nkpts,)
    loader = mpi_load_balancer.load_balancer(BLKSIZE=BLKSIZE)
    loader.set_ranges((range(nkpts),range(nkpts),range(nkpts),))
    # Adaptive blocking ends here
    ovov_tmp_size = BLKSIZE + (nocc,nvir,nocc,nvir)
    ovov_tmp = np.empty(ovov_tmp_size,dtype=t2.dtype)

    good2go = True
    while(good2go):
        good2go, data = loader.slave_set()
        if good2go is False:
            break
        ranges0, ranges1, ranges2 = loader.get_blocks_from_data(data)

        s0,s1,s2 = [slice(min(x),max(x)+1) for x in (ranges0,ranges1,ranges2)]
        eris_ovvv  = _cp(eris.ovvv[s0,s1,s2])
        WWooov_kbi = _cp(WWooov[s0,s1,s2])

        for iterkk,kk in enumerate(ranges0):
            for iterkb,kb in enumerate(ranges1):
                for iterki,ki in enumerate(ranges2):
                    kd = kconserv[kk,ki,kb]
                    ovov_tmp[iterkk,iterkb,iterki] = einsum('klid,lb->kbid',WWooov_kbi[iterkk,iterkb,iterki],-t1[kb])
                    ovov_tmp[iterkk,iterkb,iterki] += einsum('kbcd,ic->kbid',eris_ovvv[iterkk,iterkb,iterki],t1[ki])
        Wkbid[s0,s1,s2] = ovov_tmp[:len(ranges0),:len(ranges1),:len(ranges2)]
        loader.slave_finished()

    comm.Barrier()
    if fint is None:
        comm.Allreduce(MPI.IN_PLACE, Wkbid, op=MPI.SUM)

    return Wkbid 
Example #19
Source File: kintermediates_rhf.py    From pyscf with Apache License 2.0 4 votes vote down vote up
def Wovvo(cc,t1,t2,eris,fint=None):
    nkpts, nocc, nvir = t1.shape
    kconserv = cc.kconserv

    if fint is None:
        Wkaci = np.zeros((nkpts,nkpts,nkpts,nocc,nvir,nvir,nocc),dtype=t2.dtype)
        W1kaci = W1ovvo(cc,t1,t2,eris,fint)
        W2kaci = W2ovvo(cc,t1,t2,eris,fint)
    else:
        Wkaci = fint['Wovvo']
        W1kaci = fint['W1ovvo']
        W2kaci = fint['W2ovvo']

    # TODO can do much better than this... call recursive function
    # Adaptive blocking begins here
    mem = 0.5e9
    pre = 1.*nocc*nocc*nvir*nvir*nkpts*16
    nkpts_blksize = min(max(int(np.floor(mem/pre)),1),nkpts)
    nkpts_blksize2 = min(max(int(np.floor(mem/(pre*nkpts_blksize))),1),nkpts)
    BLKSIZE = (nkpts_blksize2,nkpts_blksize,nkpts,)
    loader = mpi_load_balancer.load_balancer(BLKSIZE=BLKSIZE)
    loader.set_ranges((range(nkpts),range(nkpts),range(nkpts),))
    # Adaptive blocking ends here

    good2go = True
    while(good2go):
        good2go, data = loader.slave_set()
        if good2go is False:
            break
        ranges0, ranges1, ranges2 = loader.get_blocks_from_data(data)

        s0,s1,s2 = [slice(min(x),max(x)+1) for x in (ranges0,ranges1,ranges2)]
        Wkaci[s0,s1,s2] = _cp(W1kaci[s0,s1,s2]) + _cp(W2kaci[s0,s1,s2])

        loader.slave_finished()

    comm.Barrier()

    if fint is None:
        comm.Allreduce(MPI.IN_PLACE, Wkaci, op=MPI.SUM)

    return Wkaci 
Example #20
Source File: kintermediates_rhf.py    From pyscf with Apache License 2.0 4 votes vote down vote up
def W2ovvo(cc,t1,t2,eris,fint=None):
    nkpts, nocc, nvir = t1.shape
    kconserv = cc.kconserv

    if fint is None:
        Wkaci  = np.zeros((nkpts,nkpts,nkpts,nocc,nvir,nvir,nocc),dtype=t1.dtype)
        WWooov = Wooov(cc,t1,t2,eris)
    else:
        Wkaci  = fint['W2ovvo']
        WWooov = fint['Wooov']

    # Adaptive blocking begins here
    mem = 0.5e9
    pre = 1.*nocc*nvir*nvir*nvir*nkpts*16
    nkpts_blksize = min(max(int(np.floor(mem/pre)),1),nkpts)
    nkpts_blksize2 = min(max(int(np.floor(mem/(pre*nkpts_blksize))),1),nkpts)
    BLKSIZE = (nkpts_blksize2,nkpts_blksize,nkpts,)
    loader = mpi_load_balancer.load_balancer(BLKSIZE=BLKSIZE)
    loader.set_ranges((range(nkpts),range(nkpts),range(nkpts),))
    # Adaptive blocking ends here
    ovvo_tmp_size = BLKSIZE + (nocc,nvir,nvir,nocc)
    ovvo_tmp = np.empty(ovvo_tmp_size,dtype=t2.dtype)

    good2go = True
    while(good2go):
        good2go, data = loader.slave_set()
        if good2go is False:
            break
        ranges0, ranges1, ranges2 = loader.get_blocks_from_data(data)

        s0,s1,s2 = [slice(min(x),max(x)+1) for x in (ranges0,ranges1,ranges2)]

        Wooov_akX     = _cp(WWooov[s1,s0])
        eris_ovvv_kac = _cp(eris.ovvv[s0,s1,s2])

        for iterkk,kk in enumerate(ranges0):
            for iterka,ka in enumerate(ranges1):
                for iterkc,kc in enumerate(ranges2):
                    ki = kconserv[kk,kc,ka]
                    ovvo_tmp[iterkk,iterka,iterkc] = einsum('la,lkic->kaci',-t1[ka],Wooov_akX[iterka,iterkk,ki])
                    ovvo_tmp[iterkk,iterka,iterkc] += einsum('akdc,id->kaci',eris_ovvv_kac[iterkk,iterka,iterkc].transpose(1,0,3,2),t1[ki])

        Wkaci[s0,s1,s2] = ovvo_tmp[:len(ranges0),:len(ranges1),:len(ranges2)]

        loader.slave_finished()

    comm.Barrier()

    if fint is None:
        comm.Allreduce(MPI.IN_PLACE, Wkaci, op=MPI.SUM)

    return Wkaci 
Example #21
Source File: kintermediates_rhf.py    From pyscf with Apache License 2.0 4 votes vote down vote up
def W1ovvo(cc,t1,t2,eris,fint=None):
    nkpts, nocc, nvir = t1.shape
    kconserv = cc.kconserv

    if fint is None:
        Wkaci  = np.zeros((nkpts,nkpts,nkpts,nocc,nvir,nvir,nocc),dtype=t1.dtype)
    else:
        Wkaci  = fint['W1ovvo']

    # Adaptive blocking begins here
    mem = 0.5e9
    pre = 1.*nocc*nocc*nvir*nvir*nkpts*16
    nkpts_blksize = min(max(int(np.floor(mem/pre)),1),nkpts)
    nkpts_blksize2 = min(max(int(np.floor(mem/(pre*nkpts_blksize))),1),nkpts)
    BLKSIZE = (nkpts_blksize2,nkpts_blksize,nkpts,)
    loader = mpi_load_balancer.load_balancer(BLKSIZE=BLKSIZE)
    loader.set_ranges((range(nkpts),range(nkpts),range(nkpts),))
    # Adaptive blocking ends here
    ovvo_tmp_size = BLKSIZE + (nocc,nvir,nvir,nocc)
    ovvo_tmp = np.empty(ovvo_tmp_size,dtype=t2.dtype)

    good2go = True
    while(good2go):
        good2go, data = loader.slave_set()
        if good2go is False:
            break
        ranges0, ranges1, ranges2 = loader.get_blocks_from_data(data)

        s0,s1,s2 = [slice(min(x),max(x)+1) for x in (ranges0,ranges1,ranges2)]

        eris_ovvo_kac = _cp(eris.ovvo[s0,s1,s2])
        eris_oovv_kXc = _cp(eris.oovv[s0,:,s2])
        eris_oovv_Xkc = _cp(eris.oovv[:,s0,s2])

        for iterkk,kk in enumerate(ranges0):
            for iterka,ka in enumerate(ranges1):
                for iterkc,kc in enumerate(ranges2):
                    ki = kconserv[kk,kc,ka]
                    ovvo_tmp[iterkk,iterka,iterkc] = _cp(eris_ovvo_kac[iterkk,iterka,iterkc])
                    #St2 = 2.*t2[ki,:,ka]
                    St2 = 2.*unpack_tril(t2,nkpts,ki,range(nkpts),ka,kconserv[ki,ka,range(nkpts)])
                    #St2 -= t2[:,ki,ka].transpose(0,2,1,3,4)
                    St2 -= unpack_tril(t2,nkpts,range(nkpts),ki,ka,kconserv[range(nkpts),ka,ki]).transpose(0,2,1,3,4)
                    ovvo_tmp[iterkk,iterka,iterkc] += einsum('klcd,ilad->kaci',eris_oovv_kXc[iterkk,:,iterkc].transpose(1,0,2,3,4).reshape(nocc,nkpts*nocc,nvir,nvir),
                                                                St2.transpose(1,0,2,3,4).reshape(nocc,nkpts*nocc,nvir,nvir))
                    ovvo_tmp[iterkk,iterka,iterkc] += -einsum('lkcd,ilad->kaci',eris_oovv_Xkc[:,iterkk,iterkc].reshape(nocc*nkpts,nocc,nvir,nvir),
                                               unpack_tril(t2,nkpts,ki,range(nkpts),ka,kconserv[ki,ka,range(nkpts)]).transpose(1,0,2,3,4).reshape(nocc,nkpts*nocc,nvir,nvir))
#                                                                t2[ki,:,ka].transpose(1,0,2,3,4).reshape(nocc,nkpts*nocc,nvir,nvir))
        Wkaci[s0,s1,s2] = ovvo_tmp[:len(ranges0),:len(ranges1),:len(ranges2)]

        loader.slave_finished()

    comm.Barrier()

    if fint is None:
        comm.Allreduce(MPI.IN_PLACE, Wkaci, op=MPI.SUM)

    return Wkaci 
Example #22
Source File: kintermediates_rhf.py    From pyscf with Apache License 2.0 4 votes vote down vote up
def Wvovv(cc,t1,t2,eris,fint=None):
    nkpts, nocc, nvir = t1.shape
    kconserv = cc.kconserv

    if fint is None:
        Walcd = np.zeros((nkpts,nkpts,nkpts,nvir,nocc,nvir,nvir),dtype=t2.dtype)
    else:
        Walcd = fint['Wvovv']

    # TODO can do much better than this... call recursive function
    # Adaptive blocking begins here
    mem = 0.5e9
    pre = 1.*nvir*nocc*nvir*nvir*nkpts*16
    nkpts_blksize = min(max(int(np.floor(mem/pre)),1),nkpts)
    nkpts_blksize2 = min(max(int(np.floor(mem/(pre*nkpts_blksize))),1),nkpts)
    BLKSIZE = (nkpts_blksize2,nkpts_blksize,nkpts,)
    loader = mpi_load_balancer.load_balancer(BLKSIZE=BLKSIZE)
    loader.set_ranges((range(nkpts),range(nkpts),range(nkpts),))
    # Adaptive blocking ends here
    vovv_tmp_size = BLKSIZE + (nvir,nocc,nvir,nvir)
    vovv_tmp = np.empty(vovv_tmp_size,dtype=t2.dtype)

    good2go = True
    while(good2go):
        good2go, data = loader.slave_set()
        if good2go is False:
            break
        ranges0, ranges1, ranges2 = loader.get_blocks_from_data(data)

        s0,s1,s2 = [slice(min(x),max(x)+1) for x in (ranges0,ranges1,ranges2)]
        eris_vovv_alc = _cp(eris.vovv[s0,s1,s2])
        eris_oovv_alc = _cp(eris.oovv[s0,s1,s2])

        for iterka,ka in enumerate(ranges0):
            for iterkl,kl in enumerate(ranges1):
                for iterkc,kc in enumerate(ranges2):
                    kd = kconserv[ka,kc,kl]
                    # vovv[ka,kl,kc,kd] <= ovvv[kl,ka,kd,kc].transpose(1,0,3,2)
                    vovv_tmp[iterka,iterkl,iterkc] = eris_vovv_alc[iterka,iterkl,iterkc] #np.array(eris.ovvv[kl,ka,kd]).transpose(1,0,3,2)
                    vovv_tmp[iterka,iterkl,iterkc] += -einsum('ka,klcd->alcd',t1[ka],eris_oovv_alc[iterka,iterkl,iterkc])
        Walcd[s0,s1,s2] = vovv_tmp[:len(ranges0),:len(ranges1),:len(ranges2)]
        loader.slave_finished()

    comm.Barrier()
    if fint is None:
        comm.Allreduce(MPI.IN_PLACE, Walcd, op=MPI.SUM)

    return Walcd