org.nd4j.linalg.factory.Nd4j#sizeOfDataType

Source File: CpuThreshold.java From deeplearning4j with Apache License 2.0

5 votes

@Override
public DataBuffer compress(DataBuffer buffer) {
    INDArray temp = Nd4j.createArrayFromShapeBuffer(buffer, Nd4j.getShapeInfoProvider().createShapeInformation(new long[]{1, buffer.length()}, buffer.dataType()).getFirst());
    MatchCondition condition = new MatchCondition(temp, Conditions.absGreaterThanOrEqual(threshold));
    int cntAbs = Nd4j.getExecutioner().exec(condition).getInt(0);


    //log.info("density ratio: {}", String.format("%.2f", cntAbs * 100.0f / buffer.length()));

    if (cntAbs < 2)
        return null;

    long originalLength = buffer.length() * Nd4j.sizeOfDataType(buffer.dataType());
    int compressedLength = cntAbs + 4;
    // first 3 elements contain header
    IntPointer pointer = new IntPointer(compressedLength);
    pointer.put(0, cntAbs);
    pointer.put(1, (int) buffer.length());
    pointer.put(2, Float.floatToIntBits(threshold));
    pointer.put(3, 0);

    CompressionDescriptor descriptor = new CompressionDescriptor();
    descriptor.setCompressedLength(compressedLength * 4); // sizeOf(INT)
    descriptor.setOriginalLength(originalLength);
    descriptor.setOriginalElementSize(Nd4j.sizeOfDataType(buffer.dataType()));
    descriptor.setNumberOfElements(buffer.length());

    descriptor.setCompressionAlgorithm(getDescriptor());
    descriptor.setCompressionType(getCompressionType());



    CompressedDataBuffer cbuff = new CompressedDataBuffer(pointer, descriptor);

    Nd4j.getNDArrayFactory().convertDataEx(getBufferTypeEx(buffer), buffer.addressPointer(), DataTypeEx.THRESHOLD, pointer, buffer.length());

    Nd4j.getAffinityManager().tagLocation(buffer, AffinityManager.Location.HOST);

    return cbuff;
}

Source File: WorkspaceProviderTests.java From deeplearning4j with Apache License 2.0

5 votes

@Test
public void testNestedWorkspacesOverlap1() {
    Nd4j.setDefaultDataTypes(DataType.FLOAT, DataType.FLOAT);
    Nd4j.getWorkspaceManager().setDefaultWorkspaceConfiguration(basicConfiguration);
    try (Nd4jWorkspace ws1 = (Nd4jWorkspace) Nd4j.getWorkspaceManager().getWorkspaceForCurrentThread("WS1")
                    .notifyScopeEntered()) {
        INDArray array = Nd4j.create(new float[] {1f, 2f, 3f, 4f, 5f});

        long reqMem = 5 * Nd4j.sizeOfDataType();
        assertEquals(reqMem + reqMem % 8, ws1.getPrimaryOffset());
        try (Nd4jWorkspace ws2 = (Nd4jWorkspace) Nd4j.getWorkspaceManager().getWorkspaceForCurrentThread("WS2")
                        .notifyScopeEntered()) {

            INDArray array2 = Nd4j.create(new float[] {1f, 2f, 3f, 4f, 5f});

            reqMem = 5 * Nd4j.sizeOfDataType();
            assertEquals(reqMem + reqMem % 8, ws1.getPrimaryOffset());
            assertEquals(reqMem + reqMem % 8, ws2.getPrimaryOffset());

            try (Nd4jWorkspace ws3 = (Nd4jWorkspace) Nd4j.getWorkspaceManager().getWorkspaceForCurrentThread("WS1")
                            .notifyScopeBorrowed()) {
                assertTrue(ws1 == ws3);

                INDArray array3 = Nd4j.create(new float[] {1f, 2f, 3f, 4f, 5f});

                assertEquals(reqMem + reqMem % 8, ws2.getPrimaryOffset());
                assertEquals((reqMem + reqMem % 8) * 2, ws1.getPrimaryOffset());
            }
        }
    }

    assertNull(Nd4j.getMemoryManager().getCurrentWorkspace());
}

Source File: ProtectedCudaConstantHandler.java From nd4j with Apache License 2.0

5 votes

/**
 * This method returns DataBuffer with contant equal to input array.
 *
 * PLEASE NOTE: This method assumes that you'll never ever change values within result DataBuffer
 *
 * @param array
 * @return
 */
@Override
public DataBuffer getConstantBuffer(float[] array) {
    //   logger.info("getConstantBuffer(float[]) called");
    ArrayDescriptor descriptor = new ArrayDescriptor(array);

    Integer deviceId = AtomicAllocator.getInstance().getDeviceId();

    ensureMaps(deviceId);

    if (!buffersCache.get(deviceId).containsKey(descriptor)) {
        // we create new databuffer
             //logger.info("Creating new constant buffer...");
        DataBuffer buffer = Nd4j.createBufferDetached(array);

        if (constantOffsets.get(deviceId).get() + (array.length * Nd4j.sizeOfDataType()) < MAX_CONSTANT_LENGTH) {
            buffer.setConstant(true);
            // now we move data to constant memory, and keep happy
            moveToConstantSpace(buffer);

            buffersCache.get(deviceId).put(descriptor, buffer);

            bytes.addAndGet(array.length * Nd4j.sizeOfDataType());
        }
        return buffer;
    } // else logger.info("Reusing constant buffer...");

    return buffersCache.get(deviceId).get(descriptor);
}

Source File: ProtectedCudaConstantHandler.java From nd4j with Apache License 2.0

5 votes

/**
 * This method returns DataBuffer with contant equal to input array.
 *
 * PLEASE NOTE: This method assumes that you'll never ever change values within result DataBuffer
 *
 * @param array
 * @return
 */
@Override
public DataBuffer getConstantBuffer(double[] array) {
            //logger.info("getConstantBuffer(double[]) called: {}", Arrays.toString(array));
    ArrayDescriptor descriptor = new ArrayDescriptor(array);

    Integer deviceId = AtomicAllocator.getInstance().getDeviceId();

    ensureMaps(deviceId);

    if (!buffersCache.get(deviceId).containsKey(descriptor)) {
        // we create new databuffer
        //logger.info("Creating new constant buffer...");
        DataBuffer buffer = Nd4j.createBufferDetached(array);

        if (constantOffsets.get(deviceId).get() + (array.length * Nd4j.sizeOfDataType()) < MAX_CONSTANT_LENGTH) {
            buffer.setConstant(true);
            // now we move data to constant memory, and keep happy
            moveToConstantSpace(buffer);

            buffersCache.get(deviceId).put(descriptor, buffer);

            bytes.addAndGet(array.length * Nd4j.sizeOfDataType());
        }
        return buffer;
    } //else logger.info("Reusing constant buffer...");

    return buffersCache.get(deviceId).get(descriptor);
}

Source File: WorkspaceProviderTests.java From nd4j with Apache License 2.0

5 votes

@Test
public void testNestedWorkspacesOverlap1() throws Exception {
    Nd4j.getWorkspaceManager().setDefaultWorkspaceConfiguration(basicConfiguration);
    try (Nd4jWorkspace ws1 = (Nd4jWorkspace) Nd4j.getWorkspaceManager().getWorkspaceForCurrentThread("WS1")
                    .notifyScopeEntered()) {
        INDArray array = Nd4j.create(new float[] {1f, 2f, 3f, 4f, 5f});

        long reqMem = 5 * Nd4j.sizeOfDataType();
        assertEquals(reqMem + reqMem % 8, ws1.getHostOffset());
        try (Nd4jWorkspace ws2 = (Nd4jWorkspace) Nd4j.getWorkspaceManager().getWorkspaceForCurrentThread("WS2")
                        .notifyScopeEntered()) {

            INDArray array2 = Nd4j.create(new float[] {1f, 2f, 3f, 4f, 5f});

            reqMem = 5 * Nd4j.sizeOfDataType();
            assertEquals(reqMem + reqMem % 8, ws1.getHostOffset());
            assertEquals(reqMem + reqMem % 8, ws2.getHostOffset());

            try (Nd4jWorkspace ws3 = (Nd4jWorkspace) Nd4j.getWorkspaceManager().getWorkspaceForCurrentThread("WS1")
                            .notifyScopeBorrowed()) {
                assertTrue(ws1 == ws3);

                INDArray array3 = Nd4j.create(new float[] {1f, 2f, 3f, 4f, 5f});

                assertEquals(reqMem + reqMem % 8, ws2.getHostOffset());
                assertEquals((reqMem + reqMem % 8) * 2, ws1.getHostOffset());
            }
        }
    }

    assertNull(Nd4j.getMemoryManager().getCurrentWorkspace());
}

Source File: CpuThreshold.java From nd4j with Apache License 2.0

5 votes

@Override
public DataBuffer compress(DataBuffer buffer) {
    INDArray temp = Nd4j.createArrayFromShapeBuffer(buffer, Nd4j.getShapeInfoProvider().createShapeInformation(new int[]{1, (int) buffer.length()}).getFirst());
    MatchCondition condition = new MatchCondition(temp, Conditions.absGreaterThanOrEqual(threshold));
    int cntAbs = Nd4j.getExecutioner().exec(condition, Integer.MAX_VALUE).getInt(0);


    //log.info("density ratio: {}", String.format("%.2f", cntAbs * 100.0f / buffer.length()));

    if (cntAbs < 2)
        return null;

    long originalLength = buffer.length() * Nd4j.sizeOfDataType(buffer.dataType());
    int compressedLength = cntAbs + 4;
    // first 3 elements contain header
    IntPointer pointer = new IntPointer(compressedLength);
    pointer.put(0, cntAbs);
    pointer.put(1, (int) buffer.length());
    pointer.put(2, Float.floatToIntBits(threshold));
    pointer.put(3, 0);

    CompressionDescriptor descriptor = new CompressionDescriptor();
    descriptor.setCompressedLength(compressedLength * 4); // sizeOf(INT)
    descriptor.setOriginalLength(originalLength);
    descriptor.setOriginalElementSize(Nd4j.sizeOfDataType(buffer.dataType()));
    descriptor.setNumberOfElements(buffer.length());

    descriptor.setCompressionAlgorithm(getDescriptor());
    descriptor.setCompressionType(getCompressionType());



    CompressedDataBuffer cbuff = new CompressedDataBuffer(pointer, descriptor);

    Nd4j.getNDArrayFactory().convertDataEx(getBufferTypeEx(buffer), buffer.addressPointer(), DataBuffer.TypeEx.THRESHOLD, pointer, buffer.length());

    Nd4j.getAffinityManager().tagLocation(buffer, AffinityManager.Location.HOST);

    return cbuff;
}

Source File: CudaFlexibleThreshold.java From nd4j with Apache License 2.0

5 votes

@Override
public DataBuffer compress(DataBuffer buffer) {
    INDArray temp = Nd4j.createArrayFromShapeBuffer(buffer, Nd4j.getShapeInfoProvider().createShapeInformation(new int[]{1, (int) buffer.length()}));
    double max = temp.amaxNumber().doubleValue();

    int cntAbs = temp.scan(Conditions.absGreaterThanOrEqual(max - (max * threshold))).intValue();

    long originalLength = buffer.length() * Nd4j.sizeOfDataType(buffer.dataType());
    int compressedLength = cntAbs + 3;
    // first 3 elements contain header
    IntPointer pointer = new IntPointer(compressedLength);
    pointer.put(0, cntAbs);
    pointer.put(1, (int) buffer.length());
    pointer.put(2, Float.floatToIntBits(threshold)); // please note, this value will be ovewritten anyway

    CompressionDescriptor descriptor = new CompressionDescriptor();
    descriptor.setCompressedLength(compressedLength * 4); // sizeOf(INT)
    descriptor.setOriginalLength(originalLength);
    descriptor.setOriginalElementSize(Nd4j.sizeOfDataType(buffer.dataType()));
    descriptor.setNumberOfElements(buffer.length());

    descriptor.setCompressionAlgorithm(getDescriptor());
    descriptor.setCompressionType(getCompressionType());

    CompressedDataBuffer cbuff = new CompressedDataBuffer(pointer, descriptor);

    Nd4j.getNDArrayFactory().convertDataEx(getBufferTypeEx(buffer), buffer.addressPointer(), DataBuffer.TypeEx.FTHRESHOLD, pointer, buffer.length());

    Nd4j.getAffinityManager().tagLocation(buffer, AffinityManager.Location.HOST);

    return cbuff;
}

Source File: BasicWorkspaceTests.java From nd4j with Apache License 2.0

4 votes

@Test
public void testAllocation2() throws Exception {
    Nd4jWorkspace workspace = (Nd4jWorkspace) Nd4j.getWorkspaceManager().getAndActivateWorkspace(basicConfig,
                    "testAllocation2");

    Nd4j.getMemoryManager().setCurrentWorkspace(workspace);

    assertNotEquals(null, Nd4j.getMemoryManager().getCurrentWorkspace());

    assertEquals(0, workspace.getHostOffset());

    INDArray array = Nd4j.create(5);

    // checking if allocation actually happened
    long reqMem = 5 * Nd4j.sizeOfDataType();
    assertEquals(reqMem + reqMem % 8, workspace.getHostOffset());

    array.assign(1.0f);

    assertEquals(5, array.sumNumber().doubleValue(), 0.01);

    workspace.close();
}

Source File: JcublasLapack.java From deeplearning4j with Apache License 2.0

4 votes

@Override
public void sgetrf(int M, int N, INDArray A, INDArray IPIV, INDArray INFO) {
    INDArray a = A;
    if (Nd4j.dataType() != DataType.FLOAT)
        log.warn("FLOAT getrf called in DOUBLE environment");

    if (A.ordering() == 'c')
        a = A.dup('f');

    if (Nd4j.getExecutioner() instanceof GridExecutioner)
        ((GridExecutioner) Nd4j.getExecutioner()).flushQueue();

    // Get context for current thread
    val ctx = allocator.getDeviceContext();

    // setup the solver handles for cuSolver calls
    cusolverDnHandle_t handle = ctx.getSolverHandle();
    cusolverDnContext solverDn = new cusolverDnContext(handle);

    // synchronized on the solver
    synchronized (handle) {
        int result = cusolverDnSetStream(new cusolverDnContext(handle), new CUstream_st(ctx.getCublasStream()));
        if (result != 0)
            throw new BlasException("solverSetStream failed");

        // transfer the INDArray into GPU memory
        CublasPointer xAPointer = new CublasPointer(a, ctx);

        // this output - indicates how much memory we'll need for the real operation
        val worksizeBuffer = (BaseCudaDataBuffer) Nd4j.getDataBufferFactory().createInt(1);
        worksizeBuffer.lazyAllocateHostPointer();

        int stat = cusolverDnSgetrf_bufferSize(solverDn, M, N, (FloatPointer) xAPointer.getDevicePointer(), M,
                (IntPointer) worksizeBuffer.addressPointer() // we intentionally use host pointer here
        );

        if (stat != CUSOLVER_STATUS_SUCCESS) {
            throw new BlasException("cusolverDnSgetrf_bufferSize failed", stat);
        }

        int worksize = worksizeBuffer.getInt(0);
        // Now allocate memory for the workspace, the permutation matrix and a return code
        Pointer workspace = new Workspace(worksize * Nd4j.sizeOfDataType());

        // Do the actual LU decomp
        stat = cusolverDnSgetrf(solverDn, M, N, (FloatPointer) xAPointer.getDevicePointer(), M,
                new CudaPointer(workspace).asFloatPointer(),
                new CudaPointer(allocator.getPointer(IPIV, ctx)).asIntPointer(),
                new CudaPointer(allocator.getPointer(INFO, ctx)).asIntPointer());

        // we do sync to make sure getrf is finished
        //ctx.syncOldStream();

        if (stat != CUSOLVER_STATUS_SUCCESS) {
            throw new BlasException("cusolverDnSgetrf failed", stat);
        }
    }
    allocator.registerAction(ctx, a);
    allocator.registerAction(ctx, INFO);
    allocator.registerAction(ctx, IPIV);

    if (a != A)
        A.assign(a);
}

Source File: BaseCudaDataBuffer.java From deeplearning4j with Apache License 2.0

4 votes

public void read(DataInputStream s) {
    try {
        val savedMode = AllocationMode.valueOf(s.readUTF());
        allocationMode = AllocationMode.MIXED_DATA_TYPES;

        long locLength = 0;

        if (savedMode.ordinal() < 3)
            locLength = s.readInt();
        else
            locLength = s.readLong();

        boolean reallocate = locLength != length || indexer == null;
        length = locLength;

        val t = DataType.valueOf(s.readUTF());
        //                  log.info("Restoring buffer ["+t+"] of length ["+ length+"]");
        if (globalType == null && Nd4j.dataType() != null) {
            globalType = Nd4j.dataType();
        }

        if (t == DataType.COMPRESSED) {
            type = t;
            return;
        }

        this.elementSize = (byte) Nd4j.sizeOfDataType(t);
        this.allocationPoint = AtomicAllocator.getInstance().allocateMemory(this, new AllocationShape(length, elementSize, t), false);

        this.type = t;

        Nd4j.getDeallocatorService().pickObject(this);

        switch (type) {
            case DOUBLE: {
                    this.pointer = new CudaPointer(allocationPoint.getHostPointer(), length).asDoublePointer();
                    indexer = DoubleIndexer.create((DoublePointer) pointer);
                }
                break;
            case FLOAT: {
                    this.pointer = new CudaPointer(allocationPoint.getHostPointer(), length).asFloatPointer();
                    indexer = FloatIndexer.create((FloatPointer) pointer);
                }
                break;
            case HALF: {
                    this.pointer = new CudaPointer(allocationPoint.getHostPointer(), length).asShortPointer();
                    indexer = HalfIndexer.create((ShortPointer) pointer);
                }
                break;
            case LONG: {
                    this.pointer = new CudaPointer(allocationPoint.getHostPointer(), length).asLongPointer();
                    indexer = LongIndexer.create((LongPointer) pointer);
                }
                break;
            case INT: {
                    this.pointer = new CudaPointer(allocationPoint.getHostPointer(), length).asIntPointer();
                    indexer = IntIndexer.create((IntPointer) pointer);
                }
                break;
            case SHORT: {
                    this.pointer = new CudaPointer(allocationPoint.getHostPointer(), length).asShortPointer();
                    indexer = ShortIndexer.create((ShortPointer) pointer);
                }
                break;
            case UBYTE: {
                    this.pointer = new CudaPointer(allocationPoint.getHostPointer(), length).asBytePointer();
                    indexer = UByteIndexer.create((BytePointer) pointer);
                }
                break;
            case BYTE: {
                    this.pointer = new CudaPointer(allocationPoint.getHostPointer(), length).asBytePointer();
                    indexer = ByteIndexer.create((BytePointer) pointer);
                }
                break;
            case BOOL: {
                    this.pointer = new CudaPointer(allocationPoint.getHostPointer(), length).asBooleanPointer();
                    indexer = BooleanIndexer.create((BooleanPointer) pointer);
                }
                break;
            default:
                throw new UnsupportedOperationException("Unsupported data type: " + type);
        }

        readContent(s, t, t);
        allocationPoint.tickHostWrite();

    } catch (Exception e) {
        throw new RuntimeException(e);
    }


    // we call sync to copyback data to host
    AtomicAllocator.getInstance().getFlowController().synchronizeToDevice(allocationPoint);
    //allocator.synchronizeHostData(this);
}

Source File: SpecialWorkspaceTests.java From deeplearning4j with Apache License 2.0

4 votes

@Test
    public void testVariableTimeSeries2() {
        WorkspaceConfiguration configuration = WorkspaceConfiguration.builder().initialSize(0).overallocationLimit(3.0)
                        .policyAllocation(AllocationPolicy.OVERALLOCATE).policySpill(SpillPolicy.REALLOCATE)
                        .policyLearning(LearningPolicy.FIRST_LOOP).policyReset(ResetPolicy.ENDOFBUFFER_REACHED).build();

        Nd4jWorkspace workspace =
                        (Nd4jWorkspace) Nd4j.getWorkspaceManager().getWorkspaceForCurrentThread(configuration, "WS1");
//        workspace.enableDebug(true);

        try (MemoryWorkspace ws = Nd4j.getWorkspaceManager().getAndActivateWorkspace(configuration, "WS1")) {
            Nd4j.create(500);
            Nd4j.create(500);
        }



        assertEquals(0, workspace.getStepNumber());

        long requiredMemory = 1000 * Nd4j.sizeOfDataType();
        long shiftedSize = ((long) (requiredMemory * 1.3)) + (8 - (((long) (requiredMemory * 1.3)) % 8));
        assertEquals(requiredMemory, workspace.getSpilledSize());
        assertEquals(shiftedSize, workspace.getInitialBlockSize());
        assertEquals(workspace.getInitialBlockSize() * 4, workspace.getCurrentSize());


        for (int i = 0; i < 100; i++) {
            try (MemoryWorkspace ws = Nd4j.getWorkspaceManager().getAndActivateWorkspace(configuration, "WS1")) {
                Nd4j.create(500);
                Nd4j.create(500);
                Nd4j.create(500);
            }
        }


        assertEquals(workspace.getInitialBlockSize() * 4, workspace.getCurrentSize());

        assertEquals(0, workspace.getNumberOfPinnedAllocations());
        assertEquals(0, workspace.getNumberOfExternalAllocations());

        assertEquals(0, workspace.getSpilledSize());
        assertEquals(0, workspace.getPinnedSize());

    }

Source File: BaseCudaDataBuffer.java From deeplearning4j with Apache License 2.0

4 votes

public BaseCudaDataBuffer(float[] data) {
    //super(data);
    this(data.length, Nd4j.sizeOfDataType(DataType.FLOAT), false);
    set(data, data.length, 0, 0);
}

Source File: WorkspaceProviderTests.java From deeplearning4j with Apache License 2.0

4 votes

@Test
public void testVariableInput1() {
    Nd4jWorkspace workspace = (Nd4jWorkspace) Nd4j.getWorkspaceManager()
                    .getWorkspaceForCurrentThread(adsiConfiguration, "ADSI");

    INDArray array1 = null;
    INDArray array2 = null;

    try (MemoryWorkspace ws = Nd4j.getWorkspaceManager().getAndActivateWorkspace(adsiConfiguration, "ADSI")) {
        // we allocate first element smaller then subsequent;
        array1 = Nd4j.create(DataType.DOUBLE, 8, 128, 100);
    }

    long requiredMemory = 8 * 128 * 100 * Nd4j.sizeOfDataType(DataType.DOUBLE);
    long shiftedSize = ((long) (requiredMemory * 1.3)) + (8 - (((long) (requiredMemory * 1.3)) % 8));
    assertEquals(shiftedSize, workspace.getInitialBlockSize());
    assertEquals(shiftedSize * 4, workspace.getCurrentSize());
    assertEquals(0, workspace.getPrimaryOffset());
    assertEquals(0, workspace.getDeviceOffset());

    assertEquals(1, workspace.getCyclesCount());
    assertEquals(0, workspace.getStepNumber());


    try (MemoryWorkspace ws = Nd4j.getWorkspaceManager().getAndActivateWorkspace(adsiConfiguration, "ADSI")) {
        // allocating same shape
        array1 = Nd4j.create(8, 128, 100);
    }

    assertEquals(workspace.getInitialBlockSize(), workspace.getPrimaryOffset());
    assertEquals(workspace.getInitialBlockSize(), workspace.getDeviceOffset());

    assertEquals(2, workspace.getCyclesCount());
    assertEquals(0, workspace.getStepNumber());


    try (MemoryWorkspace ws = Nd4j.getWorkspaceManager().getAndActivateWorkspace(adsiConfiguration, "ADSI")) {
        // allocating bigger shape
        array1 = Nd4j.create(DataType.DOUBLE, 8, 128, 200);
    }

    // offsets should be intact, allocation happened as pinned
    assertEquals(workspace.getInitialBlockSize(), workspace.getPrimaryOffset());
    assertEquals(workspace.getInitialBlockSize(), workspace.getDeviceOffset());

    assertEquals(1, workspace.getNumberOfPinnedAllocations());

    assertEquals(3, workspace.getCyclesCount());
    assertEquals(0, workspace.getStepNumber());


    try (MemoryWorkspace ws = Nd4j.getWorkspaceManager().getAndActivateWorkspace(adsiConfiguration, "ADSI")) {
        // allocating same shape
        array1 = Nd4j.create(DataType.DOUBLE, 8, 128, 100);
    }

    assertEquals(2, workspace.getNumberOfPinnedAllocations());
    assertEquals(0, workspace.getStepNumber());
    assertEquals(4, workspace.getCyclesCount());

    try (MemoryWorkspace ws = Nd4j.getWorkspaceManager().getAndActivateWorkspace(adsiConfiguration, "ADSI")) {
        // allocating same shape
        array1 = Nd4j.create(DataType.DOUBLE, 8, 128, 100);
    }

    assertEquals(3, workspace.getNumberOfPinnedAllocations());
    assertEquals(1, workspace.getStepNumber());
    assertEquals(5, workspace.getCyclesCount());

    for (int i = 0; i < 12; i++) {
        try (MemoryWorkspace ws = Nd4j.getWorkspaceManager().getAndActivateWorkspace(adsiConfiguration, "ADSI")) {
            // allocating same shape
            array1 = Nd4j.create(DataType.DOUBLE, 8, 128, 100);
        }
    }

    // Now we know that workspace was reallocated and offset was shifted to the end of workspace
    assertEquals(4, workspace.getStepNumber());

    requiredMemory = 8 * 128 * 200 * Nd4j.sizeOfDataType(DataType.DOUBLE);
    shiftedSize = ((long) (requiredMemory * 1.3)) + (8 - (((long) (requiredMemory * 1.3)) % 8));

    //assertEquals(shiftedSize * 4, workspace.getCurrentSize());
    assertEquals(workspace.getCurrentSize(), workspace.getPrimaryOffset());
    assertEquals(workspace.getCurrentSize(), workspace.getDeviceOffset());

}

Source File: NativeOpExecutioner.java From nd4j with Apache License 2.0

4 votes

@Override
public INDArray thresholdEncode(INDArray input, double threshold, Integer boundary) {

    MatchCondition condition = new MatchCondition(input, Conditions.absGreaterThanOrEqual(threshold));
    int cntAbs = Nd4j.getExecutioner().exec(condition, Integer.MAX_VALUE).getInt(0);

    if (cntAbs < 2)
        return null;

    if (boundary != null)
        cntAbs = Math.min(cntAbs, boundary);

    DataBuffer buffer = input.data();

    long originalLength = buffer.length() * Nd4j.sizeOfDataType(buffer.dataType());
    int compressedLength = cntAbs + 4;
    // first 3 elements contain header

    DataBuffer encodedBuffer = Nd4j.getMemoryManager().getCurrentWorkspace() == null ? Nd4j.getDataBufferFactory().createInt(4+cntAbs, false) : Nd4j.getDataBufferFactory().createInt(4+cntAbs, false, Nd4j.getMemoryManager().getCurrentWorkspace());

    encodedBuffer.put(0, cntAbs);
    encodedBuffer.put(1, (int) buffer.length());
    encodedBuffer.put(2, Float.floatToIntBits((float) threshold));

    // format id
    encodedBuffer.put(3, ThresholdCompression.FLEXIBLE_ENCODING);

    CompressionDescriptor descriptor = new CompressionDescriptor();
    descriptor.setCompressedLength(compressedLength * 4); // sizeOf(INT)
    descriptor.setOriginalLength(originalLength);
    descriptor.setOriginalElementSize(Nd4j.sizeOfDataType(buffer.dataType()));
    descriptor.setNumberOfElements(buffer.length());

    descriptor.setCompressionAlgorithm("THRESHOLD");
    descriptor.setCompressionType(CompressionType.LOSSLESS);

    //CompressedDataBuffer cbuff = new CompressedDataBuffer(pointer, descriptor);

    Nd4j.getNDArrayFactory().convertDataEx(AbstractCompressor.getBufferTypeEx(buffer), buffer.addressPointer(), DataBuffer.TypeEx.THRESHOLD, encodedBuffer.addressPointer(), buffer.length());

    Nd4j.getAffinityManager().tagLocation(buffer, AffinityManager.Location.HOST);

    return Nd4j.createArrayFromShapeBuffer(encodedBuffer, input.shapeInfoDataBuffer());
}

Source File: JcublasLapack.java From nd4j with Apache License 2.0

4 votes

public int dsyev( char _jobz, char _uplo, int N, INDArray A, INDArray R ) {

	int status = -1 ;

	int jobz = _jobz == 'V' ? CUSOLVER_EIG_MODE_VECTOR : CUSOLVER_EIG_MODE_NOVECTOR ;
	int uplo = _uplo == 'L' ? CUBLAS_FILL_MODE_LOWER : CUBLAS_FILL_MODE_UPPER ;

        if (Nd4j.dataType() != DataBuffer.Type.DOUBLE)
            log.warn("DOUBLE dsyev called in FLOAT environment");

        INDArray a = A;

        if (A.ordering() == 'c')
            a = A.dup('f');

        // FIXME: int cast
	int M = (int) A.rows() ;

        if (Nd4j.getExecutioner() instanceof GridExecutioner)
            ((GridExecutioner) Nd4j.getExecutioner()).flushQueue();

        // Get context for current thread
        CudaContext ctx = (CudaContext) allocator.getDeviceContext().getContext();

        // setup the solver handles for cuSolver calls
        cusolverDnHandle_t handle = ctx.getSolverHandle();
        cusolverDnContext solverDn = new cusolverDnContext(handle);

        // synchronized on the solver
        synchronized (handle) {
            status = cusolverDnSetStream(new cusolverDnContext(handle), new CUstream_st(ctx.getOldStream()));
            if( status == 0 ) {
		    // transfer the INDArray into GPU memory
		    CublasPointer xAPointer = new CublasPointer(a, ctx);
		    CublasPointer xRPointer = new CublasPointer(R, ctx);

		    // this output - indicates how much memory we'll need for the real operation
		    DataBuffer worksizeBuffer = Nd4j.getDataBufferFactory().createInt(1);
		    status = cusolverDnDsyevd_bufferSize(
				solverDn, jobz, uplo, M, 
				(DoublePointer) xAPointer.getDevicePointer(), M,
				(DoublePointer) xRPointer.getDevicePointer(),
				(IntPointer)worksizeBuffer.addressPointer() ) ;

		    if (status == CUSOLVER_STATUS_SUCCESS) {
			    int worksize = worksizeBuffer.getInt(0);

			    // allocate memory for the workspace, the non-converging row buffer and a return code
			    Pointer workspace = new Workspace(worksize * Nd4j.sizeOfDataType());

			    INDArray INFO = Nd4j.createArrayFromShapeBuffer(Nd4j.getDataBufferFactory().createInt(1),
			    Nd4j.getShapeInfoProvider().createShapeInformation(new int[] {1, 1}));


			    // Do the actual decomp
			    status = cusolverDnDsyevd(solverDn, jobz, uplo, M, 
					(DoublePointer) xAPointer.getDevicePointer(), M,
					(DoublePointer) xRPointer.getDevicePointer(), 
					new CudaPointer(workspace).asDoublePointer(), worksize,
					new CudaPointer(allocator.getPointer(INFO, ctx)).asIntPointer());

			    allocator.registerAction(ctx, INFO);
			    if( status == 0 ) status = INFO.getInt(0) ;
		    }
		}
        }
	if( status == 0 ) {
		allocator.registerAction(ctx, R);
		allocator.registerAction(ctx, a);

		if (a != A)
		    A.assign(a);
	}
	return status ;
    }

Source File: JcublasLapack.java From nd4j with Apache License 2.0

4 votes

public int ssyev( char _jobz, char _uplo, int N, INDArray A, INDArray R ) {

	int status = -1 ;

	int jobz = _jobz == 'V' ? CUSOLVER_EIG_MODE_VECTOR : CUSOLVER_EIG_MODE_NOVECTOR ;
	int uplo = _uplo == 'L' ? CUBLAS_FILL_MODE_LOWER : CUBLAS_FILL_MODE_UPPER ;

        if (Nd4j.dataType() != DataBuffer.Type.FLOAT)
            log.warn("FLOAT ssyev called in DOUBLE environment");

        INDArray a = A;

        if (A.ordering() == 'c')
            a = A.dup('f');

        // FIXME: int cast
	int M = (int) A.rows() ;

        if (Nd4j.getExecutioner() instanceof GridExecutioner)
            ((GridExecutioner) Nd4j.getExecutioner()).flushQueue();

        // Get context for current thread
        CudaContext ctx = (CudaContext) allocator.getDeviceContext().getContext();

        // setup the solver handles for cuSolver calls
        cusolverDnHandle_t handle = ctx.getSolverHandle();
        cusolverDnContext solverDn = new cusolverDnContext(handle);

        // synchronized on the solver
        synchronized (handle) {
            status = cusolverDnSetStream(new cusolverDnContext(handle), new CUstream_st(ctx.getOldStream()));
            if( status == 0 ) {
		    // transfer the INDArray into GPU memory
		    CublasPointer xAPointer = new CublasPointer(a, ctx);
		    CublasPointer xRPointer = new CublasPointer(R, ctx);

		    // this output - indicates how much memory we'll need for the real operation
		    DataBuffer worksizeBuffer = Nd4j.getDataBufferFactory().createInt(1);
		    status = cusolverDnSsyevd_bufferSize (
				solverDn, jobz, uplo, M, 
				(FloatPointer) xAPointer.getDevicePointer(), M,
				(FloatPointer) xRPointer.getDevicePointer(),
				(IntPointer)worksizeBuffer.addressPointer() ) ;

		    if (status == CUSOLVER_STATUS_SUCCESS) {
			    int worksize = worksizeBuffer.getInt(0);

			    // allocate memory for the workspace, the non-converging row buffer and a return code
			    Pointer workspace = new Workspace(worksize * Nd4j.sizeOfDataType());

			    INDArray INFO = Nd4j.createArrayFromShapeBuffer(Nd4j.getDataBufferFactory().createInt(1),
		  	    Nd4j.getShapeInfoProvider().createShapeInformation(new int[] {1, 1}));


			    // Do the actual decomp
			    status = cusolverDnSsyevd(solverDn, jobz, uplo, M, 
					(FloatPointer) xAPointer.getDevicePointer(), M,
					(FloatPointer) xRPointer.getDevicePointer(), 
					new CudaPointer(workspace).asFloatPointer(), worksize,
					new CudaPointer(allocator.getPointer(INFO, ctx)).asIntPointer());

			    allocator.registerAction(ctx, INFO);
			    if( status == 0 ) status = INFO.getInt(0) ;
		    }
		}
        }
	if( status == 0 ) {
		allocator.registerAction(ctx, R);
		allocator.registerAction(ctx, a);

		if (a != A)
		    A.assign(a);
	}
	return status ;
    }

Source File: BasicWorkspaceTests.java From deeplearning4j with Apache License 2.0

4 votes

@Test
public void testAllocation5() {
    Nd4jWorkspace workspace = (Nd4jWorkspace) Nd4j.getWorkspaceManager().getAndActivateWorkspace(basicConfig, "testAllocation5");

    Nd4j.getMemoryManager().setCurrentWorkspace(workspace);

    assertNotEquals(null, Nd4j.getMemoryManager().getCurrentWorkspace());

    assertEquals(0, workspace.getPrimaryOffset());

    INDArray array = Nd4j.create(DOUBLE, new long[] {1, 5}, 'c');

    // checking if allocation actually happened
    long reqMemory = 5 * Nd4j.sizeOfDataType(DOUBLE);
    assertEquals(reqMemory + reqMemory % 8, workspace.getPrimaryOffset());

    array.assign(1.0f);

    INDArray dup = array.dup();

    assertEquals((reqMemory + reqMemory % 8) * 2, workspace.getPrimaryOffset());

    assertEquals(5, dup.sumNumber().doubleValue(), 0.01);

    workspace.close();
}

Source File: JcublasLapack.java From nd4j with Apache License 2.0

4 votes

@Override
public void sgetrf(int M, int N, INDArray A, INDArray IPIV, INDArray INFO) {
    INDArray a = A;
    if (Nd4j.dataType() != DataBuffer.Type.FLOAT)
        log.warn("FLOAT getrf called in DOUBLE environment");

    if (A.ordering() == 'c')
        a = A.dup('f');


    if (Nd4j.getExecutioner() instanceof GridExecutioner)
        ((GridExecutioner) Nd4j.getExecutioner()).flushQueue();

    // Get context for current thread
    CudaContext ctx = (CudaContext) allocator.getDeviceContext().getContext();

    // setup the solver handles for cuSolver calls
    cusolverDnHandle_t handle = ctx.getSolverHandle();
    cusolverDnContext solverDn = new cusolverDnContext(handle);

    // synchronized on the solver
    synchronized (handle) {
        int result = cusolverDnSetStream(new cusolverDnContext(handle), new CUstream_st(ctx.getOldStream()));
        if (result != 0)
            throw new BlasException("solverSetStream failed");

        // transfer the INDArray into GPU memory
        CublasPointer xAPointer = new CublasPointer(a, ctx);

        // this output - indicates how much memory we'll need for the real operation
        DataBuffer worksizeBuffer = Nd4j.getDataBufferFactory().createInt(1);

        int stat = cusolverDnSgetrf_bufferSize(solverDn, M, N, (FloatPointer) xAPointer.getDevicePointer(), M,
                        (IntPointer) worksizeBuffer.addressPointer() // we intentionally use host pointer here
        );

        if (stat != CUSOLVER_STATUS_SUCCESS) {
            throw new BlasException("cusolverDnSgetrf_bufferSize failed", stat);
        }

        int worksize = worksizeBuffer.getInt(0);
        // Now allocate memory for the workspace, the permutation matrix and a return code
        Pointer workspace = new Workspace(worksize * Nd4j.sizeOfDataType());

        // Do the actual LU decomp
        stat = cusolverDnSgetrf(solverDn, M, N, (FloatPointer) xAPointer.getDevicePointer(), M,
                        new CudaPointer(workspace).asFloatPointer(),
                        new CudaPointer(allocator.getPointer(IPIV, ctx)).asIntPointer(),
                        new CudaPointer(allocator.getPointer(INFO, ctx)).asIntPointer());

        // we do sync to make sure getrf is finished
        //ctx.syncOldStream();

        if (stat != CUSOLVER_STATUS_SUCCESS) {
            throw new BlasException("cusolverDnSgetrf failed", stat);
        }
    }
    allocator.registerAction(ctx, a);
    allocator.registerAction(ctx, INFO);
    allocator.registerAction(ctx, IPIV);

    if (a != A)
        A.assign(a);
}

Source File: BasicWorkspaceTests.java From deeplearning4j with Apache License 2.0

2 votes

@Test
public void testLoops1() {
    Nd4jWorkspace workspace = (Nd4jWorkspace) Nd4j.getWorkspaceManager().createNewWorkspace(loopOverTimeConfig);

    Nd4j.getMemoryManager().setCurrentWorkspace(workspace);

    assertNotEquals(null, Nd4j.getMemoryManager().getCurrentWorkspace());

    assertEquals(0, workspace.getPrimaryOffset());

    workspace.notifyScopeEntered();

    INDArray arrayCold = Nd4j.create(DOUBLE, 10);

    assertEquals(0, workspace.getPrimaryOffset());
    assertEquals(0, workspace.getCurrentSize());

    arrayCold.assign(1.0f);

    assertEquals(10f, arrayCold.sumNumber().floatValue(), 0.01f);

    workspace.notifyScopeLeft();


    workspace.initializeWorkspace();
    long reqMemory = 11 * Nd4j.sizeOfDataType(arrayCold.dataType());
    assertEquals(reqMemory + reqMemory % 8, workspace.getCurrentSize());


    log.info("-----------------------");

    for (int x = 0; x < 10; x++) {
        assertEquals(0, workspace.getPrimaryOffset());

        workspace.notifyScopeEntered();

        INDArray array = Nd4j.create(DOUBLE, 10);


        long reqMem = 10 * Nd4j.sizeOfDataType(array.dataType());

        assertEquals(reqMem + reqMem % 8, workspace.getPrimaryOffset());

        array.addi(1.0);

        assertEquals(reqMem + reqMem % 8, workspace.getPrimaryOffset());

        assertEquals("Failed on iteration " + x, 10, array.sumNumber().doubleValue(), 0.01);

        workspace.notifyScopeLeft();

        assertEquals(0, workspace.getPrimaryOffset());
    }
}

Source File: BasicWorkspaceTests.java From nd4j with Apache License 2.0

2 votes

@Test
public void testLoops1() throws Exception {
    Nd4jWorkspace workspace = (Nd4jWorkspace) Nd4j.getWorkspaceManager().createNewWorkspace(loopOverTimeConfig);

    Nd4j.getMemoryManager().setCurrentWorkspace(workspace);

    assertNotEquals(null, Nd4j.getMemoryManager().getCurrentWorkspace());

    assertEquals(0, workspace.getHostOffset());

    workspace.notifyScopeEntered();

    INDArray arrayCold = Nd4j.create(10);

    assertEquals(0, workspace.getHostOffset());
    assertEquals(0, workspace.getCurrentSize());

    arrayCold.assign(1.0f);

    assertEquals(10f, arrayCold.sumNumber().floatValue(), 0.01f);

    workspace.notifyScopeLeft();


    workspace.initializeWorkspace();
    long reqMemory = 11 * Nd4j.sizeOfDataType();
    assertEquals(reqMemory + reqMemory % 8, workspace.getCurrentSize());


    log.info("-----------------------");

    for (int x = 0; x < 10; x++) {
        assertEquals(0, workspace.getHostOffset());

        workspace.notifyScopeEntered();

        INDArray array = Nd4j.create(10);


        long reqMem = 10 * Nd4j.sizeOfDataType();

        assertEquals(reqMem + reqMem % 8, workspace.getHostOffset());

        array.addi(1.0f);

        assertEquals(reqMem + reqMem % 8, workspace.getHostOffset());

        assertEquals("Failed on iteration " + x, 10, array.sumNumber().doubleValue(), 0.01);

        workspace.notifyScopeLeft();

        assertEquals(0, workspace.getHostOffset());
    }
}

Java Code Examples for org.nd4j.linalg.factory.Nd4j#sizeOfDataType()