org.nd4j.jita.conf.CudaEnvironment Java Examples

The following examples show how to use org.nd4j.jita.conf.CudaEnvironment. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: CudaAffinityManager.java    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
/**
 * This method returns device id available. Round-robin balancing used here.
 *
 * @param threadId this parameter can be anything, it's used for logging only.
 * @return
 */
protected Integer getNextDevice(long threadId) {
    Integer device = null;
    if (!CudaEnvironment.getInstance().getConfiguration().isForcedSingleGPU() && getNumberOfDevices() > 0) {
        // simple round-robin here
        synchronized (this) {
            device = CudaEnvironment.getInstance().getConfiguration().getAvailableDevices().get(devPtr.getAndIncrement());

            // We check only for number of entries here, not their actual values
            if (devPtr.get() >= CudaEnvironment.getInstance().getConfiguration().getAvailableDevices().size())
                devPtr.set(0);

            val t = Thread.currentThread();
            val n = t.getId() == threadId ? t.getName() : "N/A";

            logger.debug("Mapping thread [{} - {}] to device [{}], out of [{}] devices...", threadId, n, device, CudaEnvironment.getInstance().getConfiguration().getAvailableDevices().size());
        }
    } else {
        device = CudaEnvironment.getInstance().getConfiguration().getAvailableDevices().get(0);
        logger.debug("Single device is forced, mapping to device [{}]", device);
    }

    return device;
}
 
Example #2
Source File: AsynchronousFlowControllerTest.java    From nd4j with Apache License 2.0 6 votes vote down vote up
@Before
public void setUp() throws Exception {
    CudaEnvironment.getInstance().getConfiguration()
            .setFirstMemory(AllocationStatus.DEVICE)
            .setExecutionModel(Configuration.ExecutionModel.ASYNCHRONOUS)
            .setAllocationModel(Configuration.AllocationModel.CACHE_ALL)
            .setMaximumSingleDeviceAllocation(1024 * 1024 * 1024L)
            .setMaximumBlockSize(128)
            .allowPreallocation(true)
            .setPreallocationCalls(20)
            .setMaximumGridSize(256)
            .enableDebug(false)
            .setVerbose(false);

    if (allocator == null)
        allocator = AtomicAllocator.getInstance();

    if (controller == null)
        controller = (AsynchronousFlowController) allocator.getFlowController();
}
 
Example #3
Source File: LimitedContextPool.java    From nd4j with Apache License 2.0 6 votes vote down vote up
public LimitedContextPool() {

        int perDevicePool = CudaEnvironment.getInstance().getConfiguration().getPoolSize();

        for (int i = 0; i < 4; i++) {
            ReferenceQueue<Thread> queue = new ReferenceQueue<>();
            ResourceGarbageCollectorThread collector = new ResourceGarbageCollectorThread(i, queue);
            collector.start();

            collectors.put(i, collector);
            queueMap.put(i, queue);
        }

        fillPoolWithResources(perDevicePool, false);
        currentPoolSize.set(perDevicePool);
    }
 
Example #4
Source File: AtomicAllocator.java    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
public void applyConfiguration() {
    //log.info("Applying CUDA configuration...");

    CudaEnvironment.getInstance().notifyConfigurationApplied();

    NativeOpsHolder.getInstance().getDeviceNativeOps().enableDebugMode(configuration.isDebug());
    //configuration.enableDebug(configuration.isDebug());

    NativeOpsHolder.getInstance().getDeviceNativeOps().enableVerboseMode(configuration.isVerbose());
    //configuration.setVerbose(configuration.isVerbose());

    NativeOpsHolder.getInstance().getDeviceNativeOps().enableP2P(configuration.isCrossDeviceAccessAllowed());
    //configuration.allowCrossDeviceAccess(configuration.isCrossDeviceAccessAllowed());

    NativeOpsHolder.getInstance().getDeviceNativeOps().setGridLimit(configuration.getMaximumGridSize());
    //configuration.setMaximumGridSize(configuration.getMaximumGridSize());

    NativeOpsHolder.getInstance().getDeviceNativeOps().setOmpNumThreads(configuration.getMaximumBlockSize());
    // configuration.setMaximumBlockSize(configuration.getMaximumBlockSize());

    NativeOpsHolder.getInstance().getDeviceNativeOps().setOmpMinThreads(configuration.getMinimumBlockSize());
    // configuration.setMinimumBlockSize(configuration.getMinimumBlockSize());
}
 
Example #5
Source File: AtomicAllocator.java    From nd4j with Apache License 2.0 6 votes vote down vote up
public void applyConfiguration() {
    //log.info("Applying CUDA configuration...");

    CudaEnvironment.getInstance().notifyConfigurationApplied();

    NativeOpsHolder.getInstance().getDeviceNativeOps().enableDebugMode(configuration.isDebug());
    //configuration.enableDebug(configuration.isDebug());

    NativeOpsHolder.getInstance().getDeviceNativeOps().enableVerboseMode(configuration.isVerbose());
    //configuration.setVerbose(configuration.isVerbose());

    NativeOpsHolder.getInstance().getDeviceNativeOps().enableP2P(configuration.isCrossDeviceAccessAllowed());
    //configuration.allowCrossDeviceAccess(configuration.isCrossDeviceAccessAllowed());

    NativeOpsHolder.getInstance().getDeviceNativeOps().setGridLimit(configuration.getMaximumGridSize());
    //configuration.setMaximumGridSize(configuration.getMaximumGridSize());

    NativeOpsHolder.getInstance().getDeviceNativeOps().setOmpNumThreads(configuration.getMaximumBlockSize());
    // configuration.setMaximumBlockSize(configuration.getMaximumBlockSize());

    NativeOpsHolder.getInstance().getDeviceNativeOps().setOmpMinThreads(configuration.getMinimumBlockSize());
    // configuration.setMinimumBlockSize(configuration.getMinimumBlockSize());
}
 
Example #6
Source File: PolicyNetService.java    From FancyBing with GNU General Public License v3.0 6 votes vote down vote up
public static void main(String[] args) {
	Nd4j.getMemoryManager().setAutoGcWindow(2000);
	CudaEnvironment.getInstance().getConfiguration()
		.setMaximumDeviceCacheableLength(1024 * 1024 * 1024L)
		.setMaximumDeviceCache(2L * 1024 * 1024 * 1024L)
		.setMaximumHostCacheableLength(1024 * 1024 * 1024L)
		.setMaximumHostCache(8L * 1024 * 1024 * 1024L);
	
	// Register services, bind services in multi ports for better performance
	Registry registry = null;
	for (int i = 0; i < Global.NETWORK_THREADS_NUM; i++) {
		try {
			registry = LocateRegistry.createRegistry(Global.POLICYNET_RMI_PORT + i);
			PolicyNetService policyNet = new PolicyNetService();
			registry.rebind(Global.NAME + "Policy", policyNet);
			
			System.out.println("Bind FancyBingPolicy server on " + (Global.POLICYNET_RMI_PORT + i));
			System.out.println("FancyBingPolicy server started.");
		} catch (Exception e) {
			e.printStackTrace();
		}
	}
}
 
Example #7
Source File: CudaAffinityManager.java    From nd4j with Apache License 2.0 6 votes vote down vote up
/**
 * This method returns device id available. Round-robin balancing used here.
 *
 * @param threadId this parameter can be anything, it's used for logging only.
 * @return
 */
protected Integer getNextDevice(long threadId) {
    Integer device = null;
    if (!CudaEnvironment.getInstance().getConfiguration().isForcedSingleGPU() && getNumberOfDevices() > 0) {
        // simple round-robin here
        synchronized (this) {
            device = CudaEnvironment.getInstance().getConfiguration().getAvailableDevices().get(devPtr.getAndIncrement());

            // We check only for number of entries here, not their actual values
            if (devPtr.get() >= CudaEnvironment.getInstance().getConfiguration().getAvailableDevices().size())
                devPtr.set(0);

            logger.debug("Mapping thread [{}] to device [{}], out of [{}] devices...", threadId, device,
                    CudaEnvironment.getInstance().getConfiguration().getAvailableDevices().size());
        }
    } else {
        device = CudaEnvironment.getInstance().getConfiguration().getAvailableDevices().get(0);
        logger.debug("Single device is forced, mapping to device [{}]", device);
    }

    return device;
}
 
Example #8
Source File: CudaTransformsTests.java    From nd4j with Apache License 2.0 6 votes vote down vote up
@Before
public void setUp() {
    CudaEnvironment.getInstance().getConfiguration()
            .setFirstMemory(AllocationStatus.DEVICE)
            .setExecutionModel(Configuration.ExecutionModel.ASYNCHRONOUS)
            .setAllocationModel(Configuration.AllocationModel.CACHE_ALL)
            .setMaximumSingleDeviceAllocation(1024 * 1024 * 1024L)
            .setMaximumBlockSize(128)
            .setMaximumGridSize(256)
            .enableDebug(false)
            .setVerbose(false);



    System.out.println("Init called");
}
 
Example #9
Source File: CudnnDropoutHelper.java    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
@Override
public void backprop(INDArray gradAtOutput, INDArray gradAtInput) {
    int[] gradAtOutShape = adaptForTensorDescr(ArrayUtil.toInts(gradAtOutput.shape()));
    int[] gradAtOutStride = adaptForTensorDescr(ArrayUtil.toInts(gradAtOutput.stride()));
    checkCudnn(cudnnSetTensorNdDescriptor(cudnnContext.dyTensorDesc, dataType, gradAtOutShape.length, gradAtOutShape, gradAtOutStride));

    int[] gradAtInShape = adaptForTensorDescr(ArrayUtil.toInts(gradAtInput.shape()));
    int[] gradAtInStride = adaptForTensorDescr(ArrayUtil.toInts(gradAtInput.stride()));
    checkCudnn(cudnnSetTensorNdDescriptor(cudnnContext.dxTensorDesc, dataType, gradAtInShape.length, gradAtInShape, gradAtInStride));

    Allocator allocator = AtomicAllocator.getInstance();
    CudaContext context = allocator.getFlowController().prepareAction(gradAtOutput, gradAtInput);
    Pointer dyPtr = allocator.getPointer(gradAtOutput, context);
    Pointer dxPtr = allocator.getPointer(gradAtInput, context);

    checkCudnn(cudnnDropoutBackward(cudnnContext, cudnnContext.dropoutDesc, cudnnContext.dyTensorDesc, dyPtr,
            cudnnContext.dxTensorDesc, dxPtr, mask, mask.capacity()));

    allocator.registerAction(context, gradAtOutput, gradAtInput);
    if (CudaEnvironment.getInstance().getConfiguration().isDebug())
        context.syncOldStream();
}
 
Example #10
Source File: WeirdSparkTests.java    From nd4j with Apache License 2.0 5 votes vote down vote up
@Before
public void setUp() {
    CudaEnvironment.getInstance().getConfiguration()
            .enableDebug(false)
            .setVerbose(false)
            .allowPreallocation(false)
            .setAllocationModel(Configuration.AllocationModel.CACHE_ALL)
            .setMemoryModel(Configuration.MemoryModel.IMMEDIATE);
}
 
Example #11
Source File: CudaReduce3Tests.java    From nd4j with Apache License 2.0 5 votes vote down vote up
@Before
public void setUp() {
    CudaEnvironment.getInstance().getConfiguration()
            .setFirstMemory(AllocationStatus.DEVICE)
            .setAllocationModel(Configuration.AllocationModel.DIRECT)
            .setMaximumBlockSize(32)
            .enableDebug(true)
            .setVerbose(true);



    System.out.println("Init called");
}
 
Example #12
Source File: DevicesTests.java    From nd4j with Apache License 2.0 5 votes vote down vote up
@Test
public void testOtherDevice1() {
    CudaEnvironment.getInstance().getConfiguration().useDevices(1, 2);

    INDArray array = Nd4j.create(1000000);
    for (int i = 0; i < 10000; i++) {
        array.addi(10f);
    }

    assertEquals(1, AtomicAllocator.getInstance().getAllocationPoint(array).getDeviceId());
}
 
Example #13
Source File: DevicesTests.java    From nd4j with Apache License 2.0 5 votes vote down vote up
@Test
public void testOtherDevice2() {
    CudaEnvironment.getInstance().getConfiguration().useDevices(0);

    INDArray array = Nd4j.create(1000000);
    for (int i = 0; i < 10000; i++) {
        array.addi(10f);
    }

    assertEquals(0, AtomicAllocator.getInstance().getAllocationPoint(array).getDeviceId());
}
 
Example #14
Source File: AveragingTests.java    From nd4j with Apache License 2.0 5 votes vote down vote up
@Before
public void setUp() {
    DataTypeUtil.setDTypeForContext(DataBuffer.Type.FLOAT);
    CudaEnvironment.getInstance().getConfiguration()
            .allowMultiGPU(true)
            .allowCrossDeviceAccess(true)
            .enableDebug(true)
            .setMaximumGridSize(512)
            .setMaximumBlockSize(256)
            .setVerbose(true);
}
 
Example #15
Source File: CudaBroadcastTests.java    From nd4j with Apache License 2.0 5 votes vote down vote up
@Before
public void setUp() {
    CudaEnvironment.getInstance().getConfiguration()
            .setExecutionModel(Configuration.ExecutionModel.SEQUENTIAL)
            .setFirstMemory(AllocationStatus.DEVICE)
            .setMaximumBlockSize(64)
            .setMaximumGridSize(128)
            .enableDebug(true);

    System.out.println("Init called");
}
 
Example #16
Source File: SporadicTests.java    From nd4j with Apache License 2.0 5 votes vote down vote up
@Test
public void testReduceX() throws Exception {
    CudaEnvironment.getInstance().getConfiguration().setMaximumGridSize(11);
    INDArray x = Nd4j.create(500, 500);
    INDArray exp_0 = Nd4j.linspace(1, 500, 500);
    INDArray exp_1 = Nd4j.create(500).assign(250.5);

    x.addiRowVector(Nd4j.linspace(1, 500, 500));

    assertEquals(exp_0, x.mean(0));
    assertEquals(exp_1, x.mean(1));

    assertEquals(250.5, x.meanNumber().doubleValue(), 1e-5);
}
 
Example #17
Source File: SporadicTests.java    From nd4j with Apache License 2.0 5 votes vote down vote up
@Test
public void testIndexReduceX() throws Exception {
    CudaEnvironment.getInstance().getConfiguration().setMaximumGridSize(11);
    INDArray x = Nd4j.create(500, 500);
    INDArray exp_0 = Nd4j.create(500).assign(0);
    INDArray exp_1 = Nd4j.create(500).assign(499);

    x.addiRowVector(Nd4j.linspace(1, 500, 500));

    assertEquals(exp_0, Nd4j.argMax(x, 0));
    assertEquals(exp_1, Nd4j.argMax(x, 1));
}
 
Example #18
Source File: ElementWiseStrideTests.java    From nd4j with Apache License 2.0 5 votes vote down vote up
@Before
public void setUp() {
    CudaEnvironment.getInstance().getConfiguration()
            .setFirstMemory(AllocationStatus.DEVICE)
            .setExecutionModel(Configuration.ExecutionModel.SEQUENTIAL)
            .setAllocationModel(Configuration.AllocationModel.CACHE_ALL)
            .setMaximumBlockSize(128)
            .enableDebug(true)
            .setVerbose(true);



    System.out.println("Init called");
}
 
Example #19
Source File: EndlessTests.java    From nd4j with Apache License 2.0 5 votes vote down vote up
@Before
public void setUp() {
    CudaEnvironment.getInstance().getConfiguration()
            .setFirstMemory(AllocationStatus.DEVICE)
            .setExecutionModel(Configuration.ExecutionModel.SEQUENTIAL)
            .setAllocationModel(Configuration.AllocationModel.CACHE_ALL)
            .enableDebug(false)
            .setVerbose(false);


    System.out.println("Init called");
}
 
Example #20
Source File: CudaPairwiseTrainformsTests.java    From nd4j with Apache License 2.0 5 votes vote down vote up
@Before
public void setUp() {
    CudaEnvironment.getInstance().getConfiguration()
            .setExecutionModel(Configuration.ExecutionModel.SEQUENTIAL)
            .setFirstMemory(AllocationStatus.DEVICE)
            .setMaximumBlockSize(256)
            .setMaximumGridSize(64)
            .enableDebug(true)
            .setVerbose(true);

    System.out.println("Init called");
}
 
Example #21
Source File: DelayedMemoryTest.java    From nd4j with Apache License 2.0 5 votes vote down vote up
@Before
public void setUp() {
    CudaEnvironment.getInstance().getConfiguration()
            .setFirstMemory(AllocationStatus.DEVICE)
            .setMemoryModel(Configuration.MemoryModel.DELAYED)
            .allowMultiGPU(true)
            .enableDebug(true);
}
 
Example #22
Source File: CudaIndexReduceTests.java    From nd4j with Apache License 2.0 5 votes vote down vote up
@Before
public void setUp() {
    CudaEnvironment.getInstance().getConfiguration()
            .setExecutionModel(Configuration.ExecutionModel.SEQUENTIAL)
            .setFirstMemory(AllocationStatus.DEVICE)
            .setMaximumBlockSize(64)
            .setMaximumGridSize(64)
            .enableDebug(true);

    System.out.println("Init called");
}
 
Example #23
Source File: DoublesTests.java    From nd4j with Apache License 2.0 5 votes vote down vote up
@Before
public void setUp() throws Exception {
    System.out.println("----------------------");
    DataTypeUtil.setDTypeForContext(DataBuffer.Type.DOUBLE);


    CudaEnvironment.getInstance().getConfiguration().enableDebug(true).setVerbose(true).allowMultiGPU(false);
}
 
Example #24
Source File: CudaAffinityManager.java    From nd4j with Apache License 2.0 5 votes vote down vote up
/**
 * This method pairs specified thread & device
 *
 * @param threadId
 * @param deviceId
 */
@Override
public void attachThreadToDevice(long threadId, Integer deviceId) {
    List<Integer> devices = new ArrayList<>(CudaEnvironment.getInstance().getConfiguration().getAvailableDevices());
    logger.debug("Manually mapping thread [{}] to device [{}], out of [{}] devices...", threadId, deviceId,
                    devices.size());
    affinityMap.put(threadId, deviceId);
}
 
Example #25
Source File: CudaCachingZeroProvider.java    From nd4j with Apache License 2.0 5 votes vote down vote up
/**
 * This method provides PointersPair to memory chunk specified by AllocationShape
 *
 * PLEASE NOTE: This method can actually ignore malloc request, and give out previously cached free memory chunk with equal shape.
 *
 * @param shape shape of desired memory chunk
 * @param point target AllocationPoint structure
 * @param location either HOST or DEVICE
 * @return
 */
@Override
public PointersPair malloc(AllocationShape shape, AllocationPoint point, AllocationStatus location) {
    long reqMemory = AllocationUtils.getRequiredMemory(shape);

    if (location == AllocationStatus.HOST && reqMemory < CudaEnvironment.getInstance().getConfiguration().getMaximumHostCacheableLength()) {

        CacheHolder cache = zeroCache.get(shape);
        if (cache != null) {
            Pointer pointer = cache.poll();
            if (pointer != null) {
                cacheZeroHit.incrementAndGet();

                // since this memory chunk is going to be used now, remove it's amount from
                zeroCachedAmount.addAndGet(-1 * reqMemory);

                PointersPair pair = new PointersPair();
                pair.setDevicePointer(new CudaPointer(pointer.address()));
                pair.setHostPointer(new CudaPointer(pointer.address()));

                point.setAllocationStatus(AllocationStatus.HOST);
                return pair;
            }
        }
        cacheZeroMiss.incrementAndGet();

        if (CudaEnvironment.getInstance().getConfiguration().isUsePreallocation() && zeroCachedAmount.get() < CudaEnvironment.getInstance().getConfiguration().getMaximumHostCache() / 10
                        && reqMemory < 16 * 1024 * 1024L) {
            CachePreallocator preallocator = new CachePreallocator(shape, location, CudaEnvironment.getInstance().getConfiguration().getPreallocationCalls());
            preallocator.start();
        }

        cacheZeroMiss.incrementAndGet();
        return super.malloc(shape, point, location);
    }

    return super.malloc(shape, point, location);
}
 
Example #26
Source File: CudaFullCachingProvider.java    From nd4j with Apache License 2.0 5 votes vote down vote up
/**
 * This method provides PointersPair to memory chunk specified by AllocationShape
 *
 * PLEASE NOTE: This method can actually ignore malloc request, and give out previously cached free memory chunk with equal shape.
 *
 * @param shape shape of desired memory chunk
 * @param point target AllocationPoint structure
 * @param location either HOST or DEVICE
 * @return
 */
@Override
public PointersPair malloc(AllocationShape shape, AllocationPoint point, AllocationStatus location) {
    long reqMemory = AllocationUtils.getRequiredMemory(shape);
    if (location == AllocationStatus.DEVICE && reqMemory < CudaEnvironment.getInstance().getConfiguration().getMaximumDeviceAllocation()) {


        int deviceId = AtomicAllocator.getInstance().getDeviceId();
        ensureDeviceCacheHolder(deviceId, shape);

        CacheHolder cache = deviceCache.get(deviceId).get(shape);
        if (cache != null) {
            Pointer pointer = cache.poll();
            if (pointer != null) {
                cacheDeviceHit.incrementAndGet();

                deviceCachedAmount.get(deviceId).addAndGet(-1 * reqMemory);

                PointersPair pair = new PointersPair();
                pair.setDevicePointer(pointer);

                point.setAllocationStatus(AllocationStatus.DEVICE);
                point.setDeviceId(deviceId);
                return pair;
            }
        }
        cacheDeviceMiss.incrementAndGet();
        return super.malloc(shape, point, location);
    }
    return super.malloc(shape, point, location);
}
 
Example #27
Source File: CudaAccumTests.java    From nd4j with Apache License 2.0 5 votes vote down vote up
@Before
public void setUp() {
    CudaEnvironment.getInstance().getConfiguration()
            .setExecutionModel(Configuration.ExecutionModel.ASYNCHRONOUS)
            .setFirstMemory(AllocationStatus.DEVICE)
            .setMaximumBlockSize(128)
            .setMaximumGridSize(256)
            .enableDebug(false)
            .setVerbose(false);

    System.out.println("Init called");
}
 
Example #28
Source File: CudaScalarsTests.java    From nd4j with Apache License 2.0 5 votes vote down vote up
@Before
public void setUp() {
    CudaEnvironment.getInstance().getConfiguration()
            .setExecutionModel(Configuration.ExecutionModel.SEQUENTIAL)
            .setFirstMemory(AllocationStatus.DEVICE)
            .setMaximumBlockSize(64)
            .setMaximumGridSize(256)
            .enableDebug(true);

    System.out.println("Init called");
}
 
Example #29
Source File: LimitedContextPool.java    From nd4j with Apache License 2.0 5 votes vote down vote up
protected synchronized void fillPoolWithResources(int numResources, boolean restoreDevice) {
    List<Integer> devices = CudaEnvironment.getInstance().getConfiguration().getAvailableDevices();

    int cDevice = 0;
    if (restoreDevice) {
        cDevice = AtomicAllocator.getInstance().getDeviceId();
    }

    NativeOps nativeOps = NativeOpsHolder.getInstance().getDeviceNativeOps();

    for (Integer device : devices) {
        nativeOps.setDevice(new CudaPointer(device));
        pool.put(device, new LinkedBlockingQueue<CudaContext>());

        cublasHandle_t handle = createNewCublasHandle();
        cusolverDnHandle_t solverHandle = createNewSolverHandle();
        for (int cnt = 0; cnt < numResources; cnt++) {
            CudaContext context = createNewStream(device);
            context.initOldStream();
            getDeviceBuffers(context, device);
            context.setHandle(handle);
            context.setSolverHandle(solverHandle);

            context.syncOldStream();

            pool.get(device).add(context);
        }
    }

    if (restoreDevice) {
        nativeOps.setDevice(new CudaPointer(cDevice));
    }
}
 
Example #30
Source File: ProtectedCudaShapeInfoProvider.java    From nd4j with Apache License 2.0 5 votes vote down vote up
@Override
public Pair<DataBuffer, long[]> createShapeInformation(long[] shape, long[] stride, long offset, long elementWiseStride, char order) {
    // We enforce offset to 0 in shapeBuffer, since we need it for cache efficiency + we don't actually use offset value @ native side
    offset = 0;

    Integer deviceId = AtomicAllocator.getInstance().getDeviceId();

    LongShapeDescriptor descriptor = new LongShapeDescriptor(shape, stride, offset, elementWiseStride, order);

    if (!protector.containsDataBuffer(deviceId, descriptor)) {
        Pair<DataBuffer, long[]> buffer = null;
        synchronized (this) {
            if (!protector.containsDataBuffer(deviceId, descriptor)) {
                //log.info("Cache miss: {}", descriptor);
                buffer = super.createShapeInformation(shape, stride, offset, elementWiseStride, order);
                buffer.getFirst().setConstant(true);

                if (CudaEnvironment.getInstance().getConfiguration().getMemoryModel() == Configuration.MemoryModel.IMMEDIATE) {
                    Nd4j.getConstantHandler().moveToConstantSpace(buffer.getFirst());
                }

                //deviceCache.get(deviceId).put(descriptor, buffer);
                protector.persistDataBuffer(deviceId, descriptor, buffer);

                bytes.addAndGet(buffer.getFirst().length() * 4 * 2);

                cacheMiss.incrementAndGet();
            } else {
                buffer = protector.getDataBuffer(deviceId, descriptor);
            }
        }
        return buffer;
    } else {
        //       log.info("Cache hit: {}", descriptor);
        cacheHit.incrementAndGet();
    }

    return protector.getDataBuffer(deviceId, descriptor); //deviceCache.get(deviceId).get(descriptor);
}