org.nd4j.jita.conf.CudaEnvironment Java Exaples

Source File: CudaAffinityManager.java From deeplearning4j with Apache License 2.0

6 votes

/**
 * This method returns device id available. Round-robin balancing used here.
 *
 * @param threadId this parameter can be anything, it's used for logging only.
 * @return
 */
protected Integer getNextDevice(long threadId) {
    Integer device = null;
    if (!CudaEnvironment.getInstance().getConfiguration().isForcedSingleGPU() && getNumberOfDevices() > 0) {
        // simple round-robin here
        synchronized (this) {
            device = CudaEnvironment.getInstance().getConfiguration().getAvailableDevices().get(devPtr.getAndIncrement());

            // We check only for number of entries here, not their actual values
            if (devPtr.get() >= CudaEnvironment.getInstance().getConfiguration().getAvailableDevices().size())
                devPtr.set(0);

            val t = Thread.currentThread();
            val n = t.getId() == threadId ? t.getName() : "N/A";

            logger.debug("Mapping thread [{} - {}] to device [{}], out of [{}] devices...", threadId, n, device, CudaEnvironment.getInstance().getConfiguration().getAvailableDevices().size());
        }
    } else {
        device = CudaEnvironment.getInstance().getConfiguration().getAvailableDevices().get(0);
        logger.debug("Single device is forced, mapping to device [{}]", device);
    }

    return device;
}

Source File: AsynchronousFlowControllerTest.java From nd4j with Apache License 2.0

6 votes

@Before
public void setUp() throws Exception {
    CudaEnvironment.getInstance().getConfiguration()
            .setFirstMemory(AllocationStatus.DEVICE)
            .setExecutionModel(Configuration.ExecutionModel.ASYNCHRONOUS)
            .setAllocationModel(Configuration.AllocationModel.CACHE_ALL)
            .setMaximumSingleDeviceAllocation(1024 * 1024 * 1024L)
            .setMaximumBlockSize(128)
            .allowPreallocation(true)
            .setPreallocationCalls(20)
            .setMaximumGridSize(256)
            .enableDebug(false)
            .setVerbose(false);

    if (allocator == null)
        allocator = AtomicAllocator.getInstance();

    if (controller == null)
        controller = (AsynchronousFlowController) allocator.getFlowController();
}

Source File: LimitedContextPool.java From nd4j with Apache License 2.0

6 votes

public LimitedContextPool() {

        int perDevicePool = CudaEnvironment.getInstance().getConfiguration().getPoolSize();

        for (int i = 0; i < 4; i++) {
            ReferenceQueue<Thread> queue = new ReferenceQueue<>();
            ResourceGarbageCollectorThread collector = new ResourceGarbageCollectorThread(i, queue);
            collector.start();

            collectors.put(i, collector);
            queueMap.put(i, queue);
        }

        fillPoolWithResources(perDevicePool, false);
        currentPoolSize.set(perDevicePool);
    }

Source File: AtomicAllocator.java From deeplearning4j with Apache License 2.0

6 votes

public void applyConfiguration() {
    //log.info("Applying CUDA configuration...");

    CudaEnvironment.getInstance().notifyConfigurationApplied();

    NativeOpsHolder.getInstance().getDeviceNativeOps().enableDebugMode(configuration.isDebug());
    //configuration.enableDebug(configuration.isDebug());

    NativeOpsHolder.getInstance().getDeviceNativeOps().enableVerboseMode(configuration.isVerbose());
    //configuration.setVerbose(configuration.isVerbose());

    NativeOpsHolder.getInstance().getDeviceNativeOps().enableP2P(configuration.isCrossDeviceAccessAllowed());
    //configuration.allowCrossDeviceAccess(configuration.isCrossDeviceAccessAllowed());

    NativeOpsHolder.getInstance().getDeviceNativeOps().setGridLimit(configuration.getMaximumGridSize());
    //configuration.setMaximumGridSize(configuration.getMaximumGridSize());

    NativeOpsHolder.getInstance().getDeviceNativeOps().setOmpNumThreads(configuration.getMaximumBlockSize());
    // configuration.setMaximumBlockSize(configuration.getMaximumBlockSize());

    NativeOpsHolder.getInstance().getDeviceNativeOps().setOmpMinThreads(configuration.getMinimumBlockSize());
    // configuration.setMinimumBlockSize(configuration.getMinimumBlockSize());
}

Source File: AtomicAllocator.java From nd4j with Apache License 2.0

6 votes

public void applyConfiguration() {
    //log.info("Applying CUDA configuration...");

    CudaEnvironment.getInstance().notifyConfigurationApplied();

    NativeOpsHolder.getInstance().getDeviceNativeOps().enableDebugMode(configuration.isDebug());
    //configuration.enableDebug(configuration.isDebug());

    NativeOpsHolder.getInstance().getDeviceNativeOps().enableVerboseMode(configuration.isVerbose());
    //configuration.setVerbose(configuration.isVerbose());

    NativeOpsHolder.getInstance().getDeviceNativeOps().enableP2P(configuration.isCrossDeviceAccessAllowed());
    //configuration.allowCrossDeviceAccess(configuration.isCrossDeviceAccessAllowed());

    NativeOpsHolder.getInstance().getDeviceNativeOps().setGridLimit(configuration.getMaximumGridSize());
    //configuration.setMaximumGridSize(configuration.getMaximumGridSize());

    NativeOpsHolder.getInstance().getDeviceNativeOps().setOmpNumThreads(configuration.getMaximumBlockSize());
    // configuration.setMaximumBlockSize(configuration.getMaximumBlockSize());

    NativeOpsHolder.getInstance().getDeviceNativeOps().setOmpMinThreads(configuration.getMinimumBlockSize());
    // configuration.setMinimumBlockSize(configuration.getMinimumBlockSize());
}

Source File: PolicyNetService.java From FancyBing with GNU General Public License v3.0

6 votes

public static void main(String[] args) {
	Nd4j.getMemoryManager().setAutoGcWindow(2000);
	CudaEnvironment.getInstance().getConfiguration()
		.setMaximumDeviceCacheableLength(1024 * 1024 * 1024L)
		.setMaximumDeviceCache(2L * 1024 * 1024 * 1024L)
		.setMaximumHostCacheableLength(1024 * 1024 * 1024L)
		.setMaximumHostCache(8L * 1024 * 1024 * 1024L);
	
	// Register services, bind services in multi ports for better performance
	Registry registry = null;
	for (int i = 0; i < Global.NETWORK_THREADS_NUM; i++) {
		try {
			registry = LocateRegistry.createRegistry(Global.POLICYNET_RMI_PORT + i);
			PolicyNetService policyNet = new PolicyNetService();
			registry.rebind(Global.NAME + "Policy", policyNet);
			
			System.out.println("Bind FancyBingPolicy server on " + (Global.POLICYNET_RMI_PORT + i));
			System.out.println("FancyBingPolicy server started.");
		} catch (Exception e) {
			e.printStackTrace();
		}
	}
}

Source File: CudaAffinityManager.java From nd4j with Apache License 2.0

6 votes

/**
 * This method returns device id available. Round-robin balancing used here.
 *
 * @param threadId this parameter can be anything, it's used for logging only.
 * @return
 */
protected Integer getNextDevice(long threadId) {
    Integer device = null;
    if (!CudaEnvironment.getInstance().getConfiguration().isForcedSingleGPU() && getNumberOfDevices() > 0) {
        // simple round-robin here
        synchronized (this) {
            device = CudaEnvironment.getInstance().getConfiguration().getAvailableDevices().get(devPtr.getAndIncrement());

            // We check only for number of entries here, not their actual values
            if (devPtr.get() >= CudaEnvironment.getInstance().getConfiguration().getAvailableDevices().size())
                devPtr.set(0);

            logger.debug("Mapping thread [{}] to device [{}], out of [{}] devices...", threadId, device,
                    CudaEnvironment.getInstance().getConfiguration().getAvailableDevices().size());
        }
    } else {
        device = CudaEnvironment.getInstance().getConfiguration().getAvailableDevices().get(0);
        logger.debug("Single device is forced, mapping to device [{}]", device);
    }

    return device;
}

Source File: CudaTransformsTests.java From nd4j with Apache License 2.0

6 votes

@Before
public void setUp() {
    CudaEnvironment.getInstance().getConfiguration()
            .setFirstMemory(AllocationStatus.DEVICE)
            .setExecutionModel(Configuration.ExecutionModel.ASYNCHRONOUS)
            .setAllocationModel(Configuration.AllocationModel.CACHE_ALL)
            .setMaximumSingleDeviceAllocation(1024 * 1024 * 1024L)
            .setMaximumBlockSize(128)
            .setMaximumGridSize(256)
            .enableDebug(false)
            .setVerbose(false);



    System.out.println("Init called");
}

Source File: CudnnDropoutHelper.java From deeplearning4j with Apache License 2.0

6 votes

@Override
public void backprop(INDArray gradAtOutput, INDArray gradAtInput) {
    int[] gradAtOutShape = adaptForTensorDescr(ArrayUtil.toInts(gradAtOutput.shape()));
    int[] gradAtOutStride = adaptForTensorDescr(ArrayUtil.toInts(gradAtOutput.stride()));
    checkCudnn(cudnnSetTensorNdDescriptor(cudnnContext.dyTensorDesc, dataType, gradAtOutShape.length, gradAtOutShape, gradAtOutStride));

    int[] gradAtInShape = adaptForTensorDescr(ArrayUtil.toInts(gradAtInput.shape()));
    int[] gradAtInStride = adaptForTensorDescr(ArrayUtil.toInts(gradAtInput.stride()));
    checkCudnn(cudnnSetTensorNdDescriptor(cudnnContext.dxTensorDesc, dataType, gradAtInShape.length, gradAtInShape, gradAtInStride));

    Allocator allocator = AtomicAllocator.getInstance();
    CudaContext context = allocator.getFlowController().prepareAction(gradAtOutput, gradAtInput);
    Pointer dyPtr = allocator.getPointer(gradAtOutput, context);
    Pointer dxPtr = allocator.getPointer(gradAtInput, context);

    checkCudnn(cudnnDropoutBackward(cudnnContext, cudnnContext.dropoutDesc, cudnnContext.dyTensorDesc, dyPtr,
            cudnnContext.dxTensorDesc, dxPtr, mask, mask.capacity()));

    allocator.registerAction(context, gradAtOutput, gradAtInput);
    if (CudaEnvironment.getInstance().getConfiguration().isDebug())
        context.syncOldStream();
}

Source File: WeirdSparkTests.java From nd4j with Apache License 2.0

5 votes

@Before
public void setUp() {
    CudaEnvironment.getInstance().getConfiguration()
            .enableDebug(false)
            .setVerbose(false)
            .allowPreallocation(false)
            .setAllocationModel(Configuration.AllocationModel.CACHE_ALL)
            .setMemoryModel(Configuration.MemoryModel.IMMEDIATE);
}

Source File: CudaReduce3Tests.java From nd4j with Apache License 2.0

5 votes

@Before
public void setUp() {
    CudaEnvironment.getInstance().getConfiguration()
            .setFirstMemory(AllocationStatus.DEVICE)
            .setAllocationModel(Configuration.AllocationModel.DIRECT)
            .setMaximumBlockSize(32)
            .enableDebug(true)
            .setVerbose(true);



    System.out.println("Init called");
}

Source File: DevicesTests.java From nd4j with Apache License 2.0

5 votes

@Test
public void testOtherDevice1() {
    CudaEnvironment.getInstance().getConfiguration().useDevices(1, 2);

    INDArray array = Nd4j.create(1000000);
    for (int i = 0; i < 10000; i++) {
        array.addi(10f);
    }

    assertEquals(1, AtomicAllocator.getInstance().getAllocationPoint(array).getDeviceId());
}

Source File: DevicesTests.java From nd4j with Apache License 2.0

5 votes

@Test
public void testOtherDevice2() {
    CudaEnvironment.getInstance().getConfiguration().useDevices(0);

    INDArray array = Nd4j.create(1000000);
    for (int i = 0; i < 10000; i++) {
        array.addi(10f);
    }

    assertEquals(0, AtomicAllocator.getInstance().getAllocationPoint(array).getDeviceId());
}

Source File: AveragingTests.java From nd4j with Apache License 2.0

5 votes

@Before
public void setUp() {
    DataTypeUtil.setDTypeForContext(DataBuffer.Type.FLOAT);
    CudaEnvironment.getInstance().getConfiguration()
            .allowMultiGPU(true)
            .allowCrossDeviceAccess(true)
            .enableDebug(true)
            .setMaximumGridSize(512)
            .setMaximumBlockSize(256)
            .setVerbose(true);
}

Source File: CudaBroadcastTests.java From nd4j with Apache License 2.0

5 votes

@Before
public void setUp() {
    CudaEnvironment.getInstance().getConfiguration()
            .setExecutionModel(Configuration.ExecutionModel.SEQUENTIAL)
            .setFirstMemory(AllocationStatus.DEVICE)
            .setMaximumBlockSize(64)
            .setMaximumGridSize(128)
            .enableDebug(true);

    System.out.println("Init called");
}

Source File: SporadicTests.java From nd4j with Apache License 2.0

5 votes

@Test
public void testReduceX() throws Exception {
    CudaEnvironment.getInstance().getConfiguration().setMaximumGridSize(11);
    INDArray x = Nd4j.create(500, 500);
    INDArray exp_0 = Nd4j.linspace(1, 500, 500);
    INDArray exp_1 = Nd4j.create(500).assign(250.5);

    x.addiRowVector(Nd4j.linspace(1, 500, 500));

    assertEquals(exp_0, x.mean(0));
    assertEquals(exp_1, x.mean(1));

    assertEquals(250.5, x.meanNumber().doubleValue(), 1e-5);
}

Source File: SporadicTests.java From nd4j with Apache License 2.0

5 votes

@Test
public void testIndexReduceX() throws Exception {
    CudaEnvironment.getInstance().getConfiguration().setMaximumGridSize(11);
    INDArray x = Nd4j.create(500, 500);
    INDArray exp_0 = Nd4j.create(500).assign(0);
    INDArray exp_1 = Nd4j.create(500).assign(499);

    x.addiRowVector(Nd4j.linspace(1, 500, 500));

    assertEquals(exp_0, Nd4j.argMax(x, 0));
    assertEquals(exp_1, Nd4j.argMax(x, 1));
}

Source File: ElementWiseStrideTests.java From nd4j with Apache License 2.0

5 votes

@Before
public void setUp() {
    CudaEnvironment.getInstance().getConfiguration()
            .setFirstMemory(AllocationStatus.DEVICE)
            .setExecutionModel(Configuration.ExecutionModel.SEQUENTIAL)
            .setAllocationModel(Configuration.AllocationModel.CACHE_ALL)
            .setMaximumBlockSize(128)
            .enableDebug(true)
            .setVerbose(true);



    System.out.println("Init called");
}

Source File: EndlessTests.java From nd4j with Apache License 2.0

5 votes

@Before
public void setUp() {
    CudaEnvironment.getInstance().getConfiguration()
            .setFirstMemory(AllocationStatus.DEVICE)
            .setExecutionModel(Configuration.ExecutionModel.SEQUENTIAL)
            .setAllocationModel(Configuration.AllocationModel.CACHE_ALL)
            .enableDebug(false)
            .setVerbose(false);


    System.out.println("Init called");
}

Source File: CudaPairwiseTrainformsTests.java From nd4j with Apache License 2.0

5 votes

@Before
public void setUp() {
    CudaEnvironment.getInstance().getConfiguration()
            .setExecutionModel(Configuration.ExecutionModel.SEQUENTIAL)
            .setFirstMemory(AllocationStatus.DEVICE)
            .setMaximumBlockSize(256)
            .setMaximumGridSize(64)
            .enableDebug(true)
            .setVerbose(true);

    System.out.println("Init called");
}

Source File: DelayedMemoryTest.java From nd4j with Apache License 2.0

5 votes

@Before
public void setUp() {
    CudaEnvironment.getInstance().getConfiguration()
            .setFirstMemory(AllocationStatus.DEVICE)
            .setMemoryModel(Configuration.MemoryModel.DELAYED)
            .allowMultiGPU(true)
            .enableDebug(true);
}

Source File: CudaIndexReduceTests.java From nd4j with Apache License 2.0

5 votes

@Before
public void setUp() {
    CudaEnvironment.getInstance().getConfiguration()
            .setExecutionModel(Configuration.ExecutionModel.SEQUENTIAL)
            .setFirstMemory(AllocationStatus.DEVICE)
            .setMaximumBlockSize(64)
            .setMaximumGridSize(64)
            .enableDebug(true);

    System.out.println("Init called");
}

Source File: DoublesTests.java From nd4j with Apache License 2.0

5 votes

@Before
public void setUp() throws Exception {
    System.out.println("----------------------");
    DataTypeUtil.setDTypeForContext(DataBuffer.Type.DOUBLE);


    CudaEnvironment.getInstance().getConfiguration().enableDebug(true).setVerbose(true).allowMultiGPU(false);
}

Source File: CudaAffinityManager.java From nd4j with Apache License 2.0

5 votes

/**
 * This method pairs specified thread & device
 *
 * @param threadId
 * @param deviceId
 */
@Override
public void attachThreadToDevice(long threadId, Integer deviceId) {
    List<Integer> devices = new ArrayList<>(CudaEnvironment.getInstance().getConfiguration().getAvailableDevices());
    logger.debug("Manually mapping thread [{}] to device [{}], out of [{}] devices...", threadId, deviceId,
                    devices.size());
    affinityMap.put(threadId, deviceId);
}

Source File: CudaCachingZeroProvider.java From nd4j with Apache License 2.0

5 votes

/**
 * This method provides PointersPair to memory chunk specified by AllocationShape
 *
 * PLEASE NOTE: This method can actually ignore malloc request, and give out previously cached free memory chunk with equal shape.
 *
 * @param shape shape of desired memory chunk
 * @param point target AllocationPoint structure
 * @param location either HOST or DEVICE
 * @return
 */
@Override
public PointersPair malloc(AllocationShape shape, AllocationPoint point, AllocationStatus location) {
    long reqMemory = AllocationUtils.getRequiredMemory(shape);

    if (location == AllocationStatus.HOST && reqMemory < CudaEnvironment.getInstance().getConfiguration().getMaximumHostCacheableLength()) {

        CacheHolder cache = zeroCache.get(shape);
        if (cache != null) {
            Pointer pointer = cache.poll();
            if (pointer != null) {
                cacheZeroHit.incrementAndGet();

                // since this memory chunk is going to be used now, remove it's amount from
                zeroCachedAmount.addAndGet(-1 * reqMemory);

                PointersPair pair = new PointersPair();
                pair.setDevicePointer(new CudaPointer(pointer.address()));
                pair.setHostPointer(new CudaPointer(pointer.address()));

                point.setAllocationStatus(AllocationStatus.HOST);
                return pair;
            }
        }
        cacheZeroMiss.incrementAndGet();

        if (CudaEnvironment.getInstance().getConfiguration().isUsePreallocation() && zeroCachedAmount.get() < CudaEnvironment.getInstance().getConfiguration().getMaximumHostCache() / 10
                        && reqMemory < 16 * 1024 * 1024L) {
            CachePreallocator preallocator = new CachePreallocator(shape, location, CudaEnvironment.getInstance().getConfiguration().getPreallocationCalls());
            preallocator.start();
        }

        cacheZeroMiss.incrementAndGet();
        return super.malloc(shape, point, location);
    }

    return super.malloc(shape, point, location);
}

Source File: CudaFullCachingProvider.java From nd4j with Apache License 2.0

5 votes

/**
 * This method provides PointersPair to memory chunk specified by AllocationShape
 *
 * PLEASE NOTE: This method can actually ignore malloc request, and give out previously cached free memory chunk with equal shape.
 *
 * @param shape shape of desired memory chunk
 * @param point target AllocationPoint structure
 * @param location either HOST or DEVICE
 * @return
 */
@Override
public PointersPair malloc(AllocationShape shape, AllocationPoint point, AllocationStatus location) {
    long reqMemory = AllocationUtils.getRequiredMemory(shape);
    if (location == AllocationStatus.DEVICE && reqMemory < CudaEnvironment.getInstance().getConfiguration().getMaximumDeviceAllocation()) {


        int deviceId = AtomicAllocator.getInstance().getDeviceId();
        ensureDeviceCacheHolder(deviceId, shape);

        CacheHolder cache = deviceCache.get(deviceId).get(shape);
        if (cache != null) {
            Pointer pointer = cache.poll();
            if (pointer != null) {
                cacheDeviceHit.incrementAndGet();

                deviceCachedAmount.get(deviceId).addAndGet(-1 * reqMemory);

                PointersPair pair = new PointersPair();
                pair.setDevicePointer(pointer);

                point.setAllocationStatus(AllocationStatus.DEVICE);
                point.setDeviceId(deviceId);
                return pair;
            }
        }
        cacheDeviceMiss.incrementAndGet();
        return super.malloc(shape, point, location);
    }
    return super.malloc(shape, point, location);
}

Source File: CudaAccumTests.java From nd4j with Apache License 2.0

5 votes

@Before
public void setUp() {
    CudaEnvironment.getInstance().getConfiguration()
            .setExecutionModel(Configuration.ExecutionModel.ASYNCHRONOUS)
            .setFirstMemory(AllocationStatus.DEVICE)
            .setMaximumBlockSize(128)
            .setMaximumGridSize(256)
            .enableDebug(false)
            .setVerbose(false);

    System.out.println("Init called");
}

Source File: CudaScalarsTests.java From nd4j with Apache License 2.0

5 votes

@Before
public void setUp() {
    CudaEnvironment.getInstance().getConfiguration()
            .setExecutionModel(Configuration.ExecutionModel.SEQUENTIAL)
            .setFirstMemory(AllocationStatus.DEVICE)
            .setMaximumBlockSize(64)
            .setMaximumGridSize(256)
            .enableDebug(true);

    System.out.println("Init called");
}

Source File: LimitedContextPool.java From nd4j with Apache License 2.0

5 votes

protected synchronized void fillPoolWithResources(int numResources, boolean restoreDevice) {
    List<Integer> devices = CudaEnvironment.getInstance().getConfiguration().getAvailableDevices();

    int cDevice = 0;
    if (restoreDevice) {
        cDevice = AtomicAllocator.getInstance().getDeviceId();
    }

    NativeOps nativeOps = NativeOpsHolder.getInstance().getDeviceNativeOps();

    for (Integer device : devices) {
        nativeOps.setDevice(new CudaPointer(device));
        pool.put(device, new LinkedBlockingQueue<CudaContext>());

        cublasHandle_t handle = createNewCublasHandle();
        cusolverDnHandle_t solverHandle = createNewSolverHandle();
        for (int cnt = 0; cnt < numResources; cnt++) {
            CudaContext context = createNewStream(device);
            context.initOldStream();
            getDeviceBuffers(context, device);
            context.setHandle(handle);
            context.setSolverHandle(solverHandle);

            context.syncOldStream();

            pool.get(device).add(context);
        }
    }

    if (restoreDevice) {
        nativeOps.setDevice(new CudaPointer(cDevice));
    }
}

Source File: ProtectedCudaShapeInfoProvider.java From nd4j with Apache License 2.0

5 votes

@Override
public Pair<DataBuffer, long[]> createShapeInformation(long[] shape, long[] stride, long offset, long elementWiseStride, char order) {
    // We enforce offset to 0 in shapeBuffer, since we need it for cache efficiency + we don't actually use offset value @ native side
    offset = 0;

    Integer deviceId = AtomicAllocator.getInstance().getDeviceId();

    LongShapeDescriptor descriptor = new LongShapeDescriptor(shape, stride, offset, elementWiseStride, order);

    if (!protector.containsDataBuffer(deviceId, descriptor)) {
        Pair<DataBuffer, long[]> buffer = null;
        synchronized (this) {
            if (!protector.containsDataBuffer(deviceId, descriptor)) {
                //log.info("Cache miss: {}", descriptor);
                buffer = super.createShapeInformation(shape, stride, offset, elementWiseStride, order);
                buffer.getFirst().setConstant(true);

                if (CudaEnvironment.getInstance().getConfiguration().getMemoryModel() == Configuration.MemoryModel.IMMEDIATE) {
                    Nd4j.getConstantHandler().moveToConstantSpace(buffer.getFirst());
                }

                //deviceCache.get(deviceId).put(descriptor, buffer);
                protector.persistDataBuffer(deviceId, descriptor, buffer);

                bytes.addAndGet(buffer.getFirst().length() * 4 * 2);

                cacheMiss.incrementAndGet();
            } else {
                buffer = protector.getDataBuffer(deviceId, descriptor);
            }
        }
        return buffer;
    } else {
        //       log.info("Cache hit: {}", descriptor);
        cacheHit.incrementAndGet();
    }

    return protector.getDataBuffer(deviceId, descriptor); //deviceCache.get(deviceId).get(descriptor);
}

org.nd4j.jita.conf.CudaEnvironment Java Examples