Java Code Examples for org.nd4j.linalg.factory.Nd4j#sizeOfDataType()
The following examples show how to use
org.nd4j.linalg.factory.Nd4j#sizeOfDataType() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: CpuThreshold.java From deeplearning4j with Apache License 2.0 | 5 votes |
@Override public DataBuffer compress(DataBuffer buffer) { INDArray temp = Nd4j.createArrayFromShapeBuffer(buffer, Nd4j.getShapeInfoProvider().createShapeInformation(new long[]{1, buffer.length()}, buffer.dataType()).getFirst()); MatchCondition condition = new MatchCondition(temp, Conditions.absGreaterThanOrEqual(threshold)); int cntAbs = Nd4j.getExecutioner().exec(condition).getInt(0); //log.info("density ratio: {}", String.format("%.2f", cntAbs * 100.0f / buffer.length())); if (cntAbs < 2) return null; long originalLength = buffer.length() * Nd4j.sizeOfDataType(buffer.dataType()); int compressedLength = cntAbs + 4; // first 3 elements contain header IntPointer pointer = new IntPointer(compressedLength); pointer.put(0, cntAbs); pointer.put(1, (int) buffer.length()); pointer.put(2, Float.floatToIntBits(threshold)); pointer.put(3, 0); CompressionDescriptor descriptor = new CompressionDescriptor(); descriptor.setCompressedLength(compressedLength * 4); // sizeOf(INT) descriptor.setOriginalLength(originalLength); descriptor.setOriginalElementSize(Nd4j.sizeOfDataType(buffer.dataType())); descriptor.setNumberOfElements(buffer.length()); descriptor.setCompressionAlgorithm(getDescriptor()); descriptor.setCompressionType(getCompressionType()); CompressedDataBuffer cbuff = new CompressedDataBuffer(pointer, descriptor); Nd4j.getNDArrayFactory().convertDataEx(getBufferTypeEx(buffer), buffer.addressPointer(), DataTypeEx.THRESHOLD, pointer, buffer.length()); Nd4j.getAffinityManager().tagLocation(buffer, AffinityManager.Location.HOST); return cbuff; }
Example 2
Source File: WorkspaceProviderTests.java From deeplearning4j with Apache License 2.0 | 5 votes |
@Test public void testNestedWorkspacesOverlap1() { Nd4j.setDefaultDataTypes(DataType.FLOAT, DataType.FLOAT); Nd4j.getWorkspaceManager().setDefaultWorkspaceConfiguration(basicConfiguration); try (Nd4jWorkspace ws1 = (Nd4jWorkspace) Nd4j.getWorkspaceManager().getWorkspaceForCurrentThread("WS1") .notifyScopeEntered()) { INDArray array = Nd4j.create(new float[] {1f, 2f, 3f, 4f, 5f}); long reqMem = 5 * Nd4j.sizeOfDataType(); assertEquals(reqMem + reqMem % 8, ws1.getPrimaryOffset()); try (Nd4jWorkspace ws2 = (Nd4jWorkspace) Nd4j.getWorkspaceManager().getWorkspaceForCurrentThread("WS2") .notifyScopeEntered()) { INDArray array2 = Nd4j.create(new float[] {1f, 2f, 3f, 4f, 5f}); reqMem = 5 * Nd4j.sizeOfDataType(); assertEquals(reqMem + reqMem % 8, ws1.getPrimaryOffset()); assertEquals(reqMem + reqMem % 8, ws2.getPrimaryOffset()); try (Nd4jWorkspace ws3 = (Nd4jWorkspace) Nd4j.getWorkspaceManager().getWorkspaceForCurrentThread("WS1") .notifyScopeBorrowed()) { assertTrue(ws1 == ws3); INDArray array3 = Nd4j.create(new float[] {1f, 2f, 3f, 4f, 5f}); assertEquals(reqMem + reqMem % 8, ws2.getPrimaryOffset()); assertEquals((reqMem + reqMem % 8) * 2, ws1.getPrimaryOffset()); } } } assertNull(Nd4j.getMemoryManager().getCurrentWorkspace()); }
Example 3
Source File: ProtectedCudaConstantHandler.java From nd4j with Apache License 2.0 | 5 votes |
/** * This method returns DataBuffer with contant equal to input array. * * PLEASE NOTE: This method assumes that you'll never ever change values within result DataBuffer * * @param array * @return */ @Override public DataBuffer getConstantBuffer(float[] array) { // logger.info("getConstantBuffer(float[]) called"); ArrayDescriptor descriptor = new ArrayDescriptor(array); Integer deviceId = AtomicAllocator.getInstance().getDeviceId(); ensureMaps(deviceId); if (!buffersCache.get(deviceId).containsKey(descriptor)) { // we create new databuffer //logger.info("Creating new constant buffer..."); DataBuffer buffer = Nd4j.createBufferDetached(array); if (constantOffsets.get(deviceId).get() + (array.length * Nd4j.sizeOfDataType()) < MAX_CONSTANT_LENGTH) { buffer.setConstant(true); // now we move data to constant memory, and keep happy moveToConstantSpace(buffer); buffersCache.get(deviceId).put(descriptor, buffer); bytes.addAndGet(array.length * Nd4j.sizeOfDataType()); } return buffer; } // else logger.info("Reusing constant buffer..."); return buffersCache.get(deviceId).get(descriptor); }
Example 4
Source File: ProtectedCudaConstantHandler.java From nd4j with Apache License 2.0 | 5 votes |
/** * This method returns DataBuffer with contant equal to input array. * * PLEASE NOTE: This method assumes that you'll never ever change values within result DataBuffer * * @param array * @return */ @Override public DataBuffer getConstantBuffer(double[] array) { //logger.info("getConstantBuffer(double[]) called: {}", Arrays.toString(array)); ArrayDescriptor descriptor = new ArrayDescriptor(array); Integer deviceId = AtomicAllocator.getInstance().getDeviceId(); ensureMaps(deviceId); if (!buffersCache.get(deviceId).containsKey(descriptor)) { // we create new databuffer //logger.info("Creating new constant buffer..."); DataBuffer buffer = Nd4j.createBufferDetached(array); if (constantOffsets.get(deviceId).get() + (array.length * Nd4j.sizeOfDataType()) < MAX_CONSTANT_LENGTH) { buffer.setConstant(true); // now we move data to constant memory, and keep happy moveToConstantSpace(buffer); buffersCache.get(deviceId).put(descriptor, buffer); bytes.addAndGet(array.length * Nd4j.sizeOfDataType()); } return buffer; } //else logger.info("Reusing constant buffer..."); return buffersCache.get(deviceId).get(descriptor); }
Example 5
Source File: WorkspaceProviderTests.java From nd4j with Apache License 2.0 | 5 votes |
@Test public void testNestedWorkspacesOverlap1() throws Exception { Nd4j.getWorkspaceManager().setDefaultWorkspaceConfiguration(basicConfiguration); try (Nd4jWorkspace ws1 = (Nd4jWorkspace) Nd4j.getWorkspaceManager().getWorkspaceForCurrentThread("WS1") .notifyScopeEntered()) { INDArray array = Nd4j.create(new float[] {1f, 2f, 3f, 4f, 5f}); long reqMem = 5 * Nd4j.sizeOfDataType(); assertEquals(reqMem + reqMem % 8, ws1.getHostOffset()); try (Nd4jWorkspace ws2 = (Nd4jWorkspace) Nd4j.getWorkspaceManager().getWorkspaceForCurrentThread("WS2") .notifyScopeEntered()) { INDArray array2 = Nd4j.create(new float[] {1f, 2f, 3f, 4f, 5f}); reqMem = 5 * Nd4j.sizeOfDataType(); assertEquals(reqMem + reqMem % 8, ws1.getHostOffset()); assertEquals(reqMem + reqMem % 8, ws2.getHostOffset()); try (Nd4jWorkspace ws3 = (Nd4jWorkspace) Nd4j.getWorkspaceManager().getWorkspaceForCurrentThread("WS1") .notifyScopeBorrowed()) { assertTrue(ws1 == ws3); INDArray array3 = Nd4j.create(new float[] {1f, 2f, 3f, 4f, 5f}); assertEquals(reqMem + reqMem % 8, ws2.getHostOffset()); assertEquals((reqMem + reqMem % 8) * 2, ws1.getHostOffset()); } } } assertNull(Nd4j.getMemoryManager().getCurrentWorkspace()); }
Example 6
Source File: CpuThreshold.java From nd4j with Apache License 2.0 | 5 votes |
@Override public DataBuffer compress(DataBuffer buffer) { INDArray temp = Nd4j.createArrayFromShapeBuffer(buffer, Nd4j.getShapeInfoProvider().createShapeInformation(new int[]{1, (int) buffer.length()}).getFirst()); MatchCondition condition = new MatchCondition(temp, Conditions.absGreaterThanOrEqual(threshold)); int cntAbs = Nd4j.getExecutioner().exec(condition, Integer.MAX_VALUE).getInt(0); //log.info("density ratio: {}", String.format("%.2f", cntAbs * 100.0f / buffer.length())); if (cntAbs < 2) return null; long originalLength = buffer.length() * Nd4j.sizeOfDataType(buffer.dataType()); int compressedLength = cntAbs + 4; // first 3 elements contain header IntPointer pointer = new IntPointer(compressedLength); pointer.put(0, cntAbs); pointer.put(1, (int) buffer.length()); pointer.put(2, Float.floatToIntBits(threshold)); pointer.put(3, 0); CompressionDescriptor descriptor = new CompressionDescriptor(); descriptor.setCompressedLength(compressedLength * 4); // sizeOf(INT) descriptor.setOriginalLength(originalLength); descriptor.setOriginalElementSize(Nd4j.sizeOfDataType(buffer.dataType())); descriptor.setNumberOfElements(buffer.length()); descriptor.setCompressionAlgorithm(getDescriptor()); descriptor.setCompressionType(getCompressionType()); CompressedDataBuffer cbuff = new CompressedDataBuffer(pointer, descriptor); Nd4j.getNDArrayFactory().convertDataEx(getBufferTypeEx(buffer), buffer.addressPointer(), DataBuffer.TypeEx.THRESHOLD, pointer, buffer.length()); Nd4j.getAffinityManager().tagLocation(buffer, AffinityManager.Location.HOST); return cbuff; }
Example 7
Source File: CudaFlexibleThreshold.java From nd4j with Apache License 2.0 | 5 votes |
@Override public DataBuffer compress(DataBuffer buffer) { INDArray temp = Nd4j.createArrayFromShapeBuffer(buffer, Nd4j.getShapeInfoProvider().createShapeInformation(new int[]{1, (int) buffer.length()})); double max = temp.amaxNumber().doubleValue(); int cntAbs = temp.scan(Conditions.absGreaterThanOrEqual(max - (max * threshold))).intValue(); long originalLength = buffer.length() * Nd4j.sizeOfDataType(buffer.dataType()); int compressedLength = cntAbs + 3; // first 3 elements contain header IntPointer pointer = new IntPointer(compressedLength); pointer.put(0, cntAbs); pointer.put(1, (int) buffer.length()); pointer.put(2, Float.floatToIntBits(threshold)); // please note, this value will be ovewritten anyway CompressionDescriptor descriptor = new CompressionDescriptor(); descriptor.setCompressedLength(compressedLength * 4); // sizeOf(INT) descriptor.setOriginalLength(originalLength); descriptor.setOriginalElementSize(Nd4j.sizeOfDataType(buffer.dataType())); descriptor.setNumberOfElements(buffer.length()); descriptor.setCompressionAlgorithm(getDescriptor()); descriptor.setCompressionType(getCompressionType()); CompressedDataBuffer cbuff = new CompressedDataBuffer(pointer, descriptor); Nd4j.getNDArrayFactory().convertDataEx(getBufferTypeEx(buffer), buffer.addressPointer(), DataBuffer.TypeEx.FTHRESHOLD, pointer, buffer.length()); Nd4j.getAffinityManager().tagLocation(buffer, AffinityManager.Location.HOST); return cbuff; }
Example 8
Source File: BasicWorkspaceTests.java From nd4j with Apache License 2.0 | 4 votes |
@Test public void testAllocation2() throws Exception { Nd4jWorkspace workspace = (Nd4jWorkspace) Nd4j.getWorkspaceManager().getAndActivateWorkspace(basicConfig, "testAllocation2"); Nd4j.getMemoryManager().setCurrentWorkspace(workspace); assertNotEquals(null, Nd4j.getMemoryManager().getCurrentWorkspace()); assertEquals(0, workspace.getHostOffset()); INDArray array = Nd4j.create(5); // checking if allocation actually happened long reqMem = 5 * Nd4j.sizeOfDataType(); assertEquals(reqMem + reqMem % 8, workspace.getHostOffset()); array.assign(1.0f); assertEquals(5, array.sumNumber().doubleValue(), 0.01); workspace.close(); }
Example 9
Source File: JcublasLapack.java From deeplearning4j with Apache License 2.0 | 4 votes |
@Override public void sgetrf(int M, int N, INDArray A, INDArray IPIV, INDArray INFO) { INDArray a = A; if (Nd4j.dataType() != DataType.FLOAT) log.warn("FLOAT getrf called in DOUBLE environment"); if (A.ordering() == 'c') a = A.dup('f'); if (Nd4j.getExecutioner() instanceof GridExecutioner) ((GridExecutioner) Nd4j.getExecutioner()).flushQueue(); // Get context for current thread val ctx = allocator.getDeviceContext(); // setup the solver handles for cuSolver calls cusolverDnHandle_t handle = ctx.getSolverHandle(); cusolverDnContext solverDn = new cusolverDnContext(handle); // synchronized on the solver synchronized (handle) { int result = cusolverDnSetStream(new cusolverDnContext(handle), new CUstream_st(ctx.getCublasStream())); if (result != 0) throw new BlasException("solverSetStream failed"); // transfer the INDArray into GPU memory CublasPointer xAPointer = new CublasPointer(a, ctx); // this output - indicates how much memory we'll need for the real operation val worksizeBuffer = (BaseCudaDataBuffer) Nd4j.getDataBufferFactory().createInt(1); worksizeBuffer.lazyAllocateHostPointer(); int stat = cusolverDnSgetrf_bufferSize(solverDn, M, N, (FloatPointer) xAPointer.getDevicePointer(), M, (IntPointer) worksizeBuffer.addressPointer() // we intentionally use host pointer here ); if (stat != CUSOLVER_STATUS_SUCCESS) { throw new BlasException("cusolverDnSgetrf_bufferSize failed", stat); } int worksize = worksizeBuffer.getInt(0); // Now allocate memory for the workspace, the permutation matrix and a return code Pointer workspace = new Workspace(worksize * Nd4j.sizeOfDataType()); // Do the actual LU decomp stat = cusolverDnSgetrf(solverDn, M, N, (FloatPointer) xAPointer.getDevicePointer(), M, new CudaPointer(workspace).asFloatPointer(), new CudaPointer(allocator.getPointer(IPIV, ctx)).asIntPointer(), new CudaPointer(allocator.getPointer(INFO, ctx)).asIntPointer()); // we do sync to make sure getrf is finished //ctx.syncOldStream(); if (stat != CUSOLVER_STATUS_SUCCESS) { throw new BlasException("cusolverDnSgetrf failed", stat); } } allocator.registerAction(ctx, a); allocator.registerAction(ctx, INFO); allocator.registerAction(ctx, IPIV); if (a != A) A.assign(a); }
Example 10
Source File: BaseCudaDataBuffer.java From deeplearning4j with Apache License 2.0 | 4 votes |
public void read(DataInputStream s) { try { val savedMode = AllocationMode.valueOf(s.readUTF()); allocationMode = AllocationMode.MIXED_DATA_TYPES; long locLength = 0; if (savedMode.ordinal() < 3) locLength = s.readInt(); else locLength = s.readLong(); boolean reallocate = locLength != length || indexer == null; length = locLength; val t = DataType.valueOf(s.readUTF()); // log.info("Restoring buffer ["+t+"] of length ["+ length+"]"); if (globalType == null && Nd4j.dataType() != null) { globalType = Nd4j.dataType(); } if (t == DataType.COMPRESSED) { type = t; return; } this.elementSize = (byte) Nd4j.sizeOfDataType(t); this.allocationPoint = AtomicAllocator.getInstance().allocateMemory(this, new AllocationShape(length, elementSize, t), false); this.type = t; Nd4j.getDeallocatorService().pickObject(this); switch (type) { case DOUBLE: { this.pointer = new CudaPointer(allocationPoint.getHostPointer(), length).asDoublePointer(); indexer = DoubleIndexer.create((DoublePointer) pointer); } break; case FLOAT: { this.pointer = new CudaPointer(allocationPoint.getHostPointer(), length).asFloatPointer(); indexer = FloatIndexer.create((FloatPointer) pointer); } break; case HALF: { this.pointer = new CudaPointer(allocationPoint.getHostPointer(), length).asShortPointer(); indexer = HalfIndexer.create((ShortPointer) pointer); } break; case LONG: { this.pointer = new CudaPointer(allocationPoint.getHostPointer(), length).asLongPointer(); indexer = LongIndexer.create((LongPointer) pointer); } break; case INT: { this.pointer = new CudaPointer(allocationPoint.getHostPointer(), length).asIntPointer(); indexer = IntIndexer.create((IntPointer) pointer); } break; case SHORT: { this.pointer = new CudaPointer(allocationPoint.getHostPointer(), length).asShortPointer(); indexer = ShortIndexer.create((ShortPointer) pointer); } break; case UBYTE: { this.pointer = new CudaPointer(allocationPoint.getHostPointer(), length).asBytePointer(); indexer = UByteIndexer.create((BytePointer) pointer); } break; case BYTE: { this.pointer = new CudaPointer(allocationPoint.getHostPointer(), length).asBytePointer(); indexer = ByteIndexer.create((BytePointer) pointer); } break; case BOOL: { this.pointer = new CudaPointer(allocationPoint.getHostPointer(), length).asBooleanPointer(); indexer = BooleanIndexer.create((BooleanPointer) pointer); } break; default: throw new UnsupportedOperationException("Unsupported data type: " + type); } readContent(s, t, t); allocationPoint.tickHostWrite(); } catch (Exception e) { throw new RuntimeException(e); } // we call sync to copyback data to host AtomicAllocator.getInstance().getFlowController().synchronizeToDevice(allocationPoint); //allocator.synchronizeHostData(this); }
Example 11
Source File: SpecialWorkspaceTests.java From deeplearning4j with Apache License 2.0 | 4 votes |
@Test public void testVariableTimeSeries2() { WorkspaceConfiguration configuration = WorkspaceConfiguration.builder().initialSize(0).overallocationLimit(3.0) .policyAllocation(AllocationPolicy.OVERALLOCATE).policySpill(SpillPolicy.REALLOCATE) .policyLearning(LearningPolicy.FIRST_LOOP).policyReset(ResetPolicy.ENDOFBUFFER_REACHED).build(); Nd4jWorkspace workspace = (Nd4jWorkspace) Nd4j.getWorkspaceManager().getWorkspaceForCurrentThread(configuration, "WS1"); // workspace.enableDebug(true); try (MemoryWorkspace ws = Nd4j.getWorkspaceManager().getAndActivateWorkspace(configuration, "WS1")) { Nd4j.create(500); Nd4j.create(500); } assertEquals(0, workspace.getStepNumber()); long requiredMemory = 1000 * Nd4j.sizeOfDataType(); long shiftedSize = ((long) (requiredMemory * 1.3)) + (8 - (((long) (requiredMemory * 1.3)) % 8)); assertEquals(requiredMemory, workspace.getSpilledSize()); assertEquals(shiftedSize, workspace.getInitialBlockSize()); assertEquals(workspace.getInitialBlockSize() * 4, workspace.getCurrentSize()); for (int i = 0; i < 100; i++) { try (MemoryWorkspace ws = Nd4j.getWorkspaceManager().getAndActivateWorkspace(configuration, "WS1")) { Nd4j.create(500); Nd4j.create(500); Nd4j.create(500); } } assertEquals(workspace.getInitialBlockSize() * 4, workspace.getCurrentSize()); assertEquals(0, workspace.getNumberOfPinnedAllocations()); assertEquals(0, workspace.getNumberOfExternalAllocations()); assertEquals(0, workspace.getSpilledSize()); assertEquals(0, workspace.getPinnedSize()); }
Example 12
Source File: BaseCudaDataBuffer.java From deeplearning4j with Apache License 2.0 | 4 votes |
public BaseCudaDataBuffer(float[] data) { //super(data); this(data.length, Nd4j.sizeOfDataType(DataType.FLOAT), false); set(data, data.length, 0, 0); }
Example 13
Source File: WorkspaceProviderTests.java From deeplearning4j with Apache License 2.0 | 4 votes |
@Test public void testVariableInput1() { Nd4jWorkspace workspace = (Nd4jWorkspace) Nd4j.getWorkspaceManager() .getWorkspaceForCurrentThread(adsiConfiguration, "ADSI"); INDArray array1 = null; INDArray array2 = null; try (MemoryWorkspace ws = Nd4j.getWorkspaceManager().getAndActivateWorkspace(adsiConfiguration, "ADSI")) { // we allocate first element smaller then subsequent; array1 = Nd4j.create(DataType.DOUBLE, 8, 128, 100); } long requiredMemory = 8 * 128 * 100 * Nd4j.sizeOfDataType(DataType.DOUBLE); long shiftedSize = ((long) (requiredMemory * 1.3)) + (8 - (((long) (requiredMemory * 1.3)) % 8)); assertEquals(shiftedSize, workspace.getInitialBlockSize()); assertEquals(shiftedSize * 4, workspace.getCurrentSize()); assertEquals(0, workspace.getPrimaryOffset()); assertEquals(0, workspace.getDeviceOffset()); assertEquals(1, workspace.getCyclesCount()); assertEquals(0, workspace.getStepNumber()); try (MemoryWorkspace ws = Nd4j.getWorkspaceManager().getAndActivateWorkspace(adsiConfiguration, "ADSI")) { // allocating same shape array1 = Nd4j.create(8, 128, 100); } assertEquals(workspace.getInitialBlockSize(), workspace.getPrimaryOffset()); assertEquals(workspace.getInitialBlockSize(), workspace.getDeviceOffset()); assertEquals(2, workspace.getCyclesCount()); assertEquals(0, workspace.getStepNumber()); try (MemoryWorkspace ws = Nd4j.getWorkspaceManager().getAndActivateWorkspace(adsiConfiguration, "ADSI")) { // allocating bigger shape array1 = Nd4j.create(DataType.DOUBLE, 8, 128, 200); } // offsets should be intact, allocation happened as pinned assertEquals(workspace.getInitialBlockSize(), workspace.getPrimaryOffset()); assertEquals(workspace.getInitialBlockSize(), workspace.getDeviceOffset()); assertEquals(1, workspace.getNumberOfPinnedAllocations()); assertEquals(3, workspace.getCyclesCount()); assertEquals(0, workspace.getStepNumber()); try (MemoryWorkspace ws = Nd4j.getWorkspaceManager().getAndActivateWorkspace(adsiConfiguration, "ADSI")) { // allocating same shape array1 = Nd4j.create(DataType.DOUBLE, 8, 128, 100); } assertEquals(2, workspace.getNumberOfPinnedAllocations()); assertEquals(0, workspace.getStepNumber()); assertEquals(4, workspace.getCyclesCount()); try (MemoryWorkspace ws = Nd4j.getWorkspaceManager().getAndActivateWorkspace(adsiConfiguration, "ADSI")) { // allocating same shape array1 = Nd4j.create(DataType.DOUBLE, 8, 128, 100); } assertEquals(3, workspace.getNumberOfPinnedAllocations()); assertEquals(1, workspace.getStepNumber()); assertEquals(5, workspace.getCyclesCount()); for (int i = 0; i < 12; i++) { try (MemoryWorkspace ws = Nd4j.getWorkspaceManager().getAndActivateWorkspace(adsiConfiguration, "ADSI")) { // allocating same shape array1 = Nd4j.create(DataType.DOUBLE, 8, 128, 100); } } // Now we know that workspace was reallocated and offset was shifted to the end of workspace assertEquals(4, workspace.getStepNumber()); requiredMemory = 8 * 128 * 200 * Nd4j.sizeOfDataType(DataType.DOUBLE); shiftedSize = ((long) (requiredMemory * 1.3)) + (8 - (((long) (requiredMemory * 1.3)) % 8)); //assertEquals(shiftedSize * 4, workspace.getCurrentSize()); assertEquals(workspace.getCurrentSize(), workspace.getPrimaryOffset()); assertEquals(workspace.getCurrentSize(), workspace.getDeviceOffset()); }
Example 14
Source File: NativeOpExecutioner.java From nd4j with Apache License 2.0 | 4 votes |
@Override public INDArray thresholdEncode(INDArray input, double threshold, Integer boundary) { MatchCondition condition = new MatchCondition(input, Conditions.absGreaterThanOrEqual(threshold)); int cntAbs = Nd4j.getExecutioner().exec(condition, Integer.MAX_VALUE).getInt(0); if (cntAbs < 2) return null; if (boundary != null) cntAbs = Math.min(cntAbs, boundary); DataBuffer buffer = input.data(); long originalLength = buffer.length() * Nd4j.sizeOfDataType(buffer.dataType()); int compressedLength = cntAbs + 4; // first 3 elements contain header DataBuffer encodedBuffer = Nd4j.getMemoryManager().getCurrentWorkspace() == null ? Nd4j.getDataBufferFactory().createInt(4+cntAbs, false) : Nd4j.getDataBufferFactory().createInt(4+cntAbs, false, Nd4j.getMemoryManager().getCurrentWorkspace()); encodedBuffer.put(0, cntAbs); encodedBuffer.put(1, (int) buffer.length()); encodedBuffer.put(2, Float.floatToIntBits((float) threshold)); // format id encodedBuffer.put(3, ThresholdCompression.FLEXIBLE_ENCODING); CompressionDescriptor descriptor = new CompressionDescriptor(); descriptor.setCompressedLength(compressedLength * 4); // sizeOf(INT) descriptor.setOriginalLength(originalLength); descriptor.setOriginalElementSize(Nd4j.sizeOfDataType(buffer.dataType())); descriptor.setNumberOfElements(buffer.length()); descriptor.setCompressionAlgorithm("THRESHOLD"); descriptor.setCompressionType(CompressionType.LOSSLESS); //CompressedDataBuffer cbuff = new CompressedDataBuffer(pointer, descriptor); Nd4j.getNDArrayFactory().convertDataEx(AbstractCompressor.getBufferTypeEx(buffer), buffer.addressPointer(), DataBuffer.TypeEx.THRESHOLD, encodedBuffer.addressPointer(), buffer.length()); Nd4j.getAffinityManager().tagLocation(buffer, AffinityManager.Location.HOST); return Nd4j.createArrayFromShapeBuffer(encodedBuffer, input.shapeInfoDataBuffer()); }
Example 15
Source File: JcublasLapack.java From nd4j with Apache License 2.0 | 4 votes |
public int dsyev( char _jobz, char _uplo, int N, INDArray A, INDArray R ) { int status = -1 ; int jobz = _jobz == 'V' ? CUSOLVER_EIG_MODE_VECTOR : CUSOLVER_EIG_MODE_NOVECTOR ; int uplo = _uplo == 'L' ? CUBLAS_FILL_MODE_LOWER : CUBLAS_FILL_MODE_UPPER ; if (Nd4j.dataType() != DataBuffer.Type.DOUBLE) log.warn("DOUBLE dsyev called in FLOAT environment"); INDArray a = A; if (A.ordering() == 'c') a = A.dup('f'); // FIXME: int cast int M = (int) A.rows() ; if (Nd4j.getExecutioner() instanceof GridExecutioner) ((GridExecutioner) Nd4j.getExecutioner()).flushQueue(); // Get context for current thread CudaContext ctx = (CudaContext) allocator.getDeviceContext().getContext(); // setup the solver handles for cuSolver calls cusolverDnHandle_t handle = ctx.getSolverHandle(); cusolverDnContext solverDn = new cusolverDnContext(handle); // synchronized on the solver synchronized (handle) { status = cusolverDnSetStream(new cusolverDnContext(handle), new CUstream_st(ctx.getOldStream())); if( status == 0 ) { // transfer the INDArray into GPU memory CublasPointer xAPointer = new CublasPointer(a, ctx); CublasPointer xRPointer = new CublasPointer(R, ctx); // this output - indicates how much memory we'll need for the real operation DataBuffer worksizeBuffer = Nd4j.getDataBufferFactory().createInt(1); status = cusolverDnDsyevd_bufferSize( solverDn, jobz, uplo, M, (DoublePointer) xAPointer.getDevicePointer(), M, (DoublePointer) xRPointer.getDevicePointer(), (IntPointer)worksizeBuffer.addressPointer() ) ; if (status == CUSOLVER_STATUS_SUCCESS) { int worksize = worksizeBuffer.getInt(0); // allocate memory for the workspace, the non-converging row buffer and a return code Pointer workspace = new Workspace(worksize * Nd4j.sizeOfDataType()); INDArray INFO = Nd4j.createArrayFromShapeBuffer(Nd4j.getDataBufferFactory().createInt(1), Nd4j.getShapeInfoProvider().createShapeInformation(new int[] {1, 1})); // Do the actual decomp status = cusolverDnDsyevd(solverDn, jobz, uplo, M, (DoublePointer) xAPointer.getDevicePointer(), M, (DoublePointer) xRPointer.getDevicePointer(), new CudaPointer(workspace).asDoublePointer(), worksize, new CudaPointer(allocator.getPointer(INFO, ctx)).asIntPointer()); allocator.registerAction(ctx, INFO); if( status == 0 ) status = INFO.getInt(0) ; } } } if( status == 0 ) { allocator.registerAction(ctx, R); allocator.registerAction(ctx, a); if (a != A) A.assign(a); } return status ; }
Example 16
Source File: JcublasLapack.java From nd4j with Apache License 2.0 | 4 votes |
public int ssyev( char _jobz, char _uplo, int N, INDArray A, INDArray R ) { int status = -1 ; int jobz = _jobz == 'V' ? CUSOLVER_EIG_MODE_VECTOR : CUSOLVER_EIG_MODE_NOVECTOR ; int uplo = _uplo == 'L' ? CUBLAS_FILL_MODE_LOWER : CUBLAS_FILL_MODE_UPPER ; if (Nd4j.dataType() != DataBuffer.Type.FLOAT) log.warn("FLOAT ssyev called in DOUBLE environment"); INDArray a = A; if (A.ordering() == 'c') a = A.dup('f'); // FIXME: int cast int M = (int) A.rows() ; if (Nd4j.getExecutioner() instanceof GridExecutioner) ((GridExecutioner) Nd4j.getExecutioner()).flushQueue(); // Get context for current thread CudaContext ctx = (CudaContext) allocator.getDeviceContext().getContext(); // setup the solver handles for cuSolver calls cusolverDnHandle_t handle = ctx.getSolverHandle(); cusolverDnContext solverDn = new cusolverDnContext(handle); // synchronized on the solver synchronized (handle) { status = cusolverDnSetStream(new cusolverDnContext(handle), new CUstream_st(ctx.getOldStream())); if( status == 0 ) { // transfer the INDArray into GPU memory CublasPointer xAPointer = new CublasPointer(a, ctx); CublasPointer xRPointer = new CublasPointer(R, ctx); // this output - indicates how much memory we'll need for the real operation DataBuffer worksizeBuffer = Nd4j.getDataBufferFactory().createInt(1); status = cusolverDnSsyevd_bufferSize ( solverDn, jobz, uplo, M, (FloatPointer) xAPointer.getDevicePointer(), M, (FloatPointer) xRPointer.getDevicePointer(), (IntPointer)worksizeBuffer.addressPointer() ) ; if (status == CUSOLVER_STATUS_SUCCESS) { int worksize = worksizeBuffer.getInt(0); // allocate memory for the workspace, the non-converging row buffer and a return code Pointer workspace = new Workspace(worksize * Nd4j.sizeOfDataType()); INDArray INFO = Nd4j.createArrayFromShapeBuffer(Nd4j.getDataBufferFactory().createInt(1), Nd4j.getShapeInfoProvider().createShapeInformation(new int[] {1, 1})); // Do the actual decomp status = cusolverDnSsyevd(solverDn, jobz, uplo, M, (FloatPointer) xAPointer.getDevicePointer(), M, (FloatPointer) xRPointer.getDevicePointer(), new CudaPointer(workspace).asFloatPointer(), worksize, new CudaPointer(allocator.getPointer(INFO, ctx)).asIntPointer()); allocator.registerAction(ctx, INFO); if( status == 0 ) status = INFO.getInt(0) ; } } } if( status == 0 ) { allocator.registerAction(ctx, R); allocator.registerAction(ctx, a); if (a != A) A.assign(a); } return status ; }
Example 17
Source File: BasicWorkspaceTests.java From deeplearning4j with Apache License 2.0 | 4 votes |
@Test public void testAllocation5() { Nd4jWorkspace workspace = (Nd4jWorkspace) Nd4j.getWorkspaceManager().getAndActivateWorkspace(basicConfig, "testAllocation5"); Nd4j.getMemoryManager().setCurrentWorkspace(workspace); assertNotEquals(null, Nd4j.getMemoryManager().getCurrentWorkspace()); assertEquals(0, workspace.getPrimaryOffset()); INDArray array = Nd4j.create(DOUBLE, new long[] {1, 5}, 'c'); // checking if allocation actually happened long reqMemory = 5 * Nd4j.sizeOfDataType(DOUBLE); assertEquals(reqMemory + reqMemory % 8, workspace.getPrimaryOffset()); array.assign(1.0f); INDArray dup = array.dup(); assertEquals((reqMemory + reqMemory % 8) * 2, workspace.getPrimaryOffset()); assertEquals(5, dup.sumNumber().doubleValue(), 0.01); workspace.close(); }
Example 18
Source File: JcublasLapack.java From nd4j with Apache License 2.0 | 4 votes |
@Override public void sgetrf(int M, int N, INDArray A, INDArray IPIV, INDArray INFO) { INDArray a = A; if (Nd4j.dataType() != DataBuffer.Type.FLOAT) log.warn("FLOAT getrf called in DOUBLE environment"); if (A.ordering() == 'c') a = A.dup('f'); if (Nd4j.getExecutioner() instanceof GridExecutioner) ((GridExecutioner) Nd4j.getExecutioner()).flushQueue(); // Get context for current thread CudaContext ctx = (CudaContext) allocator.getDeviceContext().getContext(); // setup the solver handles for cuSolver calls cusolverDnHandle_t handle = ctx.getSolverHandle(); cusolverDnContext solverDn = new cusolverDnContext(handle); // synchronized on the solver synchronized (handle) { int result = cusolverDnSetStream(new cusolverDnContext(handle), new CUstream_st(ctx.getOldStream())); if (result != 0) throw new BlasException("solverSetStream failed"); // transfer the INDArray into GPU memory CublasPointer xAPointer = new CublasPointer(a, ctx); // this output - indicates how much memory we'll need for the real operation DataBuffer worksizeBuffer = Nd4j.getDataBufferFactory().createInt(1); int stat = cusolverDnSgetrf_bufferSize(solverDn, M, N, (FloatPointer) xAPointer.getDevicePointer(), M, (IntPointer) worksizeBuffer.addressPointer() // we intentionally use host pointer here ); if (stat != CUSOLVER_STATUS_SUCCESS) { throw new BlasException("cusolverDnSgetrf_bufferSize failed", stat); } int worksize = worksizeBuffer.getInt(0); // Now allocate memory for the workspace, the permutation matrix and a return code Pointer workspace = new Workspace(worksize * Nd4j.sizeOfDataType()); // Do the actual LU decomp stat = cusolverDnSgetrf(solverDn, M, N, (FloatPointer) xAPointer.getDevicePointer(), M, new CudaPointer(workspace).asFloatPointer(), new CudaPointer(allocator.getPointer(IPIV, ctx)).asIntPointer(), new CudaPointer(allocator.getPointer(INFO, ctx)).asIntPointer()); // we do sync to make sure getrf is finished //ctx.syncOldStream(); if (stat != CUSOLVER_STATUS_SUCCESS) { throw new BlasException("cusolverDnSgetrf failed", stat); } } allocator.registerAction(ctx, a); allocator.registerAction(ctx, INFO); allocator.registerAction(ctx, IPIV); if (a != A) A.assign(a); }
Example 19
Source File: BasicWorkspaceTests.java From deeplearning4j with Apache License 2.0 | 2 votes |
@Test public void testLoops1() { Nd4jWorkspace workspace = (Nd4jWorkspace) Nd4j.getWorkspaceManager().createNewWorkspace(loopOverTimeConfig); Nd4j.getMemoryManager().setCurrentWorkspace(workspace); assertNotEquals(null, Nd4j.getMemoryManager().getCurrentWorkspace()); assertEquals(0, workspace.getPrimaryOffset()); workspace.notifyScopeEntered(); INDArray arrayCold = Nd4j.create(DOUBLE, 10); assertEquals(0, workspace.getPrimaryOffset()); assertEquals(0, workspace.getCurrentSize()); arrayCold.assign(1.0f); assertEquals(10f, arrayCold.sumNumber().floatValue(), 0.01f); workspace.notifyScopeLeft(); workspace.initializeWorkspace(); long reqMemory = 11 * Nd4j.sizeOfDataType(arrayCold.dataType()); assertEquals(reqMemory + reqMemory % 8, workspace.getCurrentSize()); log.info("-----------------------"); for (int x = 0; x < 10; x++) { assertEquals(0, workspace.getPrimaryOffset()); workspace.notifyScopeEntered(); INDArray array = Nd4j.create(DOUBLE, 10); long reqMem = 10 * Nd4j.sizeOfDataType(array.dataType()); assertEquals(reqMem + reqMem % 8, workspace.getPrimaryOffset()); array.addi(1.0); assertEquals(reqMem + reqMem % 8, workspace.getPrimaryOffset()); assertEquals("Failed on iteration " + x, 10, array.sumNumber().doubleValue(), 0.01); workspace.notifyScopeLeft(); assertEquals(0, workspace.getPrimaryOffset()); } }
Example 20
Source File: BasicWorkspaceTests.java From nd4j with Apache License 2.0 | 2 votes |
@Test public void testLoops1() throws Exception { Nd4jWorkspace workspace = (Nd4jWorkspace) Nd4j.getWorkspaceManager().createNewWorkspace(loopOverTimeConfig); Nd4j.getMemoryManager().setCurrentWorkspace(workspace); assertNotEquals(null, Nd4j.getMemoryManager().getCurrentWorkspace()); assertEquals(0, workspace.getHostOffset()); workspace.notifyScopeEntered(); INDArray arrayCold = Nd4j.create(10); assertEquals(0, workspace.getHostOffset()); assertEquals(0, workspace.getCurrentSize()); arrayCold.assign(1.0f); assertEquals(10f, arrayCold.sumNumber().floatValue(), 0.01f); workspace.notifyScopeLeft(); workspace.initializeWorkspace(); long reqMemory = 11 * Nd4j.sizeOfDataType(); assertEquals(reqMemory + reqMemory % 8, workspace.getCurrentSize()); log.info("-----------------------"); for (int x = 0; x < 10; x++) { assertEquals(0, workspace.getHostOffset()); workspace.notifyScopeEntered(); INDArray array = Nd4j.create(10); long reqMem = 10 * Nd4j.sizeOfDataType(); assertEquals(reqMem + reqMem % 8, workspace.getHostOffset()); array.addi(1.0f); assertEquals(reqMem + reqMem % 8, workspace.getHostOffset()); assertEquals("Failed on iteration " + x, 10, array.sumNumber().doubleValue(), 0.01); workspace.notifyScopeLeft(); assertEquals(0, workspace.getHostOffset()); } }