Java Code Examples for org.nd4j.linalg.factory.Nd4j#getExecutioner()
The following examples show how to use
org.nd4j.linalg.factory.Nd4j#getExecutioner() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: OpExecutionerTests.java From deeplearning4j with Apache License 2.0 | 5 votes |
@Test public void testStridedExp() { OpExecutioner opExecutioner = Nd4j.getExecutioner(); INDArray arr = Nd4j.linspace(1, 6, 6, DataType.DOUBLE).reshape(2, 3); INDArray slice = arr.slice(0); val expected = new double[(int) slice.length()]; for (int i = 0; i < slice.length(); i++) expected[i] = (float) Math.exp(slice.getDouble(i)); Exp exp = new Exp(slice); opExecutioner.exec(exp); assertEquals(getFailureMessage(), Nd4j.create(expected), slice); }
Example 2
Source File: SparkDl4jMultiLayer.java From deeplearning4j with Apache License 2.0 | 5 votes |
/** * Fit the DataSet RDD * * @param trainingData the training data RDD to fitDataSet * @return the MultiLayerNetwork after training */ public MultiLayerNetwork fit(JavaRDD<DataSet> trainingData) { if (Nd4j.getExecutioner() instanceof GridExecutioner) ((GridExecutioner) Nd4j.getExecutioner()).flushQueue(); trainingMaster.executeTraining(this, trainingData); network.incrementEpochCount(); return network; }
Example 3
Source File: OpExecutionerTestsC.java From nd4j with Apache License 2.0 | 5 votes |
@Test public void testRowSoftmax() { OpExecutioner opExecutioner = Nd4j.getExecutioner(); INDArray arr = Nd4j.linspace(1, 6, 6); OldSoftMax softMax = new OldSoftMax(arr); opExecutioner.exec(softMax); assertEquals(getFailureMessage(), 1.0, softMax.z().sumNumber().doubleValue(), 1e-1); }
Example 4
Source File: OpExecutionerTestsC.java From deeplearning4j with Apache License 2.0 | 5 votes |
@Test public void testStridedExp() { OpExecutioner opExecutioner = Nd4j.getExecutioner(); INDArray arr = Nd4j.linspace(1, 6, 6, DataType.DOUBLE).reshape(2, 3); INDArray slice = arr.slice(0); val expected = new double[(int) slice.length()]; for (int i = 0; i < slice.length(); i++) expected[i] = (float) Math.exp(slice.getDouble(i)); Exp exp = new Exp(slice); opExecutioner.exec(exp); assertEquals(getFailureMessage(), Nd4j.create(expected), slice); }
Example 5
Source File: OpExecutionerTestsC.java From nd4j with Apache License 2.0 | 5 votes |
@Test public void testDescriptiveStatsDouble() { OpExecutioner opExecutioner = Nd4j.getExecutioner(); INDArray x = Nd4j.linspace(1, 5, 5); Mean mean = new Mean(x); opExecutioner.exec(mean); assertEquals(3.0, mean.getFinalResult().doubleValue(), 1e-1); Variance variance = new Variance(x.dup(), true); opExecutioner.exec(variance); assertEquals(getFailureMessage(), 2.5, variance.getFinalResult().doubleValue(), 1e-1); }
Example 6
Source File: OpExecutionerTestsC.java From nd4j with Apache License 2.0 | 5 votes |
@Test public void testMaxMin() { OpExecutioner opExecutioner = Nd4j.getExecutioner(); INDArray x = Nd4j.linspace(1, 5, 5); Max max = new Max(x); opExecutioner.exec(max); assertEquals(5, max.getFinalResult().doubleValue(), 1e-1); Min min = new Min(x); opExecutioner.exec(min); assertEquals(1, min.getFinalResult().doubleValue(), 1e-1); }
Example 7
Source File: OpExecutionerTests.java From deeplearning4j with Apache License 2.0 | 5 votes |
@Test public void testDescriptiveStatsDouble() { OpExecutioner opExecutioner = Nd4j.getExecutioner(); INDArray x = Nd4j.linspace(1, 5, 5, DataType.DOUBLE); Mean mean = new Mean(x); opExecutioner.exec(mean); assertEquals(3.0, mean.getFinalResult().doubleValue(), 1e-1); Variance variance = new Variance(x.dup(), true); opExecutioner.exec(variance); assertEquals(getFailureMessage(), 2.5, variance.getFinalResult().doubleValue(), 1e-1); }
Example 8
Source File: OpExecutionerTestsC.java From nd4j with Apache License 2.0 | 5 votes |
@Test public void testMul() { OpExecutioner opExecutioner = Nd4j.getExecutioner(); INDArray x = Nd4j.ones(5); INDArray xDup = x.dup(); INDArray solution = Nd4j.valueArrayOf(5, 1.0); opExecutioner.exec(new OldMulOp(x, xDup, x)); assertEquals(solution, x); }
Example 9
Source File: OpExecutionerTests.java From nd4j with Apache License 2.0 | 5 votes |
@Test public void testMaxMin() { OpExecutioner opExecutioner = Nd4j.getExecutioner(); INDArray x = Nd4j.linspace(1, 5, 5); Max max = new Max(x); opExecutioner.exec(max); assertEquals(5, max.getFinalResult().doubleValue(), 1e-1); Min min = new Min(x); opExecutioner.exec(min); assertEquals(1, min.getFinalResult().doubleValue(), 1e-1); }
Example 10
Source File: SparkComputationGraph.java From deeplearning4j with Apache License 2.0 | 5 votes |
/** * Fit the ComputationGraph with the given data set * * @param rdd Data to train on * @return Trained network */ public ComputationGraph fitMultiDataSet(JavaRDD<MultiDataSet> rdd) { if (Nd4j.getExecutioner() instanceof GridExecutioner) ((GridExecutioner) Nd4j.getExecutioner()).flushQueue(); trainingMaster.executeTrainingMDS(this, rdd); network.incrementEpochCount(); return network; }
Example 11
Source File: OpExecutionerTestsC.java From deeplearning4j with Apache License 2.0 | 5 votes |
@Test public void testMul() { OpExecutioner opExecutioner = Nd4j.getExecutioner(); INDArray x = Nd4j.ones(5); INDArray xDup = x.dup(); INDArray solution = Nd4j.valueArrayOf(5, 1.0); opExecutioner.exec(new MulOp(x, xDup, x)); assertEquals(solution, x); }
Example 12
Source File: OpExecutionerTests.java From nd4j with Apache License 2.0 | 5 votes |
@Test public void testStridedLog() { OpExecutioner opExecutioner = Nd4j.getExecutioner(); INDArray arr = Nd4j.linspace(1, 6, 6).reshape(2, 3); INDArray slice = arr.slice(0); Log log = new Log(slice); opExecutioner.exec(log); INDArray assertion = Nd4j.create(Nd4j.createBuffer(new float[] {0.f, 1.09861229f, 1.60943791f})); assertEquals(getFailureMessage(), assertion, slice); }
Example 13
Source File: OpExecutionerTests.java From deeplearning4j with Apache License 2.0 | 5 votes |
@Test public void testDescriptiveStats() { OpExecutioner opExecutioner = Nd4j.getExecutioner(); INDArray x = Nd4j.linspace(1, 5, 5, DataType.DOUBLE); Mean mean = new Mean(x); opExecutioner.exec(mean); assertEquals(getFailureMessage(), 3.0, mean.getFinalResult().doubleValue(), 1e-1); Variance variance = new Variance(x.dup(), true); opExecutioner.exec(variance); assertEquals(getFailureMessage(), 2.5, variance.getFinalResult().doubleValue(), 1e-1); }
Example 14
Source File: OpExecutionerTestsC.java From deeplearning4j with Apache License 2.0 | 5 votes |
@Test public void testDescriptiveStats() { OpExecutioner opExecutioner = Nd4j.getExecutioner(); INDArray x = Nd4j.linspace(1, 5, 5, DataType.DOUBLE); Mean mean = new Mean(x); opExecutioner.exec(mean); assertEquals(getFailureMessage(), 3.0, mean.getFinalResult().doubleValue(), 1e-1); Variance variance = new Variance(x.dup(), true); opExecutioner.exec(variance); assertEquals(getFailureMessage(), 2.5, variance.getFinalResult().doubleValue(), 1e-1); }
Example 15
Source File: JCublasNDArrayFactory.java From nd4j with Apache License 2.0 | 4 votes |
@Override public INDArray specialConcat(int dimension, INDArray... toConcat) { if (toConcat.length == 1) return toConcat[0]; if (Nd4j.getExecutioner() instanceof GridExecutioner) ((GridExecutioner) Nd4j.getExecutioner()).flushQueue(); PointerPointer shapeInfoPointers = new PointerPointer(toConcat.length); PointerPointer dataPointers = new PointerPointer(toConcat.length); AtomicAllocator allocator = AtomicAllocator.getInstance(); CudaContext context = (CudaContext) allocator.getDeviceContext().getContext(); int sumAlongDim = 0; val outputShape = ArrayUtil.copy(toConcat[0].shape()); for (int i = 0; i < toConcat.length; i++) { if (toConcat[i].isCompressed()) Nd4j.getCompressor().decompressi(toConcat[i]); allocator.synchronizeHostData(toConcat[i]); shapeInfoPointers.put(i, allocator.getHostPointer(toConcat[i].shapeInfoDataBuffer())); dataPointers.put(i, allocator.getHostPointer(toConcat[i].data())); sumAlongDim += toConcat[i].size(dimension); for (int j = 0; j < toConcat[i].rank(); j++) if (j != dimension && toConcat[i].size(j) != outputShape[j]) { throw new IllegalArgumentException( "Illegal concatenation at array " + i + " and shape element " + j); } } outputShape[dimension] = sumAlongDim; PointerPointer dummy = new PointerPointer(new Pointer[] {null}); INDArray ret = Nd4j.createUninitialized(outputShape, Nd4j.order()); if (ret.data().dataType() == DataBuffer.Type.DOUBLE) { nativeOps.specialConcatDouble(dummy, dimension, toConcat.length, dataPointers, shapeInfoPointers, (DoublePointer) ret.data().addressPointer(), (LongPointer) ret.shapeInfoDataBuffer().addressPointer(), new PointerPointer(new Pointer[] {null}), new PointerPointer(new Pointer[] {null})); } else if (ret.data().dataType() == DataBuffer.Type.FLOAT) { nativeOps.specialConcatFloat(dummy, dimension, toConcat.length, dataPointers, shapeInfoPointers, (FloatPointer) ret.data().addressPointer(), (LongPointer) ret.shapeInfoDataBuffer().addressPointer(), new PointerPointer(new Pointer[] {null}), new PointerPointer(new Pointer[] {null})); } else if (ret.data().dataType() == DataBuffer.Type.HALF) { nativeOps.specialConcatHalf(dummy, dimension, toConcat.length, dataPointers, shapeInfoPointers, (ShortPointer) ret.data().addressPointer(), (LongPointer) ret.shapeInfoDataBuffer().addressPointer(), new PointerPointer(new Pointer[]{null}), new PointerPointer(new Pointer[]{null})); } else { throw new ND4JIllegalStateException("Unknown dataType: " + ret.data().dataType()); } AllocationPoint point = allocator.getAllocationPoint(ret); val perfD = PerformanceTracker.getInstance().helperStartTransaction(); nativeOps.memcpyAsync(point.getDevicePointer(), point.getHostPointer(), ret.lengthLong() * Nd4j.sizeOfDataType(ret.data().dataType()), CudaConstants.cudaMemcpyHostToDevice, context.getSpecialStream()); context.getSpecialStream().synchronize(); PerformanceTracker.getInstance().helperRegisterTransaction(point.getDeviceId(), perfD, point.getNumberOfBytes(), MemcpyDirection.HOST_TO_DEVICE); point.tickHostRead(); point.tickDeviceWrite(); return ret; }
Example 16
Source File: CudnnBatchNormalizationHelper.java From deeplearning4j with Apache License 2.0 | 4 votes |
@Override public Pair<Gradient, INDArray> backpropGradient(INDArray input, INDArray epsilon, long[] shape, INDArray gamma, INDArray beta, INDArray dGammaView, INDArray dBetaView, double eps, CNN2DFormat format, LayerWorkspaceMgr layerWorkspaceMgr) { boolean nchw = format == CNN2DFormat.NCHW; this.eps = eps; int cudnnTensorFormat = nchw ? CUDNN_TENSOR_NCHW : CUDNN_TENSOR_NHWC; int chIdx = nchw ? 1 : 3; int hIdx = nchw ? 2 : 1; int wIdx = nchw ? 3 : 2; val miniBatch = (int) input.size(0); val depth = (int) input.size(chIdx); val inH = (int) input.size(hIdx); val inW = (int) input.size(wIdx); final boolean isHalf = (input.dataType() == DataType.HALF); INDArray gammaOrig = null; INDArray dGammaViewOrig = null; INDArray dBetaViewOrig = null; if(isHalf) { //Convert FP16 to FP32 if required (CuDNN BN doesn't support FP16 for these params, only for input/output) gammaOrig = gamma; dGammaViewOrig = dGammaView; dBetaViewOrig = dBetaView; /* From CuDNN docs: bnScale, resultBnScaleDiff, resultBnBiasDiff, savedMean, savedInvVariance "Note: The data type of this tensor descriptor must be 'float' for FP16 and FP32 input tensors, and 'double' for FP64 input tensors." >> Last 2 are the meanCache and varCache; first 3 are below */ gamma = gamma.castTo(DataType.FLOAT); dGammaView = dGammaView.castTo(DataType.FLOAT); dBetaView = dBetaView.castTo(DataType.FLOAT); } Gradient retGradient = new DefaultGradient(); if (!Shape.hasDefaultStridesForShape(epsilon)) { // apparently not supported by cuDNN epsilon = epsilon.dup('c'); } val srcStride = ArrayUtil.toInts(input.stride()); val deltaStride = ArrayUtil.toInts(epsilon.stride()); if (Nd4j.getExecutioner() instanceof GridExecutioner) ((GridExecutioner) Nd4j.getExecutioner()).flushQueue(); checkCudnn(cudnnSetTensor4dDescriptorEx(cudnnContext.srcTensorDesc, dataType, (int) miniBatch, (int) depth, (int) inH, (int) inW, (int) srcStride[0], (int) srcStride[chIdx], (int) srcStride[hIdx], (int) srcStride[wIdx])); checkCudnn(cudnnSetTensor4dDescriptorEx(cudnnContext.deltaTensorDesc, dataType, (int) miniBatch, (int) depth, (int) inH, (int) inW, (int) deltaStride[0], (int) deltaStride[chIdx], (int) deltaStride[hIdx], (int) deltaStride[wIdx])); long[] nextEpsShape = nchw ? new long[] {miniBatch, depth, inH, inW} : new long[] {miniBatch, inH, inW, depth}; INDArray nextEpsilon = layerWorkspaceMgr.createUninitialized(ArrayType.ACTIVATION_GRAD, input.dataType(), nextEpsShape, 'c'); val dstStride = ArrayUtil.toInts(nextEpsilon.stride()); checkCudnn(cudnnSetTensor4dDescriptorEx(cudnnContext.dstTensorDesc, dataType, miniBatch, depth, inH, inW, dstStride[0], dstStride[chIdx], dstStride[hIdx], dstStride[wIdx])); checkCudnn(cudnnSetTensor4dDescriptor(cudnnContext.gammaBetaTensorDesc, cudnnTensorFormat, toCudnnDataType(gamma.data().dataType()), (int)shape[0], (int)shape[1], shape.length > 2 ? (int)shape[2] : 1, shape.length > 3 ? (int)shape[3] : 1)); Allocator allocator = AtomicAllocator.getInstance(); CudaContext context = allocator.getFlowController().prepareActionAllWrite(input, epsilon, nextEpsilon, gamma, dGammaView, dBetaView); Pointer srcData = allocator.getPointer(input, context); Pointer epsData = allocator.getPointer(epsilon, context); Pointer dstData = allocator.getPointer(nextEpsilon, context); Pointer gammaData = allocator.getPointer(gamma, context); Pointer dGammaData = allocator.getPointer(dGammaView, context); Pointer dBetaData = allocator.getPointer(dBetaView, context); Pointer meanCacheData = allocator.getPointer(meanCache, context); Pointer varCacheData = allocator.getPointer(varCache, context); checkCudnn(cudnnSetStream(cudnnContext, new CUstream_st(context.getCublasStream()))); checkCudnn(cudnnBatchNormalizationBackward(cudnnContext, batchNormMode, alpha, this.beta, alpha, alpha, cudnnContext.srcTensorDesc, srcData, cudnnContext.deltaTensorDesc, epsData, cudnnContext.dstTensorDesc, dstData, cudnnContext.gammaBetaTensorDesc, gammaData, dGammaData, dBetaData, eps, meanCacheData, varCacheData)); allocator.getFlowController().registerActionAllWrite(context, input, epsilon, nextEpsilon, gamma, dGammaView, dBetaView); retGradient.setGradientFor(BatchNormalizationParamInitializer.GAMMA, dGammaView); retGradient.setGradientFor(BatchNormalizationParamInitializer.BETA, dBetaView); context.syncOldStream(); //Convert back and assign, if required: if(isHalf){ gammaOrig.assign(gamma.castTo(DataType.HALF)); dGammaViewOrig.assign(dGammaView.castTo(DataType.HALF)); dBetaViewOrig.assign(dBetaView.castTo(DataType.HALF)); } return new Pair<>(retGradient, nextEpsilon); }
Example 17
Source File: JcublasLapack.java From nd4j with Apache License 2.0 | 4 votes |
public int dsyev( char _jobz, char _uplo, int N, INDArray A, INDArray R ) { int status = -1 ; int jobz = _jobz == 'V' ? CUSOLVER_EIG_MODE_VECTOR : CUSOLVER_EIG_MODE_NOVECTOR ; int uplo = _uplo == 'L' ? CUBLAS_FILL_MODE_LOWER : CUBLAS_FILL_MODE_UPPER ; if (Nd4j.dataType() != DataBuffer.Type.DOUBLE) log.warn("DOUBLE dsyev called in FLOAT environment"); INDArray a = A; if (A.ordering() == 'c') a = A.dup('f'); // FIXME: int cast int M = (int) A.rows() ; if (Nd4j.getExecutioner() instanceof GridExecutioner) ((GridExecutioner) Nd4j.getExecutioner()).flushQueue(); // Get context for current thread CudaContext ctx = (CudaContext) allocator.getDeviceContext().getContext(); // setup the solver handles for cuSolver calls cusolverDnHandle_t handle = ctx.getSolverHandle(); cusolverDnContext solverDn = new cusolverDnContext(handle); // synchronized on the solver synchronized (handle) { status = cusolverDnSetStream(new cusolverDnContext(handle), new CUstream_st(ctx.getOldStream())); if( status == 0 ) { // transfer the INDArray into GPU memory CublasPointer xAPointer = new CublasPointer(a, ctx); CublasPointer xRPointer = new CublasPointer(R, ctx); // this output - indicates how much memory we'll need for the real operation DataBuffer worksizeBuffer = Nd4j.getDataBufferFactory().createInt(1); status = cusolverDnDsyevd_bufferSize( solverDn, jobz, uplo, M, (DoublePointer) xAPointer.getDevicePointer(), M, (DoublePointer) xRPointer.getDevicePointer(), (IntPointer)worksizeBuffer.addressPointer() ) ; if (status == CUSOLVER_STATUS_SUCCESS) { int worksize = worksizeBuffer.getInt(0); // allocate memory for the workspace, the non-converging row buffer and a return code Pointer workspace = new Workspace(worksize * Nd4j.sizeOfDataType()); INDArray INFO = Nd4j.createArrayFromShapeBuffer(Nd4j.getDataBufferFactory().createInt(1), Nd4j.getShapeInfoProvider().createShapeInformation(new int[] {1, 1})); // Do the actual decomp status = cusolverDnDsyevd(solverDn, jobz, uplo, M, (DoublePointer) xAPointer.getDevicePointer(), M, (DoublePointer) xRPointer.getDevicePointer(), new CudaPointer(workspace).asDoublePointer(), worksize, new CudaPointer(allocator.getPointer(INFO, ctx)).asIntPointer()); allocator.registerAction(ctx, INFO); if( status == 0 ) status = INFO.getInt(0) ; } } } if( status == 0 ) { allocator.registerAction(ctx, R); allocator.registerAction(ctx, a); if (a != A) A.assign(a); } return status ; }
Example 18
Source File: JcublasLapack.java From nd4j with Apache License 2.0 | 4 votes |
public int ssyev( char _jobz, char _uplo, int N, INDArray A, INDArray R ) { int status = -1 ; int jobz = _jobz == 'V' ? CUSOLVER_EIG_MODE_VECTOR : CUSOLVER_EIG_MODE_NOVECTOR ; int uplo = _uplo == 'L' ? CUBLAS_FILL_MODE_LOWER : CUBLAS_FILL_MODE_UPPER ; if (Nd4j.dataType() != DataBuffer.Type.FLOAT) log.warn("FLOAT ssyev called in DOUBLE environment"); INDArray a = A; if (A.ordering() == 'c') a = A.dup('f'); // FIXME: int cast int M = (int) A.rows() ; if (Nd4j.getExecutioner() instanceof GridExecutioner) ((GridExecutioner) Nd4j.getExecutioner()).flushQueue(); // Get context for current thread CudaContext ctx = (CudaContext) allocator.getDeviceContext().getContext(); // setup the solver handles for cuSolver calls cusolverDnHandle_t handle = ctx.getSolverHandle(); cusolverDnContext solverDn = new cusolverDnContext(handle); // synchronized on the solver synchronized (handle) { status = cusolverDnSetStream(new cusolverDnContext(handle), new CUstream_st(ctx.getOldStream())); if( status == 0 ) { // transfer the INDArray into GPU memory CublasPointer xAPointer = new CublasPointer(a, ctx); CublasPointer xRPointer = new CublasPointer(R, ctx); // this output - indicates how much memory we'll need for the real operation DataBuffer worksizeBuffer = Nd4j.getDataBufferFactory().createInt(1); status = cusolverDnSsyevd_bufferSize ( solverDn, jobz, uplo, M, (FloatPointer) xAPointer.getDevicePointer(), M, (FloatPointer) xRPointer.getDevicePointer(), (IntPointer)worksizeBuffer.addressPointer() ) ; if (status == CUSOLVER_STATUS_SUCCESS) { int worksize = worksizeBuffer.getInt(0); // allocate memory for the workspace, the non-converging row buffer and a return code Pointer workspace = new Workspace(worksize * Nd4j.sizeOfDataType()); INDArray INFO = Nd4j.createArrayFromShapeBuffer(Nd4j.getDataBufferFactory().createInt(1), Nd4j.getShapeInfoProvider().createShapeInformation(new int[] {1, 1})); // Do the actual decomp status = cusolverDnSsyevd(solverDn, jobz, uplo, M, (FloatPointer) xAPointer.getDevicePointer(), M, (FloatPointer) xRPointer.getDevicePointer(), new CudaPointer(workspace).asFloatPointer(), worksize, new CudaPointer(allocator.getPointer(INFO, ctx)).asIntPointer()); allocator.registerAction(ctx, INFO); if( status == 0 ) status = INFO.getInt(0) ; } } } if( status == 0 ) { allocator.registerAction(ctx, R); allocator.registerAction(ctx, a); if (a != A) A.assign(a); } return status ; }
Example 19
Source File: JCublasNDArrayFactory.java From nd4j with Apache License 2.0 | 4 votes |
@Override public INDArray pullRows(INDArray source, INDArray destination, int sourceDimension, int[] indexes) { if (Nd4j.getExecutioner() instanceof GridExecutioner) ((GridExecutioner) Nd4j.getExecutioner()).flushQueue(); if (indexes == null || indexes.length < 1) throw new IllegalStateException("Indexes can't be null or zero-length"); long[] shape = null; if (sourceDimension == 1) shape = new long[] {indexes.length, source.shape()[sourceDimension]}; else if (sourceDimension == 0) shape = new long[] {source.shape()[sourceDimension], indexes.length}; else throw new UnsupportedOperationException("2D input is expected"); INDArray ret = destination; if(ret == null){ ret = Nd4j.createUninitialized(shape, order); } else { if(!Arrays.equals(shape, destination.shape())){ throw new IllegalStateException("Cannot pull rows into destination array: expected destination array of" + " shape " + Arrays.toString(shape) + " but got destination array of shape " + Arrays.toString(destination.shape())); } } AtomicAllocator allocator = AtomicAllocator.getInstance(); CudaContext context = allocator.getFlowController().prepareAction(ret, source); Pointer x = AtomicAllocator.getInstance().getPointer(source, context); Pointer xShape = AtomicAllocator.getInstance().getPointer(source.shapeInfoDataBuffer(), context); Pointer z = AtomicAllocator.getInstance().getPointer(ret, context); Pointer zShape = AtomicAllocator.getInstance().getPointer(ret.shapeInfoDataBuffer(), context); PointerPointer extras = new PointerPointer(AddressRetriever.retrieveHostPointer(ret.shapeInfoDataBuffer()), context.getOldStream(), allocator.getDeviceIdPointer()); val tempIndexes = new CudaLongDataBuffer(indexes.length); AtomicAllocator.getInstance().memcpyBlocking(tempIndexes, new LongPointer(ArrayUtil.toLongArray(indexes)), indexes.length * 8, 0); Pointer pIndex = AtomicAllocator.getInstance().getPointer(tempIndexes, context); TADManager tadManager = Nd4j.getExecutioner().getTADManager(); Pair<DataBuffer, DataBuffer> tadBuffers = tadManager.getTADOnlyShapeInfo(source, new int[] {sourceDimension}); Pair<DataBuffer, DataBuffer> zTadBuffers = tadManager.getTADOnlyShapeInfo(ret, new int[] {sourceDimension}); Pointer tadShapeInfo = AtomicAllocator.getInstance().getPointer(tadBuffers.getFirst(), context); Pointer zTadShapeInfo = AtomicAllocator.getInstance().getPointer(zTadBuffers.getFirst(), context); DataBuffer offsets = tadBuffers.getSecond(); Pointer tadOffsets = AtomicAllocator.getInstance().getPointer(offsets, context); Pointer zTadOffsets = AtomicAllocator.getInstance().getPointer(zTadBuffers.getSecond(), context); if (ret.data().dataType() == DataBuffer.Type.DOUBLE) { nativeOps.pullRowsDouble(extras, (DoublePointer) x, (LongPointer) xShape, (DoublePointer) z, (LongPointer) zShape, indexes.length, (LongPointer) pIndex, (LongPointer) tadShapeInfo, new LongPointerWrapper(tadOffsets), (LongPointer) zTadShapeInfo, new LongPointerWrapper(zTadOffsets)); } else if (ret.data().dataType() == DataBuffer.Type.FLOAT) { nativeOps.pullRowsFloat(extras, (FloatPointer) x, (LongPointer) xShape, (FloatPointer) z, (LongPointer) zShape, indexes.length, (LongPointer) pIndex, (LongPointer) tadShapeInfo, new LongPointerWrapper(tadOffsets), (LongPointer) zTadShapeInfo, new LongPointerWrapper(zTadOffsets)); } else { nativeOps.pullRowsHalf(extras, (ShortPointer) x, (LongPointer) xShape, (ShortPointer) z, (LongPointer) zShape, indexes.length, (LongPointer) pIndex, (LongPointer) tadShapeInfo, new LongPointerWrapper(tadOffsets), (LongPointer) zTadShapeInfo, new LongPointerWrapper(zTadOffsets)); } allocator.registerAction(context, ret, source); return ret; }
Example 20
Source File: JCublasNDArrayFactory.java From deeplearning4j with Apache License 2.0 | 2 votes |
@Override public INDArray specialConcat(int dimension, INDArray... toConcat) { if (toConcat.length == 1) return toConcat[0]; if (Nd4j.getExecutioner() instanceof GridExecutioner) ((GridExecutioner) Nd4j.getExecutioner()).flushQueue(); PointerPointer shapeInfoPointers = new PointerPointer(toConcat.length); PointerPointer dataPointers = new PointerPointer(toConcat.length); AtomicAllocator allocator = AtomicAllocator.getInstance(); val context = allocator.getDeviceContext(); int sumAlongDim = 0; val outputShape = ArrayUtil.copy(toConcat[0].shape()); for (int i = 0; i < toConcat.length; i++) { ((BaseCudaDataBuffer) toConcat[i].data()).lazyAllocateHostPointer(); if (toConcat[i].isCompressed()) Nd4j.getCompressor().decompressi(toConcat[i]); allocator.synchronizeHostData(toConcat[i]); shapeInfoPointers.put(i, allocator.getHostPointer(toConcat[i].shapeInfoDataBuffer())); dataPointers.put(i, allocator.getHostPointer(toConcat[i].data())); sumAlongDim += toConcat[i].size(dimension); for (int j = 0; j < toConcat[i].rank(); j++) if (j != dimension && toConcat[i].size(j) != outputShape[j]) { throw new IllegalArgumentException( "Illegal concatenation at array " + i + " and shape element " + j); } } outputShape[dimension] = sumAlongDim; val ret = Nd4j.createUninitialized(toConcat[0].dataType(), outputShape, Nd4j.order()); ((BaseCudaDataBuffer) ret.data()).lazyAllocateHostPointer(); nativeOps.specialConcat(null, dimension, toConcat.length, dataPointers, shapeInfoPointers, ret.data().addressPointer(), (LongPointer) ret.shapeInfoDataBuffer().addressPointer(), null, null); if (nativeOps.lastErrorCode() != 0) throw new RuntimeException(nativeOps.lastErrorMessage()); AllocationPoint point = allocator.getAllocationPoint(ret); val perfD = PerformanceTracker.getInstance().helperStartTransaction(); nativeOps.memcpyAsync(point.getDevicePointer(), point.getHostPointer(), ret.length() * Nd4j.sizeOfDataType(ret.data().dataType()), CudaConstants.cudaMemcpyHostToDevice, context.getSpecialStream()); context.getSpecialStream().synchronize(); if (nativeOps.lastErrorCode() != 0) throw new RuntimeException(nativeOps.lastErrorMessage()); PerformanceTracker.getInstance().helperRegisterTransaction(point.getDeviceId(), perfD, point.getNumberOfBytes(), MemcpyDirection.HOST_TO_DEVICE); point.tickHostRead(); point.tickDeviceWrite(); return ret; }