org.nd4j.linalg.api.ops.performance.PerformanceTracker Java Examples

The following examples show how to use org.nd4j.linalg.api.ops.performance.PerformanceTracker. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: NoOp.java    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
@Override
protected CompressedDataBuffer compressPointer(DataTypeEx srcType, Pointer srcPointer, int length,
                                               int elementSize) {

    CompressionDescriptor descriptor = new CompressionDescriptor();
    descriptor.setCompressionType(getCompressionType());
    descriptor.setOriginalLength(length * elementSize);
    descriptor.setCompressionAlgorithm(getDescriptor());
    descriptor.setOriginalElementSize(elementSize);
    descriptor.setCompressedLength(length * elementSize);
    descriptor.setNumberOfElements(length);

    BytePointer ptr = new BytePointer(length * elementSize);

    val perfD = PerformanceTracker.getInstance().helperStartTransaction();

    // this Pointer.memcpy is used intentionally. This method operates on host memory ALWAYS
    Pointer.memcpy(ptr, srcPointer, length * elementSize);

    PerformanceTracker.getInstance().helperRegisterTransaction(0, perfD, length * elementSize, MemcpyDirection.HOST_TO_HOST);

    CompressedDataBuffer buffer = new CompressedDataBuffer(ptr, descriptor);

    return buffer;
}
 
Example #2
Source File: PerformanceTrackerTests.java    From nd4j with Apache License 2.0 6 votes vote down vote up
@Test
public void testTrackerGpu_1() {
    if (!Nd4j.getExecutioner().getClass().getCanonicalName().toLowerCase().contains("cuda"))
        return;

    val fa = new float[100000000];
    val array = Nd4j.create(fa, new int[]{10000, 10000});

    val map = PerformanceTracker.getInstance().getCurrentBandwidth();

    // getting H2D bandwidth for device 0
    val bw = map.get(0).get(MemcpyDirection.HOST_TO_DEVICE);
    log.info("H2D bandwidth: {}", map);

    assertTrue(bw > 0);
}
 
Example #3
Source File: PerformanceTrackerTests.java    From nd4j with Apache License 2.0 6 votes vote down vote up
@Test
public void testTrackerCpu_1() {
    if (!Nd4j.getExecutioner().getClass().getCanonicalName().toLowerCase().contains("native"))
        return;

    val fa = new float[100000000];
    val array = Nd4j.create(fa, new int[]{10000, 10000});

    val map = PerformanceTracker.getInstance().getCurrentBandwidth();

    // getting H2H bandwidth
    val bw = map.get(0).get(MemcpyDirection.HOST_TO_HOST);
    log.info("H2H bandwidth: {}", map);

    assertTrue(bw > 0);
}
 
Example #4
Source File: NoOp.java    From nd4j with Apache License 2.0 6 votes vote down vote up
@Override
protected CompressedDataBuffer compressPointer(DataBuffer.TypeEx srcType, Pointer srcPointer, int length,
                int elementSize) {

    CompressionDescriptor descriptor = new CompressionDescriptor();
    descriptor.setCompressionType(getCompressionType());
    descriptor.setOriginalLength(length * elementSize);
    descriptor.setCompressionAlgorithm(getDescriptor());
    descriptor.setOriginalElementSize(elementSize);
    descriptor.setCompressedLength(length * elementSize);
    descriptor.setNumberOfElements(length);

    BytePointer ptr = new BytePointer(length * elementSize);

    val perfD = PerformanceTracker.getInstance().helperStartTransaction();

    // this Pointer.memcpy is used intentionally. This method operates on host memory ALWAYS
    Pointer.memcpy(ptr, srcPointer, length * elementSize);

    PerformanceTracker.getInstance().helperRegisterTransaction(0, perfD, length * elementSize, MemcpyDirection.HOST_TO_HOST);

    CompressedDataBuffer buffer = new CompressedDataBuffer(ptr, descriptor);

    return buffer;
}
 
Example #5
Source File: BaseNDArray.java    From nd4j with Apache License 2.0 6 votes vote down vote up
/**
 *
 * @param data
 * @param shape
 * @param stride
 * @param offset
 * @param ordering
 */
public BaseNDArray(float[] data, int[] shape, int[] stride, long offset, char ordering) {
    setShapeInformation(Nd4j.getShapeInfoProvider().createShapeInformation(shape, stride, offset,
            Shape.elementWiseStride(shape, stride, ordering == 'f'), ordering));
    if (data != null && data.length > 0) {

        val perfD = PerformanceTracker.getInstance().helperStartTransaction();

        this.data = internalCreateBuffer(data, offset);

        PerformanceTracker.getInstance().helperRegisterTransaction(0, perfD, data.length * Nd4j.sizeOfDataType(), MemcpyDirection.HOST_TO_HOST);

        if (offset >= data.length)
            throw new IllegalArgumentException("invalid offset: must be < data.length");
    }

    init(shape, stride);
}
 
Example #6
Source File: PerformanceTrackerTests.java    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
@Test
@Ignore
public void testTrackerCpu_1() {
    if (!Nd4j.getExecutioner().getClass().getCanonicalName().toLowerCase().contains("native"))
        return;

    val fa = new float[100000000];
    val array = Nd4j.create(fa, new int[]{10000, 10000});

    val map = PerformanceTracker.getInstance().getCurrentBandwidth();

    // getting H2H bandwidth
    val bw = map.get(0).get(MemcpyDirection.HOST_TO_HOST);
    log.info("H2H bandwidth: {}", map);

    assertTrue(bw > 0);
}
 
Example #7
Source File: PerformanceTrackerTests.java    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
@Test
@Ignore("useless these days")
public void testTrackerGpu_1() {
    if (!Nd4j.getExecutioner().getClass().getCanonicalName().toLowerCase().contains("cuda"))
        return;

    val fa = new float[100000000];
    val array = Nd4j.create(fa, new int[]{10000, 10000});

    val map = PerformanceTracker.getInstance().getCurrentBandwidth();

    // getting H2D bandwidth for device 0
    val bw = map.get(0).get(MemcpyDirection.HOST_TO_DEVICE);
    log.info("H2D bandwidth: {}", map);

    assertTrue(bw > 0);
}
 
Example #8
Source File: SynchronousFlowController.java    From nd4j with Apache License 2.0 6 votes vote down vote up
@Override
public void synchronizeToDevice(AllocationPoint point) {
    if (point.isConstant())
        return;

    if (!point.isActualOnDeviceSide()) {


        if (point.getAllocationStatus() == AllocationStatus.DEVICE) {
            CudaContext context = (CudaContext) allocator.getDeviceContext().getContext();

            long perfD = PerformanceTracker.getInstance().helperStartTransaction();

            if (nativeOps.memcpyAsync(point.getDevicePointer(), point.getHostPointer(),
                    AllocationUtils.getRequiredMemory(point.getShape()),
                    CudaConstants.cudaMemcpyHostToDevice, context.getSpecialStream()) == 0)
                throw new IllegalStateException("MemcpyAsync failed: " + point.getShape());

            commitTransfer(context.getSpecialStream());
            point.tickDeviceRead();

            PerformanceTracker.getInstance().helperRegisterTransaction(point.getDeviceId(), perfD, point.getNumberOfBytes(), MemcpyDirection.HOST_TO_DEVICE);
        }
    }
}
 
Example #9
Source File: BaseNDArray.java    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
/**
 *
 * @param data
 * @param shape
 * @param stride
 * @param offset
 * @param ordering
 */
public BaseNDArray(float[] data, int[] shape, int[] stride, long offset, char ordering) {
    Shape.assertValidOrder(ordering);
    setShapeInformation(Nd4j.getShapeInfoProvider().createShapeInformation(ArrayUtil.toLongArray(shape), ArrayUtil.toLongArray(stride),
            Shape.elementWiseStride(shape, stride, ordering == 'f'), ordering, DataType.FLOAT, data != null && data.length > 0 ? false : true));
    if (data != null && data.length > 0) {

        val perfD = PerformanceTracker.getInstance().helperStartTransaction();

        this.data = internalCreateBuffer(data, offset);

        PerformanceTracker.getInstance().helperRegisterTransaction(0, perfD, data.length * Nd4j.sizeOfDataType(DataType.FLOAT), MemcpyDirection.HOST_TO_HOST);

        if (offset >= data.length)
            throw new IllegalArgumentException("invalid offset: must be < data.length");
    }

    init(shape, stride);
}
 
Example #10
Source File: NativeOpExecutioner.java    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
/**
 * This method return set of key/value and
 * key/key/value objects,
 * describing current environment
 *
 * @return
 */
@Override
public Properties getEnvironmentInformation() {
    Properties properties = super.getEnvironmentInformation();
    properties.put(Nd4jEnvironment.BACKEND_KEY, "CPU");
    properties.put(Nd4jEnvironment.OMP_THREADS_KEY, loop.ompGetMaxThreads());
    properties.put(Nd4jEnvironment.BLAS_THREADS_KEY, Nd4j.factory().blas().getMaxThreads());
    properties.put(Nd4jEnvironment.BLAS_VENDOR_KEY, (Nd4j.factory().blas()).getBlasVendor().toString());
    properties.put(Nd4jEnvironment.HOST_FREE_MEMORY_KEY, Pointer.maxBytes() - Pointer.totalBytes());

    // fill bandwidth information
    /*
    Note: Environment information is logged as part of ND4J initialization... but PerformanceTracker required
    ND4J init to be completed before it can be initialized. Hence we can get a null PerformanceTracker when
    OpExecutioner.printEnvironmentInformation() is called as part of ND4J class initialization - even
    though PerformanceTracker.getInstance() refers to a static final field (as it may not yet be initialized)
     */
    if(PerformanceTracker.getInstance() != null) {
        properties.put(Nd4jEnvironment.MEMORY_BANDWIDTH_KEY, PerformanceTracker.getInstance().getCurrentBandwidth());
    }

    return properties;
}
 
Example #11
Source File: BaseCudaDataBuffer.java    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
/**
 * Meant for creating another view of a buffer
 *
 * @param pointer the underlying buffer to create a view from
 * @param indexer the indexer for the pointer
 * @param length  the length of the view
 */
public BaseCudaDataBuffer(Pointer pointer, Indexer indexer, long length) {
    super(pointer, indexer, length);

    // allocating interop buffer
    this.ptrDataBuffer = OpaqueDataBuffer.allocateDataBuffer(length, type, false);

    // passing existing pointer to native holder
    this.ptrDataBuffer.setPrimaryBuffer(pointer, length);

    //cuda specific bits
    this.allocationPoint = new AllocationPoint(ptrDataBuffer, length * elementSize);
    Nd4j.getDeallocatorService().pickObject(this);

    // now we're getting context and copying our stuff to device
    val context = AtomicAllocator.getInstance().getDeviceContext();

    val perfD = PerformanceTracker.getInstance().helperStartTransaction();

    NativeOpsHolder.getInstance().getDeviceNativeOps().memcpyAsync(allocationPoint.getDevicePointer(), pointer, length * getElementSize(), CudaConstants.cudaMemcpyHostToDevice, context.getSpecialStream());

    PerformanceTracker.getInstance().helperRegisterTransaction(allocationPoint.getDeviceId(), perfD / 2, allocationPoint.getNumberOfBytes(), MemcpyDirection.HOST_TO_DEVICE);
    context.getSpecialStream().synchronize();
}
 
Example #12
Source File: IOTiming.java    From DataVec with Apache License 2.0 6 votes vote down vote up
/**
 *
 * @param reader
 * @param inputStream
 * @param function
 * @return
 * @throws Exception
 */
public static TimingStatistics timeNDArrayCreation(RecordReader reader,
                                                   InputStream inputStream,
                                                   INDArrayCreationFunction function) throws Exception {


    reader.initialize(new InputStreamInputSplit(inputStream));
    long longNanos = System.nanoTime();
    List<Writable> next = reader.next();
    long endNanos = System.nanoTime();
    long etlDiff = endNanos - longNanos;
    long startArrCreation = System.nanoTime();
    INDArray arr = function.createFromRecord(next);
    long endArrCreation = System.nanoTime();
    long endCreationDiff = endArrCreation - startArrCreation;
    Map<Integer, Map<MemcpyDirection, Long>> currentBandwidth = PerformanceTracker.getInstance().getCurrentBandwidth();
    val bw = currentBandwidth.get(0).get(MemcpyDirection.HOST_TO_DEVICE);
    val deviceToHost = currentBandwidth.get(0).get(MemcpyDirection.HOST_TO_DEVICE);

    return TimingStatistics.builder()
            .diskReadingTimeNanos(etlDiff)
            .bandwidthNanosHostToDevice(bw)
            .bandwidthDeviceToHost(deviceToHost)
            .ndarrayCreationTimeNanos(endCreationDiff)
            .build();
}
 
Example #13
Source File: BaseNDArray.java    From nd4j with Apache License 2.0 5 votes vote down vote up
protected static DataBuffer internalCreateBuffer(float[] data, long offset) {
    val perfX = PerformanceTracker.getInstance().helperStartTransaction();

    val buffer = Nd4j.createBuffer(data, offset);
    PerformanceTracker.getInstance().helperRegisterTransaction(0, perfX, data.length * Nd4j.sizeOfDataType(), MemcpyDirection.HOST_TO_HOST);

    return buffer;
}
 
Example #14
Source File: PerformanceTrackerTests.java    From nd4j with Apache License 2.0 5 votes vote down vote up
@Test
public void testPerformanceTracker_3() {
    val perf = PerformanceTracker.getInstance();

    // 10000 nanoseconds spent for 5000 bytes. result should be around 500 bytes per microsecond
    val res = perf.addMemoryTransaction(0, 10000, 5000);
    assertEquals(500, res);
}
 
Example #15
Source File: BaseNDArray.java    From nd4j with Apache License 2.0 5 votes vote down vote up
protected static DataBuffer internalCreateBuffer(double[] data, long offset) {
    val perfX = PerformanceTracker.getInstance().helperStartTransaction();

    val buffer = Nd4j.createBuffer(data, offset);
    PerformanceTracker.getInstance().helperRegisterTransaction(0, perfX, data.length * Nd4j.sizeOfDataType(), MemcpyDirection.HOST_TO_HOST);

    return buffer;
}
 
Example #16
Source File: BaseNDArray.java    From nd4j with Apache License 2.0 5 votes vote down vote up
protected static DataBuffer internalCreateBuffer(int[] data, long offset) {
    val perfX = PerformanceTracker.getInstance().helperStartTransaction();

    val buffer = Nd4j.createBuffer(data, offset);
    PerformanceTracker.getInstance().helperRegisterTransaction(0, perfX, data.length * Nd4j.sizeOfDataType(), MemcpyDirection.HOST_TO_HOST);

    return buffer;
}
 
Example #17
Source File: BaseNDArray.java    From nd4j with Apache License 2.0 5 votes vote down vote up
protected static DataBuffer internalCreateBuffer(int[] data) {
    val perfX = PerformanceTracker.getInstance().helperStartTransaction();

    val buffer = Nd4j.createBuffer(data);
    PerformanceTracker.getInstance().helperRegisterTransaction(0, perfX, data.length * Nd4j.sizeOfDataType(), MemcpyDirection.HOST_TO_HOST);

    return buffer;
}
 
Example #18
Source File: BaseNDArray.java    From nd4j with Apache License 2.0 5 votes vote down vote up
protected static DataBuffer internalCreateBuffer(double[] data) {
    val perfX = PerformanceTracker.getInstance().helperStartTransaction();

    val buffer = Nd4j.createBuffer(data);
    PerformanceTracker.getInstance().helperRegisterTransaction(0, perfX, data.length * Nd4j.sizeOfDataType(), MemcpyDirection.HOST_TO_HOST);

    return buffer;
}
 
Example #19
Source File: BaseNDArray.java    From nd4j with Apache License 2.0 5 votes vote down vote up
protected static DataBuffer internalCreateBuffer(float[] data) {
    val perfX = PerformanceTracker.getInstance().helperStartTransaction();

    val buffer = Nd4j.createBuffer(data);
    PerformanceTracker.getInstance().helperRegisterTransaction(0, perfX, data.length * Nd4j.sizeOfDataType(), MemcpyDirection.HOST_TO_HOST);

    return buffer;
}
 
Example #20
Source File: PerformanceTrackerTests.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Test
public void testPerformanceTracker_1() {
    val perf = PerformanceTracker.getInstance();

    // 100 nanoseconds spent for 5000 bytes. result should be around 50000 bytes per microsecond
    val res = perf.addMemoryTransaction(0, 100, 5000);
    assertEquals(50000, res);
}
 
Example #21
Source File: BasicMemoryManager.java    From nd4j with Apache License 2.0 5 votes vote down vote up
@Override
public void memcpy(DataBuffer dstBuffer, DataBuffer srcBuffer) {
    val perfD = PerformanceTracker.getInstance().helperStartTransaction();

    Pointer.memcpy(dstBuffer.addressPointer(), srcBuffer.addressPointer(),
                    srcBuffer.length() * srcBuffer.getElementSize());

    PerformanceTracker.getInstance().helperRegisterTransaction(0, perfD, srcBuffer.length() * srcBuffer.getElementSize(), MemcpyDirection.HOST_TO_HOST);
}
 
Example #22
Source File: PerformanceTrackerTests.java    From nd4j with Apache License 2.0 5 votes vote down vote up
@Test
public void testPerformanceTracker_2() {
    val perf = PerformanceTracker.getInstance();

    // 10 nanoseconds spent for 5000 bytes. result should be around 500000 bytes per microsecond
    val res = perf.addMemoryTransaction(0, 10, 5000, MemcpyDirection.HOST_TO_HOST);
    assertEquals(500000, res);
}
 
Example #23
Source File: PerformanceTrackerTests.java    From nd4j with Apache License 2.0 5 votes vote down vote up
@Test
public void testPerformanceTracker_1() {
    val perf = PerformanceTracker.getInstance();

    // 100 nanoseconds spent for 5000 bytes. result should be around 50000 bytes per microsecond
    val res = perf.addMemoryTransaction(0, 100, 5000);
    assertEquals(50000, res);
}
 
Example #24
Source File: PerformanceTrackerTests.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Test
public void testPerformanceTracker_2() {
    val perf = PerformanceTracker.getInstance();

    // 10 nanoseconds spent for 5000 bytes. result should be around 500000 bytes per microsecond
    val res = perf.addMemoryTransaction(0, 10, 5000, MemcpyDirection.HOST_TO_HOST);
    assertEquals(500000, res);
}
 
Example #25
Source File: PerformanceTrackerTests.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Test
public void testPerformanceTracker_3() {
    val perf = PerformanceTracker.getInstance();

    // 10000 nanoseconds spent for 5000 bytes. result should be around 500 bytes per microsecond
    val res = perf.addMemoryTransaction(0, 10000, 5000);
    assertEquals(500, res);
}
 
Example #26
Source File: NativeOpExecutioner.java    From nd4j with Apache License 2.0 5 votes vote down vote up
/**
 * This method return set of key/value and
 * key/key/value objects,
 * describing current environment
 *
 * @return
 */
@Override
public Properties getEnvironmentInformation() {
    Properties properties = super.getEnvironmentInformation();
    properties.put(Nd4jEnvironment.BACKEND_KEY, "CPU");
    properties.put(Nd4jEnvironment.OMP_THREADS_KEY, loop.ompGetMaxThreads());
    properties.put(Nd4jEnvironment.BLAS_THREADS_KEY, Nd4j.factory().blas().getMaxThreads());
    properties.put(Nd4jEnvironment.BLAS_VENDOR_KEY, (Nd4j.factory().blas()).getBlasVendor().toString());
    properties.put(Nd4jEnvironment.HOST_FREE_MEMORY_KEY, Pointer.maxBytes() - Pointer.totalBytes());

    // fill bandwidth information
    properties.put(Nd4jEnvironment.MEMORY_BANDWIDTH_KEY, PerformanceTracker.getInstance().getCurrentBandwidth());

    return properties;
}
 
Example #27
Source File: BasicMemoryManager.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Override
public void memcpy(DataBuffer dstBuffer, DataBuffer srcBuffer) {
    val perfD = PerformanceTracker.getInstance().helperStartTransaction();

    Pointer.memcpy(dstBuffer.addressPointer(), srcBuffer.addressPointer(),
                    srcBuffer.length() * srcBuffer.getElementSize());

    PerformanceTracker.getInstance().helperRegisterTransaction(0, perfD, srcBuffer.length() * srcBuffer.getElementSize(), MemcpyDirection.HOST_TO_HOST);
}
 
Example #28
Source File: BaseNDArray.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
protected static DataBuffer internalCreateBuffer(float[] data) {
    val perfX = PerformanceTracker.getInstance().helperStartTransaction();

    val buffer = Nd4j.createBuffer(data);
    PerformanceTracker.getInstance().helperRegisterTransaction(0, perfX, data.length * Nd4j.sizeOfDataType(buffer.dataType()), MemcpyDirection.HOST_TO_HOST);

    return buffer;
}
 
Example #29
Source File: BaseNDArray.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
protected static DataBuffer internalCreateBuffer(double[] data) {
    val perfX = PerformanceTracker.getInstance().helperStartTransaction();

    val buffer = Nd4j.createBuffer(data);
    PerformanceTracker.getInstance().helperRegisterTransaction(0, perfX, data.length * Nd4j.sizeOfDataType(buffer.dataType()), MemcpyDirection.HOST_TO_HOST);

    return buffer;
}
 
Example #30
Source File: BaseNDArray.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
protected static DataBuffer internalCreateBuffer(int[] data) {
    val perfX = PerformanceTracker.getInstance().helperStartTransaction();

    val buffer = Nd4j.createBuffer(data);
    PerformanceTracker.getInstance().helperRegisterTransaction(0, perfX, data.length * Nd4j.sizeOfDataType(buffer.dataType()), MemcpyDirection.HOST_TO_HOST);

    return buffer;
}