org.nd4j.linalg.dataset.DataSet#copy

Source File: UnderSamplingPreProcessorTest.java From nd4j with Apache License 2.0

6 votes

@Test
public void allMajority() {
    float[] someTargets = new float[] {0.01f, 0.1f, 0.5f};
    DataSet d = allMajorityDataSet(false);
    DataSet dToPreProcess;
    for (int i = 0; i < someTargets.length; i++) {
        //if all majority default is to mask all time steps
        UnderSamplingByMaskingPreProcessor preProcessor =
                        new UnderSamplingByMaskingPreProcessor(someTargets[i], shortSeq / 2);
        dToPreProcess = d.copy();
        preProcessor.preProcess(dToPreProcess);
        assertEquals(Nd4j.zeros(dToPreProcess.getLabelsMaskArray().shape()), dToPreProcess.getLabelsMaskArray());

        //change default and check distribution which should be 1-targetMinorityDist
        preProcessor.donotMaskAllMajorityWindows();
        dToPreProcess = d.copy();
        preProcessor.preProcess(dToPreProcess);
        INDArray percentagesNow = dToPreProcess.getLabelsMaskArray().sum(1).div(shortSeq);
        assertTrue(Nd4j.valueArrayOf(percentagesNow.shape(), 1 - someTargets[i]).equalsWithEps(percentagesNow,
                        tolerancePerc));
    }
}

Source File: UnderSamplingPreProcessorTest.java From nd4j with Apache License 2.0

6 votes

@Test
public void allMinority() {
    float[] someTargets = new float[] {0.01f, 0.1f, 0.5f};
    DataSet d = allMinorityDataSet(false);
    DataSet dToPreProcess;
    for (int i = 0; i < someTargets.length; i++) {
        UnderSamplingByMaskingPreProcessor preProcessor =
                        new UnderSamplingByMaskingPreProcessor(someTargets[i], shortSeq / 2);
        dToPreProcess = d.copy();
        preProcessor.preProcess(dToPreProcess);
        //all minority classes present  - check that no time steps are masked
        assertEquals(Nd4j.ones(minibatchSize, shortSeq), dToPreProcess.getLabelsMaskArray());

        //check behavior with override minority - now these are seen as all majority classes
        preProcessor.overrideMinorityDefault();
        preProcessor.donotMaskAllMajorityWindows();
        dToPreProcess = d.copy();
        preProcessor.preProcess(dToPreProcess);
        INDArray percentagesNow = dToPreProcess.getLabelsMaskArray().sum(1).div(shortSeq);
        assertTrue(Nd4j.valueArrayOf(percentagesNow.shape(), 1 - someTargets[i]).equalsWithEps(percentagesNow,
                        tolerancePerc));
    }
}

Source File: KFoldIterator.java From nd4j with Apache License 2.0

6 votes

/**Create an iterator given the dataset and a value of k (optional, defaults to 10)
 * If number of samples in the dataset is not a multiple of k, the last fold will have less samples with the rest having the same number of samples.
 *
 * @param k number of folds (optional, defaults to 10)
 * @param singleFold DataSet to split into k folds
 */

public KFoldIterator(int k, DataSet singleFold) {
    this.k = k;
    this.singleFold = singleFold.copy();
    if (k <= 1)
        throw new IllegalArgumentException();
    if (singleFold.numExamples() % k != 0) {
        if (k != 2) {
            this.batch = singleFold.numExamples() / (k - 1);
            this.lastBatch = singleFold.numExamples() % (k - 1);
        } else {
            this.lastBatch = singleFold.numExamples() / 2;
            this.batch = this.lastBatch + 1;
        }
    } else {
        this.batch = singleFold.numExamples() / k;
        this.lastBatch = singleFold.numExamples() / k;
    }
}

Source File: UnderSamplingPreProcessorTest.java From deeplearning4j with Apache License 2.0

6 votes

@Test
public void allMajority() {
    float[] someTargets = new float[] {0.01f, 0.1f, 0.5f};
    DataSet d = allMajorityDataSet(false);
    DataSet dToPreProcess;
    for (int i = 0; i < someTargets.length; i++) {
        //if all majority default is to mask all time steps
        UnderSamplingByMaskingPreProcessor preProcessor =
                        new UnderSamplingByMaskingPreProcessor(someTargets[i], shortSeq / 2);
        dToPreProcess = d.copy();
        preProcessor.preProcess(dToPreProcess);
        INDArray exp = Nd4j.zeros(dToPreProcess.getLabelsMaskArray().shape());
        INDArray lm = dToPreProcess.getLabelsMaskArray();
        assertEquals(exp, lm);

        //change default and check distribution which should be 1-targetMinorityDist
        preProcessor.donotMaskAllMajorityWindows();
        dToPreProcess = d.copy();
        preProcessor.preProcess(dToPreProcess);
        INDArray percentagesNow = dToPreProcess.getLabelsMaskArray().sum(1).div(shortSeq);
        assertTrue(Nd4j.valueArrayOf(percentagesNow.shape(), 1 - someTargets[i]).castTo(Nd4j.defaultFloatingPointType()).equalsWithEps(percentagesNow,
                        tolerancePerc));
    }
}

Source File: UnderSamplingPreProcessorTest.java From deeplearning4j with Apache License 2.0

6 votes

@Test
public void allMinority() {
    float[] someTargets = new float[] {0.01f, 0.1f, 0.5f};
    DataSet d = allMinorityDataSet(false);
    DataSet dToPreProcess;
    for (int i = 0; i < someTargets.length; i++) {
        UnderSamplingByMaskingPreProcessor preProcessor =
                        new UnderSamplingByMaskingPreProcessor(someTargets[i], shortSeq / 2);
        dToPreProcess = d.copy();
        preProcessor.preProcess(dToPreProcess);
        //all minority classes present  - check that no time steps are masked
        assertEquals(Nd4j.ones(minibatchSize, shortSeq), dToPreProcess.getLabelsMaskArray());

        //check behavior with override minority - now these are seen as all majority classes
        preProcessor.overrideMinorityDefault();
        preProcessor.donotMaskAllMajorityWindows();
        dToPreProcess = d.copy();
        preProcessor.preProcess(dToPreProcess);
        INDArray percentagesNow = dToPreProcess.getLabelsMaskArray().sum(1).div(shortSeq);
        assertTrue(Nd4j.valueArrayOf(percentagesNow.shape(), 1 - someTargets[i])
                .castTo(Nd4j.defaultFloatingPointType()).equalsWithEps(percentagesNow,tolerancePerc));
    }
}

Source File: LoneTest.java From nd4j with Apache License 2.0

5 votes

@Test
public void maskWhenMerge() {
    DataSet dsA = new DataSet(Nd4j.linspace(1, 15, 15).reshape(1, 3, 5), Nd4j.zeros(1, 3, 5));
    DataSet dsB = new DataSet(Nd4j.linspace(1, 9, 9).reshape(1, 3, 3), Nd4j.zeros(1, 3, 3));
    List<DataSet> dataSetList = new ArrayList<DataSet>();
    dataSetList.add(dsA);
    dataSetList.add(dsB);
    DataSet fullDataSet = DataSet.merge(dataSetList);
    assertTrue(fullDataSet.getFeaturesMaskArray() != null);

    DataSet fullDataSetCopy = fullDataSet.copy();
    assertTrue(fullDataSetCopy.getFeaturesMaskArray() != null);

}

Source File: LoneTest.java From deeplearning4j with Apache License 2.0

5 votes

@Test
public void maskWhenMerge() {
    DataSet dsA = new DataSet(Nd4j.linspace(1, 15, 15).reshape(1, 3, 5), Nd4j.zeros(1, 3, 5));
    DataSet dsB = new DataSet(Nd4j.linspace(1, 9, 9).reshape(1, 3, 3), Nd4j.zeros(1, 3, 3));
    List<DataSet> dataSetList = new ArrayList<DataSet>();
    dataSetList.add(dsA);
    dataSetList.add(dsB);
    DataSet fullDataSet = DataSet.merge(dataSetList);
    assertTrue(fullDataSet.getFeaturesMaskArray() != null);

    DataSet fullDataSetCopy = fullDataSet.copy();
    assertTrue(fullDataSetCopy.getFeaturesMaskArray() != null);

}

Source File: UnderSamplingPreProcessorTest.java From nd4j with Apache License 2.0

4 votes

@Test
public void mixedDist() {

    UnderSamplingByMaskingPreProcessor preProcessor = new UnderSamplingByMaskingPreProcessor(targetDist, window);

    DataSet dataSet = knownDistVariedDataSet(new float[] {0.1f, 0.2f, 0.8f}, false);

    //Call preprocess for the same dataset multiple times to mimic calls with .next() and checks total distribution
    int loop = 2;
    for (int i = 0; i < loop; i++) {
        //preprocess dataset
        DataSet dataSetToPreProcess = dataSet.copy();
        INDArray labelsBefore = dataSetToPreProcess.getLabels().dup();
        preProcessor.preProcess(dataSetToPreProcess);
        INDArray labels = dataSetToPreProcess.getLabels();
        assertEquals(labelsBefore, labels);

        //check masks are zero where there are no time steps
        INDArray masks = dataSetToPreProcess.getLabelsMaskArray();
        INDArray shouldBeAllZeros =
                        masks.get(NDArrayIndex.interval(0, 3), NDArrayIndex.interval(shortSeq, longSeq));
        assertEquals(Nd4j.zeros(shouldBeAllZeros.shape()), shouldBeAllZeros);

        //check distribution of masks in window, going backwards from last time step
        for (int j = (int) Math.ceil((double) longSeq / window); j > 0; j--) {
            //collect mask and labels
            int maxIndex = min(longSeq, j * window);
            int minIndex = min(0, maxIndex - window);
            INDArray maskWindow = masks.get(NDArrayIndex.all(), NDArrayIndex.interval(minIndex, maxIndex));
            INDArray labelWindow = labels.get(NDArrayIndex.all(), NDArrayIndex.point(0),
                            NDArrayIndex.interval(minIndex, maxIndex));

            //calc minority class distribution
            INDArray minorityDist = labelWindow.mul(maskWindow).sum(1).div(maskWindow.sum(1));

            if (j < shortSeq / window) {
                assertEquals("Failed on window " + j + " batch 0, loop " + i, targetDist,
                                minorityDist.getFloat(0, 0), tolerancePerc); //should now be close to target dist
                assertEquals("Failed on window " + j + " batch 1, loop " + i, targetDist,
                                minorityDist.getFloat(1, 0), tolerancePerc); //should now be close to target dist
                assertEquals("Failed on window " + j + " batch 2, loop " + i, 0.8, minorityDist.getFloat(2, 0),
                                tolerancePerc); //should be unchanged as it was already above target dist
            }
            assertEquals("Failed on window " + j + " batch 3, loop " + i, targetDist, minorityDist.getFloat(3, 0),
                            tolerancePerc); //should now be close to target dist
            assertEquals("Failed on window " + j + " batch 4, loop " + i, targetDist, minorityDist.getFloat(4, 0),
                            tolerancePerc); //should now be close to target dist
            assertEquals("Failed on window " + j + " batch 5, loop " + i, 0.8, minorityDist.getFloat(5, 0),
                            tolerancePerc); //should be unchanged as it was already above target dist
        }
    }
}

Source File: UnderSamplingPreProcessorTest.java From nd4j with Apache License 2.0

4 votes

@Test
public void mixedDistOneHot() {

    //preprocessor should give 30% minority class for every "window"
    UnderSamplingByMaskingPreProcessor preProcessor = new UnderSamplingByMaskingPreProcessor(targetDist, window);
    preProcessor.overrideMinorityDefault();

    //construct a dataset with known distribution of minority class and varying time steps
    DataSet dataSet = knownDistVariedDataSet(new float[] {0.9f, 0.8f, 0.2f}, true);

    //Call preprocess for the same dataset multiple times to mimic calls with .next() and checks total distribution
    int loop = 10;
    for (int i = 0; i < loop; i++) {

        //preprocess dataset
        DataSet dataSetToPreProcess = dataSet.copy();
        preProcessor.preProcess(dataSetToPreProcess);
        INDArray labels = dataSetToPreProcess.getLabels();
        INDArray masks = dataSetToPreProcess.getLabelsMaskArray();

        //check masks are zero where there were no time steps
        INDArray shouldBeAllZeros =
                        masks.get(NDArrayIndex.interval(0, 3), NDArrayIndex.interval(shortSeq, longSeq));
        assertEquals(Nd4j.zeros(shouldBeAllZeros.shape()), shouldBeAllZeros);

        //check distribution of masks in the window length, going backwards from last time step
        for (int j = (int) Math.ceil((double) longSeq / window); j > 0; j--) {
            //collect mask and labels
            int maxIndex = min(longSeq, j * window);
            int minIndex = min(0, maxIndex - window);
            INDArray maskWindow = masks.get(NDArrayIndex.all(), NDArrayIndex.interval(minIndex, maxIndex));
            INDArray labelWindow = labels.get(NDArrayIndex.all(), NDArrayIndex.all(),
                            NDArrayIndex.interval(minIndex, maxIndex));

            //calc minority class distribution after accounting for masks
            INDArray minorityClass = labelWindow.get(NDArrayIndex.all(), NDArrayIndex.point(0), NDArrayIndex.all())
                            .mul(maskWindow);
            INDArray majorityClass = labelWindow.get(NDArrayIndex.all(), NDArrayIndex.point(1), NDArrayIndex.all())
                            .mul(maskWindow);
            INDArray minorityDist = minorityClass.sum(1).div(majorityClass.add(minorityClass).sum(1));

            if (j < shortSeq / window) {
                assertEquals("Failed on window " + j + " batch 0, loop " + i, targetDist,
                                minorityDist.getFloat(0, 0), tolerancePerc); //should now be close to target dist
                assertEquals("Failed on window " + j + " batch 1, loop " + i, targetDist,
                                minorityDist.getFloat(1, 0), tolerancePerc); //should now be close to target dist
                assertEquals("Failed on window " + j + " batch 2, loop " + i, 0.8, minorityDist.getFloat(2, 0),
                                tolerancePerc); //should be unchanged as it was already above target dist
            }
            assertEquals("Failed on window " + j + " batch 3, loop " + i, targetDist, minorityDist.getFloat(3, 0),
                            tolerancePerc); //should now be close to target dist
            assertEquals("Failed on window " + j + " batch 4, loop " + i, targetDist, minorityDist.getFloat(4, 0),
                            tolerancePerc); //should now be close to target dist
            assertEquals("Failed on window " + j + " batch 5, loop " + i, 0.8, minorityDist.getFloat(5, 0),
                            tolerancePerc); //should be unchanged as it was already above target dist
        }
    }
}

Source File: UnderSamplingPreProcessorTest.java From deeplearning4j with Apache License 2.0

4 votes

@Test
public void mixedDist() {

    UnderSamplingByMaskingPreProcessor preProcessor = new UnderSamplingByMaskingPreProcessor(targetDist, window);

    DataSet dataSet = knownDistVariedDataSet(new float[] {0.1f, 0.2f, 0.8f}, false);

    //Call preprocess for the same dataset multiple times to mimic calls with .next() and checks total distribution
    int loop = 2;
    for (int i = 0; i < loop; i++) {
        //preprocess dataset
        DataSet dataSetToPreProcess = dataSet.copy();
        INDArray labelsBefore = dataSetToPreProcess.getLabels().dup();
        preProcessor.preProcess(dataSetToPreProcess);
        INDArray labels = dataSetToPreProcess.getLabels();
        assertEquals(labelsBefore, labels);

        //check masks are zero where there are no time steps
        INDArray masks = dataSetToPreProcess.getLabelsMaskArray();
        INDArray shouldBeAllZeros =
                        masks.get(NDArrayIndex.interval(0, 3), NDArrayIndex.interval(shortSeq, longSeq));
        assertEquals(Nd4j.zeros(shouldBeAllZeros.shape()), shouldBeAllZeros);

        //check distribution of masks in window, going backwards from last time step
        for (int j = (int) Math.ceil((double) longSeq / window); j > 0; j--) {
            //collect mask and labels
            int maxIndex = min(longSeq, j * window);
            int minIndex = min(0, maxIndex - window);
            INDArray maskWindow = masks.get(NDArrayIndex.all(), NDArrayIndex.interval(minIndex, maxIndex));
            INDArray labelWindow = labels.get(NDArrayIndex.all(), NDArrayIndex.point(0),
                            NDArrayIndex.interval(minIndex, maxIndex));

            //calc minority class distribution
            INDArray minorityDist = labelWindow.mul(maskWindow).sum(1).div(maskWindow.sum(1));

            if (j < shortSeq / window) {
                assertEquals("Failed on window " + j + " batch 0, loop " + i, targetDist,
                                minorityDist.getFloat(0), tolerancePerc); //should now be close to target dist
                assertEquals("Failed on window " + j + " batch 1, loop " + i, targetDist,
                                minorityDist.getFloat(1), tolerancePerc); //should now be close to target dist
                assertEquals("Failed on window " + j + " batch 2, loop " + i, 0.8, minorityDist.getFloat(2),
                                tolerancePerc); //should be unchanged as it was already above target dist
            }
            assertEquals("Failed on window " + j + " batch 3, loop " + i, targetDist, minorityDist.getFloat(3),
                            tolerancePerc); //should now be close to target dist
            assertEquals("Failed on window " + j + " batch 4, loop " + i, targetDist, minorityDist.getFloat(4),
                            tolerancePerc); //should now be close to target dist
            assertEquals("Failed on window " + j + " batch 5, loop " + i, 0.8, minorityDist.getFloat(5),
                            tolerancePerc); //should be unchanged as it was already above target dist
        }
    }
}

Source File: UnderSamplingPreProcessorTest.java From deeplearning4j with Apache License 2.0

4 votes

@Test
public void mixedDistOneHot() {

    //preprocessor should give 30% minority class for every "window"
    UnderSamplingByMaskingPreProcessor preProcessor = new UnderSamplingByMaskingPreProcessor(targetDist, window);
    preProcessor.overrideMinorityDefault();

    //construct a dataset with known distribution of minority class and varying time steps
    DataSet dataSet = knownDistVariedDataSet(new float[] {0.9f, 0.8f, 0.2f}, true);

    //Call preprocess for the same dataset multiple times to mimic calls with .next() and checks total distribution
    int loop = 10;
    for (int i = 0; i < loop; i++) {

        //preprocess dataset
        DataSet dataSetToPreProcess = dataSet.copy();
        preProcessor.preProcess(dataSetToPreProcess);
        INDArray labels = dataSetToPreProcess.getLabels();
        INDArray masks = dataSetToPreProcess.getLabelsMaskArray();

        //check masks are zero where there were no time steps
        INDArray shouldBeAllZeros =
                        masks.get(NDArrayIndex.interval(0, 3), NDArrayIndex.interval(shortSeq, longSeq));
        assertEquals(Nd4j.zeros(shouldBeAllZeros.shape()), shouldBeAllZeros);

        //check distribution of masks in the window length, going backwards from last time step
        for (int j = (int) Math.ceil((double) longSeq / window); j > 0; j--) {
            //collect mask and labels
            int maxIndex = min(longSeq, j * window);
            int minIndex = min(0, maxIndex - window);
            INDArray maskWindow = masks.get(NDArrayIndex.all(), NDArrayIndex.interval(minIndex, maxIndex));
            INDArray labelWindow = labels.get(NDArrayIndex.all(), NDArrayIndex.all(),
                            NDArrayIndex.interval(minIndex, maxIndex));

            //calc minority class distribution after accounting for masks
            INDArray minorityClass = labelWindow.get(NDArrayIndex.all(), NDArrayIndex.point(0), NDArrayIndex.all())
                            .mul(maskWindow);
            INDArray majorityClass = labelWindow.get(NDArrayIndex.all(), NDArrayIndex.point(1), NDArrayIndex.all())
                            .mul(maskWindow);
            INDArray minorityDist = minorityClass.sum(1).div(majorityClass.add(minorityClass).sum(1));

            if (j < shortSeq / window) {
                assertEquals("Failed on window " + j + " batch 0, loop " + i, targetDist,
                                minorityDist.getFloat(0), tolerancePerc); //should now be close to target dist
                assertEquals("Failed on window " + j + " batch 1, loop " + i, targetDist,
                                minorityDist.getFloat(1), tolerancePerc); //should now be close to target dist
                assertEquals("Failed on window " + j + " batch 2, loop " + i, 0.8, minorityDist.getFloat(2),
                                tolerancePerc); //should be unchanged as it was already above target dist
            }
            assertEquals("Failed on window " + j + " batch 3, loop " + i, targetDist, minorityDist.getFloat(3),
                            tolerancePerc); //should now be close to target dist
            assertEquals("Failed on window " + j + " batch 4, loop " + i, targetDist, minorityDist.getFloat(4),
                            tolerancePerc); //should now be close to target dist
            assertEquals("Failed on window " + j + " batch 5, loop " + i, 0.8, minorityDist.getFloat(5),
                            tolerancePerc); //should be unchanged as it was already above target dist
        }
    }
}

Source File: ModelSerializerTest.java From deeplearning4j with Apache License 2.0

3 votes

@Test
public void testSaveRestoreNormalizerFromInputStream() throws Exception {
    DataSet dataSet = trivialDataSet();
    NormalizerStandardize norm = new NormalizerStandardize();
    norm.fit(dataSet);

    ComputationGraph cg = simpleComputationGraph();
    cg.init();

    File tempFile = tempDir.newFile();

    ModelSerializer.writeModel(cg, tempFile, true);

    ModelSerializer.addNormalizerToModel(tempFile, norm);
    FileInputStream fis = new FileInputStream(tempFile);


    NormalizerStandardize restored = ModelSerializer.restoreNormalizerFromInputStream(fis);

    assertNotEquals(null, restored);

    DataSet dataSet2 = dataSet.copy();

    norm.preProcess(dataSet2);
    assertNotEquals(dataSet.getFeatures(), dataSet2.getFeatures());

    restored.revert(dataSet2);
    assertEquals(dataSet.getFeatures(), dataSet2.getFeatures());
}

Java Code Examples for org.nd4j.linalg.dataset.DataSet#copy()