org.nd4j.linalg.dataset.api.preprocessor.NormalizerStandardize Java Exaples

Source File: IrisFileDataSource.java From FederatedAndroidTrainer with MIT License

7 votes

private void createDataSource() throws IOException, InterruptedException {
    //First: get the dataset using the record reader. CSVRecordReader handles loading/parsing
    int numLinesToSkip = 0;
    String delimiter = ",";
    RecordReader recordReader = new CSVRecordReader(numLinesToSkip, delimiter);
    recordReader.initialize(new InputStreamInputSplit(dataFile));

    //Second: the RecordReaderDataSetIterator handles conversion to DataSet objects, ready for use in neural network
    int labelIndex = 4;     //5 values in each row of the iris.txt CSV: 4 input features followed by an integer label (class) index. Labels are the 5th value (index 4) in each row
    int numClasses = 3;     //3 classes (types of iris flowers) in the iris data set. Classes have integer values 0, 1 or 2

    DataSetIterator iterator = new RecordReaderDataSetIterator(recordReader, batchSize, labelIndex, numClasses);
    DataSet allData = iterator.next();
    allData.shuffle();

    SplitTestAndTrain testAndTrain = allData.splitTestAndTrain(0.80);  //Use 80% of data for training

    trainingData = testAndTrain.getTrain();
    testData = testAndTrain.getTest();

    //We need to normalize our data. We'll use NormalizeStandardize (which gives us mean 0, unit variance):
    DataNormalization normalizer = new NormalizerStandardize();
    normalizer.fit(trainingData);           //Collect the statistics (mean/stdev) from the training data. This does not modify the input data
    normalizer.transform(trainingData);     //Apply normalization to the training data
    normalizer.transform(testData);         //Apply normalization to the test data. This is using statistics calculated from the *training* set
}

Source File: PreProcessor3D4DTest.java From deeplearning4j with Apache License 2.0

6 votes

@Test
public void testBruteForce4d() {
    Construct4dDataSet imageDataSet = new Construct4dDataSet(10, 5, 10, 15);

    NormalizerStandardize myNormalizer = new NormalizerStandardize();
    myNormalizer.fit(imageDataSet.sampleDataSet);
    assertEquals(imageDataSet.expectedMean, myNormalizer.getMean());

    float aat = Transforms.abs(myNormalizer.getStd().div(imageDataSet.expectedStd).sub(1)).maxNumber().floatValue();
    float abt = myNormalizer.getStd().maxNumber().floatValue();
    float act = imageDataSet.expectedStd.maxNumber().floatValue();
    System.out.println("ValA: " + aat);
    System.out.println("ValB: " + abt);
    System.out.println("ValC: " + act);
    assertTrue(aat < 0.05);

    NormalizerMinMaxScaler myMinMaxScaler = new NormalizerMinMaxScaler();
    myMinMaxScaler.fit(imageDataSet.sampleDataSet);
    assertEquals(imageDataSet.expectedMin, myMinMaxScaler.getMin());
    assertEquals(imageDataSet.expectedMax, myMinMaxScaler.getMax());

    DataSet copyDataSet = imageDataSet.sampleDataSet.copy();
    myNormalizer.transform(copyDataSet);
}

Source File: PreProcessor3D4DTest.java From nd4j with Apache License 2.0

6 votes

@Test
public void testBruteForce4d() {
    Construct4dDataSet imageDataSet = new Construct4dDataSet(10, 5, 10, 15);

    NormalizerStandardize myNormalizer = new NormalizerStandardize();
    myNormalizer.fit(imageDataSet.sampleDataSet);
    assertEquals(imageDataSet.expectedMean, myNormalizer.getMean());

    float aat = Transforms.abs(myNormalizer.getStd().div(imageDataSet.expectedStd).sub(1)).maxNumber().floatValue();
    float abt = myNormalizer.getStd().maxNumber().floatValue();
    float act = imageDataSet.expectedStd.maxNumber().floatValue();
    System.out.println("ValA: " + aat);
    System.out.println("ValB: " + abt);
    System.out.println("ValC: " + act);
    assertTrue(aat < 0.05);

    NormalizerMinMaxScaler myMinMaxScaler = new NormalizerMinMaxScaler();
    myMinMaxScaler.fit(imageDataSet.sampleDataSet);
    assertEquals(imageDataSet.expectedMin, myMinMaxScaler.getMin());
    assertEquals(imageDataSet.expectedMax, myMinMaxScaler.getMax());

    DataSet copyDataSet = imageDataSet.sampleDataSet.copy();
    myNormalizer.transform(copyDataSet);
}

Source File: NormalizerStandardizeTest.java From nd4j with Apache License 2.0

6 votes

@Test
public void testRevert() {
    double tolerancePerc = 0.01; // 0.01% of correct value
    int nSamples = 500;
    int nFeatures = 3;

    INDArray featureSet = Nd4j.randn(nSamples, nFeatures);
    INDArray labelSet = Nd4j.zeros(nSamples, 1);
    DataSet sampleDataSet = new DataSet(featureSet, labelSet);

    NormalizerStandardize myNormalizer = new NormalizerStandardize();
    myNormalizer.fit(sampleDataSet);
    DataSet transformed = sampleDataSet.copy();
    myNormalizer.transform(transformed);
    //System.out.println(transformed.getFeatures());
    myNormalizer.revert(transformed);
    //System.out.println(transformed.getFeatures());
    INDArray delta = Transforms.abs(transformed.getFeatures().sub(sampleDataSet.getFeatures()))
                    .div(sampleDataSet.getFeatures());
    double maxdeltaPerc = delta.max(0, 1).mul(100).getDouble(0, 0);
    assertTrue(maxdeltaPerc < tolerancePerc);
}

Source File: ModelSerializerTest.java From deeplearning4j with Apache License 2.0

6 votes

@Test
public void testJavaSerde_1() throws Exception {
    int nIn = 5;
    int nOut = 6;

    ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345).l1(0.01)
            .graphBuilder()
            .addInputs("in")
            .layer("0", new OutputLayer.Builder().nIn(nIn).nOut(nOut).build(), "in")
            .setOutputs("0")
            .validateOutputLayerConfig(false)
            .build();

    ComputationGraph net = new ComputationGraph(conf);
    net.init();

    DataSet dataSet = trivialDataSet();
    NormalizerStandardize norm = new NormalizerStandardize();
    norm.fit(dataSet);

    val b = SerializationUtils.serialize(net);

    ComputationGraph restored = SerializationUtils.deserialize(b);

    assertEquals(net, restored);
}

Source File: ModelSerializerTest.java From deeplearning4j with Apache License 2.0

6 votes

@Test
public void testJavaSerde_2() throws Exception {
    int nIn = 5;
    int nOut = 6;

    MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345).l1(0.01)
            .list()
            .layer(0, new OutputLayer.Builder().nIn(nIn).nOut(nOut).activation(Activation.SOFTMAX).build())
            .build();

    MultiLayerNetwork net = new MultiLayerNetwork(conf);
    net.init();

    DataSet dataSet = trivialDataSet();
    NormalizerStandardize norm = new NormalizerStandardize();
    norm.fit(dataSet);

    val b = SerializationUtils.serialize(net);

    MultiLayerNetwork restored = SerializationUtils.deserialize(b);

    assertEquals(net, restored);
}

Source File: RPTreeTest.java From deeplearning4j with Apache License 2.0

6 votes

@Test
public void testRPTree() throws Exception {
    DataSetIterator mnist = new MnistDataSetIterator(150,150);
    RPTree rpTree = new RPTree(784,50);
    DataSet d = mnist.next();
    NormalizerStandardize normalizerStandardize = new NormalizerStandardize();
    normalizerStandardize.fit(d);
    normalizerStandardize.transform(d.getFeatures());
    INDArray data = d.getFeatures();
    rpTree.buildTree(data);
    assertEquals(4,rpTree.getLeaves().size());
    assertEquals(0,rpTree.getRoot().getDepth());

    List<Integer> candidates = rpTree.getCandidates(data.getRow(0));
    assertFalse(candidates.isEmpty());
    assertEquals(10,rpTree.query(data.slice(0),10).length());
    System.out.println(candidates.size());

    rpTree.addNodeAtIndex(150,data.getRow(0));

}

Source File: DiabetesFileDataSource.java From FederatedAndroidTrainer with MIT License

6 votes

private void createDataSource() throws IOException, InterruptedException {
    //First: get the dataset using the record reader. CSVRecordReader handles loading/parsing
    int numLinesToSkip = 0;
    String delimiter = ",";
    RecordReader recordReader = new CSVRecordReader(numLinesToSkip, delimiter);
    recordReader.initialize(new InputStreamInputSplit(dataFile));

    //Second: the RecordReaderDataSetIterator handles conversion to DataSet objects, ready for use in neural network
    int labelIndex = 11;

    DataSetIterator iterator = new RecordReaderDataSetIterator(recordReader, batchSize, labelIndex, labelIndex, true);
    DataSet allData = iterator.next();

    SplitTestAndTrain testAndTrain = allData.splitTestAndTrain(0.80);  //Use 80% of data for training

    trainingData = testAndTrain.getTrain();
    testData = testAndTrain.getTest();

    //We need to normalize our data. We'll use NormalizeStandardize (which gives us mean 0, unit variance):
    DataNormalization normalizer = new NormalizerStandardize();
    normalizer.fit(trainingData);           //Collect the statistics (mean/stdev) from the training data. This does not modify the input data
    normalizer.transform(trainingData);     //Apply normalization to the training data
    normalizer.transform(testData);         //Apply normalization to the test data. This is using statistics calculated from the *training* set
}

Source File: NormalizerStandardizeTest.java From deeplearning4j with Apache License 2.0

6 votes

@Test
public void testRevert() {
    double tolerancePerc = 0.01; // 0.01% of correct value
    int nSamples = 500;
    int nFeatures = 3;

    INDArray featureSet = Nd4j.randn(nSamples, nFeatures);
    INDArray labelSet = Nd4j.zeros(nSamples, 1);
    DataSet sampleDataSet = new DataSet(featureSet, labelSet);

    NormalizerStandardize myNormalizer = new NormalizerStandardize();
    myNormalizer.fit(sampleDataSet);
    DataSet transformed = sampleDataSet.copy();
    myNormalizer.transform(transformed);
    //System.out.println(transformed.getFeatures());
    myNormalizer.revert(transformed);
    //System.out.println(transformed.getFeatures());
    INDArray delta = Transforms.abs(transformed.getFeatures().sub(sampleDataSet.getFeatures()))
                    .div(sampleDataSet.getFeatures());
    double maxdeltaPerc = delta.max(0, 1).mul(100).getDouble(0);
    assertTrue(maxdeltaPerc < tolerancePerc);
}

Source File: StandardizeSerializerStrategy.java From deeplearning4j with Apache License 2.0

5 votes

@Override
public NormalizerStandardize restore(@NonNull InputStream stream) throws IOException {
    DataInputStream dis = new DataInputStream(stream);

    boolean fitLabels = dis.readBoolean();

    NormalizerStandardize result = new NormalizerStandardize(Nd4j.read(dis), Nd4j.read(dis));
    result.fitLabel(fitLabels);
    if (fitLabels) {
        result.setLabelStats(Nd4j.read(dis), Nd4j.read(dis));
    }

    return result;
}

Source File: DataSet.java From nd4j with Apache License 2.0

5 votes

@Override
public void normalize() {
    //FeatureUtil.normalizeMatrix(getFeatures());
    NormalizerStandardize inClassPreProcessor = new NormalizerStandardize();
    inClassPreProcessor.fit(this);
    inClassPreProcessor.transform(this);
}

Source File: StandardizeSerializerStrategy.java From deeplearning4j with Apache License 2.0

5 votes

@Override
public void write(@NonNull NormalizerStandardize normalizer, @NonNull OutputStream stream) throws IOException {
    try (DataOutputStream dos = new DataOutputStream(stream)) {
        dos.writeBoolean(normalizer.isFitLabel());

        Nd4j.write(normalizer.getMean(), dos);
        Nd4j.write(normalizer.getStd(), dos);

        if (normalizer.isFitLabel()) {
            Nd4j.write(normalizer.getLabelMean(), dos);
            Nd4j.write(normalizer.getLabelStd(), dos);
        }
        dos.flush();
    }
}

Source File: NormalizerStandardizeTest.java From deeplearning4j with Apache License 2.0

5 votes

@Test
public void testConstant() {
    double tolerancePerc = 10.0; // 10% of correct value
    int nSamples = 500;
    int nFeatures = 3;
    int constant = 100;

    INDArray featureSet = Nd4j.zeros(nSamples, nFeatures).add(constant);
    INDArray labelSet = Nd4j.zeros(nSamples, 1);
    DataSet sampleDataSet = new DataSet(featureSet, labelSet);


    NormalizerStandardize myNormalizer = new NormalizerStandardize();
    myNormalizer.fit(sampleDataSet);
    //Checking if we gets nans
    assertFalse(Double.isNaN(myNormalizer.getStd().getDouble(0)));

    myNormalizer.transform(sampleDataSet);
    //Checking if we gets nans, because std dev is zero
    assertFalse(Double.isNaN(sampleDataSet.getFeatures().min(0, 1).getDouble(0)));
    //Checking to see if transformed values are close enough to zero
    assertEquals(Transforms.abs(sampleDataSet.getFeatures()).max(0, 1).getDouble(0), 0,
                    constant * tolerancePerc / 100.0);

    myNormalizer.revert(sampleDataSet);
    //Checking if we gets nans, because std dev is zero
    assertFalse(Double.isNaN(sampleDataSet.getFeatures().min(0, 1).getDouble(0)));
    assertEquals(Transforms.abs(sampleDataSet.getFeatures().sub(featureSet)).min(0, 1).getDouble(0), 0,
                    constant * tolerancePerc / 100.0);
}

Source File: NormalizerStandardizeTest.java From deeplearning4j with Apache License 2.0

5 votes

@Test
public void testUnderOverflow() {
    // This dataset will be basically constant with a small std deviation
    // And the constant is large. Checking if algorithm can handle
    double tolerancePerc = 1; //Within 1 %
    double toleranceAbs = 0.0005;
    int nSamples = 1000;
    int bSize = 10;
    int x = -1000000, y = 1000000;
    double z = 1000000;

    INDArray featureX = Nd4j.rand(nSamples, 1).mul(1).add(x);
    INDArray featureY = Nd4j.rand(nSamples, 1).mul(2).add(y);
    INDArray featureZ = Nd4j.rand(nSamples, 1).mul(3).add(z);
    INDArray featureSet = Nd4j.concat(1, featureX, featureY, featureZ);
    INDArray labelSet = Nd4j.zeros(nSamples, 1);
    DataSet sampleDataSet = new DataSet(featureSet, labelSet);
    DataSetIterator sampleIter = new TestDataSetIterator(sampleDataSet, bSize);

    INDArray theoreticalMean = Nd4j.create(new float[] {x, y, (float) z}).castTo(Nd4j.defaultFloatingPointType()).reshape(1, -1);

    NormalizerStandardize myNormalizer = new NormalizerStandardize();
    myNormalizer.fit(sampleIter);

    INDArray meanDelta = Transforms.abs(theoreticalMean.sub(myNormalizer.getMean()));
    INDArray meanDeltaPerc = meanDelta.mul(100).div(theoreticalMean);
    assertTrue(meanDeltaPerc.max(1).getDouble(0) < tolerancePerc);

    //this just has to not barf
    //myNormalizer.transform(sampleIter);
    myNormalizer.transform(sampleDataSet);
}

Source File: StandardizeSerializerStrategy.java From nd4j with Apache License 2.0

5 votes

@Override
public NormalizerStandardize restore(@NonNull InputStream stream) throws IOException {
    DataInputStream dis = new DataInputStream(stream);

    boolean fitLabels = dis.readBoolean();

    NormalizerStandardize result = new NormalizerStandardize(Nd4j.read(dis), Nd4j.read(dis));
    result.fitLabel(fitLabels);
    if (fitLabels) {
        result.setLabelStats(Nd4j.read(dis), Nd4j.read(dis));
    }

    return result;
}

Source File: PreProcessor3D4DTest.java From deeplearning4j with Apache License 2.0

5 votes

@Test
public void testBruteForce3d() {

    NormalizerStandardize myNormalizer = new NormalizerStandardize();
    NormalizerMinMaxScaler myMinMaxScaler = new NormalizerMinMaxScaler();

    int timeSteps = 15;
    int samples = 100;
    //multiplier for the features
    INDArray featureScaleA = Nd4j.create(new double[] {1, -2, 3}).reshape(3,1);
    INDArray featureScaleB = Nd4j.create(new double[] {2, 2, 3}).reshape(3,1);

    Construct3dDataSet caseA = new Construct3dDataSet(featureScaleA, timeSteps, samples, 1);
    Construct3dDataSet caseB = new Construct3dDataSet(featureScaleB, timeSteps, samples, 1);

    myNormalizer.fit(caseA.sampleDataSet);
    assertEquals(caseA.expectedMean.castTo(DataType.FLOAT), myNormalizer.getMean().castTo(DataType.FLOAT));
    assertTrue(Transforms.abs(myNormalizer.getStd().div(caseA.expectedStd).sub(1)).maxNumber().floatValue() < 0.01);

    myMinMaxScaler.fit(caseB.sampleDataSet);
    assertEquals(caseB.expectedMin.castTo(DataType.FLOAT), myMinMaxScaler.getMin().castTo(DataType.FLOAT));
    assertEquals(caseB.expectedMax.castTo(DataType.FLOAT), myMinMaxScaler.getMax().castTo(DataType.FLOAT));

    //Same Test with an Iterator, values should be close for std, exact for everything else
    DataSetIterator sampleIterA = new TestDataSetIterator(caseA.sampleDataSet, 5);
    DataSetIterator sampleIterB = new TestDataSetIterator(caseB.sampleDataSet, 5);

    myNormalizer.fit(sampleIterA);
    assertEquals(myNormalizer.getMean().castTo(DataType.FLOAT), caseA.expectedMean.castTo(DataType.FLOAT));
    assertTrue(Transforms.abs(myNormalizer.getStd().div(caseA.expectedStd).sub(1)).maxNumber().floatValue() < 0.01);

    myMinMaxScaler.fit(sampleIterB);
    assertEquals(myMinMaxScaler.getMin().castTo(DataType.FLOAT), caseB.expectedMin.castTo(DataType.FLOAT));
    assertEquals(myMinMaxScaler.getMax().castTo(DataType.FLOAT), caseB.expectedMax.castTo(DataType.FLOAT));

}

Source File: StandardizeSerializerStrategy.java From nd4j with Apache License 2.0

5 votes

@Override
public void write(@NonNull NormalizerStandardize normalizer, @NonNull OutputStream stream) throws IOException {
    try (DataOutputStream dos = new DataOutputStream(stream)) {
        dos.writeBoolean(normalizer.isFitLabel());

        Nd4j.write(normalizer.getMean(), dos);
        Nd4j.write(normalizer.getStd(), dos);

        if (normalizer.isFitLabel()) {
            Nd4j.write(normalizer.getLabelMean(), dos);
            Nd4j.write(normalizer.getLabelStd(), dos);
        }
        dos.flush();
    }
}

Source File: NormalizerTests.java From nd4j with Apache License 2.0

5 votes

@Before
public void randomData() {
    Nd4j.getRandom().setSeed(12345);
    batchSize = 13;
    batchCount = 20;
    lastBatch = batchSize / 2;
    INDArray origFeatures = Nd4j.rand(batchCount * batchSize + lastBatch, 10);
    INDArray origLabels = Nd4j.rand(batchCount * batchSize + lastBatch, 3);
    data = new DataSet(origFeatures, origLabels);
    stdScaler = new NormalizerStandardize();
    minMaxScaler = new NormalizerMinMaxScaler();
}

Source File: NormalizerStandardizeTest.java From nd4j with Apache License 2.0

5 votes

@Test
public void testConstant() {
    double tolerancePerc = 10.0; // 10% of correct value
    int nSamples = 500;
    int nFeatures = 3;
    int constant = 100;

    INDArray featureSet = Nd4j.zeros(nSamples, nFeatures).add(constant);
    INDArray labelSet = Nd4j.zeros(nSamples, 1);
    DataSet sampleDataSet = new DataSet(featureSet, labelSet);


    NormalizerStandardize myNormalizer = new NormalizerStandardize();
    myNormalizer.fit(sampleDataSet);
    //Checking if we gets nans
    assertFalse(Double.isNaN(myNormalizer.getStd().getDouble(0)));

    myNormalizer.transform(sampleDataSet);
    //Checking if we gets nans, because std dev is zero
    assertFalse(Double.isNaN(sampleDataSet.getFeatures().min(0, 1).getDouble(0)));
    //Checking to see if transformed values are close enough to zero
    assertEquals(Transforms.abs(sampleDataSet.getFeatures()).max(0, 1).getDouble(0, 0), 0,
                    constant * tolerancePerc / 100.0);

    myNormalizer.revert(sampleDataSet);
    //Checking if we gets nans, because std dev is zero
    assertFalse(Double.isNaN(sampleDataSet.getFeatures().min(0, 1).getDouble(0)));
    assertEquals(Transforms.abs(sampleDataSet.getFeatures().sub(featureSet)).min(0, 1).getDouble(0), 0,
                    constant * tolerancePerc / 100.0);
}

Source File: NormalizerStandardizeTest.java From deeplearning4j with Apache License 2.0

5 votes

@Test
public void testDifferentBatchSizes() {
    // Create 6x1 matrix of the numbers 1 through 6
    INDArray values = Nd4j.linspace(1, 6, 6, DataType.DOUBLE).reshape(1, -1).transpose();
    DataSet dataSet = new DataSet(values, values);

    // Test fitting a DataSet
    NormalizerStandardize norm1 = new NormalizerStandardize();
    norm1.fit(dataSet);
    assertEquals(3.5f, norm1.getMean().getFloat(0), 1e-6);
    assertEquals(1.70783f, norm1.getStd().getFloat(0), 1e-4);

    // Test fitting an iterator with equal batch sizes
    DataSetIterator testIter1 = new TestDataSetIterator(dataSet, 3); // Will yield 2 batches of 3 rows
    NormalizerStandardize norm2 = new NormalizerStandardize();
    norm2.fit(testIter1);
    assertEquals(3.5f, norm2.getMean().getFloat(0), 1e-6);
    assertEquals(1.70783f, norm2.getStd().getFloat(0), 1e-4);

    // Test fitting an iterator with varying batch sizes
    DataSetIterator testIter2 = new TestDataSetIterator(dataSet, 4); // Will yield batch of 4 and batch of 2 rows
    NormalizerStandardize norm3 = new NormalizerStandardize();
    norm3.fit(testIter2);
    assertEquals(3.5f, norm3.getMean().getFloat(0), 1e-6);
    assertEquals(1.70783f, norm3.getStd().getFloat(0), 1e-4);

    // Test fitting an iterator with batches of single rows
    DataSetIterator testIter3 = new TestDataSetIterator(dataSet, 1); // Will yield 6 batches of 1 row
    NormalizerStandardize norm4 = new NormalizerStandardize();
    norm4.fit(testIter3);
    assertEquals(3.5f, norm4.getMean().getFloat(0), 1e-6);
    assertEquals(1.70783f, norm4.getStd().getFloat(0), 1e-4);
}

Source File: DataSetIteratorHelper.java From Java-Deep-Learning-Cookbook with MIT License

5 votes

private static DataSetIteratorSplitter createDataSetSplitter() throws IOException, InterruptedException {
    final RecordReader recordReader = DataSetIteratorHelper.generateReader(new ClassPathResource("Churn_Modelling.csv").getFile());
    final DataSetIterator dataSetIterator = new RecordReaderDataSetIterator.Builder(recordReader,batchSize)
            .classification(labelIndex,numClasses)
            .build();
    final DataNormalization dataNormalization = new NormalizerStandardize();
    dataNormalization.fit(dataSetIterator);
    dataSetIterator.setPreProcessor(dataNormalization);
    final DataSetIteratorSplitter dataSetIteratorSplitter = new DataSetIteratorSplitter(dataSetIterator,1250,0.8);
    return dataSetIteratorSplitter;
}

Source File: NormalizerStandardizeTest.java From nd4j with Apache License 2.0

5 votes

@Test
public void testUnderOverflow() {
    // This dataset will be basically constant with a small std deviation
    // And the constant is large. Checking if algorithm can handle
    double tolerancePerc = 1; //Within 1 %
    double toleranceAbs = 0.0005;
    int nSamples = 1000;
    int bSize = 10;
    int x = -1000000, y = 1000000;
    double z = 1000000;

    INDArray featureX = Nd4j.rand(nSamples, 1).mul(1).add(x);
    INDArray featureY = Nd4j.rand(nSamples, 1).mul(2).add(y);
    INDArray featureZ = Nd4j.rand(nSamples, 1).mul(3).add(z);
    INDArray featureSet = Nd4j.concat(1, featureX, featureY, featureZ);
    INDArray labelSet = Nd4j.zeros(nSamples, 1);
    DataSet sampleDataSet = new DataSet(featureSet, labelSet);
    DataSetIterator sampleIter = new TestDataSetIterator(sampleDataSet, bSize);

    INDArray theoreticalMean = Nd4j.create(new double[] {x, y, z});

    NormalizerStandardize myNormalizer = new NormalizerStandardize();
    myNormalizer.fit(sampleIter);

    INDArray meanDelta = Transforms.abs(theoreticalMean.sub(myNormalizer.getMean()));
    INDArray meanDeltaPerc = meanDelta.mul(100).div(theoreticalMean);
    assertTrue(meanDeltaPerc.max(1).getDouble(0, 0) < tolerancePerc);

    //this just has to not barf
    //myNormalizer.transform(sampleIter);
    myNormalizer.transform(sampleDataSet);
}

Source File: NormalizerStandardizeTest.java From nd4j with Apache License 2.0

5 votes

@Test
public void testDifferentBatchSizes() {
    // Create 6x1 matrix of the numbers 1 through 6
    INDArray values = Nd4j.linspace(1, 6, 6).transpose();
    DataSet dataSet = new DataSet(values, values);

    // Test fitting a DataSet
    NormalizerStandardize norm1 = new NormalizerStandardize();
    norm1.fit(dataSet);
    assertEquals(3.5f, norm1.getMean().getFloat(0), 1e-6);
    assertEquals(1.70783f, norm1.getStd().getFloat(0), 1e-4);

    // Test fitting an iterator with equal batch sizes
    DataSetIterator testIter1 = new TestDataSetIterator(dataSet, 3); // Will yield 2 batches of 3 rows
    NormalizerStandardize norm2 = new NormalizerStandardize();
    norm2.fit(testIter1);
    assertEquals(3.5f, norm2.getMean().getFloat(0), 1e-6);
    assertEquals(1.70783f, norm2.getStd().getFloat(0), 1e-4);

    // Test fitting an iterator with varying batch sizes
    DataSetIterator testIter2 = new TestDataSetIterator(dataSet, 4); // Will yield batch of 4 and batch of 2 rows
    NormalizerStandardize norm3 = new NormalizerStandardize();
    norm3.fit(testIter2);
    assertEquals(3.5f, norm3.getMean().getFloat(0), 1e-6);
    assertEquals(1.70783f, norm3.getStd().getFloat(0), 1e-4);

    // Test fitting an iterator with batches of single rows
    DataSetIterator testIter3 = new TestDataSetIterator(dataSet, 1); // Will yield 6 batches of 1 row
    NormalizerStandardize norm4 = new NormalizerStandardize();
    norm4.fit(testIter3);
    assertEquals(3.5f, norm4.getMean().getFloat(0), 1e-6);
    assertEquals(1.70783f, norm4.getStd().getFloat(0), 1e-4);
}

Source File: DataSet.java From deeplearning4j with Apache License 2.0

5 votes

@Override
public void normalize() {
    //FeatureUtil.normalizeMatrix(getFeatures());
    NormalizerStandardize inClassPreProcessor = new NormalizerStandardize();
    inClassPreProcessor.fit(this);
    inClassPreProcessor.transform(this);
}

Source File: EvaluationToolsTests.java From deeplearning4j with Apache License 2.0

5 votes

@Test
    public void testRocMultiToHtml() throws Exception {
        DataSetIterator iter = new IrisDataSetIterator(150, 150);

        MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().weightInit(WeightInit.XAVIER).list()
                        .layer(0, new DenseLayer.Builder().nIn(4).nOut(4).activation(Activation.TANH).build()).layer(1,
                                        new OutputLayer.Builder().nIn(4).nOut(3).activation(Activation.SOFTMAX)
                                                        .lossFunction(LossFunctions.LossFunction.MCXENT).build())
                        .build();
        MultiLayerNetwork net = new MultiLayerNetwork(conf);
        net.init();

        NormalizerStandardize ns = new NormalizerStandardize();
        DataSet ds = iter.next();
        ns.fit(ds);
        ns.transform(ds);

        for (int i = 0; i < 30; i++) {
            net.fit(ds);
        }

        for (int numSteps : new int[] {20, 0}) {
            ROCMultiClass roc = new ROCMultiClass(numSteps);
            iter.reset();

            INDArray f = ds.getFeatures();
            INDArray l = ds.getLabels();
            INDArray out = net.output(f);
            roc.eval(l, out);


            String str = EvaluationTools.rocChartToHtml(roc, Arrays.asList("setosa", "versicolor", "virginica"));
//            System.out.println(str);
        }
    }

Source File: PreProcessor3D4DTest.java From nd4j with Apache License 2.0

5 votes

@Test
public void testBruteForce3d() {

    NormalizerStandardize myNormalizer = new NormalizerStandardize();
    NormalizerMinMaxScaler myMinMaxScaler = new NormalizerMinMaxScaler();

    int timeSteps = 15;
    int samples = 100;
    //multiplier for the features
    INDArray featureScaleA = Nd4j.create(new double[] {1, -2, 3}).reshape(3, 1);
    INDArray featureScaleB = Nd4j.create(new double[] {2, 2, 3}).reshape(3, 1);

    Construct3dDataSet caseA = new Construct3dDataSet(featureScaleA, timeSteps, samples, 1);
    Construct3dDataSet caseB = new Construct3dDataSet(featureScaleB, timeSteps, samples, 1);

    myNormalizer.fit(caseA.sampleDataSet);
    assertEquals(caseA.expectedMean, myNormalizer.getMean());
    assertTrue(Transforms.abs(myNormalizer.getStd().div(caseA.expectedStd).sub(1)).maxNumber().floatValue() < 0.01);

    myMinMaxScaler.fit(caseB.sampleDataSet);
    assertEquals(caseB.expectedMin, myMinMaxScaler.getMin());
    assertEquals(caseB.expectedMax, myMinMaxScaler.getMax());

    //Same Test with an Iterator, values should be close for std, exact for everything else
    DataSetIterator sampleIterA = new TestDataSetIterator(caseA.sampleDataSet, 5);
    DataSetIterator sampleIterB = new TestDataSetIterator(caseB.sampleDataSet, 5);

    myNormalizer.fit(sampleIterA);
    assertEquals(myNormalizer.getMean(), caseA.expectedMean);
    assertTrue(Transforms.abs(myNormalizer.getStd().div(caseA.expectedStd).sub(1)).maxNumber().floatValue() < 0.01);

    myMinMaxScaler.fit(sampleIterB);
    assertEquals(myMinMaxScaler.getMin(), caseB.expectedMin);
    assertEquals(myMinMaxScaler.getMax(), caseB.expectedMax);

}

Source File: OCNNOutputLayerTest.java From deeplearning4j with Apache License 2.0

5 votes

public DataSetIterator getNormalizedIterator() {
    DataSetIterator dataSetIterator = new IrisDataSetIterator(150,150);
    NormalizerStandardize normalizerStandardize = new NormalizerStandardize();
    normalizerStandardize.fit(dataSetIterator);
    dataSetIterator.reset();
    dataSetIterator.setPreProcessor(normalizerStandardize);
    return dataSetIterator;
}

Source File: CustomerRetentionPredictionApi.java From Java-Deep-Learning-Cookbook with MIT License

5 votes

public static INDArray generateOutput(File inputFile, String modelFilePath) throws IOException, InterruptedException {
    final File modelFile = new File(modelFilePath);
    final MultiLayerNetwork network = ModelSerializer.restoreMultiLayerNetwork(modelFile);
    final RecordReader recordReader = generateReader(inputFile);
    //final INDArray array = RecordConverter.toArray(recordReader.next());
    final NormalizerStandardize normalizerStandardize = ModelSerializer.restoreNormalizerFromFile(modelFile);
    //normalizerStandardize.transform(array);
    final DataSetIterator dataSetIterator = new RecordReaderDataSetIterator.Builder(recordReader,1).build();
    normalizerStandardize.fit(dataSetIterator);
    dataSetIterator.setPreProcessor(normalizerStandardize);
    return network.output(dataSetIterator);

}

Source File: NormalizationTests.java From DataVec with Apache License 2.0

5 votes

@Test
public void testMeanStdZeros() {
    List<List<Writable>> data = new ArrayList<>();
    Schema.Builder builder = new Schema.Builder();
    int numColumns = 6;
    for (int i = 0; i < numColumns; i++)
        builder.addColumnDouble(String.valueOf(i));

    for (int i = 0; i < 5; i++) {
        List<Writable> record = new ArrayList<>(numColumns);
        data.add(record);
        for (int j = 0; j < numColumns; j++) {
            record.add(new DoubleWritable(1.0));
        }

    }

    INDArray arr = RecordConverter.toMatrix(data);

    Schema schema = builder.build();
    JavaRDD<List<Writable>> rdd = sc.parallelize(data);
    DataRowsFacade dataFrame = DataFrames.toDataFrame(schema, rdd);

    //assert equivalent to the ndarray pre processing
    NormalizerStandardize standardScaler = new NormalizerStandardize();
    standardScaler.fit(new DataSet(arr.dup(), arr.dup()));
    INDArray standardScalered = arr.dup();
    standardScaler.transform(new DataSet(standardScalered, standardScalered));
    DataNormalization zeroToOne = new NormalizerMinMaxScaler();
    zeroToOne.fit(new DataSet(arr.dup(), arr.dup()));
    INDArray zeroToOnes = arr.dup();
    zeroToOne.transform(new DataSet(zeroToOnes, zeroToOnes));
    List<Row> rows = Normalization.stdDevMeanColumns(dataFrame, dataFrame.get().columns());
    INDArray assertion = DataFrames.toMatrix(rows);
    //compare standard deviation
    assertTrue(standardScaler.getStd().equalsWithEps(assertion.getRow(0), 1e-1));
    //compare mean
    assertTrue(standardScaler.getMean().equalsWithEps(assertion.getRow(1), 1e-1));

}

Source File: HyperParameterTuning.java From Java-Deep-Learning-Cookbook with MIT License

5 votes

public DataSetIteratorSplitter dataSplit(DataSetIterator iterator) throws IOException, InterruptedException {
    DataNormalization dataNormalization = new NormalizerStandardize();
    dataNormalization.fit(iterator);
    iterator.setPreProcessor(dataNormalization);
    DataSetIteratorSplitter splitter = new DataSetIteratorSplitter(iterator,1000,0.8);
    return splitter;
}

org.nd4j.linalg.dataset.api.preprocessor.NormalizerStandardize Java Examples