Java Code Examples for org.nd4j.linalg.dataset.api.preprocessor.NormalizerStandardize#fit()
The following examples show how to use
org.nd4j.linalg.dataset.api.preprocessor.NormalizerStandardize#fit() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: NormalizerStandardizeTest.java From nd4j with Apache License 2.0 | 6 votes |
@Test public void testRevert() { double tolerancePerc = 0.01; // 0.01% of correct value int nSamples = 500; int nFeatures = 3; INDArray featureSet = Nd4j.randn(nSamples, nFeatures); INDArray labelSet = Nd4j.zeros(nSamples, 1); DataSet sampleDataSet = new DataSet(featureSet, labelSet); NormalizerStandardize myNormalizer = new NormalizerStandardize(); myNormalizer.fit(sampleDataSet); DataSet transformed = sampleDataSet.copy(); myNormalizer.transform(transformed); //System.out.println(transformed.getFeatures()); myNormalizer.revert(transformed); //System.out.println(transformed.getFeatures()); INDArray delta = Transforms.abs(transformed.getFeatures().sub(sampleDataSet.getFeatures())) .div(sampleDataSet.getFeatures()); double maxdeltaPerc = delta.max(0, 1).mul(100).getDouble(0, 0); assertTrue(maxdeltaPerc < tolerancePerc); }
Example 2
Source File: PreProcessor3D4DTest.java From deeplearning4j with Apache License 2.0 | 6 votes |
@Test public void testBruteForce4d() { Construct4dDataSet imageDataSet = new Construct4dDataSet(10, 5, 10, 15); NormalizerStandardize myNormalizer = new NormalizerStandardize(); myNormalizer.fit(imageDataSet.sampleDataSet); assertEquals(imageDataSet.expectedMean, myNormalizer.getMean()); float aat = Transforms.abs(myNormalizer.getStd().div(imageDataSet.expectedStd).sub(1)).maxNumber().floatValue(); float abt = myNormalizer.getStd().maxNumber().floatValue(); float act = imageDataSet.expectedStd.maxNumber().floatValue(); System.out.println("ValA: " + aat); System.out.println("ValB: " + abt); System.out.println("ValC: " + act); assertTrue(aat < 0.05); NormalizerMinMaxScaler myMinMaxScaler = new NormalizerMinMaxScaler(); myMinMaxScaler.fit(imageDataSet.sampleDataSet); assertEquals(imageDataSet.expectedMin, myMinMaxScaler.getMin()); assertEquals(imageDataSet.expectedMax, myMinMaxScaler.getMax()); DataSet copyDataSet = imageDataSet.sampleDataSet.copy(); myNormalizer.transform(copyDataSet); }
Example 3
Source File: CustomerRetentionPredictionApi.java From Java-Deep-Learning-Cookbook with MIT License | 5 votes |
public static INDArray generateOutput(File inputFile, String modelFilePath) throws IOException, InterruptedException { final File modelFile = new File(modelFilePath); final MultiLayerNetwork network = ModelSerializer.restoreMultiLayerNetwork(modelFile); final RecordReader recordReader = generateReader(inputFile); //final INDArray array = RecordConverter.toArray(recordReader.next()); final NormalizerStandardize normalizerStandardize = ModelSerializer.restoreNormalizerFromFile(modelFile); //normalizerStandardize.transform(array); final DataSetIterator dataSetIterator = new RecordReaderDataSetIterator.Builder(recordReader,1).build(); normalizerStandardize.fit(dataSetIterator); dataSetIterator.setPreProcessor(normalizerStandardize); return network.output(dataSetIterator); }
Example 4
Source File: NormalizerStandardizeTest.java From deeplearning4j with Apache License 2.0 | 5 votes |
@Test public void testUnderOverflow() { // This dataset will be basically constant with a small std deviation // And the constant is large. Checking if algorithm can handle double tolerancePerc = 1; //Within 1 % double toleranceAbs = 0.0005; int nSamples = 1000; int bSize = 10; int x = -1000000, y = 1000000; double z = 1000000; INDArray featureX = Nd4j.rand(nSamples, 1).mul(1).add(x); INDArray featureY = Nd4j.rand(nSamples, 1).mul(2).add(y); INDArray featureZ = Nd4j.rand(nSamples, 1).mul(3).add(z); INDArray featureSet = Nd4j.concat(1, featureX, featureY, featureZ); INDArray labelSet = Nd4j.zeros(nSamples, 1); DataSet sampleDataSet = new DataSet(featureSet, labelSet); DataSetIterator sampleIter = new TestDataSetIterator(sampleDataSet, bSize); INDArray theoreticalMean = Nd4j.create(new float[] {x, y, (float) z}).castTo(Nd4j.defaultFloatingPointType()).reshape(1, -1); NormalizerStandardize myNormalizer = new NormalizerStandardize(); myNormalizer.fit(sampleIter); INDArray meanDelta = Transforms.abs(theoreticalMean.sub(myNormalizer.getMean())); INDArray meanDeltaPerc = meanDelta.mul(100).div(theoreticalMean); assertTrue(meanDeltaPerc.max(1).getDouble(0) < tolerancePerc); //this just has to not barf //myNormalizer.transform(sampleIter); myNormalizer.transform(sampleDataSet); }
Example 5
Source File: DataSet.java From nd4j with Apache License 2.0 | 5 votes |
@Override public void normalize() { //FeatureUtil.normalizeMatrix(getFeatures()); NormalizerStandardize inClassPreProcessor = new NormalizerStandardize(); inClassPreProcessor.fit(this); inClassPreProcessor.transform(this); }
Example 6
Source File: NormalizerStandardizeTest.java From nd4j with Apache License 2.0 | 5 votes |
@Test public void testConstant() { double tolerancePerc = 10.0; // 10% of correct value int nSamples = 500; int nFeatures = 3; int constant = 100; INDArray featureSet = Nd4j.zeros(nSamples, nFeatures).add(constant); INDArray labelSet = Nd4j.zeros(nSamples, 1); DataSet sampleDataSet = new DataSet(featureSet, labelSet); NormalizerStandardize myNormalizer = new NormalizerStandardize(); myNormalizer.fit(sampleDataSet); //Checking if we gets nans assertFalse(Double.isNaN(myNormalizer.getStd().getDouble(0))); myNormalizer.transform(sampleDataSet); //Checking if we gets nans, because std dev is zero assertFalse(Double.isNaN(sampleDataSet.getFeatures().min(0, 1).getDouble(0))); //Checking to see if transformed values are close enough to zero assertEquals(Transforms.abs(sampleDataSet.getFeatures()).max(0, 1).getDouble(0, 0), 0, constant * tolerancePerc / 100.0); myNormalizer.revert(sampleDataSet); //Checking if we gets nans, because std dev is zero assertFalse(Double.isNaN(sampleDataSet.getFeatures().min(0, 1).getDouble(0))); assertEquals(Transforms.abs(sampleDataSet.getFeatures().sub(featureSet)).min(0, 1).getDouble(0), 0, constant * tolerancePerc / 100.0); }
Example 7
Source File: RPTreeTest.java From deeplearning4j with Apache License 2.0 | 5 votes |
@Test public void testRpTreeMaxNodes() throws Exception { DataSetIterator mnist = new MnistDataSetIterator(150,150); RPForest rpTree = new RPForest(4,4,"euclidean"); DataSet d = mnist.next(); NormalizerStandardize normalizerStandardize = new NormalizerStandardize(); normalizerStandardize.fit(d); rpTree.fit(d.getFeatures()); for(RPTree tree : rpTree.getTrees()) { for(RPNode node : tree.getLeaves()) { assertTrue(node.getIndices().size() <= rpTree.getMaxSize()); } } }
Example 8
Source File: NormalizerStandardizeTest.java From nd4j with Apache License 2.0 | 5 votes |
@Test public void testUnderOverflow() { // This dataset will be basically constant with a small std deviation // And the constant is large. Checking if algorithm can handle double tolerancePerc = 1; //Within 1 % double toleranceAbs = 0.0005; int nSamples = 1000; int bSize = 10; int x = -1000000, y = 1000000; double z = 1000000; INDArray featureX = Nd4j.rand(nSamples, 1).mul(1).add(x); INDArray featureY = Nd4j.rand(nSamples, 1).mul(2).add(y); INDArray featureZ = Nd4j.rand(nSamples, 1).mul(3).add(z); INDArray featureSet = Nd4j.concat(1, featureX, featureY, featureZ); INDArray labelSet = Nd4j.zeros(nSamples, 1); DataSet sampleDataSet = new DataSet(featureSet, labelSet); DataSetIterator sampleIter = new TestDataSetIterator(sampleDataSet, bSize); INDArray theoreticalMean = Nd4j.create(new double[] {x, y, z}); NormalizerStandardize myNormalizer = new NormalizerStandardize(); myNormalizer.fit(sampleIter); INDArray meanDelta = Transforms.abs(theoreticalMean.sub(myNormalizer.getMean())); INDArray meanDeltaPerc = meanDelta.mul(100).div(theoreticalMean); assertTrue(meanDeltaPerc.max(1).getDouble(0, 0) < tolerancePerc); //this just has to not barf //myNormalizer.transform(sampleIter); myNormalizer.transform(sampleDataSet); }
Example 9
Source File: DataSet.java From deeplearning4j with Apache License 2.0 | 5 votes |
@Override public void normalize() { //FeatureUtil.normalizeMatrix(getFeatures()); NormalizerStandardize inClassPreProcessor = new NormalizerStandardize(); inClassPreProcessor.fit(this); inClassPreProcessor.transform(this); }
Example 10
Source File: NormalizerStandardizeLabelsTest.java From deeplearning4j with Apache License 2.0 | 4 votes |
@Test public void testBruteForce() { /* This test creates a dataset where feature values are multiples of consecutive natural numbers The obtained values are compared to the theoretical mean and std dev */ double tolerancePerc = 0.01; int nSamples = 5120; int x = 1, y = 2, z = 3; INDArray featureX = Nd4j.linspace(1, nSamples, nSamples).reshape(nSamples, 1).mul(x); INDArray featureY = featureX.mul(y); INDArray featureZ = featureX.mul(z); INDArray featureSet = Nd4j.concat(1, featureX, featureY, featureZ); INDArray labelSet = featureSet.dup().getColumns(0); DataSet sampleDataSet = new DataSet(featureSet, labelSet); double meanNaturalNums = (nSamples + 1) / 2.0; INDArray theoreticalMean = Nd4j.create(new double[] {meanNaturalNums * x, meanNaturalNums * y, meanNaturalNums * z}).reshape(1, -1).castTo(Nd4j.defaultFloatingPointType()); INDArray theoreticallabelMean = theoreticalMean.dup().getColumns(0); double stdNaturalNums = Math.sqrt((nSamples * nSamples - 1) / 12.0); INDArray theoreticalStd = Nd4j.create(new double[] {stdNaturalNums * x, stdNaturalNums * y, stdNaturalNums * z}).reshape(1, -1).castTo(Nd4j.defaultFloatingPointType()); INDArray theoreticallabelStd = theoreticalStd.dup().getColumns(0); NormalizerStandardize myNormalizer = new NormalizerStandardize(); myNormalizer.fitLabel(true); myNormalizer.fit(sampleDataSet); INDArray meanDelta = Transforms.abs(theoreticalMean.sub(myNormalizer.getMean())); INDArray labelDelta = Transforms.abs(theoreticallabelMean.sub(myNormalizer.getLabelMean())); INDArray meanDeltaPerc = meanDelta.div(theoreticalMean).mul(100); INDArray labelDeltaPerc = labelDelta.div(theoreticallabelMean).mul(100); double maxMeanDeltaPerc = meanDeltaPerc.max(1).getDouble(0); assertTrue(maxMeanDeltaPerc < tolerancePerc); assertTrue(labelDeltaPerc.max(1).getDouble(0) < tolerancePerc); INDArray stdDelta = Transforms.abs(theoreticalStd.sub(myNormalizer.getStd())); INDArray stdDeltaPerc = stdDelta.div(theoreticalStd).mul(100); INDArray stdlabelDeltaPerc = Transforms.abs(theoreticallabelStd.sub(myNormalizer.getLabelStd())).div(theoreticallabelStd); double maxStdDeltaPerc = stdDeltaPerc.max(1).mul(100).getDouble(0); double maxlabelStdDeltaPerc = stdlabelDeltaPerc.max(1).getDouble(0); assertTrue(maxStdDeltaPerc < tolerancePerc); assertTrue(maxlabelStdDeltaPerc < tolerancePerc); // SAME TEST WITH THE ITERATOR int bSize = 10; tolerancePerc = 0.1; // 1% of correct value DataSetIterator sampleIter = new TestDataSetIterator(sampleDataSet, bSize); myNormalizer.fit(sampleIter); meanDelta = Transforms.abs(theoreticalMean.sub(myNormalizer.getMean())); meanDeltaPerc = meanDelta.div(theoreticalMean).mul(100); maxMeanDeltaPerc = meanDeltaPerc.max(1).getDouble(0); assertTrue(maxMeanDeltaPerc < tolerancePerc); stdDelta = Transforms.abs(theoreticalMean.sub(myNormalizer.getMean())); stdDeltaPerc = stdDelta.div(theoreticalStd).mul(100); maxStdDeltaPerc = stdDeltaPerc.max(1).getDouble(0); assertTrue(maxStdDeltaPerc < tolerancePerc); }
Example 11
Source File: NormalizerStandardizeTest.java From deeplearning4j with Apache License 2.0 | 4 votes |
@Test public void testBruteForce() { /* This test creates a dataset where feature values are multiples of consecutive natural numbers The obtained values are compared to the theoretical mean and std dev */ double tolerancePerc = 0.01; // 0.01% of correct value int nSamples = 5120; int x = 1, y = 2, z = 3; INDArray featureX = Nd4j.linspace(1, nSamples, nSamples, DataType.DOUBLE).reshape(nSamples, 1).mul(x); INDArray featureY = featureX.mul(y); INDArray featureZ = featureX.mul(z); INDArray featureSet = Nd4j.concat(1, featureX, featureY, featureZ); INDArray labelSet = Nd4j.zeros(nSamples, 1); DataSet sampleDataSet = new DataSet(featureSet, labelSet); double meanNaturalNums = (nSamples + 1) / 2.0; INDArray theoreticalMean = Nd4j.create(new double[] {meanNaturalNums * x, meanNaturalNums * y, meanNaturalNums * z}).reshape(1, -1); double stdNaturalNums = Math.sqrt((nSamples * nSamples - 1) / 12.0); INDArray theoreticalStd = Nd4j.create(new double[] {stdNaturalNums * x, stdNaturalNums * y, stdNaturalNums * z}).reshape(1, -1); NormalizerStandardize myNormalizer = new NormalizerStandardize(); myNormalizer.fit(sampleDataSet); INDArray meanDelta = Transforms.abs(theoreticalMean.sub(myNormalizer.getMean())); INDArray meanDeltaPerc = meanDelta.div(theoreticalMean).mul(100); double maxMeanDeltaPerc = meanDeltaPerc.max(1).getDouble(0); assertTrue(maxMeanDeltaPerc < tolerancePerc); INDArray stdDelta = Transforms.abs(theoreticalStd.sub(myNormalizer.getStd())); INDArray stdDeltaPerc = stdDelta.div(theoreticalStd).mul(100); double maxStdDeltaPerc = stdDeltaPerc.max(1).getDouble(0); assertTrue(maxStdDeltaPerc < tolerancePerc); // SAME TEST WITH THE ITERATOR int bSize = 10; tolerancePerc = 0.1; // 0.1% of correct value DataSetIterator sampleIter = new TestDataSetIterator(sampleDataSet, bSize); myNormalizer.fit(sampleIter); meanDelta = Transforms.abs(theoreticalMean.sub(myNormalizer.getMean())); meanDeltaPerc = meanDelta.div(theoreticalMean).mul(100); maxMeanDeltaPerc = meanDeltaPerc.max(1).getDouble(0); assertTrue(maxMeanDeltaPerc < tolerancePerc); stdDelta = Transforms.abs(theoreticalStd.sub(myNormalizer.getStd())); stdDeltaPerc = stdDelta.div(theoreticalStd).mul(100); maxStdDeltaPerc = stdDeltaPerc.max(1).getDouble(0); assertTrue(maxStdDeltaPerc < tolerancePerc); }
Example 12
Source File: NormalizationTests.java From deeplearning4j with Apache License 2.0 | 4 votes |
@Test public void normalizationTests() { List<List<Writable>> data = new ArrayList<>(); Schema.Builder builder = new Schema.Builder(); int numColumns = 6; for (int i = 0; i < numColumns; i++) builder.addColumnDouble(String.valueOf(i)); for (int i = 0; i < 5; i++) { List<Writable> record = new ArrayList<>(numColumns); data.add(record); for (int j = 0; j < numColumns; j++) { record.add(new DoubleWritable(1.0)); } } INDArray arr = RecordConverter.toMatrix(DataType.DOUBLE, data); Schema schema = builder.build(); JavaRDD<List<Writable>> rdd = sc.parallelize(data); assertEquals(schema, DataFrames.fromStructType(DataFrames.fromSchema(schema))); assertEquals(rdd.collect(), DataFrames.toRecords(DataFrames.toDataFrame(schema, rdd)).getSecond().collect()); Dataset<Row> dataFrame = DataFrames.toDataFrame(schema, rdd); dataFrame.show(); Normalization.zeromeanUnitVariance(dataFrame).show(); Normalization.normalize(dataFrame).show(); //assert equivalent to the ndarray pre processing NormalizerStandardize standardScaler = new NormalizerStandardize(); standardScaler.fit(new DataSet(arr.dup(), arr.dup())); INDArray standardScalered = arr.dup(); standardScaler.transform(new DataSet(standardScalered, standardScalered)); DataNormalization zeroToOne = new NormalizerMinMaxScaler(); zeroToOne.fit(new DataSet(arr.dup(), arr.dup())); INDArray zeroToOnes = arr.dup(); zeroToOne.transform(new DataSet(zeroToOnes, zeroToOnes)); INDArray zeroMeanUnitVarianceDataFrame = RecordConverter.toMatrix(DataType.DOUBLE, Normalization.zeromeanUnitVariance(schema, rdd).collect()); INDArray zeroMeanUnitVarianceDataFrameZeroToOne = RecordConverter.toMatrix(DataType.DOUBLE, Normalization.normalize(schema, rdd).collect()); assertEquals(standardScalered, zeroMeanUnitVarianceDataFrame); assertTrue(zeroToOnes.equalsWithEps(zeroMeanUnitVarianceDataFrameZeroToOne, 1e-1)); }
Example 13
Source File: NormalizerStandardizeTest.java From nd4j with Apache License 2.0 | 4 votes |
@Test public void testBruteForce() { /* This test creates a dataset where feature values are multiples of consecutive natural numbers The obtained values are compared to the theoretical mean and std dev */ double tolerancePerc = 0.01; // 0.01% of correct value int nSamples = 5120; int x = 1, y = 2, z = 3; INDArray featureX = Nd4j.linspace(1, nSamples, nSamples).reshape(nSamples, 1).mul(x); INDArray featureY = featureX.mul(y); INDArray featureZ = featureX.mul(z); INDArray featureSet = Nd4j.concat(1, featureX, featureY, featureZ); INDArray labelSet = Nd4j.zeros(nSamples, 1); DataSet sampleDataSet = new DataSet(featureSet, labelSet); double meanNaturalNums = (nSamples + 1) / 2.0; INDArray theoreticalMean = Nd4j.create(new double[] {meanNaturalNums * x, meanNaturalNums * y, meanNaturalNums * z}); double stdNaturalNums = Math.sqrt((nSamples * nSamples - 1) / 12.0); INDArray theoreticalStd = Nd4j.create(new double[] {stdNaturalNums * x, stdNaturalNums * y, stdNaturalNums * z}); NormalizerStandardize myNormalizer = new NormalizerStandardize(); myNormalizer.fit(sampleDataSet); INDArray meanDelta = Transforms.abs(theoreticalMean.sub(myNormalizer.getMean())); INDArray meanDeltaPerc = meanDelta.div(theoreticalMean).mul(100); double maxMeanDeltaPerc = meanDeltaPerc.max(1).getDouble(0, 0); assertTrue(maxMeanDeltaPerc < tolerancePerc); INDArray stdDelta = Transforms.abs(theoreticalStd.sub(myNormalizer.getStd())); INDArray stdDeltaPerc = stdDelta.div(theoreticalStd).mul(100); double maxStdDeltaPerc = stdDeltaPerc.max(1).getDouble(0, 0); assertTrue(maxStdDeltaPerc < tolerancePerc); // SAME TEST WITH THE ITERATOR int bSize = 10; tolerancePerc = 0.1; // 0.1% of correct value DataSetIterator sampleIter = new TestDataSetIterator(sampleDataSet, bSize); myNormalizer.fit(sampleIter); meanDelta = Transforms.abs(theoreticalMean.sub(myNormalizer.getMean())); meanDeltaPerc = meanDelta.div(theoreticalMean).mul(100); maxMeanDeltaPerc = meanDeltaPerc.max(1).getDouble(0, 0); assertTrue(maxMeanDeltaPerc < tolerancePerc); stdDelta = Transforms.abs(theoreticalStd.sub(myNormalizer.getStd())); stdDeltaPerc = stdDelta.div(theoreticalStd).mul(100); maxStdDeltaPerc = stdDeltaPerc.max(1).getDouble(0, 0); assertTrue(maxStdDeltaPerc < tolerancePerc); }
Example 14
Source File: PreProcessor3D4DTest.java From nd4j with Apache License 2.0 | 4 votes |
@Test public void testBruteForce3dMaskLabels() { NormalizerStandardize myNormalizer = new NormalizerStandardize(); myNormalizer.fitLabel(true); NormalizerMinMaxScaler myMinMaxScaler = new NormalizerMinMaxScaler(); myMinMaxScaler.fitLabel(true); //generating a dataset with consecutive numbers as feature values. Dataset also has masks int samples = 100; INDArray featureScale = Nd4j.create(new float[] {1, 2, 10}).reshape(3, 1); int timeStepsU = 5; Construct3dDataSet sampleU = new Construct3dDataSet(featureScale, timeStepsU, samples, 1); int timeStepsV = 3; Construct3dDataSet sampleV = new Construct3dDataSet(featureScale, timeStepsV, samples, sampleU.newOrigin); List<DataSet> dataSetList = new ArrayList<DataSet>(); dataSetList.add(sampleU.sampleDataSet); dataSetList.add(sampleV.sampleDataSet); DataSet fullDataSetA = DataSet.merge(dataSetList); DataSet fullDataSetAA = fullDataSetA.copy(); //This should be the same datasets as above without a mask Construct3dDataSet fullDataSetNoMask = new Construct3dDataSet(featureScale, timeStepsU + timeStepsV, samples, 1); //preprocessors - label and feature values are the same myNormalizer.fit(fullDataSetA); assertEquals(myNormalizer.getMean(), fullDataSetNoMask.expectedMean); assertEquals(myNormalizer.getStd(), fullDataSetNoMask.expectedStd); assertEquals(myNormalizer.getLabelMean(), fullDataSetNoMask.expectedMean); assertEquals(myNormalizer.getLabelStd(), fullDataSetNoMask.expectedStd); myMinMaxScaler.fit(fullDataSetAA); assertEquals(myMinMaxScaler.getMin(), fullDataSetNoMask.expectedMin); assertEquals(myMinMaxScaler.getMax(), fullDataSetNoMask.expectedMax); assertEquals(myMinMaxScaler.getLabelMin(), fullDataSetNoMask.expectedMin); assertEquals(myMinMaxScaler.getLabelMax(), fullDataSetNoMask.expectedMax); //Same Test with an Iterator, values should be close for std, exact for everything else DataSetIterator sampleIterA = new TestDataSetIterator(fullDataSetA, 5); DataSetIterator sampleIterB = new TestDataSetIterator(fullDataSetAA, 5); myNormalizer.fit(sampleIterA); assertEquals(myNormalizer.getMean(), fullDataSetNoMask.expectedMean); assertEquals(myNormalizer.getLabelMean(), fullDataSetNoMask.expectedMean); assertTrue(Transforms.abs(myNormalizer.getStd().div(fullDataSetNoMask.expectedStd).sub(1)).maxNumber() .floatValue() < 0.01); assertTrue(Transforms.abs(myNormalizer.getLabelStd().div(fullDataSetNoMask.expectedStd).sub(1)).maxNumber() .floatValue() < 0.01); myMinMaxScaler.fit(sampleIterB); assertEquals(myMinMaxScaler.getMin(), fullDataSetNoMask.expectedMin); assertEquals(myMinMaxScaler.getMax(), fullDataSetNoMask.expectedMax); assertEquals(myMinMaxScaler.getLabelMin(), fullDataSetNoMask.expectedMin); assertEquals(myMinMaxScaler.getLabelMax(), fullDataSetNoMask.expectedMax); }
Example 15
Source File: ModelSerializerTest.java From deeplearning4j with Apache License 2.0 | 4 votes |
@Test public void testRestoreUnsavedNormalizerFromInputStream() throws Exception { DataSet dataSet = trivialDataSet(); NormalizerStandardize norm = new NormalizerStandardize(); norm.fit(dataSet); ComputationGraph cg = simpleComputationGraph(); cg.init(); File tempFile = tempDir.newFile(); ModelSerializer.writeModel(cg, tempFile, true); FileInputStream fis = new FileInputStream(tempFile); NormalizerStandardize restored = ModelSerializer.restoreNormalizerFromInputStream(fis); assertEquals(null, restored); }
Example 16
Source File: GradientCheckTests.java From deeplearning4j with Apache License 2.0 | 4 votes |
@Test public void testAutoEncoder() { //As above (testGradientMLP2LayerIrisSimple()) but with L2, L1, and both L2/L1 applied //Need to run gradient through updater, so that L2 can be applied Activation[] activFns = {Activation.SIGMOID, Activation.TANH}; boolean[] characteristic = {false, true}; //If true: run some backprop steps first LossFunction[] lossFunctions = {LossFunction.MCXENT, LossFunction.MSE}; Activation[] outputActivations = {Activation.SOFTMAX, Activation.TANH}; DataNormalization scaler = new NormalizerMinMaxScaler(); DataSetIterator iter = new IrisDataSetIterator(150, 150); scaler.fit(iter); iter.setPreProcessor(scaler); DataSet ds = iter.next(); INDArray input = ds.getFeatures(); INDArray labels = ds.getLabels(); NormalizerStandardize norm = new NormalizerStandardize(); norm.fit(ds); norm.transform(ds); double[] l2vals = {0.2, 0.0, 0.2}; double[] l1vals = {0.0, 0.3, 0.3}; //i.e., use l2vals[i] with l1vals[i] for (Activation afn : activFns) { for (boolean doLearningFirst : characteristic) { for (int i = 0; i < lossFunctions.length; i++) { for (int k = 0; k < l2vals.length; k++) { LossFunction lf = lossFunctions[i]; Activation outputActivation = outputActivations[i]; double l2 = l2vals[k]; double l1 = l1vals[k]; Nd4j.getRandom().setSeed(12345); MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() .dataType(DataType.DOUBLE) .updater(new NoOp()) .l2(l2).l1(l1) .optimizationAlgo(OptimizationAlgorithm.CONJUGATE_GRADIENT) .seed(12345L) .dist(new NormalDistribution(0, 1)) .list().layer(0, new AutoEncoder.Builder().nIn(4).nOut(3) .activation(afn).build()) .layer(1, new OutputLayer.Builder(lf).nIn(3).nOut(3) .activation(outputActivation).build()) .build(); MultiLayerNetwork mln = new MultiLayerNetwork(conf); mln.init(); String msg; if (doLearningFirst) { //Run a number of iterations of learning mln.setInput(ds.getFeatures()); mln.setLabels(ds.getLabels()); mln.computeGradientAndScore(); double scoreBefore = mln.score(); for (int j = 0; j < 10; j++) mln.fit(ds); mln.computeGradientAndScore(); double scoreAfter = mln.score(); //Can't test in 'characteristic mode of operation' if not learning msg = "testGradMLP2LayerIrisSimple() - score did not (sufficiently) decrease during learning - activationFn=" + afn + ", lossFn=" + lf + ", outputActivation=" + outputActivation + ", doLearningFirst=" + doLearningFirst + ", l2=" + l2 + ", l1=" + l1 + " (before=" + scoreBefore + ", scoreAfter=" + scoreAfter + ")"; assertTrue(msg, scoreAfter < scoreBefore); } msg = "testGradMLP2LayerIrisSimple() - activationFn=" + afn + ", lossFn=" + lf + ", outputActivation=" + outputActivation + ", doLearningFirst=" + doLearningFirst + ", l2=" + l2 + ", l1=" + l1; if (PRINT_RESULTS) { System.out.println(msg); // for (int j = 0; j < mln.getnLayers(); j++) // System.out.println("Layer " + j + " # params: " + mln.getLayer(j).numParams()); } boolean gradOK = GradientCheckUtil.checkGradients(mln, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR, DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels); assertTrue(msg, gradOK); TestUtils.testModelSerialization(mln); } } } } }
Example 17
Source File: NormalizerStandardizeLabelsTest.java From nd4j with Apache License 2.0 | 4 votes |
@Test public void testBruteForce() { /* This test creates a dataset where feature values are multiples of consecutive natural numbers The obtained values are compared to the theoretical mean and std dev */ double tolerancePerc = 0.01; int nSamples = 5120; int x = 1, y = 2, z = 3; INDArray featureX = Nd4j.linspace(1, nSamples, nSamples).reshape(nSamples, 1).mul(x); INDArray featureY = featureX.mul(y); INDArray featureZ = featureX.mul(z); INDArray featureSet = Nd4j.concat(1, featureX, featureY, featureZ); INDArray labelSet = featureSet.dup().getColumns(new int[] {0}); DataSet sampleDataSet = new DataSet(featureSet, labelSet); double meanNaturalNums = (nSamples + 1) / 2.0; INDArray theoreticalMean = Nd4j.create(new double[] {meanNaturalNums * x, meanNaturalNums * y, meanNaturalNums * z}); INDArray theoreticallabelMean = theoreticalMean.dup().getColumns(new int[] {0}); double stdNaturalNums = Math.sqrt((nSamples * nSamples - 1) / 12.0); INDArray theoreticalStd = Nd4j.create(new double[] {stdNaturalNums * x, stdNaturalNums * y, stdNaturalNums * z}); INDArray theoreticallabelStd = theoreticalStd.dup().getColumns(new int[] {0}); NormalizerStandardize myNormalizer = new NormalizerStandardize(); myNormalizer.fitLabel(true); myNormalizer.fit(sampleDataSet); INDArray meanDelta = Transforms.abs(theoreticalMean.sub(myNormalizer.getMean())); INDArray labelDelta = Transforms.abs(theoreticallabelMean.sub(myNormalizer.getLabelMean())); INDArray meanDeltaPerc = meanDelta.div(theoreticalMean).mul(100); INDArray labelDeltaPerc = labelDelta.div(theoreticallabelMean).mul(100); double maxMeanDeltaPerc = meanDeltaPerc.max(1).getDouble(0, 0); assertTrue(maxMeanDeltaPerc < tolerancePerc); assertTrue(labelDeltaPerc.max(1).getDouble(0, 0) < tolerancePerc); INDArray stdDelta = Transforms.abs(theoreticalStd.sub(myNormalizer.getStd())); INDArray stdDeltaPerc = stdDelta.div(theoreticalStd).mul(100); INDArray stdlabelDeltaPerc = Transforms.abs(theoreticallabelStd.sub(myNormalizer.getLabelStd())).div(theoreticallabelStd); double maxStdDeltaPerc = stdDeltaPerc.max(1).mul(100).getDouble(0, 0); double maxlabelStdDeltaPerc = stdlabelDeltaPerc.max(1).getDouble(0, 0); assertTrue(maxStdDeltaPerc < tolerancePerc); assertTrue(maxlabelStdDeltaPerc < tolerancePerc); // SAME TEST WITH THE ITERATOR int bSize = 10; tolerancePerc = 0.1; // 1% of correct value DataSetIterator sampleIter = new TestDataSetIterator(sampleDataSet, bSize); myNormalizer.fit(sampleIter); meanDelta = Transforms.abs(theoreticalMean.sub(myNormalizer.getMean())); meanDeltaPerc = meanDelta.div(theoreticalMean).mul(100); maxMeanDeltaPerc = meanDeltaPerc.max(1).getDouble(0, 0); assertTrue(maxMeanDeltaPerc < tolerancePerc); stdDelta = Transforms.abs(theoreticalMean.sub(myNormalizer.getMean())); stdDeltaPerc = stdDelta.div(theoreticalStd).mul(100); maxStdDeltaPerc = stdDeltaPerc.max(1).getDouble(0, 0); assertTrue(maxStdDeltaPerc < tolerancePerc); }
Example 18
Source File: NormalizationTests.java From DataVec with Apache License 2.0 | 4 votes |
@Test public void normalizationTests() { List<List<Writable>> data = new ArrayList<>(); Schema.Builder builder = new Schema.Builder(); int numColumns = 6; for (int i = 0; i < numColumns; i++) builder.addColumnDouble(String.valueOf(i)); for (int i = 0; i < 5; i++) { List<Writable> record = new ArrayList<>(numColumns); data.add(record); for (int j = 0; j < numColumns; j++) { record.add(new DoubleWritable(1.0)); } } INDArray arr = RecordConverter.toMatrix(data); Schema schema = builder.build(); JavaRDD<List<Writable>> rdd = sc.parallelize(data); assertEquals(schema, DataFrames.fromStructType(DataFrames.fromSchema(schema))); assertEquals(rdd.collect(), DataFrames.toRecords(DataFrames.toDataFrame(schema, rdd)).getSecond().collect()); DataRowsFacade dataFrame = DataFrames.toDataFrame(schema, rdd); dataFrame.get().show(); Normalization.zeromeanUnitVariance(dataFrame).get().show(); Normalization.normalize(dataFrame).get().show(); //assert equivalent to the ndarray pre processing NormalizerStandardize standardScaler = new NormalizerStandardize(); standardScaler.fit(new DataSet(arr.dup(), arr.dup())); INDArray standardScalered = arr.dup(); standardScaler.transform(new DataSet(standardScalered, standardScalered)); DataNormalization zeroToOne = new NormalizerMinMaxScaler(); zeroToOne.fit(new DataSet(arr.dup(), arr.dup())); INDArray zeroToOnes = arr.dup(); zeroToOne.transform(new DataSet(zeroToOnes, zeroToOnes)); INDArray zeroMeanUnitVarianceDataFrame = RecordConverter.toMatrix(Normalization.zeromeanUnitVariance(schema, rdd).collect()); INDArray zeroMeanUnitVarianceDataFrameZeroToOne = RecordConverter.toMatrix(Normalization.normalize(schema, rdd).collect()); assertEquals(standardScalered, zeroMeanUnitVarianceDataFrame); assertTrue(zeroToOnes.equalsWithEps(zeroMeanUnitVarianceDataFrameZeroToOne, 1e-1)); }
Example 19
Source File: NormalizerStandardizeTest.java From deeplearning4j with Apache License 2.0 | 4 votes |
@Test public void testTransform() { /*Random dataset is generated such that AX + B where X is from a normal distribution with mean 0 and std 1 The mean of above will be B and std A Obtained mean and std dev are compared to theoretical Transformed values should be the same as X with the same seed. */ long randSeed = 12345; int nFeatures = 2; int nSamples = 6400; int bsize = 8; int a = 5; int b = 100; INDArray sampleMean, sampleStd, sampleMeanDelta, sampleStdDelta, delta, deltaPerc; double maxDeltaPerc, sampleMeanSEM; genRandomDataSet normData = new genRandomDataSet(nSamples, nFeatures, a, b, randSeed); DataSet genRandExpected = normData.theoreticalTransform; genRandomDataSet expectedData = new genRandomDataSet(nSamples, nFeatures, 1, 0, randSeed); genRandomDataSet beforeTransformData = new genRandomDataSet(nSamples, nFeatures, a, b, randSeed); NormalizerStandardize myNormalizer = new NormalizerStandardize(); DataSetIterator normIterator = normData.getIter(bsize); DataSetIterator genRandExpectedIter = new TestDataSetIterator(genRandExpected, bsize); DataSetIterator expectedIterator = expectedData.getIter(bsize); DataSetIterator beforeTransformIterator = beforeTransformData.getIter(bsize); myNormalizer.fit(normIterator); double tolerancePerc = 0.10; //within 0.1% sampleMean = myNormalizer.getMean(); sampleMeanDelta = Transforms.abs(sampleMean.sub(normData.theoreticalMean)); assertTrue(sampleMeanDelta.mul(100).div(normData.theoreticalMean).max().getDouble(0) < tolerancePerc); //sanity check to see if it's within the theoretical standard error of mean sampleMeanSEM = sampleMeanDelta.div(normData.theoreticalSEM).max().getDouble(0); assertTrue(sampleMeanSEM < 2.6); //99% of the time it should be within this many SEMs tolerancePerc = 1; //within 1% - std dev value sampleStd = myNormalizer.getStd(); sampleStdDelta = Transforms.abs(sampleStd.sub(normData.theoreticalStd)); double actualmaxDiff = sampleStdDelta.div(normData.theoreticalStd).max().mul(100).getDouble(0); assertTrue(actualmaxDiff < tolerancePerc); tolerancePerc = 1; //within 1% normIterator.setPreProcessor(myNormalizer); while (normIterator.hasNext()) { INDArray before = beforeTransformIterator.next().getFeatures(); INDArray origBefore = genRandExpectedIter.next().getFeatures(); INDArray after = normIterator.next().getFeatures(); INDArray expected = expectedIterator.next().getFeatures(); delta = Transforms.abs(after.sub(expected)); deltaPerc = delta.div(Transforms.abs(before.sub(expected))); deltaPerc.muli(100); maxDeltaPerc = deltaPerc.max(0, 1).getDouble(0); /* System.out.println("=== BEFORE ==="); System.out.println(before); System.out.println("=== ORIG BEFORE ==="); System.out.println(origBefore); System.out.println("=== AFTER ==="); System.out.println(after); System.out.println("=== SHOULD BE ==="); System.out.println(expected); System.out.println("% diff, "+ maxDeltaPerc); */ assertTrue(maxDeltaPerc < tolerancePerc); } }
Example 20
Source File: PreProcessor3D4DTest.java From deeplearning4j with Apache License 2.0 | 4 votes |
@Test public void testBruteForce3dMaskLabels() { NormalizerStandardize myNormalizer = new NormalizerStandardize(); myNormalizer.fitLabel(true); NormalizerMinMaxScaler myMinMaxScaler = new NormalizerMinMaxScaler(); myMinMaxScaler.fitLabel(true); //generating a dataset with consecutive numbers as feature values. Dataset also has masks int samples = 100; INDArray featureScale = Nd4j.create(new double[] {1, 2, 10}).reshape(3, 1); int timeStepsU = 5; Construct3dDataSet sampleU = new Construct3dDataSet(featureScale, timeStepsU, samples, 1); int timeStepsV = 3; Construct3dDataSet sampleV = new Construct3dDataSet(featureScale, timeStepsV, samples, sampleU.newOrigin); List<DataSet> dataSetList = new ArrayList<DataSet>(); dataSetList.add(sampleU.sampleDataSet); dataSetList.add(sampleV.sampleDataSet); DataSet fullDataSetA = DataSet.merge(dataSetList); DataSet fullDataSetAA = fullDataSetA.copy(); //This should be the same datasets as above without a mask Construct3dDataSet fullDataSetNoMask = new Construct3dDataSet(featureScale, timeStepsU + timeStepsV, samples, 1); //preprocessors - label and feature values are the same myNormalizer.fit(fullDataSetA); assertEquals(myNormalizer.getMean().castTo(DataType.FLOAT), fullDataSetNoMask.expectedMean.castTo(DataType.FLOAT)); assertEquals(myNormalizer.getStd().castTo(DataType.FLOAT), fullDataSetNoMask.expectedStd.castTo(DataType.FLOAT)); assertEquals(myNormalizer.getLabelMean().castTo(DataType.FLOAT), fullDataSetNoMask.expectedMean.castTo(DataType.FLOAT)); assertEquals(myNormalizer.getLabelStd().castTo(DataType.FLOAT), fullDataSetNoMask.expectedStd.castTo(DataType.FLOAT)); myMinMaxScaler.fit(fullDataSetAA); assertEquals(myMinMaxScaler.getMin().castTo(DataType.FLOAT), fullDataSetNoMask.expectedMin.castTo(DataType.FLOAT)); assertEquals(myMinMaxScaler.getMax().castTo(DataType.FLOAT), fullDataSetNoMask.expectedMax.castTo(DataType.FLOAT)); assertEquals(myMinMaxScaler.getLabelMin().castTo(DataType.FLOAT), fullDataSetNoMask.expectedMin.castTo(DataType.FLOAT)); assertEquals(myMinMaxScaler.getLabelMax().castTo(DataType.FLOAT), fullDataSetNoMask.expectedMax.castTo(DataType.FLOAT)); //Same Test with an Iterator, values should be close for std, exact for everything else DataSetIterator sampleIterA = new TestDataSetIterator(fullDataSetA, 5); DataSetIterator sampleIterB = new TestDataSetIterator(fullDataSetAA, 5); myNormalizer.fit(sampleIterA); assertEquals(myNormalizer.getMean().castTo(DataType.FLOAT), fullDataSetNoMask.expectedMean.castTo(DataType.FLOAT)); assertEquals(myNormalizer.getLabelMean().castTo(DataType.FLOAT), fullDataSetNoMask.expectedMean.castTo(DataType.FLOAT)); double diff1 = Transforms.abs(myNormalizer.getStd().div(fullDataSetNoMask.expectedStd).sub(1)).maxNumber().doubleValue(); double diff2 = Transforms.abs(myNormalizer.getLabelStd().div(fullDataSetNoMask.expectedStd).sub(1)).maxNumber().doubleValue(); assertTrue(diff1 < 0.01); assertTrue(diff2 < 0.01); myMinMaxScaler.fit(sampleIterB); assertEquals(myMinMaxScaler.getMin().castTo(DataType.FLOAT), fullDataSetNoMask.expectedMin.castTo(DataType.FLOAT)); assertEquals(myMinMaxScaler.getMax().castTo(DataType.FLOAT), fullDataSetNoMask.expectedMax.castTo(DataType.FLOAT)); assertEquals(myMinMaxScaler.getLabelMin().castTo(DataType.FLOAT), fullDataSetNoMask.expectedMin.castTo(DataType.FLOAT)); assertEquals(myMinMaxScaler.getLabelMax().castTo(DataType.FLOAT), fullDataSetNoMask.expectedMax.castTo(DataType.FLOAT)); }