org.nd4j.linalg.dataset.api.preprocessor.NormalizerStandardize Java Examples
The following examples show how to use
org.nd4j.linalg.dataset.api.preprocessor.NormalizerStandardize.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: IrisFileDataSource.java From FederatedAndroidTrainer with MIT License | 7 votes |
private void createDataSource() throws IOException, InterruptedException { //First: get the dataset using the record reader. CSVRecordReader handles loading/parsing int numLinesToSkip = 0; String delimiter = ","; RecordReader recordReader = new CSVRecordReader(numLinesToSkip, delimiter); recordReader.initialize(new InputStreamInputSplit(dataFile)); //Second: the RecordReaderDataSetIterator handles conversion to DataSet objects, ready for use in neural network int labelIndex = 4; //5 values in each row of the iris.txt CSV: 4 input features followed by an integer label (class) index. Labels are the 5th value (index 4) in each row int numClasses = 3; //3 classes (types of iris flowers) in the iris data set. Classes have integer values 0, 1 or 2 DataSetIterator iterator = new RecordReaderDataSetIterator(recordReader, batchSize, labelIndex, numClasses); DataSet allData = iterator.next(); allData.shuffle(); SplitTestAndTrain testAndTrain = allData.splitTestAndTrain(0.80); //Use 80% of data for training trainingData = testAndTrain.getTrain(); testData = testAndTrain.getTest(); //We need to normalize our data. We'll use NormalizeStandardize (which gives us mean 0, unit variance): DataNormalization normalizer = new NormalizerStandardize(); normalizer.fit(trainingData); //Collect the statistics (mean/stdev) from the training data. This does not modify the input data normalizer.transform(trainingData); //Apply normalization to the training data normalizer.transform(testData); //Apply normalization to the test data. This is using statistics calculated from the *training* set }
Example #2
Source File: PreProcessor3D4DTest.java From deeplearning4j with Apache License 2.0 | 6 votes |
@Test public void testBruteForce4d() { Construct4dDataSet imageDataSet = new Construct4dDataSet(10, 5, 10, 15); NormalizerStandardize myNormalizer = new NormalizerStandardize(); myNormalizer.fit(imageDataSet.sampleDataSet); assertEquals(imageDataSet.expectedMean, myNormalizer.getMean()); float aat = Transforms.abs(myNormalizer.getStd().div(imageDataSet.expectedStd).sub(1)).maxNumber().floatValue(); float abt = myNormalizer.getStd().maxNumber().floatValue(); float act = imageDataSet.expectedStd.maxNumber().floatValue(); System.out.println("ValA: " + aat); System.out.println("ValB: " + abt); System.out.println("ValC: " + act); assertTrue(aat < 0.05); NormalizerMinMaxScaler myMinMaxScaler = new NormalizerMinMaxScaler(); myMinMaxScaler.fit(imageDataSet.sampleDataSet); assertEquals(imageDataSet.expectedMin, myMinMaxScaler.getMin()); assertEquals(imageDataSet.expectedMax, myMinMaxScaler.getMax()); DataSet copyDataSet = imageDataSet.sampleDataSet.copy(); myNormalizer.transform(copyDataSet); }
Example #3
Source File: PreProcessor3D4DTest.java From nd4j with Apache License 2.0 | 6 votes |
@Test public void testBruteForce4d() { Construct4dDataSet imageDataSet = new Construct4dDataSet(10, 5, 10, 15); NormalizerStandardize myNormalizer = new NormalizerStandardize(); myNormalizer.fit(imageDataSet.sampleDataSet); assertEquals(imageDataSet.expectedMean, myNormalizer.getMean()); float aat = Transforms.abs(myNormalizer.getStd().div(imageDataSet.expectedStd).sub(1)).maxNumber().floatValue(); float abt = myNormalizer.getStd().maxNumber().floatValue(); float act = imageDataSet.expectedStd.maxNumber().floatValue(); System.out.println("ValA: " + aat); System.out.println("ValB: " + abt); System.out.println("ValC: " + act); assertTrue(aat < 0.05); NormalizerMinMaxScaler myMinMaxScaler = new NormalizerMinMaxScaler(); myMinMaxScaler.fit(imageDataSet.sampleDataSet); assertEquals(imageDataSet.expectedMin, myMinMaxScaler.getMin()); assertEquals(imageDataSet.expectedMax, myMinMaxScaler.getMax()); DataSet copyDataSet = imageDataSet.sampleDataSet.copy(); myNormalizer.transform(copyDataSet); }
Example #4
Source File: NormalizerStandardizeTest.java From nd4j with Apache License 2.0 | 6 votes |
@Test public void testRevert() { double tolerancePerc = 0.01; // 0.01% of correct value int nSamples = 500; int nFeatures = 3; INDArray featureSet = Nd4j.randn(nSamples, nFeatures); INDArray labelSet = Nd4j.zeros(nSamples, 1); DataSet sampleDataSet = new DataSet(featureSet, labelSet); NormalizerStandardize myNormalizer = new NormalizerStandardize(); myNormalizer.fit(sampleDataSet); DataSet transformed = sampleDataSet.copy(); myNormalizer.transform(transformed); //System.out.println(transformed.getFeatures()); myNormalizer.revert(transformed); //System.out.println(transformed.getFeatures()); INDArray delta = Transforms.abs(transformed.getFeatures().sub(sampleDataSet.getFeatures())) .div(sampleDataSet.getFeatures()); double maxdeltaPerc = delta.max(0, 1).mul(100).getDouble(0, 0); assertTrue(maxdeltaPerc < tolerancePerc); }
Example #5
Source File: ModelSerializerTest.java From deeplearning4j with Apache License 2.0 | 6 votes |
@Test public void testJavaSerde_1() throws Exception { int nIn = 5; int nOut = 6; ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345).l1(0.01) .graphBuilder() .addInputs("in") .layer("0", new OutputLayer.Builder().nIn(nIn).nOut(nOut).build(), "in") .setOutputs("0") .validateOutputLayerConfig(false) .build(); ComputationGraph net = new ComputationGraph(conf); net.init(); DataSet dataSet = trivialDataSet(); NormalizerStandardize norm = new NormalizerStandardize(); norm.fit(dataSet); val b = SerializationUtils.serialize(net); ComputationGraph restored = SerializationUtils.deserialize(b); assertEquals(net, restored); }
Example #6
Source File: ModelSerializerTest.java From deeplearning4j with Apache License 2.0 | 6 votes |
@Test public void testJavaSerde_2() throws Exception { int nIn = 5; int nOut = 6; MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345).l1(0.01) .list() .layer(0, new OutputLayer.Builder().nIn(nIn).nOut(nOut).activation(Activation.SOFTMAX).build()) .build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); net.init(); DataSet dataSet = trivialDataSet(); NormalizerStandardize norm = new NormalizerStandardize(); norm.fit(dataSet); val b = SerializationUtils.serialize(net); MultiLayerNetwork restored = SerializationUtils.deserialize(b); assertEquals(net, restored); }
Example #7
Source File: RPTreeTest.java From deeplearning4j with Apache License 2.0 | 6 votes |
@Test public void testRPTree() throws Exception { DataSetIterator mnist = new MnistDataSetIterator(150,150); RPTree rpTree = new RPTree(784,50); DataSet d = mnist.next(); NormalizerStandardize normalizerStandardize = new NormalizerStandardize(); normalizerStandardize.fit(d); normalizerStandardize.transform(d.getFeatures()); INDArray data = d.getFeatures(); rpTree.buildTree(data); assertEquals(4,rpTree.getLeaves().size()); assertEquals(0,rpTree.getRoot().getDepth()); List<Integer> candidates = rpTree.getCandidates(data.getRow(0)); assertFalse(candidates.isEmpty()); assertEquals(10,rpTree.query(data.slice(0),10).length()); System.out.println(candidates.size()); rpTree.addNodeAtIndex(150,data.getRow(0)); }
Example #8
Source File: DiabetesFileDataSource.java From FederatedAndroidTrainer with MIT License | 6 votes |
private void createDataSource() throws IOException, InterruptedException { //First: get the dataset using the record reader. CSVRecordReader handles loading/parsing int numLinesToSkip = 0; String delimiter = ","; RecordReader recordReader = new CSVRecordReader(numLinesToSkip, delimiter); recordReader.initialize(new InputStreamInputSplit(dataFile)); //Second: the RecordReaderDataSetIterator handles conversion to DataSet objects, ready for use in neural network int labelIndex = 11; DataSetIterator iterator = new RecordReaderDataSetIterator(recordReader, batchSize, labelIndex, labelIndex, true); DataSet allData = iterator.next(); SplitTestAndTrain testAndTrain = allData.splitTestAndTrain(0.80); //Use 80% of data for training trainingData = testAndTrain.getTrain(); testData = testAndTrain.getTest(); //We need to normalize our data. We'll use NormalizeStandardize (which gives us mean 0, unit variance): DataNormalization normalizer = new NormalizerStandardize(); normalizer.fit(trainingData); //Collect the statistics (mean/stdev) from the training data. This does not modify the input data normalizer.transform(trainingData); //Apply normalization to the training data normalizer.transform(testData); //Apply normalization to the test data. This is using statistics calculated from the *training* set }
Example #9
Source File: NormalizerStandardizeTest.java From deeplearning4j with Apache License 2.0 | 6 votes |
@Test public void testRevert() { double tolerancePerc = 0.01; // 0.01% of correct value int nSamples = 500; int nFeatures = 3; INDArray featureSet = Nd4j.randn(nSamples, nFeatures); INDArray labelSet = Nd4j.zeros(nSamples, 1); DataSet sampleDataSet = new DataSet(featureSet, labelSet); NormalizerStandardize myNormalizer = new NormalizerStandardize(); myNormalizer.fit(sampleDataSet); DataSet transformed = sampleDataSet.copy(); myNormalizer.transform(transformed); //System.out.println(transformed.getFeatures()); myNormalizer.revert(transformed); //System.out.println(transformed.getFeatures()); INDArray delta = Transforms.abs(transformed.getFeatures().sub(sampleDataSet.getFeatures())) .div(sampleDataSet.getFeatures()); double maxdeltaPerc = delta.max(0, 1).mul(100).getDouble(0); assertTrue(maxdeltaPerc < tolerancePerc); }
Example #10
Source File: StandardizeSerializerStrategy.java From deeplearning4j with Apache License 2.0 | 5 votes |
@Override public NormalizerStandardize restore(@NonNull InputStream stream) throws IOException { DataInputStream dis = new DataInputStream(stream); boolean fitLabels = dis.readBoolean(); NormalizerStandardize result = new NormalizerStandardize(Nd4j.read(dis), Nd4j.read(dis)); result.fitLabel(fitLabels); if (fitLabels) { result.setLabelStats(Nd4j.read(dis), Nd4j.read(dis)); } return result; }
Example #11
Source File: DataSet.java From nd4j with Apache License 2.0 | 5 votes |
@Override public void normalize() { //FeatureUtil.normalizeMatrix(getFeatures()); NormalizerStandardize inClassPreProcessor = new NormalizerStandardize(); inClassPreProcessor.fit(this); inClassPreProcessor.transform(this); }
Example #12
Source File: StandardizeSerializerStrategy.java From deeplearning4j with Apache License 2.0 | 5 votes |
@Override public void write(@NonNull NormalizerStandardize normalizer, @NonNull OutputStream stream) throws IOException { try (DataOutputStream dos = new DataOutputStream(stream)) { dos.writeBoolean(normalizer.isFitLabel()); Nd4j.write(normalizer.getMean(), dos); Nd4j.write(normalizer.getStd(), dos); if (normalizer.isFitLabel()) { Nd4j.write(normalizer.getLabelMean(), dos); Nd4j.write(normalizer.getLabelStd(), dos); } dos.flush(); } }
Example #13
Source File: NormalizerStandardizeTest.java From deeplearning4j with Apache License 2.0 | 5 votes |
@Test public void testConstant() { double tolerancePerc = 10.0; // 10% of correct value int nSamples = 500; int nFeatures = 3; int constant = 100; INDArray featureSet = Nd4j.zeros(nSamples, nFeatures).add(constant); INDArray labelSet = Nd4j.zeros(nSamples, 1); DataSet sampleDataSet = new DataSet(featureSet, labelSet); NormalizerStandardize myNormalizer = new NormalizerStandardize(); myNormalizer.fit(sampleDataSet); //Checking if we gets nans assertFalse(Double.isNaN(myNormalizer.getStd().getDouble(0))); myNormalizer.transform(sampleDataSet); //Checking if we gets nans, because std dev is zero assertFalse(Double.isNaN(sampleDataSet.getFeatures().min(0, 1).getDouble(0))); //Checking to see if transformed values are close enough to zero assertEquals(Transforms.abs(sampleDataSet.getFeatures()).max(0, 1).getDouble(0), 0, constant * tolerancePerc / 100.0); myNormalizer.revert(sampleDataSet); //Checking if we gets nans, because std dev is zero assertFalse(Double.isNaN(sampleDataSet.getFeatures().min(0, 1).getDouble(0))); assertEquals(Transforms.abs(sampleDataSet.getFeatures().sub(featureSet)).min(0, 1).getDouble(0), 0, constant * tolerancePerc / 100.0); }
Example #14
Source File: NormalizerStandardizeTest.java From deeplearning4j with Apache License 2.0 | 5 votes |
@Test public void testUnderOverflow() { // This dataset will be basically constant with a small std deviation // And the constant is large. Checking if algorithm can handle double tolerancePerc = 1; //Within 1 % double toleranceAbs = 0.0005; int nSamples = 1000; int bSize = 10; int x = -1000000, y = 1000000; double z = 1000000; INDArray featureX = Nd4j.rand(nSamples, 1).mul(1).add(x); INDArray featureY = Nd4j.rand(nSamples, 1).mul(2).add(y); INDArray featureZ = Nd4j.rand(nSamples, 1).mul(3).add(z); INDArray featureSet = Nd4j.concat(1, featureX, featureY, featureZ); INDArray labelSet = Nd4j.zeros(nSamples, 1); DataSet sampleDataSet = new DataSet(featureSet, labelSet); DataSetIterator sampleIter = new TestDataSetIterator(sampleDataSet, bSize); INDArray theoreticalMean = Nd4j.create(new float[] {x, y, (float) z}).castTo(Nd4j.defaultFloatingPointType()).reshape(1, -1); NormalizerStandardize myNormalizer = new NormalizerStandardize(); myNormalizer.fit(sampleIter); INDArray meanDelta = Transforms.abs(theoreticalMean.sub(myNormalizer.getMean())); INDArray meanDeltaPerc = meanDelta.mul(100).div(theoreticalMean); assertTrue(meanDeltaPerc.max(1).getDouble(0) < tolerancePerc); //this just has to not barf //myNormalizer.transform(sampleIter); myNormalizer.transform(sampleDataSet); }
Example #15
Source File: StandardizeSerializerStrategy.java From nd4j with Apache License 2.0 | 5 votes |
@Override public NormalizerStandardize restore(@NonNull InputStream stream) throws IOException { DataInputStream dis = new DataInputStream(stream); boolean fitLabels = dis.readBoolean(); NormalizerStandardize result = new NormalizerStandardize(Nd4j.read(dis), Nd4j.read(dis)); result.fitLabel(fitLabels); if (fitLabels) { result.setLabelStats(Nd4j.read(dis), Nd4j.read(dis)); } return result; }
Example #16
Source File: PreProcessor3D4DTest.java From deeplearning4j with Apache License 2.0 | 5 votes |
@Test public void testBruteForce3d() { NormalizerStandardize myNormalizer = new NormalizerStandardize(); NormalizerMinMaxScaler myMinMaxScaler = new NormalizerMinMaxScaler(); int timeSteps = 15; int samples = 100; //multiplier for the features INDArray featureScaleA = Nd4j.create(new double[] {1, -2, 3}).reshape(3,1); INDArray featureScaleB = Nd4j.create(new double[] {2, 2, 3}).reshape(3,1); Construct3dDataSet caseA = new Construct3dDataSet(featureScaleA, timeSteps, samples, 1); Construct3dDataSet caseB = new Construct3dDataSet(featureScaleB, timeSteps, samples, 1); myNormalizer.fit(caseA.sampleDataSet); assertEquals(caseA.expectedMean.castTo(DataType.FLOAT), myNormalizer.getMean().castTo(DataType.FLOAT)); assertTrue(Transforms.abs(myNormalizer.getStd().div(caseA.expectedStd).sub(1)).maxNumber().floatValue() < 0.01); myMinMaxScaler.fit(caseB.sampleDataSet); assertEquals(caseB.expectedMin.castTo(DataType.FLOAT), myMinMaxScaler.getMin().castTo(DataType.FLOAT)); assertEquals(caseB.expectedMax.castTo(DataType.FLOAT), myMinMaxScaler.getMax().castTo(DataType.FLOAT)); //Same Test with an Iterator, values should be close for std, exact for everything else DataSetIterator sampleIterA = new TestDataSetIterator(caseA.sampleDataSet, 5); DataSetIterator sampleIterB = new TestDataSetIterator(caseB.sampleDataSet, 5); myNormalizer.fit(sampleIterA); assertEquals(myNormalizer.getMean().castTo(DataType.FLOAT), caseA.expectedMean.castTo(DataType.FLOAT)); assertTrue(Transforms.abs(myNormalizer.getStd().div(caseA.expectedStd).sub(1)).maxNumber().floatValue() < 0.01); myMinMaxScaler.fit(sampleIterB); assertEquals(myMinMaxScaler.getMin().castTo(DataType.FLOAT), caseB.expectedMin.castTo(DataType.FLOAT)); assertEquals(myMinMaxScaler.getMax().castTo(DataType.FLOAT), caseB.expectedMax.castTo(DataType.FLOAT)); }
Example #17
Source File: StandardizeSerializerStrategy.java From nd4j with Apache License 2.0 | 5 votes |
@Override public void write(@NonNull NormalizerStandardize normalizer, @NonNull OutputStream stream) throws IOException { try (DataOutputStream dos = new DataOutputStream(stream)) { dos.writeBoolean(normalizer.isFitLabel()); Nd4j.write(normalizer.getMean(), dos); Nd4j.write(normalizer.getStd(), dos); if (normalizer.isFitLabel()) { Nd4j.write(normalizer.getLabelMean(), dos); Nd4j.write(normalizer.getLabelStd(), dos); } dos.flush(); } }
Example #18
Source File: NormalizerTests.java From nd4j with Apache License 2.0 | 5 votes |
@Before public void randomData() { Nd4j.getRandom().setSeed(12345); batchSize = 13; batchCount = 20; lastBatch = batchSize / 2; INDArray origFeatures = Nd4j.rand(batchCount * batchSize + lastBatch, 10); INDArray origLabels = Nd4j.rand(batchCount * batchSize + lastBatch, 3); data = new DataSet(origFeatures, origLabels); stdScaler = new NormalizerStandardize(); minMaxScaler = new NormalizerMinMaxScaler(); }
Example #19
Source File: NormalizerStandardizeTest.java From nd4j with Apache License 2.0 | 5 votes |
@Test public void testConstant() { double tolerancePerc = 10.0; // 10% of correct value int nSamples = 500; int nFeatures = 3; int constant = 100; INDArray featureSet = Nd4j.zeros(nSamples, nFeatures).add(constant); INDArray labelSet = Nd4j.zeros(nSamples, 1); DataSet sampleDataSet = new DataSet(featureSet, labelSet); NormalizerStandardize myNormalizer = new NormalizerStandardize(); myNormalizer.fit(sampleDataSet); //Checking if we gets nans assertFalse(Double.isNaN(myNormalizer.getStd().getDouble(0))); myNormalizer.transform(sampleDataSet); //Checking if we gets nans, because std dev is zero assertFalse(Double.isNaN(sampleDataSet.getFeatures().min(0, 1).getDouble(0))); //Checking to see if transformed values are close enough to zero assertEquals(Transforms.abs(sampleDataSet.getFeatures()).max(0, 1).getDouble(0, 0), 0, constant * tolerancePerc / 100.0); myNormalizer.revert(sampleDataSet); //Checking if we gets nans, because std dev is zero assertFalse(Double.isNaN(sampleDataSet.getFeatures().min(0, 1).getDouble(0))); assertEquals(Transforms.abs(sampleDataSet.getFeatures().sub(featureSet)).min(0, 1).getDouble(0), 0, constant * tolerancePerc / 100.0); }
Example #20
Source File: NormalizerStandardizeTest.java From deeplearning4j with Apache License 2.0 | 5 votes |
@Test public void testDifferentBatchSizes() { // Create 6x1 matrix of the numbers 1 through 6 INDArray values = Nd4j.linspace(1, 6, 6, DataType.DOUBLE).reshape(1, -1).transpose(); DataSet dataSet = new DataSet(values, values); // Test fitting a DataSet NormalizerStandardize norm1 = new NormalizerStandardize(); norm1.fit(dataSet); assertEquals(3.5f, norm1.getMean().getFloat(0), 1e-6); assertEquals(1.70783f, norm1.getStd().getFloat(0), 1e-4); // Test fitting an iterator with equal batch sizes DataSetIterator testIter1 = new TestDataSetIterator(dataSet, 3); // Will yield 2 batches of 3 rows NormalizerStandardize norm2 = new NormalizerStandardize(); norm2.fit(testIter1); assertEquals(3.5f, norm2.getMean().getFloat(0), 1e-6); assertEquals(1.70783f, norm2.getStd().getFloat(0), 1e-4); // Test fitting an iterator with varying batch sizes DataSetIterator testIter2 = new TestDataSetIterator(dataSet, 4); // Will yield batch of 4 and batch of 2 rows NormalizerStandardize norm3 = new NormalizerStandardize(); norm3.fit(testIter2); assertEquals(3.5f, norm3.getMean().getFloat(0), 1e-6); assertEquals(1.70783f, norm3.getStd().getFloat(0), 1e-4); // Test fitting an iterator with batches of single rows DataSetIterator testIter3 = new TestDataSetIterator(dataSet, 1); // Will yield 6 batches of 1 row NormalizerStandardize norm4 = new NormalizerStandardize(); norm4.fit(testIter3); assertEquals(3.5f, norm4.getMean().getFloat(0), 1e-6); assertEquals(1.70783f, norm4.getStd().getFloat(0), 1e-4); }
Example #21
Source File: DataSetIteratorHelper.java From Java-Deep-Learning-Cookbook with MIT License | 5 votes |
private static DataSetIteratorSplitter createDataSetSplitter() throws IOException, InterruptedException { final RecordReader recordReader = DataSetIteratorHelper.generateReader(new ClassPathResource("Churn_Modelling.csv").getFile()); final DataSetIterator dataSetIterator = new RecordReaderDataSetIterator.Builder(recordReader,batchSize) .classification(labelIndex,numClasses) .build(); final DataNormalization dataNormalization = new NormalizerStandardize(); dataNormalization.fit(dataSetIterator); dataSetIterator.setPreProcessor(dataNormalization); final DataSetIteratorSplitter dataSetIteratorSplitter = new DataSetIteratorSplitter(dataSetIterator,1250,0.8); return dataSetIteratorSplitter; }
Example #22
Source File: NormalizerStandardizeTest.java From nd4j with Apache License 2.0 | 5 votes |
@Test public void testUnderOverflow() { // This dataset will be basically constant with a small std deviation // And the constant is large. Checking if algorithm can handle double tolerancePerc = 1; //Within 1 % double toleranceAbs = 0.0005; int nSamples = 1000; int bSize = 10; int x = -1000000, y = 1000000; double z = 1000000; INDArray featureX = Nd4j.rand(nSamples, 1).mul(1).add(x); INDArray featureY = Nd4j.rand(nSamples, 1).mul(2).add(y); INDArray featureZ = Nd4j.rand(nSamples, 1).mul(3).add(z); INDArray featureSet = Nd4j.concat(1, featureX, featureY, featureZ); INDArray labelSet = Nd4j.zeros(nSamples, 1); DataSet sampleDataSet = new DataSet(featureSet, labelSet); DataSetIterator sampleIter = new TestDataSetIterator(sampleDataSet, bSize); INDArray theoreticalMean = Nd4j.create(new double[] {x, y, z}); NormalizerStandardize myNormalizer = new NormalizerStandardize(); myNormalizer.fit(sampleIter); INDArray meanDelta = Transforms.abs(theoreticalMean.sub(myNormalizer.getMean())); INDArray meanDeltaPerc = meanDelta.mul(100).div(theoreticalMean); assertTrue(meanDeltaPerc.max(1).getDouble(0, 0) < tolerancePerc); //this just has to not barf //myNormalizer.transform(sampleIter); myNormalizer.transform(sampleDataSet); }
Example #23
Source File: NormalizerStandardizeTest.java From nd4j with Apache License 2.0 | 5 votes |
@Test public void testDifferentBatchSizes() { // Create 6x1 matrix of the numbers 1 through 6 INDArray values = Nd4j.linspace(1, 6, 6).transpose(); DataSet dataSet = new DataSet(values, values); // Test fitting a DataSet NormalizerStandardize norm1 = new NormalizerStandardize(); norm1.fit(dataSet); assertEquals(3.5f, norm1.getMean().getFloat(0), 1e-6); assertEquals(1.70783f, norm1.getStd().getFloat(0), 1e-4); // Test fitting an iterator with equal batch sizes DataSetIterator testIter1 = new TestDataSetIterator(dataSet, 3); // Will yield 2 batches of 3 rows NormalizerStandardize norm2 = new NormalizerStandardize(); norm2.fit(testIter1); assertEquals(3.5f, norm2.getMean().getFloat(0), 1e-6); assertEquals(1.70783f, norm2.getStd().getFloat(0), 1e-4); // Test fitting an iterator with varying batch sizes DataSetIterator testIter2 = new TestDataSetIterator(dataSet, 4); // Will yield batch of 4 and batch of 2 rows NormalizerStandardize norm3 = new NormalizerStandardize(); norm3.fit(testIter2); assertEquals(3.5f, norm3.getMean().getFloat(0), 1e-6); assertEquals(1.70783f, norm3.getStd().getFloat(0), 1e-4); // Test fitting an iterator with batches of single rows DataSetIterator testIter3 = new TestDataSetIterator(dataSet, 1); // Will yield 6 batches of 1 row NormalizerStandardize norm4 = new NormalizerStandardize(); norm4.fit(testIter3); assertEquals(3.5f, norm4.getMean().getFloat(0), 1e-6); assertEquals(1.70783f, norm4.getStd().getFloat(0), 1e-4); }
Example #24
Source File: DataSet.java From deeplearning4j with Apache License 2.0 | 5 votes |
@Override public void normalize() { //FeatureUtil.normalizeMatrix(getFeatures()); NormalizerStandardize inClassPreProcessor = new NormalizerStandardize(); inClassPreProcessor.fit(this); inClassPreProcessor.transform(this); }
Example #25
Source File: EvaluationToolsTests.java From deeplearning4j with Apache License 2.0 | 5 votes |
@Test public void testRocMultiToHtml() throws Exception { DataSetIterator iter = new IrisDataSetIterator(150, 150); MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().weightInit(WeightInit.XAVIER).list() .layer(0, new DenseLayer.Builder().nIn(4).nOut(4).activation(Activation.TANH).build()).layer(1, new OutputLayer.Builder().nIn(4).nOut(3).activation(Activation.SOFTMAX) .lossFunction(LossFunctions.LossFunction.MCXENT).build()) .build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); net.init(); NormalizerStandardize ns = new NormalizerStandardize(); DataSet ds = iter.next(); ns.fit(ds); ns.transform(ds); for (int i = 0; i < 30; i++) { net.fit(ds); } for (int numSteps : new int[] {20, 0}) { ROCMultiClass roc = new ROCMultiClass(numSteps); iter.reset(); INDArray f = ds.getFeatures(); INDArray l = ds.getLabels(); INDArray out = net.output(f); roc.eval(l, out); String str = EvaluationTools.rocChartToHtml(roc, Arrays.asList("setosa", "versicolor", "virginica")); // System.out.println(str); } }
Example #26
Source File: PreProcessor3D4DTest.java From nd4j with Apache License 2.0 | 5 votes |
@Test public void testBruteForce3d() { NormalizerStandardize myNormalizer = new NormalizerStandardize(); NormalizerMinMaxScaler myMinMaxScaler = new NormalizerMinMaxScaler(); int timeSteps = 15; int samples = 100; //multiplier for the features INDArray featureScaleA = Nd4j.create(new double[] {1, -2, 3}).reshape(3, 1); INDArray featureScaleB = Nd4j.create(new double[] {2, 2, 3}).reshape(3, 1); Construct3dDataSet caseA = new Construct3dDataSet(featureScaleA, timeSteps, samples, 1); Construct3dDataSet caseB = new Construct3dDataSet(featureScaleB, timeSteps, samples, 1); myNormalizer.fit(caseA.sampleDataSet); assertEquals(caseA.expectedMean, myNormalizer.getMean()); assertTrue(Transforms.abs(myNormalizer.getStd().div(caseA.expectedStd).sub(1)).maxNumber().floatValue() < 0.01); myMinMaxScaler.fit(caseB.sampleDataSet); assertEquals(caseB.expectedMin, myMinMaxScaler.getMin()); assertEquals(caseB.expectedMax, myMinMaxScaler.getMax()); //Same Test with an Iterator, values should be close for std, exact for everything else DataSetIterator sampleIterA = new TestDataSetIterator(caseA.sampleDataSet, 5); DataSetIterator sampleIterB = new TestDataSetIterator(caseB.sampleDataSet, 5); myNormalizer.fit(sampleIterA); assertEquals(myNormalizer.getMean(), caseA.expectedMean); assertTrue(Transforms.abs(myNormalizer.getStd().div(caseA.expectedStd).sub(1)).maxNumber().floatValue() < 0.01); myMinMaxScaler.fit(sampleIterB); assertEquals(myMinMaxScaler.getMin(), caseB.expectedMin); assertEquals(myMinMaxScaler.getMax(), caseB.expectedMax); }
Example #27
Source File: OCNNOutputLayerTest.java From deeplearning4j with Apache License 2.0 | 5 votes |
public DataSetIterator getNormalizedIterator() { DataSetIterator dataSetIterator = new IrisDataSetIterator(150,150); NormalizerStandardize normalizerStandardize = new NormalizerStandardize(); normalizerStandardize.fit(dataSetIterator); dataSetIterator.reset(); dataSetIterator.setPreProcessor(normalizerStandardize); return dataSetIterator; }
Example #28
Source File: CustomerRetentionPredictionApi.java From Java-Deep-Learning-Cookbook with MIT License | 5 votes |
public static INDArray generateOutput(File inputFile, String modelFilePath) throws IOException, InterruptedException { final File modelFile = new File(modelFilePath); final MultiLayerNetwork network = ModelSerializer.restoreMultiLayerNetwork(modelFile); final RecordReader recordReader = generateReader(inputFile); //final INDArray array = RecordConverter.toArray(recordReader.next()); final NormalizerStandardize normalizerStandardize = ModelSerializer.restoreNormalizerFromFile(modelFile); //normalizerStandardize.transform(array); final DataSetIterator dataSetIterator = new RecordReaderDataSetIterator.Builder(recordReader,1).build(); normalizerStandardize.fit(dataSetIterator); dataSetIterator.setPreProcessor(normalizerStandardize); return network.output(dataSetIterator); }
Example #29
Source File: NormalizationTests.java From DataVec with Apache License 2.0 | 5 votes |
@Test public void testMeanStdZeros() { List<List<Writable>> data = new ArrayList<>(); Schema.Builder builder = new Schema.Builder(); int numColumns = 6; for (int i = 0; i < numColumns; i++) builder.addColumnDouble(String.valueOf(i)); for (int i = 0; i < 5; i++) { List<Writable> record = new ArrayList<>(numColumns); data.add(record); for (int j = 0; j < numColumns; j++) { record.add(new DoubleWritable(1.0)); } } INDArray arr = RecordConverter.toMatrix(data); Schema schema = builder.build(); JavaRDD<List<Writable>> rdd = sc.parallelize(data); DataRowsFacade dataFrame = DataFrames.toDataFrame(schema, rdd); //assert equivalent to the ndarray pre processing NormalizerStandardize standardScaler = new NormalizerStandardize(); standardScaler.fit(new DataSet(arr.dup(), arr.dup())); INDArray standardScalered = arr.dup(); standardScaler.transform(new DataSet(standardScalered, standardScalered)); DataNormalization zeroToOne = new NormalizerMinMaxScaler(); zeroToOne.fit(new DataSet(arr.dup(), arr.dup())); INDArray zeroToOnes = arr.dup(); zeroToOne.transform(new DataSet(zeroToOnes, zeroToOnes)); List<Row> rows = Normalization.stdDevMeanColumns(dataFrame, dataFrame.get().columns()); INDArray assertion = DataFrames.toMatrix(rows); //compare standard deviation assertTrue(standardScaler.getStd().equalsWithEps(assertion.getRow(0), 1e-1)); //compare mean assertTrue(standardScaler.getMean().equalsWithEps(assertion.getRow(1), 1e-1)); }
Example #30
Source File: HyperParameterTuning.java From Java-Deep-Learning-Cookbook with MIT License | 5 votes |
public DataSetIteratorSplitter dataSplit(DataSetIterator iterator) throws IOException, InterruptedException { DataNormalization dataNormalization = new NormalizerStandardize(); dataNormalization.fit(iterator); iterator.setPreProcessor(dataNormalization); DataSetIteratorSplitter splitter = new DataSetIteratorSplitter(iterator,1000,0.8); return splitter; }