Java Code Examples for org.nd4j.linalg.dataset.DataSet#copy()
The following examples show how to use
org.nd4j.linalg.dataset.DataSet#copy() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: UnderSamplingPreProcessorTest.java From nd4j with Apache License 2.0 | 6 votes |
@Test public void allMajority() { float[] someTargets = new float[] {0.01f, 0.1f, 0.5f}; DataSet d = allMajorityDataSet(false); DataSet dToPreProcess; for (int i = 0; i < someTargets.length; i++) { //if all majority default is to mask all time steps UnderSamplingByMaskingPreProcessor preProcessor = new UnderSamplingByMaskingPreProcessor(someTargets[i], shortSeq / 2); dToPreProcess = d.copy(); preProcessor.preProcess(dToPreProcess); assertEquals(Nd4j.zeros(dToPreProcess.getLabelsMaskArray().shape()), dToPreProcess.getLabelsMaskArray()); //change default and check distribution which should be 1-targetMinorityDist preProcessor.donotMaskAllMajorityWindows(); dToPreProcess = d.copy(); preProcessor.preProcess(dToPreProcess); INDArray percentagesNow = dToPreProcess.getLabelsMaskArray().sum(1).div(shortSeq); assertTrue(Nd4j.valueArrayOf(percentagesNow.shape(), 1 - someTargets[i]).equalsWithEps(percentagesNow, tolerancePerc)); } }
Example 2
Source File: UnderSamplingPreProcessorTest.java From nd4j with Apache License 2.0 | 6 votes |
@Test public void allMinority() { float[] someTargets = new float[] {0.01f, 0.1f, 0.5f}; DataSet d = allMinorityDataSet(false); DataSet dToPreProcess; for (int i = 0; i < someTargets.length; i++) { UnderSamplingByMaskingPreProcessor preProcessor = new UnderSamplingByMaskingPreProcessor(someTargets[i], shortSeq / 2); dToPreProcess = d.copy(); preProcessor.preProcess(dToPreProcess); //all minority classes present - check that no time steps are masked assertEquals(Nd4j.ones(minibatchSize, shortSeq), dToPreProcess.getLabelsMaskArray()); //check behavior with override minority - now these are seen as all majority classes preProcessor.overrideMinorityDefault(); preProcessor.donotMaskAllMajorityWindows(); dToPreProcess = d.copy(); preProcessor.preProcess(dToPreProcess); INDArray percentagesNow = dToPreProcess.getLabelsMaskArray().sum(1).div(shortSeq); assertTrue(Nd4j.valueArrayOf(percentagesNow.shape(), 1 - someTargets[i]).equalsWithEps(percentagesNow, tolerancePerc)); } }
Example 3
Source File: KFoldIterator.java From nd4j with Apache License 2.0 | 6 votes |
/**Create an iterator given the dataset and a value of k (optional, defaults to 10) * If number of samples in the dataset is not a multiple of k, the last fold will have less samples with the rest having the same number of samples. * * @param k number of folds (optional, defaults to 10) * @param singleFold DataSet to split into k folds */ public KFoldIterator(int k, DataSet singleFold) { this.k = k; this.singleFold = singleFold.copy(); if (k <= 1) throw new IllegalArgumentException(); if (singleFold.numExamples() % k != 0) { if (k != 2) { this.batch = singleFold.numExamples() / (k - 1); this.lastBatch = singleFold.numExamples() % (k - 1); } else { this.lastBatch = singleFold.numExamples() / 2; this.batch = this.lastBatch + 1; } } else { this.batch = singleFold.numExamples() / k; this.lastBatch = singleFold.numExamples() / k; } }
Example 4
Source File: UnderSamplingPreProcessorTest.java From deeplearning4j with Apache License 2.0 | 6 votes |
@Test public void allMajority() { float[] someTargets = new float[] {0.01f, 0.1f, 0.5f}; DataSet d = allMajorityDataSet(false); DataSet dToPreProcess; for (int i = 0; i < someTargets.length; i++) { //if all majority default is to mask all time steps UnderSamplingByMaskingPreProcessor preProcessor = new UnderSamplingByMaskingPreProcessor(someTargets[i], shortSeq / 2); dToPreProcess = d.copy(); preProcessor.preProcess(dToPreProcess); INDArray exp = Nd4j.zeros(dToPreProcess.getLabelsMaskArray().shape()); INDArray lm = dToPreProcess.getLabelsMaskArray(); assertEquals(exp, lm); //change default and check distribution which should be 1-targetMinorityDist preProcessor.donotMaskAllMajorityWindows(); dToPreProcess = d.copy(); preProcessor.preProcess(dToPreProcess); INDArray percentagesNow = dToPreProcess.getLabelsMaskArray().sum(1).div(shortSeq); assertTrue(Nd4j.valueArrayOf(percentagesNow.shape(), 1 - someTargets[i]).castTo(Nd4j.defaultFloatingPointType()).equalsWithEps(percentagesNow, tolerancePerc)); } }
Example 5
Source File: UnderSamplingPreProcessorTest.java From deeplearning4j with Apache License 2.0 | 6 votes |
@Test public void allMinority() { float[] someTargets = new float[] {0.01f, 0.1f, 0.5f}; DataSet d = allMinorityDataSet(false); DataSet dToPreProcess; for (int i = 0; i < someTargets.length; i++) { UnderSamplingByMaskingPreProcessor preProcessor = new UnderSamplingByMaskingPreProcessor(someTargets[i], shortSeq / 2); dToPreProcess = d.copy(); preProcessor.preProcess(dToPreProcess); //all minority classes present - check that no time steps are masked assertEquals(Nd4j.ones(minibatchSize, shortSeq), dToPreProcess.getLabelsMaskArray()); //check behavior with override minority - now these are seen as all majority classes preProcessor.overrideMinorityDefault(); preProcessor.donotMaskAllMajorityWindows(); dToPreProcess = d.copy(); preProcessor.preProcess(dToPreProcess); INDArray percentagesNow = dToPreProcess.getLabelsMaskArray().sum(1).div(shortSeq); assertTrue(Nd4j.valueArrayOf(percentagesNow.shape(), 1 - someTargets[i]) .castTo(Nd4j.defaultFloatingPointType()).equalsWithEps(percentagesNow,tolerancePerc)); } }
Example 6
Source File: LoneTest.java From nd4j with Apache License 2.0 | 5 votes |
@Test public void maskWhenMerge() { DataSet dsA = new DataSet(Nd4j.linspace(1, 15, 15).reshape(1, 3, 5), Nd4j.zeros(1, 3, 5)); DataSet dsB = new DataSet(Nd4j.linspace(1, 9, 9).reshape(1, 3, 3), Nd4j.zeros(1, 3, 3)); List<DataSet> dataSetList = new ArrayList<DataSet>(); dataSetList.add(dsA); dataSetList.add(dsB); DataSet fullDataSet = DataSet.merge(dataSetList); assertTrue(fullDataSet.getFeaturesMaskArray() != null); DataSet fullDataSetCopy = fullDataSet.copy(); assertTrue(fullDataSetCopy.getFeaturesMaskArray() != null); }
Example 7
Source File: LoneTest.java From deeplearning4j with Apache License 2.0 | 5 votes |
@Test public void maskWhenMerge() { DataSet dsA = new DataSet(Nd4j.linspace(1, 15, 15).reshape(1, 3, 5), Nd4j.zeros(1, 3, 5)); DataSet dsB = new DataSet(Nd4j.linspace(1, 9, 9).reshape(1, 3, 3), Nd4j.zeros(1, 3, 3)); List<DataSet> dataSetList = new ArrayList<DataSet>(); dataSetList.add(dsA); dataSetList.add(dsB); DataSet fullDataSet = DataSet.merge(dataSetList); assertTrue(fullDataSet.getFeaturesMaskArray() != null); DataSet fullDataSetCopy = fullDataSet.copy(); assertTrue(fullDataSetCopy.getFeaturesMaskArray() != null); }
Example 8
Source File: UnderSamplingPreProcessorTest.java From nd4j with Apache License 2.0 | 4 votes |
@Test public void mixedDist() { UnderSamplingByMaskingPreProcessor preProcessor = new UnderSamplingByMaskingPreProcessor(targetDist, window); DataSet dataSet = knownDistVariedDataSet(new float[] {0.1f, 0.2f, 0.8f}, false); //Call preprocess for the same dataset multiple times to mimic calls with .next() and checks total distribution int loop = 2; for (int i = 0; i < loop; i++) { //preprocess dataset DataSet dataSetToPreProcess = dataSet.copy(); INDArray labelsBefore = dataSetToPreProcess.getLabels().dup(); preProcessor.preProcess(dataSetToPreProcess); INDArray labels = dataSetToPreProcess.getLabels(); assertEquals(labelsBefore, labels); //check masks are zero where there are no time steps INDArray masks = dataSetToPreProcess.getLabelsMaskArray(); INDArray shouldBeAllZeros = masks.get(NDArrayIndex.interval(0, 3), NDArrayIndex.interval(shortSeq, longSeq)); assertEquals(Nd4j.zeros(shouldBeAllZeros.shape()), shouldBeAllZeros); //check distribution of masks in window, going backwards from last time step for (int j = (int) Math.ceil((double) longSeq / window); j > 0; j--) { //collect mask and labels int maxIndex = min(longSeq, j * window); int minIndex = min(0, maxIndex - window); INDArray maskWindow = masks.get(NDArrayIndex.all(), NDArrayIndex.interval(minIndex, maxIndex)); INDArray labelWindow = labels.get(NDArrayIndex.all(), NDArrayIndex.point(0), NDArrayIndex.interval(minIndex, maxIndex)); //calc minority class distribution INDArray minorityDist = labelWindow.mul(maskWindow).sum(1).div(maskWindow.sum(1)); if (j < shortSeq / window) { assertEquals("Failed on window " + j + " batch 0, loop " + i, targetDist, minorityDist.getFloat(0, 0), tolerancePerc); //should now be close to target dist assertEquals("Failed on window " + j + " batch 1, loop " + i, targetDist, minorityDist.getFloat(1, 0), tolerancePerc); //should now be close to target dist assertEquals("Failed on window " + j + " batch 2, loop " + i, 0.8, minorityDist.getFloat(2, 0), tolerancePerc); //should be unchanged as it was already above target dist } assertEquals("Failed on window " + j + " batch 3, loop " + i, targetDist, minorityDist.getFloat(3, 0), tolerancePerc); //should now be close to target dist assertEquals("Failed on window " + j + " batch 4, loop " + i, targetDist, minorityDist.getFloat(4, 0), tolerancePerc); //should now be close to target dist assertEquals("Failed on window " + j + " batch 5, loop " + i, 0.8, minorityDist.getFloat(5, 0), tolerancePerc); //should be unchanged as it was already above target dist } } }
Example 9
Source File: UnderSamplingPreProcessorTest.java From nd4j with Apache License 2.0 | 4 votes |
@Test public void mixedDistOneHot() { //preprocessor should give 30% minority class for every "window" UnderSamplingByMaskingPreProcessor preProcessor = new UnderSamplingByMaskingPreProcessor(targetDist, window); preProcessor.overrideMinorityDefault(); //construct a dataset with known distribution of minority class and varying time steps DataSet dataSet = knownDistVariedDataSet(new float[] {0.9f, 0.8f, 0.2f}, true); //Call preprocess for the same dataset multiple times to mimic calls with .next() and checks total distribution int loop = 10; for (int i = 0; i < loop; i++) { //preprocess dataset DataSet dataSetToPreProcess = dataSet.copy(); preProcessor.preProcess(dataSetToPreProcess); INDArray labels = dataSetToPreProcess.getLabels(); INDArray masks = dataSetToPreProcess.getLabelsMaskArray(); //check masks are zero where there were no time steps INDArray shouldBeAllZeros = masks.get(NDArrayIndex.interval(0, 3), NDArrayIndex.interval(shortSeq, longSeq)); assertEquals(Nd4j.zeros(shouldBeAllZeros.shape()), shouldBeAllZeros); //check distribution of masks in the window length, going backwards from last time step for (int j = (int) Math.ceil((double) longSeq / window); j > 0; j--) { //collect mask and labels int maxIndex = min(longSeq, j * window); int minIndex = min(0, maxIndex - window); INDArray maskWindow = masks.get(NDArrayIndex.all(), NDArrayIndex.interval(minIndex, maxIndex)); INDArray labelWindow = labels.get(NDArrayIndex.all(), NDArrayIndex.all(), NDArrayIndex.interval(minIndex, maxIndex)); //calc minority class distribution after accounting for masks INDArray minorityClass = labelWindow.get(NDArrayIndex.all(), NDArrayIndex.point(0), NDArrayIndex.all()) .mul(maskWindow); INDArray majorityClass = labelWindow.get(NDArrayIndex.all(), NDArrayIndex.point(1), NDArrayIndex.all()) .mul(maskWindow); INDArray minorityDist = minorityClass.sum(1).div(majorityClass.add(minorityClass).sum(1)); if (j < shortSeq / window) { assertEquals("Failed on window " + j + " batch 0, loop " + i, targetDist, minorityDist.getFloat(0, 0), tolerancePerc); //should now be close to target dist assertEquals("Failed on window " + j + " batch 1, loop " + i, targetDist, minorityDist.getFloat(1, 0), tolerancePerc); //should now be close to target dist assertEquals("Failed on window " + j + " batch 2, loop " + i, 0.8, minorityDist.getFloat(2, 0), tolerancePerc); //should be unchanged as it was already above target dist } assertEquals("Failed on window " + j + " batch 3, loop " + i, targetDist, minorityDist.getFloat(3, 0), tolerancePerc); //should now be close to target dist assertEquals("Failed on window " + j + " batch 4, loop " + i, targetDist, minorityDist.getFloat(4, 0), tolerancePerc); //should now be close to target dist assertEquals("Failed on window " + j + " batch 5, loop " + i, 0.8, minorityDist.getFloat(5, 0), tolerancePerc); //should be unchanged as it was already above target dist } } }
Example 10
Source File: UnderSamplingPreProcessorTest.java From deeplearning4j with Apache License 2.0 | 4 votes |
@Test public void mixedDist() { UnderSamplingByMaskingPreProcessor preProcessor = new UnderSamplingByMaskingPreProcessor(targetDist, window); DataSet dataSet = knownDistVariedDataSet(new float[] {0.1f, 0.2f, 0.8f}, false); //Call preprocess for the same dataset multiple times to mimic calls with .next() and checks total distribution int loop = 2; for (int i = 0; i < loop; i++) { //preprocess dataset DataSet dataSetToPreProcess = dataSet.copy(); INDArray labelsBefore = dataSetToPreProcess.getLabels().dup(); preProcessor.preProcess(dataSetToPreProcess); INDArray labels = dataSetToPreProcess.getLabels(); assertEquals(labelsBefore, labels); //check masks are zero where there are no time steps INDArray masks = dataSetToPreProcess.getLabelsMaskArray(); INDArray shouldBeAllZeros = masks.get(NDArrayIndex.interval(0, 3), NDArrayIndex.interval(shortSeq, longSeq)); assertEquals(Nd4j.zeros(shouldBeAllZeros.shape()), shouldBeAllZeros); //check distribution of masks in window, going backwards from last time step for (int j = (int) Math.ceil((double) longSeq / window); j > 0; j--) { //collect mask and labels int maxIndex = min(longSeq, j * window); int minIndex = min(0, maxIndex - window); INDArray maskWindow = masks.get(NDArrayIndex.all(), NDArrayIndex.interval(minIndex, maxIndex)); INDArray labelWindow = labels.get(NDArrayIndex.all(), NDArrayIndex.point(0), NDArrayIndex.interval(minIndex, maxIndex)); //calc minority class distribution INDArray minorityDist = labelWindow.mul(maskWindow).sum(1).div(maskWindow.sum(1)); if (j < shortSeq / window) { assertEquals("Failed on window " + j + " batch 0, loop " + i, targetDist, minorityDist.getFloat(0), tolerancePerc); //should now be close to target dist assertEquals("Failed on window " + j + " batch 1, loop " + i, targetDist, minorityDist.getFloat(1), tolerancePerc); //should now be close to target dist assertEquals("Failed on window " + j + " batch 2, loop " + i, 0.8, minorityDist.getFloat(2), tolerancePerc); //should be unchanged as it was already above target dist } assertEquals("Failed on window " + j + " batch 3, loop " + i, targetDist, minorityDist.getFloat(3), tolerancePerc); //should now be close to target dist assertEquals("Failed on window " + j + " batch 4, loop " + i, targetDist, minorityDist.getFloat(4), tolerancePerc); //should now be close to target dist assertEquals("Failed on window " + j + " batch 5, loop " + i, 0.8, minorityDist.getFloat(5), tolerancePerc); //should be unchanged as it was already above target dist } } }
Example 11
Source File: UnderSamplingPreProcessorTest.java From deeplearning4j with Apache License 2.0 | 4 votes |
@Test public void mixedDistOneHot() { //preprocessor should give 30% minority class for every "window" UnderSamplingByMaskingPreProcessor preProcessor = new UnderSamplingByMaskingPreProcessor(targetDist, window); preProcessor.overrideMinorityDefault(); //construct a dataset with known distribution of minority class and varying time steps DataSet dataSet = knownDistVariedDataSet(new float[] {0.9f, 0.8f, 0.2f}, true); //Call preprocess for the same dataset multiple times to mimic calls with .next() and checks total distribution int loop = 10; for (int i = 0; i < loop; i++) { //preprocess dataset DataSet dataSetToPreProcess = dataSet.copy(); preProcessor.preProcess(dataSetToPreProcess); INDArray labels = dataSetToPreProcess.getLabels(); INDArray masks = dataSetToPreProcess.getLabelsMaskArray(); //check masks are zero where there were no time steps INDArray shouldBeAllZeros = masks.get(NDArrayIndex.interval(0, 3), NDArrayIndex.interval(shortSeq, longSeq)); assertEquals(Nd4j.zeros(shouldBeAllZeros.shape()), shouldBeAllZeros); //check distribution of masks in the window length, going backwards from last time step for (int j = (int) Math.ceil((double) longSeq / window); j > 0; j--) { //collect mask and labels int maxIndex = min(longSeq, j * window); int minIndex = min(0, maxIndex - window); INDArray maskWindow = masks.get(NDArrayIndex.all(), NDArrayIndex.interval(minIndex, maxIndex)); INDArray labelWindow = labels.get(NDArrayIndex.all(), NDArrayIndex.all(), NDArrayIndex.interval(minIndex, maxIndex)); //calc minority class distribution after accounting for masks INDArray minorityClass = labelWindow.get(NDArrayIndex.all(), NDArrayIndex.point(0), NDArrayIndex.all()) .mul(maskWindow); INDArray majorityClass = labelWindow.get(NDArrayIndex.all(), NDArrayIndex.point(1), NDArrayIndex.all()) .mul(maskWindow); INDArray minorityDist = minorityClass.sum(1).div(majorityClass.add(minorityClass).sum(1)); if (j < shortSeq / window) { assertEquals("Failed on window " + j + " batch 0, loop " + i, targetDist, minorityDist.getFloat(0), tolerancePerc); //should now be close to target dist assertEquals("Failed on window " + j + " batch 1, loop " + i, targetDist, minorityDist.getFloat(1), tolerancePerc); //should now be close to target dist assertEquals("Failed on window " + j + " batch 2, loop " + i, 0.8, minorityDist.getFloat(2), tolerancePerc); //should be unchanged as it was already above target dist } assertEquals("Failed on window " + j + " batch 3, loop " + i, targetDist, minorityDist.getFloat(3), tolerancePerc); //should now be close to target dist assertEquals("Failed on window " + j + " batch 4, loop " + i, targetDist, minorityDist.getFloat(4), tolerancePerc); //should now be close to target dist assertEquals("Failed on window " + j + " batch 5, loop " + i, 0.8, minorityDist.getFloat(5), tolerancePerc); //should be unchanged as it was already above target dist } } }
Example 12
Source File: ModelSerializerTest.java From deeplearning4j with Apache License 2.0 | 3 votes |
@Test public void testSaveRestoreNormalizerFromInputStream() throws Exception { DataSet dataSet = trivialDataSet(); NormalizerStandardize norm = new NormalizerStandardize(); norm.fit(dataSet); ComputationGraph cg = simpleComputationGraph(); cg.init(); File tempFile = tempDir.newFile(); ModelSerializer.writeModel(cg, tempFile, true); ModelSerializer.addNormalizerToModel(tempFile, norm); FileInputStream fis = new FileInputStream(tempFile); NormalizerStandardize restored = ModelSerializer.restoreNormalizerFromInputStream(fis); assertNotEquals(null, restored); DataSet dataSet2 = dataSet.copy(); norm.preProcess(dataSet2); assertNotEquals(dataSet.getFeatures(), dataSet2.getFeatures()); restored.revert(dataSet2); assertEquals(dataSet.getFeatures(), dataSet2.getFeatures()); }