Java Code Examples for org.nd4j.linalg.dataset.DataSet#getLabelsMaskArray()
The following examples show how to use
org.nd4j.linalg.dataset.DataSet#getLabelsMaskArray() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: UnderSamplingPreProcessorTest.java From deeplearning4j with Apache License 2.0 | 6 votes |
@Test public void allMajority() { float[] someTargets = new float[] {0.01f, 0.1f, 0.5f}; DataSet d = allMajorityDataSet(false); DataSet dToPreProcess; for (int i = 0; i < someTargets.length; i++) { //if all majority default is to mask all time steps UnderSamplingByMaskingPreProcessor preProcessor = new UnderSamplingByMaskingPreProcessor(someTargets[i], shortSeq / 2); dToPreProcess = d.copy(); preProcessor.preProcess(dToPreProcess); INDArray exp = Nd4j.zeros(dToPreProcess.getLabelsMaskArray().shape()); INDArray lm = dToPreProcess.getLabelsMaskArray(); assertEquals(exp, lm); //change default and check distribution which should be 1-targetMinorityDist preProcessor.donotMaskAllMajorityWindows(); dToPreProcess = d.copy(); preProcessor.preProcess(dToPreProcess); INDArray percentagesNow = dToPreProcess.getLabelsMaskArray().sum(1).div(shortSeq); assertTrue(Nd4j.valueArrayOf(percentagesNow.shape(), 1 - someTargets[i]).castTo(Nd4j.defaultFloatingPointType()).equalsWithEps(percentagesNow, tolerancePerc)); } }
Example 2
Source File: TransferLearningHelper.java From deeplearning4j with Apache License 2.0 | 6 votes |
/** * During training frozen vertices/layers can be treated as "featurizing" the input * The forward pass through these frozen layer/vertices can be done in advance and the dataset saved to disk to iterate * quickly on the smaller unfrozen part of the model * Currently does not support datasets with feature masks * * @param input multidataset to feed into the computation graph with frozen layer vertices * @return a multidataset with input features that are the outputs of the frozen layer vertices and the original labels. */ public DataSet featurize(DataSet input) { if (isGraph) { //trying to featurize for a computation graph if (origGraph.getNumInputArrays() > 1 || origGraph.getNumOutputArrays() > 1) { throw new IllegalArgumentException( "Input or output size to a computation graph is greater than one. Requires use of a MultiDataSet."); } else { if (input.getFeaturesMaskArray() != null) { throw new IllegalArgumentException( "Currently cannot support featurizing datasets with feature masks"); } MultiDataSet inbW = new MultiDataSet(new INDArray[] {input.getFeatures()}, new INDArray[] {input.getLabels()}, null, new INDArray[] {input.getLabelsMaskArray()}); MultiDataSet ret = featurize(inbW); return new DataSet(ret.getFeatures()[0], input.getLabels(), ret.getLabelsMaskArrays()[0], input.getLabelsMaskArray()); } } else { if (input.getFeaturesMaskArray() != null) throw new UnsupportedOperationException("Feature masks not supported with featurizing currently"); return new DataSet(origMLN.feedForwardToLayer(frozenInputLayer + 1, input.getFeatures(), false) .get(frozenInputLayer + 1), input.getLabels(), null, input.getLabelsMaskArray()); } }
Example 3
Source File: DataSetDescriptor.java From deeplearning4j with Apache License 2.0 | 6 votes |
public DataSetDescriptor(DataSet ds)throws Exception{ features = new ArrayDescriptor(ds.getFeatures()); labels = new ArrayDescriptor(ds.getLabels()); INDArray featuresMask = ds.getFeaturesMaskArray(); if (featuresMask == null){ this.featuresMask = null; } else{ this.featuresMask = new ArrayDescriptor(featuresMask); } INDArray labelsMask = ds.getLabelsMaskArray(); if (labelsMask == null){ this.labelsMask = null; } else{ this.labelsMask = new ArrayDescriptor(labelsMask); } preProcessed = ds.isPreProcessed(); }
Example 4
Source File: DL4JSentimentAnalysisExample.java From Java-for-Data-Science with MIT License | 4 votes |
public static void main(String[] args) throws Exception { getModelData(); System.out.println("Total memory = " + Runtime.getRuntime().totalMemory()); int batchSize = 50; int vectorSize = 300; int nEpochs = 5; int truncateReviewsToLength = 300; MultiLayerConfiguration sentimentNN = new NeuralNetConfiguration.Builder() .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).iterations(1) .updater(Updater.RMSPROP) .regularization(true).l2(1e-5) .weightInit(WeightInit.XAVIER) .gradientNormalization(GradientNormalization.ClipElementWiseAbsoluteValue).gradientNormalizationThreshold(1.0) .learningRate(0.0018) .list() .layer(0, new GravesLSTM.Builder().nIn(vectorSize).nOut(200) .activation("softsign").build()) .layer(1, new RnnOutputLayer.Builder().activation("softmax") .lossFunction(LossFunctions.LossFunction.MCXENT).nIn(200).nOut(2).build()) .pretrain(false).backprop(true).build(); MultiLayerNetwork net = new MultiLayerNetwork(sentimentNN); net.init(); net.setListeners(new ScoreIterationListener(1)); WordVectors wordVectors = WordVectorSerializer.loadGoogleModel(new File(GNEWS_VECTORS_PATH), true, false); DataSetIterator trainData = new AsyncDataSetIterator(new SentimentExampleIterator(EXTRACT_DATA_PATH, wordVectors, batchSize, truncateReviewsToLength, true), 1); DataSetIterator testData = new AsyncDataSetIterator(new SentimentExampleIterator(EXTRACT_DATA_PATH, wordVectors, 100, truncateReviewsToLength, false), 1); for (int i = 0; i < nEpochs; i++) { net.fit(trainData); trainData.reset(); Evaluation evaluation = new Evaluation(); while (testData.hasNext()) { DataSet t = testData.next(); INDArray dataFeatures = t.getFeatureMatrix(); INDArray dataLabels = t.getLabels(); INDArray inMask = t.getFeaturesMaskArray(); INDArray outMask = t.getLabelsMaskArray(); INDArray predicted = net.output(dataFeatures, false, inMask, outMask); evaluation.evalTimeSeries(dataLabels, predicted, outMask); } testData.reset(); System.out.println(evaluation.stats()); } }
Example 5
Source File: RnnSequenceClassifier.java From wekaDeeplearning4j with GNU General Public License v3.0 | 4 votes |
/** * The method to use when making predictions for test instances. * * @param insts the instances to get predictions for * @return the class probability estimates (if the class is nominal) or the numeric predictions * (if it is numeric) * @throws Exception if something goes wrong at prediction time */ @Override public double[][] distributionsForInstances(Instances insts) throws Exception { log.info("Calc. dist for {} instances", insts.numInstances()); // Do we only have a ZeroR model? if (zeroR != null) { return zeroR.distributionsForInstances(insts); } // Process input data to have the same filters applied as the training data insts = applyFilters(insts); // Get predictions final DataSetIterator it = getDataSetIterator(insts, CacheMode.NONE); double[][] preds = new double[insts.numInstances()][insts.numClasses()]; if (it.resetSupported()) { it.reset(); } int offset = 0; boolean next = it.hasNext(); // Get predictions batch-wise while (next) { final DataSet ds = Utils.getNext(it); final INDArray features = ds.getFeatures(); final INDArray labelsMask = ds.getLabelsMaskArray(); INDArray lastTimeStepIndices; if (labelsMask != null) { lastTimeStepIndices = Nd4j.argMax(labelsMask, 1); } else { lastTimeStepIndices = Nd4j.zeros(features.size(0), 1); } INDArray predBatch = model.outputSingle(features); int currentBatchSize = (int) predBatch.size(0); for (int i = 0; i < currentBatchSize; i++) { int thisTimeSeriesLastIndex = lastTimeStepIndices.getInt(i); INDArray thisExampleProbabilities = predBatch.get( NDArrayIndex.point(i), NDArrayIndex.all(), NDArrayIndex.point(thisTimeSeriesLastIndex)); for (int j = 0; j < insts.numClasses(); j++) { preds[i + offset][j] = thisExampleProbabilities.getDouble(j); } } offset += currentBatchSize; // add batchsize as offset boolean iteratorHasInstancesLeft = offset < insts.numInstances(); next = it.hasNext() || iteratorHasInstancesLeft; } // Fix classes for (int i = 0; i < preds.length; i++) { if (preds[i].length > 1) { weka.core.Utils.normalize(preds[i]); } else { // Rescale numeric classes with the computed coefficients in the initialization phase preds[i][0] = preds[i][0] * x1 + x0; } } return preds; }
Example 6
Source File: RnnSequenceClassifier.java From wekaDeeplearning4j with GNU General Public License v3.0 | 4 votes |
/** * The method to use when making predictions for test instances. * * @param insts the instances to get predictions for * @return the class probability estimates (if the class is nominal) or the numeric predictions * (if it is numeric) * @throws Exception if something goes wrong at prediction time */ @Override public double[][] distributionsForInstances(Instances insts) throws Exception { log.info("Calc. dist for {} instances", insts.numInstances()); // Do we only have a ZeroR model? if (zeroR != null) { return zeroR.distributionsForInstances(insts); } // Process input data to have the same filters applied as the training data insts = applyFilters(insts); // Get predictions final DataSetIterator it = getDataSetIterator(insts, CacheMode.NONE); double[][] preds = new double[insts.numInstances()][insts.numClasses()]; if (it.resetSupported()) { it.reset(); } int offset = 0; boolean next = it.hasNext(); // Get predictions batch-wise while (next) { final DataSet ds = Utils.getNext(it); final INDArray features = ds.getFeatures(); final INDArray labelsMask = ds.getLabelsMaskArray(); INDArray lastTimeStepIndices; if (labelsMask != null) { lastTimeStepIndices = Nd4j.argMax(labelsMask, 1); } else { lastTimeStepIndices = Nd4j.zeros(features.size(0), 1); } INDArray predBatch = model.outputSingle(features); int currentBatchSize = (int) predBatch.size(0); for (int i = 0; i < currentBatchSize; i++) { int thisTimeSeriesLastIndex = lastTimeStepIndices.getInt(i); INDArray thisExampleProbabilities = predBatch.get( NDArrayIndex.point(i), NDArrayIndex.all(), NDArrayIndex.point(thisTimeSeriesLastIndex)); for (int j = 0; j < insts.numClasses(); j++) { preds[i + offset][j] = thisExampleProbabilities.getDouble(j); } } offset += currentBatchSize; // add batchsize as offset boolean iteratorHasInstancesLeft = offset < insts.numInstances(); next = it.hasNext() || iteratorHasInstancesLeft; } // Fix classes for (int i = 0; i < preds.length; i++) { if (preds[i].length > 1) { weka.core.Utils.normalize(preds[i]); } else { // Rescale numeric classes with the computed coefficients in the initialization phase preds[i][0] = preds[i][0] * x1 + x0; } } return preds; }
Example 7
Source File: UnderSamplingPreProcessorTest.java From nd4j with Apache License 2.0 | 4 votes |
@Test public void mixedDist() { UnderSamplingByMaskingPreProcessor preProcessor = new UnderSamplingByMaskingPreProcessor(targetDist, window); DataSet dataSet = knownDistVariedDataSet(new float[] {0.1f, 0.2f, 0.8f}, false); //Call preprocess for the same dataset multiple times to mimic calls with .next() and checks total distribution int loop = 2; for (int i = 0; i < loop; i++) { //preprocess dataset DataSet dataSetToPreProcess = dataSet.copy(); INDArray labelsBefore = dataSetToPreProcess.getLabels().dup(); preProcessor.preProcess(dataSetToPreProcess); INDArray labels = dataSetToPreProcess.getLabels(); assertEquals(labelsBefore, labels); //check masks are zero where there are no time steps INDArray masks = dataSetToPreProcess.getLabelsMaskArray(); INDArray shouldBeAllZeros = masks.get(NDArrayIndex.interval(0, 3), NDArrayIndex.interval(shortSeq, longSeq)); assertEquals(Nd4j.zeros(shouldBeAllZeros.shape()), shouldBeAllZeros); //check distribution of masks in window, going backwards from last time step for (int j = (int) Math.ceil((double) longSeq / window); j > 0; j--) { //collect mask and labels int maxIndex = min(longSeq, j * window); int minIndex = min(0, maxIndex - window); INDArray maskWindow = masks.get(NDArrayIndex.all(), NDArrayIndex.interval(minIndex, maxIndex)); INDArray labelWindow = labels.get(NDArrayIndex.all(), NDArrayIndex.point(0), NDArrayIndex.interval(minIndex, maxIndex)); //calc minority class distribution INDArray minorityDist = labelWindow.mul(maskWindow).sum(1).div(maskWindow.sum(1)); if (j < shortSeq / window) { assertEquals("Failed on window " + j + " batch 0, loop " + i, targetDist, minorityDist.getFloat(0, 0), tolerancePerc); //should now be close to target dist assertEquals("Failed on window " + j + " batch 1, loop " + i, targetDist, minorityDist.getFloat(1, 0), tolerancePerc); //should now be close to target dist assertEquals("Failed on window " + j + " batch 2, loop " + i, 0.8, minorityDist.getFloat(2, 0), tolerancePerc); //should be unchanged as it was already above target dist } assertEquals("Failed on window " + j + " batch 3, loop " + i, targetDist, minorityDist.getFloat(3, 0), tolerancePerc); //should now be close to target dist assertEquals("Failed on window " + j + " batch 4, loop " + i, targetDist, minorityDist.getFloat(4, 0), tolerancePerc); //should now be close to target dist assertEquals("Failed on window " + j + " batch 5, loop " + i, 0.8, minorityDist.getFloat(5, 0), tolerancePerc); //should be unchanged as it was already above target dist } } }
Example 8
Source File: UnderSamplingPreProcessorTest.java From nd4j with Apache License 2.0 | 4 votes |
@Test public void mixedDistOneHot() { //preprocessor should give 30% minority class for every "window" UnderSamplingByMaskingPreProcessor preProcessor = new UnderSamplingByMaskingPreProcessor(targetDist, window); preProcessor.overrideMinorityDefault(); //construct a dataset with known distribution of minority class and varying time steps DataSet dataSet = knownDistVariedDataSet(new float[] {0.9f, 0.8f, 0.2f}, true); //Call preprocess for the same dataset multiple times to mimic calls with .next() and checks total distribution int loop = 10; for (int i = 0; i < loop; i++) { //preprocess dataset DataSet dataSetToPreProcess = dataSet.copy(); preProcessor.preProcess(dataSetToPreProcess); INDArray labels = dataSetToPreProcess.getLabels(); INDArray masks = dataSetToPreProcess.getLabelsMaskArray(); //check masks are zero where there were no time steps INDArray shouldBeAllZeros = masks.get(NDArrayIndex.interval(0, 3), NDArrayIndex.interval(shortSeq, longSeq)); assertEquals(Nd4j.zeros(shouldBeAllZeros.shape()), shouldBeAllZeros); //check distribution of masks in the window length, going backwards from last time step for (int j = (int) Math.ceil((double) longSeq / window); j > 0; j--) { //collect mask and labels int maxIndex = min(longSeq, j * window); int minIndex = min(0, maxIndex - window); INDArray maskWindow = masks.get(NDArrayIndex.all(), NDArrayIndex.interval(minIndex, maxIndex)); INDArray labelWindow = labels.get(NDArrayIndex.all(), NDArrayIndex.all(), NDArrayIndex.interval(minIndex, maxIndex)); //calc minority class distribution after accounting for masks INDArray minorityClass = labelWindow.get(NDArrayIndex.all(), NDArrayIndex.point(0), NDArrayIndex.all()) .mul(maskWindow); INDArray majorityClass = labelWindow.get(NDArrayIndex.all(), NDArrayIndex.point(1), NDArrayIndex.all()) .mul(maskWindow); INDArray minorityDist = minorityClass.sum(1).div(majorityClass.add(minorityClass).sum(1)); if (j < shortSeq / window) { assertEquals("Failed on window " + j + " batch 0, loop " + i, targetDist, minorityDist.getFloat(0, 0), tolerancePerc); //should now be close to target dist assertEquals("Failed on window " + j + " batch 1, loop " + i, targetDist, minorityDist.getFloat(1, 0), tolerancePerc); //should now be close to target dist assertEquals("Failed on window " + j + " batch 2, loop " + i, 0.8, minorityDist.getFloat(2, 0), tolerancePerc); //should be unchanged as it was already above target dist } assertEquals("Failed on window " + j + " batch 3, loop " + i, targetDist, minorityDist.getFloat(3, 0), tolerancePerc); //should now be close to target dist assertEquals("Failed on window " + j + " batch 4, loop " + i, targetDist, minorityDist.getFloat(4, 0), tolerancePerc); //should now be close to target dist assertEquals("Failed on window " + j + " batch 5, loop " + i, 0.8, minorityDist.getFloat(5, 0), tolerancePerc); //should be unchanged as it was already above target dist } } }
Example 9
Source File: UnderSamplingPreProcessorTest.java From deeplearning4j with Apache License 2.0 | 4 votes |
@Test public void mixedDist() { UnderSamplingByMaskingPreProcessor preProcessor = new UnderSamplingByMaskingPreProcessor(targetDist, window); DataSet dataSet = knownDistVariedDataSet(new float[] {0.1f, 0.2f, 0.8f}, false); //Call preprocess for the same dataset multiple times to mimic calls with .next() and checks total distribution int loop = 2; for (int i = 0; i < loop; i++) { //preprocess dataset DataSet dataSetToPreProcess = dataSet.copy(); INDArray labelsBefore = dataSetToPreProcess.getLabels().dup(); preProcessor.preProcess(dataSetToPreProcess); INDArray labels = dataSetToPreProcess.getLabels(); assertEquals(labelsBefore, labels); //check masks are zero where there are no time steps INDArray masks = dataSetToPreProcess.getLabelsMaskArray(); INDArray shouldBeAllZeros = masks.get(NDArrayIndex.interval(0, 3), NDArrayIndex.interval(shortSeq, longSeq)); assertEquals(Nd4j.zeros(shouldBeAllZeros.shape()), shouldBeAllZeros); //check distribution of masks in window, going backwards from last time step for (int j = (int) Math.ceil((double) longSeq / window); j > 0; j--) { //collect mask and labels int maxIndex = min(longSeq, j * window); int minIndex = min(0, maxIndex - window); INDArray maskWindow = masks.get(NDArrayIndex.all(), NDArrayIndex.interval(minIndex, maxIndex)); INDArray labelWindow = labels.get(NDArrayIndex.all(), NDArrayIndex.point(0), NDArrayIndex.interval(minIndex, maxIndex)); //calc minority class distribution INDArray minorityDist = labelWindow.mul(maskWindow).sum(1).div(maskWindow.sum(1)); if (j < shortSeq / window) { assertEquals("Failed on window " + j + " batch 0, loop " + i, targetDist, minorityDist.getFloat(0), tolerancePerc); //should now be close to target dist assertEquals("Failed on window " + j + " batch 1, loop " + i, targetDist, minorityDist.getFloat(1), tolerancePerc); //should now be close to target dist assertEquals("Failed on window " + j + " batch 2, loop " + i, 0.8, minorityDist.getFloat(2), tolerancePerc); //should be unchanged as it was already above target dist } assertEquals("Failed on window " + j + " batch 3, loop " + i, targetDist, minorityDist.getFloat(3), tolerancePerc); //should now be close to target dist assertEquals("Failed on window " + j + " batch 4, loop " + i, targetDist, minorityDist.getFloat(4), tolerancePerc); //should now be close to target dist assertEquals("Failed on window " + j + " batch 5, loop " + i, 0.8, minorityDist.getFloat(5), tolerancePerc); //should be unchanged as it was already above target dist } } }
Example 10
Source File: UnderSamplingPreProcessorTest.java From deeplearning4j with Apache License 2.0 | 4 votes |
@Test public void mixedDistOneHot() { //preprocessor should give 30% minority class for every "window" UnderSamplingByMaskingPreProcessor preProcessor = new UnderSamplingByMaskingPreProcessor(targetDist, window); preProcessor.overrideMinorityDefault(); //construct a dataset with known distribution of minority class and varying time steps DataSet dataSet = knownDistVariedDataSet(new float[] {0.9f, 0.8f, 0.2f}, true); //Call preprocess for the same dataset multiple times to mimic calls with .next() and checks total distribution int loop = 10; for (int i = 0; i < loop; i++) { //preprocess dataset DataSet dataSetToPreProcess = dataSet.copy(); preProcessor.preProcess(dataSetToPreProcess); INDArray labels = dataSetToPreProcess.getLabels(); INDArray masks = dataSetToPreProcess.getLabelsMaskArray(); //check masks are zero where there were no time steps INDArray shouldBeAllZeros = masks.get(NDArrayIndex.interval(0, 3), NDArrayIndex.interval(shortSeq, longSeq)); assertEquals(Nd4j.zeros(shouldBeAllZeros.shape()), shouldBeAllZeros); //check distribution of masks in the window length, going backwards from last time step for (int j = (int) Math.ceil((double) longSeq / window); j > 0; j--) { //collect mask and labels int maxIndex = min(longSeq, j * window); int minIndex = min(0, maxIndex - window); INDArray maskWindow = masks.get(NDArrayIndex.all(), NDArrayIndex.interval(minIndex, maxIndex)); INDArray labelWindow = labels.get(NDArrayIndex.all(), NDArrayIndex.all(), NDArrayIndex.interval(minIndex, maxIndex)); //calc minority class distribution after accounting for masks INDArray minorityClass = labelWindow.get(NDArrayIndex.all(), NDArrayIndex.point(0), NDArrayIndex.all()) .mul(maskWindow); INDArray majorityClass = labelWindow.get(NDArrayIndex.all(), NDArrayIndex.point(1), NDArrayIndex.all()) .mul(maskWindow); INDArray minorityDist = minorityClass.sum(1).div(majorityClass.add(minorityClass).sum(1)); if (j < shortSeq / window) { assertEquals("Failed on window " + j + " batch 0, loop " + i, targetDist, minorityDist.getFloat(0), tolerancePerc); //should now be close to target dist assertEquals("Failed on window " + j + " batch 1, loop " + i, targetDist, minorityDist.getFloat(1), tolerancePerc); //should now be close to target dist assertEquals("Failed on window " + j + " batch 2, loop " + i, 0.8, minorityDist.getFloat(2), tolerancePerc); //should be unchanged as it was already above target dist } assertEquals("Failed on window " + j + " batch 3, loop " + i, targetDist, minorityDist.getFloat(3), tolerancePerc); //should now be close to target dist assertEquals("Failed on window " + j + " batch 4, loop " + i, targetDist, minorityDist.getFloat(4), tolerancePerc); //should now be close to target dist assertEquals("Failed on window " + j + " batch 5, loop " + i, 0.8, minorityDist.getFloat(5), tolerancePerc); //should be unchanged as it was already above target dist } } }