weka.filters.unsupervised.instance.RemovePercentage Java Examples
The following examples show how to use
weka.filters.unsupervised.instance.RemovePercentage.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: RnnSequenceClassifierTest.java From wekaDeeplearning4j with GNU General Public License v3.0 | 5 votes |
@Test public void testImdbClassification() throws Exception { // Init data data = DatasetLoader.loadImdb(); // Define layers LSTM lstm1 = new LSTM(); lstm1.setNOut(3); lstm1.setActivationFunction(new ActivationTanH()); RnnOutputLayer rnnOut = new RnnOutputLayer(); // Network config NeuralNetConfiguration nnc = new NeuralNetConfiguration(); nnc.setL2(1e-5); nnc.setGradientNormalization(GradientNormalization.ClipElementWiseAbsoluteValue); nnc.setGradientNormalizationThreshold(1.0); Adam opt = new Adam(); opt.setLearningRate(0.02); nnc.setUpdater(opt); // Config classifier clf.setLayers(lstm1, rnnOut); clf.setNeuralNetConfiguration(nnc); clf.settBPTTbackwardLength(20); clf.settBPTTforwardLength(20); clf.setQueueSize(0); // Randomize data data.randomize(new Random(42)); // Reduce datasize RemovePercentage rp = new RemovePercentage(); rp.setPercentage(95); rp.setInputFormat(data); data = Filter.useFilter(data, rp); TestUtil.holdout(clf, data, 5, tii); }
Example #2
Source File: RnnSequenceClassifierTest.java From wekaDeeplearning4j with GNU General Public License v3.0 | 5 votes |
@Test public void testImdbClassification() throws Exception { // Init data data = DatasetLoader.loadImdb(); // Define layers LSTM lstm1 = new LSTM(); lstm1.setNOut(3); lstm1.setActivationFunction(new ActivationTanH()); RnnOutputLayer rnnOut = new RnnOutputLayer(); // Network config NeuralNetConfiguration nnc = new NeuralNetConfiguration(); nnc.setL2(1e-5); nnc.setGradientNormalization(GradientNormalization.ClipElementWiseAbsoluteValue); nnc.setGradientNormalizationThreshold(1.0); Adam opt = new Adam(); opt.setLearningRate(0.02); nnc.setUpdater(opt); // Config classifier clf.setLayers(lstm1, rnnOut); clf.setNeuralNetConfiguration(nnc); clf.settBPTTbackwardLength(20); clf.settBPTTforwardLength(20); clf.setQueueSize(0); // Randomize data data.randomize(new Random(42)); // Reduce datasize RemovePercentage rp = new RemovePercentage(); rp.setPercentage(95); rp.setInputFormat(data); data = Filter.useFilter(data, rp); TestUtil.holdout(clf, data, 5, tii); }
Example #3
Source File: RnnSequenceClassifierTest.java From wekaDeeplearning4j with GNU General Public License v3.0 | 4 votes |
@Test public void testConfigRotation() throws Exception { Map<String, String> failedConfigs = new HashMap<>(); tii = new RnnTextEmbeddingInstanceIterator(); tii.setWordVectorLocation(modelSlim); data = DatasetLoader.loadAnger(); // Reduce datasize RemovePercentage rp = new RemovePercentage(); rp.setPercentage(98); rp.setInputFormat(data); data = Filter.useFilter(data, rp); RnnOutputLayer out = new RnnOutputLayer(); out.setLossFn(new LossMSE()); out.setActivationFunction(new ActivationIdentity()); final Dl4jWordsFromFile wff = new Dl4jWordsFromFile(); wff.setStopwords(new File("src/test/resources/stopwords/english.txt")); // Iterate stopwords for (Dl4jAbstractStopwords sw : new Dl4jAbstractStopwords[]{new Dl4jRainbow(), new Dl4jNull(), wff}) { tii.setStopwords(sw); final StemmingPreProcessor spp = new StemmingPreProcessor(); spp.setStemmer(new SnowballStemmer()); // Iterate TokenPreProcess for (TokenPreProcess tpp : new TokenPreProcess[]{ new CommonPreProcessor(), new EndingPreProcessor(), new LowCasePreProcessor(), spp }) { tii.setTokenPreProcess(tpp); // Iterate tokenizer faktory for (TokenizerFactory tf : new TokenizerFactory[]{ new DefaultTokenizerFactory(), new CharacterNGramTokenizerFactory(), new TweetNLPTokenizerFactory(), }) { tii.setTokenizerFactory(tf); // Create clean classifier clf = new RnnSequenceClassifier(); clf.setNumEpochs(1); clf.setLayers(out); clf.setInstanceIterator(tii); clf.settBPTTforwardLength(3); clf.settBPTTbackwardLength(3); String conf = "\n - TokenPreProcess: " + tpp.getClass().getSimpleName() + "\n - TokenizerFactory: " + tf.getClass().getSimpleName() + "\n - StopWords: " + sw.getClass().getSimpleName(); log.info(conf); try { clf.buildClassifier(data); } catch (Exception e) { failedConfigs.put(conf, e.toString()); } } } } // Check if anything failed if (!failedConfigs.isEmpty()) { final String err = failedConfigs .keySet() .stream() .map(s -> "Config failed: " + s + "\nException: " + failedConfigs.get(s)) .collect(Collectors.joining("\n")); Assert.fail("Some of the configs failed:\n" + err); } }
Example #4
Source File: Dl4jMlpTest.java From wekaDeeplearning4j with GNU General Public License v3.0 | 4 votes |
@Test public void testTextCnnClassification() throws Exception { CnnTextEmbeddingInstanceIterator cnnTextIter = new CnnTextEmbeddingInstanceIterator(); cnnTextIter.setTrainBatchSize(128); cnnTextIter.setWordVectorLocation(DatasetLoader.loadGoogleNewsVectors()); clf.setInstanceIterator(cnnTextIter); cnnTextIter.initialize(); final WordVectors wordVectors = cnnTextIter.getWordVectors(); int vectorSize = wordVectors.getWordVector(wordVectors.vocab().wordAtIndex(0)).length; ConvolutionLayer conv1 = new ConvolutionLayer(); conv1.setKernelSize(new int[]{4, vectorSize}); conv1.setNOut(10); conv1.setStride(new int[]{1, vectorSize}); conv1.setConvolutionMode(ConvolutionMode.Same); conv1.setActivationFunction(new ActivationReLU()); BatchNormalization bn1 = new BatchNormalization(); ConvolutionLayer conv2 = new ConvolutionLayer(); conv2.setKernelSize(new int[]{3, vectorSize}); conv2.setNOut(10); conv2.setStride(new int[]{1, vectorSize}); conv2.setConvolutionMode(ConvolutionMode.Same); conv2.setActivationFunction(new ActivationReLU()); BatchNormalization bn2 = new BatchNormalization(); ConvolutionLayer conv3 = new ConvolutionLayer(); conv3.setKernelSize(new int[]{2, vectorSize}); conv3.setNOut(10); conv3.setStride(new int[]{1, vectorSize}); conv3.setConvolutionMode(ConvolutionMode.Same); conv3.setActivationFunction(new ActivationReLU()); BatchNormalization bn3 = new BatchNormalization(); GlobalPoolingLayer gpl = new GlobalPoolingLayer(); OutputLayer out = new OutputLayer(); // clf.setLayers(conv1, bn1, conv2, bn2, conv3, bn3, gpl, out); clf.setLayers(conv1, conv2, conv3, gpl, out); // clf.setNumEpochs(50); clf.setCacheMode(CacheMode.MEMORY); final EpochListener l = new EpochListener(); l.setN(1); clf.setIterationListener(l); clf.setEarlyStopping(new EarlyStopping(10, 15)); clf.setDebug(true); // NNC NeuralNetConfiguration nnc = new NeuralNetConfiguration(); nnc.setL2(1e-3); final Dropout dropout = new Dropout(); dropout.setP(0.2); nnc.setDropout(dropout); clf.setNeuralNetConfiguration(nnc); // Data final Instances data = DatasetLoader.loadImdb(); data.randomize(new Random(42)); RemovePercentage rp = new RemovePercentage(); rp.setInputFormat(data); rp.setPercentage(98); final Instances dataFiltered = Filter.useFilter(data, rp); TestUtil.holdout(clf, dataFiltered); }
Example #5
Source File: TestUtil.java From wekaDeeplearning4j with GNU General Public License v3.0 | 4 votes |
/** * Creates a relational test dataset */ public static Instances makeTestDatasetRelational( int seed, int numInstances, int numClasses, int classType, int classIndex, int numRelationalNominal, int numRelationalString, int numRelationalNumeric, int numInstancesRelational) throws Exception { TestInstances testset = new TestInstances(); testset.setSeed(seed); testset.setNumInstances(numInstances); testset.setNumClasses(numClasses); testset.setClassType(classType); testset.setClassIndex(classIndex); testset.setNumClasses(numClasses); testset.setMultiInstance(false); testset.setNumNominal(0); testset.setNumNumeric(0); testset.setNumString(0); testset.setNumDate(0); testset.setNumRelational(1); testset.setNumRelationalNominal(numRelationalNominal); testset.setNumRelationalString(numRelationalString); testset.setNumRelationalNumeric(numRelationalNumeric); testset.setNumInstancesRelational(numInstancesRelational); final Instances generated = testset.generate(); // Remove random instances Random rand = new Random(42); for (Instance datum : generated) { final Instances rel = datum.relationalValue(0); RemovePercentage rp = new RemovePercentage(); rp.setInputFormat(rel); rp.setPercentage(rand.nextDouble() * 100); final Instances rel2 = Filter.useFilter(rel, rp); final int i = generated.attribute(0).addRelation(rel2); datum.setValue(0, i); } return generated; }
Example #6
Source File: RnnSequenceClassifierTest.java From wekaDeeplearning4j with GNU General Public License v3.0 | 4 votes |
@Test public void testConfigRotation() throws Exception { Map<String, String> failedConfigs = new HashMap<>(); tii = new RnnTextEmbeddingInstanceIterator(); tii.setWordVectorLocation(modelSlim); data = DatasetLoader.loadAnger(); // Reduce datasize RemovePercentage rp = new RemovePercentage(); rp.setPercentage(98); rp.setInputFormat(data); data = Filter.useFilter(data, rp); RnnOutputLayer out = new RnnOutputLayer(); out.setLossFn(new LossMSE()); out.setActivationFunction(new ActivationIdentity()); final Dl4jWordsFromFile wff = new Dl4jWordsFromFile(); wff.setStopwords(new File("src/test/resources/stopwords/english.txt")); // Iterate stopwords for (Dl4jAbstractStopwords sw : new Dl4jAbstractStopwords[]{new Dl4jRainbow(), new Dl4jNull(), wff}) { tii.setStopwords(sw); final StemmingPreProcessor spp = new StemmingPreProcessor(); spp.setStemmer(new SnowballStemmer()); // Iterate TokenPreProcess for (TokenPreProcess tpp : new TokenPreProcess[]{ new CommonPreProcessor(), new EndingPreProcessor(), new LowCasePreProcessor(), spp }) { tii.setTokenPreProcess(tpp); // Iterate tokenizer faktory for (TokenizerFactory tf : new TokenizerFactory[]{ new DefaultTokenizerFactory(), new CharacterNGramTokenizerFactory(), new TweetNLPTokenizerFactory(), }) { tii.setTokenizerFactory(tf); // Create clean classifier clf = new RnnSequenceClassifier(); clf.setNumEpochs(1); clf.setLayers(out); clf.setInstanceIterator(tii); clf.settBPTTforwardLength(3); clf.settBPTTbackwardLength(3); String conf = "\n - TokenPreProcess: " + tpp.getClass().getSimpleName() + "\n - TokenizerFactory: " + tf.getClass().getSimpleName() + "\n - StopWords: " + sw.getClass().getSimpleName(); log.info(conf); try { clf.buildClassifier(data); } catch (Exception e) { failedConfigs.put(conf, e.toString()); } } } } // Check if anything failed if (!failedConfigs.isEmpty()) { final String err = failedConfigs .keySet() .stream() .map(s -> "Config failed: " + s + "\nException: " + failedConfigs.get(s)) .collect(Collectors.joining("\n")); Assert.fail("Some of the configs failed:\n" + err); } }
Example #7
Source File: Dl4jMlpTest.java From wekaDeeplearning4j with GNU General Public License v3.0 | 4 votes |
@Test public void testTextCnnClassification() throws Exception { CnnTextEmbeddingInstanceIterator cnnTextIter = new CnnTextEmbeddingInstanceIterator(); cnnTextIter.setTrainBatchSize(128); cnnTextIter.setWordVectorLocation(DatasetLoader.loadGoogleNewsVectors()); clf.setInstanceIterator(cnnTextIter); cnnTextIter.initialize(); final WordVectors wordVectors = cnnTextIter.getWordVectors(); int vectorSize = wordVectors.getWordVector(wordVectors.vocab().wordAtIndex(0)).length; ConvolutionLayer conv1 = new ConvolutionLayer(); conv1.setKernelSize(new int[]{4, vectorSize}); conv1.setNOut(10); conv1.setStride(new int[]{1, vectorSize}); conv1.setConvolutionMode(ConvolutionMode.Same); conv1.setActivationFunction(new ActivationReLU()); BatchNormalization bn1 = new BatchNormalization(); ConvolutionLayer conv2 = new ConvolutionLayer(); conv2.setKernelSize(new int[]{3, vectorSize}); conv2.setNOut(10); conv2.setStride(new int[]{1, vectorSize}); conv2.setConvolutionMode(ConvolutionMode.Same); conv2.setActivationFunction(new ActivationReLU()); BatchNormalization bn2 = new BatchNormalization(); ConvolutionLayer conv3 = new ConvolutionLayer(); conv3.setKernelSize(new int[]{2, vectorSize}); conv3.setNOut(10); conv3.setStride(new int[]{1, vectorSize}); conv3.setConvolutionMode(ConvolutionMode.Same); conv3.setActivationFunction(new ActivationReLU()); BatchNormalization bn3 = new BatchNormalization(); GlobalPoolingLayer gpl = new GlobalPoolingLayer(); OutputLayer out = new OutputLayer(); // clf.setLayers(conv1, bn1, conv2, bn2, conv3, bn3, gpl, out); clf.setLayers(conv1, conv2, conv3, gpl, out); // clf.setNumEpochs(50); clf.setCacheMode(CacheMode.MEMORY); final EpochListener l = new EpochListener(); l.setN(1); clf.setIterationListener(l); clf.setEarlyStopping(new EarlyStopping(10, 15)); clf.setDebug(true); // NNC NeuralNetConfiguration nnc = new NeuralNetConfiguration(); nnc.setL2(1e-3); final Dropout dropout = new Dropout(); dropout.setP(0.2); nnc.setDropout(dropout); clf.setNeuralNetConfiguration(nnc); // Data final Instances data = DatasetLoader.loadImdb(); data.randomize(new Random(42)); RemovePercentage rp = new RemovePercentage(); rp.setInputFormat(data); rp.setPercentage(98); final Instances dataFiltered = Filter.useFilter(data, rp); TestUtil.holdout(clf, dataFiltered); }
Example #8
Source File: TestUtil.java From wekaDeeplearning4j with GNU General Public License v3.0 | 4 votes |
/** * Creates a relational test dataset */ public static Instances makeTestDatasetRelational( int seed, int numInstances, int numClasses, int classType, int classIndex, int numRelationalNominal, int numRelationalString, int numRelationalNumeric, int numInstancesRelational) throws Exception { TestInstances testset = new TestInstances(); testset.setSeed(seed); testset.setNumInstances(numInstances); testset.setNumClasses(numClasses); testset.setClassType(classType); testset.setClassIndex(classIndex); testset.setNumClasses(numClasses); testset.setMultiInstance(false); testset.setNumNominal(0); testset.setNumNumeric(0); testset.setNumString(0); testset.setNumDate(0); testset.setNumRelational(1); testset.setNumRelationalNominal(numRelationalNominal); testset.setNumRelationalString(numRelationalString); testset.setNumRelationalNumeric(numRelationalNumeric); testset.setNumInstancesRelational(numInstancesRelational); final Instances generated = testset.generate(); // Remove random instances Random rand = new Random(42); for (Instance datum : generated) { final Instances rel = datum.relationalValue(0); RemovePercentage rp = new RemovePercentage(); rp.setInputFormat(rel); rp.setPercentage(rand.nextDouble() * 100); final Instances rel2 = Filter.useFilter(rel, rp); final int i = generated.attribute(0).addRelation(rel2); datum.setValue(0, i); } return generated; }