org.deeplearning4j.models.embeddings.wordvectors.WordVectors Java Examples
The following examples show how to use
org.deeplearning4j.models.embeddings.wordvectors.WordVectors.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: CnnSentenceDataSetIterator.java From wekaDeeplearning4j with GNU General Public License v3.0 | 6 votes |
/** * Constructor that uses {@link Builder} extended with stopwords. * * @param builder Builder */ protected CnnSentenceDataSetIterator(CnnSentenceDataSetIterator.Builder builder) { super(builder); this.stopwords = builder.stopwords; setUnknownWordHandling(UnknownWordHandling.UseUnknownVector); // Set unknown word WordVectors wordVectors = getWordVectors(); wordVectors.setUNK("UNKNOWN"); // Initialize unknown word manually INDArray unknown; if (getUseNormalizedWordVectors()) { unknown = wordVectors.getWordVectorMatrixNormalized(wordVectors.getUNK()); } else { unknown = wordVectors.getWordVectorMatrix(wordVectors.getUNK()); } setUnknown(unknown); }
Example #2
Source File: RnnTextEmbeddingDataSetIterator.java From wekaDeeplearning4j with GNU General Public License v3.0 | 6 votes |
/** * Constructor with necessary objects to create RNN features. * * @param data Instances with documents and labels * @param wordVectors WordVectors object * @param tokenFact Tokenizer factory * @param tpp Token pre processor * @param stopWords Stop word object * @param batchSize Size of each minibatch for training * @param truncateLength If reviews exceed */ public RnnTextEmbeddingDataSetIterator( Instances data, WordVectors wordVectors, TokenizerFactory tokenFact, TokenPreProcess tpp, AbstractStopwords stopWords, LabeledSentenceProvider sentenceProvider, int batchSize, int truncateLength) { this.batchSize = batchSize; this.wordVectorSize = wordVectors.getWordVector(wordVectors.vocab().wordAtIndex(0)).length; this.data = data; this.wordVectors = wordVectors; this.truncateLength = truncateLength; this.tokenizerFactory = tokenFact; this.tokenizerFactory.getBackend().setTokenPreProcessor(tpp.getBackend()); this.stopWords = stopWords; this.sentenceProvider = sentenceProvider; }
Example #3
Source File: CnnSentenceDataSetIterator.java From wekaDeeplearning4j with GNU General Public License v3.0 | 6 votes |
/** * Constructor that uses {@link Builder} extended with stopwords. * * @param builder Builder */ protected CnnSentenceDataSetIterator(CnnSentenceDataSetIterator.Builder builder) { super(builder); this.stopwords = builder.stopwords; setUnknownWordHandling(UnknownWordHandling.UseUnknownVector); // Set unknown word WordVectors wordVectors = getWordVectors(); wordVectors.setUNK("UNKNOWN"); // Initialize unknown word manually INDArray unknown; if (getUseNormalizedWordVectors()) { unknown = wordVectors.getWordVectorMatrixNormalized(wordVectors.getUNK()); } else { unknown = wordVectors.getWordVectorMatrix(wordVectors.getUNK()); } setUnknown(unknown); }
Example #4
Source File: CnnWord2VecSentenceClassificationExample.java From Java-Deep-Learning-Cookbook with MIT License | 6 votes |
private static DataSetIterator getDataSetIterator(boolean isTraining, WordVectors wordVectors, int minibatchSize, int maxSentenceLength, Random rng ){ String path = FilenameUtils.concat(DATA_PATH, (isTraining ? "aclImdb/train/" : "aclImdb/test/")); String positiveBaseDir = FilenameUtils.concat(path, "pos"); String negativeBaseDir = FilenameUtils.concat(path, "neg"); File filePositive = new File(positiveBaseDir); File fileNegative = new File(negativeBaseDir); Map<String,List<File>> reviewFilesMap = new HashMap<>(); reviewFilesMap.put("Positive", Arrays.asList(filePositive.listFiles())); reviewFilesMap.put("Negative", Arrays.asList(fileNegative.listFiles())); LabeledSentenceProvider sentenceProvider = new FileLabeledSentenceProvider(reviewFilesMap, rng); return new CnnSentenceDataSetIterator.Builder(CnnSentenceDataSetIterator.Format.CNN2D) .sentenceProvider(sentenceProvider) .wordVectors(wordVectors) .minibatchSize(minibatchSize) .maxSentenceLength(maxSentenceLength) .useNormalizedWordVectors(false) .build(); }
Example #5
Source File: CnnWord2VecSentenceClassificationExample.java From Java-Deep-Learning-Cookbook with MIT License | 6 votes |
private static DataSetIterator getDataSetIterator(boolean isTraining, WordVectors wordVectors, int minibatchSize, int maxSentenceLength, Random rng ){ String path = FilenameUtils.concat(DATA_PATH, (isTraining ? "aclImdb/train/" : "aclImdb/test/")); String positiveBaseDir = FilenameUtils.concat(path, "pos"); String negativeBaseDir = FilenameUtils.concat(path, "neg"); File filePositive = new File(positiveBaseDir); File fileNegative = new File(negativeBaseDir); Map<String,List<File>> reviewFilesMap = new HashMap<>(); reviewFilesMap.put("Positive", Arrays.asList(filePositive.listFiles())); reviewFilesMap.put("Negative", Arrays.asList(fileNegative.listFiles())); LabeledSentenceProvider sentenceProvider = new FileLabeledSentenceProvider(reviewFilesMap, rng); return new CnnSentenceDataSetIterator.Builder(CnnSentenceDataSetIterator.Format.CNN2D) .sentenceProvider(sentenceProvider) .wordVectors(wordVectors) .minibatchSize(minibatchSize) .maxSentenceLength(maxSentenceLength) .useNormalizedWordVectors(false) .build(); }
Example #6
Source File: RnnTextEmbeddingDataSetIterator.java From wekaDeeplearning4j with GNU General Public License v3.0 | 6 votes |
/** * Constructor with necessary objects to create RNN features. * * @param data Instances with documents and labels * @param wordVectors WordVectors object * @param tokenFact Tokenizer factory * @param tpp Token pre processor * @param stopWords Stop word object * @param batchSize Size of each minibatch for training * @param truncateLength If reviews exceed */ public RnnTextEmbeddingDataSetIterator( Instances data, WordVectors wordVectors, TokenizerFactory tokenFact, TokenPreProcess tpp, AbstractStopwords stopWords, LabeledSentenceProvider sentenceProvider, int batchSize, int truncateLength) { this.batchSize = batchSize; this.wordVectorSize = wordVectors.getWordVector(wordVectors.vocab().wordAtIndex(0)).length; this.data = data; this.wordVectors = wordVectors; this.truncateLength = truncateLength; this.tokenizerFactory = tokenFact; this.tokenizerFactory.getBackend().setTokenPreProcessor(tpp.getBackend()); this.stopWords = stopWords; this.sentenceProvider = sentenceProvider; }
Example #7
Source File: WordVectorSerializerTest.java From deeplearning4j with Apache License 2.0 | 6 votes |
@Test @Ignore public void testWriteWordVectors() throws IOException { WordVectors vec = WordVectorSerializer.readWord2VecModel(binaryFile); InMemoryLookupTable lookupTable = (InMemoryLookupTable) vec.lookupTable(); InMemoryLookupCache lookupCache = (InMemoryLookupCache) vec.vocab(); WordVectorSerializer.writeWordVectors(lookupTable, lookupCache, pathToWriteto); WordVectors wordVectors = WordVectorSerializer.loadTxtVectors(new File(pathToWriteto)); double[] wordVector1 = wordVectors.getWordVector("Morgan_Freeman"); double[] wordVector2 = wordVectors.getWordVector("JA_Montalbano"); assertTrue(wordVector1.length == 300); assertTrue(wordVector2.length == 300); assertEquals(Doubles.asList(wordVector1).get(0), 0.044423, 1e-3); assertEquals(Doubles.asList(wordVector2).get(0), 0.051964, 1e-3); }
Example #8
Source File: WordVectorSerializerTest.java From deeplearning4j with Apache License 2.0 | 6 votes |
@Test @Ignore public void testWriteWordVectorsFromWord2Vec() throws IOException { WordVectors vec = WordVectorSerializer.readWord2VecModel(binaryFile, true); WordVectorSerializer.writeWordVectors((Word2Vec) vec, pathToWriteto); WordVectors wordVectors = WordVectorSerializer.loadTxtVectors(new File(pathToWriteto)); INDArray wordVector1 = wordVectors.getWordVectorMatrix("Morgan_Freeman"); INDArray wordVector2 = wordVectors.getWordVectorMatrix("JA_Montalbano"); assertEquals(vec.getWordVectorMatrix("Morgan_Freeman"), wordVector1); assertEquals(vec.getWordVectorMatrix("JA_Montalbano"), wordVector2); assertTrue(wordVector1.length() == 300); assertTrue(wordVector2.length() == 300); assertEquals(wordVector1.getDouble(0), 0.044423, 1e-3); assertEquals(wordVector2.getDouble(0), 0.051964, 1e-3); }
Example #9
Source File: WordVectorSerializerTest.java From deeplearning4j with Apache License 2.0 | 6 votes |
@Test @Ignore public void testFromTableAndVocab() throws IOException { WordVectors vec = WordVectorSerializer.readWord2VecModel(textFile); InMemoryLookupTable lookupTable = (InMemoryLookupTable) vec.lookupTable(); InMemoryLookupCache lookupCache = (InMemoryLookupCache) vec.vocab(); WordVectors wordVectors = WordVectorSerializer.fromTableAndVocab(lookupTable, lookupCache); double[] wordVector1 = wordVectors.getWordVector("Morgan_Freeman"); double[] wordVector2 = wordVectors.getWordVector("JA_Montalbano"); assertTrue(wordVector1.length == 300); assertTrue(wordVector2.length == 300); assertEquals(Doubles.asList(wordVector1).get(0), 0.044423, 1e-3); assertEquals(Doubles.asList(wordVector2).get(0), 0.051964, 1e-3); }
Example #10
Source File: WordVectorSerializerTest.java From deeplearning4j with Apache License 2.0 | 6 votes |
/** * This method tests binary file loading as static model * * @throws Exception */ @Test @Ignore("AB 2019/06/24 - Failing: Ignored to get to all passing baseline to prevent regressions via CI - see issue #7912") public void testStaticLoaderBinary() throws Exception { logger.info("Executor name: {}", Nd4j.getExecutioner().getClass().getSimpleName()); WordVectors vectorsLive = WordVectorSerializer.readWord2VecModel(binaryFile); WordVectors vectorsStatic = WordVectorSerializer.loadStaticModel(binaryFile); INDArray arrayLive = vectorsLive.getWordVectorMatrix("Morgan_Freeman"); INDArray arrayStatic = vectorsStatic.getWordVectorMatrix("Morgan_Freeman"); assertNotEquals(null, arrayLive); assertEquals(arrayLive, arrayStatic); }
Example #11
Source File: WordVectorSerializerTest.java From deeplearning4j with Apache License 2.0 | 6 votes |
/** * This method tests ZIP file loading as static model * * @throws Exception */ @Test @Ignore("AB 2019/06/24 - Failing: Ignored to get to all passing baseline to prevent regressions via CI - see issue #7912") public void testStaticLoaderArchive() throws Exception { logger.info("Executor name: {}", Nd4j.getExecutioner().getClass().getSimpleName()); File w2v = new ClassPathResource("word2vec.dl4j/file.w2v").getFile(); WordVectors vectorsLive = WordVectorSerializer.readWord2Vec(w2v); WordVectors vectorsStatic = WordVectorSerializer.loadStaticModel(w2v); INDArray arrayLive = vectorsLive.getWordVectorMatrix("night"); INDArray arrayStatic = vectorsStatic.getWordVectorMatrix("night"); assertNotEquals(null, arrayLive); assertEquals(arrayLive, arrayStatic); }
Example #12
Source File: WordVectorSerializerTest.java From deeplearning4j with Apache License 2.0 | 6 votes |
@Test public void testUnifiedLoaderArchive1() throws Exception { logger.info("Executor name: {}", Nd4j.getExecutioner().getClass().getSimpleName()); File w2v = new ClassPathResource("word2vec.dl4j/file.w2v").getFile(); WordVectors vectorsLive = WordVectorSerializer.readWord2Vec(w2v); WordVectors vectorsUnified = WordVectorSerializer.readWord2VecModel(w2v, false); INDArray arrayLive = vectorsLive.getWordVectorMatrix("night"); INDArray arrayStatic = vectorsUnified.getWordVectorMatrix("night"); assertNotEquals(null, arrayLive); assertEquals(arrayLive, arrayStatic); assertEquals(null, ((InMemoryLookupTable) vectorsUnified.lookupTable()).getSyn1()); assertEquals(null, ((InMemoryLookupTable) vectorsUnified.lookupTable()).getSyn1Neg()); }
Example #13
Source File: WordVectorSerializerTest.java From deeplearning4j with Apache License 2.0 | 6 votes |
@Test public void testUnifiedLoaderArchive2() throws Exception { logger.info("Executor name: {}", Nd4j.getExecutioner().getClass().getSimpleName()); File w2v = new ClassPathResource("word2vec.dl4j/file.w2v").getFile(); WordVectors vectorsLive = WordVectorSerializer.readWord2Vec(w2v); WordVectors vectorsUnified = WordVectorSerializer.readWord2VecModel(w2v, true); INDArray arrayLive = vectorsLive.getWordVectorMatrix("night"); INDArray arrayStatic = vectorsUnified.getWordVectorMatrix("night"); assertNotEquals(null, arrayLive); assertEquals(arrayLive, arrayStatic); assertNotEquals(null, ((InMemoryLookupTable) vectorsUnified.lookupTable()).getSyn1()); }
Example #14
Source File: WordVectorSerializerTest.java From deeplearning4j with Apache License 2.0 | 6 votes |
/** * This method tests CSV file loading via unified loader * * @throws Exception */ @Test public void testUnifiedLoaderText() throws Exception { logger.info("Executor name: {}", Nd4j.getExecutioner().getClass().getSimpleName()); WordVectors vectorsLive = WordVectorSerializer.loadTxtVectors(textFile); WordVectors vectorsUnified = WordVectorSerializer.readWord2VecModel(textFile, true); INDArray arrayLive = vectorsLive.getWordVectorMatrix("Morgan_Freeman"); INDArray arrayStatic = vectorsUnified.getWordVectorMatrix("Morgan_Freeman"); assertNotEquals(null, arrayLive); assertEquals(arrayLive, arrayStatic); // we're trying EXTENDED model, but file doesn't have syn1/huffman info, so it should be silently degraded to simplified model assertEquals(null, ((InMemoryLookupTable) vectorsUnified.lookupTable()).getSyn1()); }
Example #15
Source File: Windows.java From deeplearning4j with Apache License 2.0 | 6 votes |
/** * Constructs a list of window of size windowSize. * Note that padding for each window is created as well. * @param words the words to tokenize and construct windows from * @param tokenizerFactory tokenizer factory to use * @param windowSize the window size to generate * @return the list of windows for the tokenized string */ public static List<Window> windows(String words, @NonNull TokenizerFactory tokenizerFactory, int windowSize, WordVectors vectors) { Tokenizer tokenizer = tokenizerFactory.create(words); List<String> list = new ArrayList<>(); while (tokenizer.hasMoreTokens()) { String token = tokenizer.nextToken(); // if we don't have UNK word defined - we have to skip this word if (vectors.getWordVectorMatrix(token) != null) list.add(token); } if (list.isEmpty()) throw new IllegalStateException("No tokens found for windows"); return windows(list, windowSize); }
Example #16
Source File: ParagraphVectorsTest.java From deeplearning4j with Apache License 2.0 | 6 votes |
@Ignore @Test public void testGoogleModelForInference() throws Exception { WordVectors googleVectors = WordVectorSerializer.readWord2VecModel(new File("/ext/GoogleNews-vectors-negative300.bin.gz")); TokenizerFactory t = new DefaultTokenizerFactory(); t.setTokenPreProcessor(new CommonPreprocessor()); ParagraphVectors pv = new ParagraphVectors.Builder().tokenizerFactory(t).iterations(10).useHierarchicSoftmax(false) .trainWordVectors(false).iterations(10).useExistingWordVectors(googleVectors) .negativeSample(10).sequenceLearningAlgorithm(new DM<VocabWord>()).build(); INDArray vec1 = pv.inferVector("This text is pretty awesome"); INDArray vec2 = pv.inferVector("Fantastic process of crazy things happening inside just for history purposes"); log.info("vec1/vec2: {}", Transforms.cosineSim(vec1, vec2)); }
Example #17
Source File: WordVectorSerializer.java From deeplearning4j with Apache License 2.0 | 5 votes |
/** * This method restores previously saved w2v model. File can be in one of the following formats: * 1) Binary model, either compressed or not. Like well-known Google Model * 2) Popular CSV word2vec text format * 3) DL4j compressed format * * In return you get StaticWord2Vec model, which might be used as lookup table only in multi-gpu environment. * * @param inputStream InputStream should point to previously saved w2v model * @return */ public static WordVectors loadStaticModel(InputStream inputStream) throws IOException { File tmpFile = DL4JFileUtils.createTempFile("word2vec"+System.currentTimeMillis(), ".tmp"); FileUtils.copyInputStreamToFile(inputStream, tmpFile); try { return loadStaticModel(tmpFile); } finally { tmpFile.delete(); } }
Example #18
Source File: WordVectorSerializerTest.java From deeplearning4j with Apache License 2.0 | 5 votes |
/** * This method tests binary file loading via unified loader * * @throws Exception */ @Test public void testUnifiedLoaderBinary() throws Exception { logger.info("Executor name: {}", Nd4j.getExecutioner().getClass().getSimpleName()); WordVectors vectorsLive = WordVectorSerializer.readWord2VecModel(binaryFile); WordVectors vectorsStatic = WordVectorSerializer.readWord2VecModel(binaryFile, false); INDArray arrayLive = vectorsLive.getWordVectorMatrix("Morgan_Freeman"); INDArray arrayStatic = vectorsStatic.getWordVectorMatrix("Morgan_Freeman"); assertNotEquals(null, arrayLive); assertEquals(arrayLive, arrayStatic); }
Example #19
Source File: FlatModelUtilsTest.java From deeplearning4j with Apache License 2.0 | 5 votes |
private static void printWords(String target, Collection<String> list, WordVectors vec) { System.out.println("Words close to [" + target + "]:"); for (String word : list) { double sim = vec.similarity(target, word); System.out.print("'" + word + "': [" + sim + "]"); } System.out.print("\n"); }
Example #20
Source File: Word2VecTest.java From deeplearning4j with Apache License 2.0 | 5 votes |
@Test @Ignore public void testPortugeseW2V() throws Exception { WordVectors word2Vec = WordVectorSerializer.loadTxtVectors(new File("/ext/Temp/para.txt")); word2Vec.setModelUtils(new FlatModelUtils()); Collection<String> portu = word2Vec.wordsNearest("carro", 10); printWords("carro", portu, word2Vec); portu = word2Vec.wordsNearest("davi", 10); printWords("davi", portu, word2Vec); }
Example #21
Source File: Word2VecTest.java From deeplearning4j with Apache License 2.0 | 5 votes |
private static void printWords(String target, Collection<String> list, WordVectors vec) { System.out.println("Words close to [" + target + "]:"); for (String word : list) { double sim = vec.similarity(target, word); System.out.print("'" + word + "': [" + sim + "], "); } System.out.print("\n"); }
Example #22
Source File: ParagraphVectors.java From deeplearning4j with Apache License 2.0 | 5 votes |
/** * This method allows you to use pre-built WordVectors model (e.g. Word2Vec) for ParagraphVectors. * Existing model will be transferred into new model before training starts. * * PLEASE NOTE: Non-normalized model is recommended to use here. * * @param vec existing WordVectors model * @return */ @Override @SuppressWarnings("unchecked") public Builder useExistingWordVectors(@NonNull WordVectors vec) { if (((InMemoryLookupTable<VocabWord>) vec.lookupTable()).getSyn1() == null && ((InMemoryLookupTable<VocabWord>) vec.lookupTable()).getSyn1Neg() == null) throw new ND4JIllegalStateException("Model being passed as existing has no syn1/syn1Neg available"); this.existingVectors = vec; return this; }
Example #23
Source File: WordVectorSerializerTest.java From deeplearning4j with Apache License 2.0 | 5 votes |
/** * This method tests CSV file loading as static model * * @throws Exception */ @Test @Ignore("AB 2019/06/24 - Failing: Ignored to get to all passing baseline to prevent regressions via CI - see issue #7912") public void testStaticLoaderText() throws Exception { logger.info("Executor name: {}", Nd4j.getExecutioner().getClass().getSimpleName()); WordVectors vectorsLive = WordVectorSerializer.loadTxtVectors(textFile); WordVectors vectorsStatic = WordVectorSerializer.loadStaticModel(textFile); INDArray arrayLive = vectorsLive.getWordVectorMatrix("Morgan_Freeman"); INDArray arrayStatic = vectorsStatic.getWordVectorMatrix("Morgan_Freeman"); assertNotEquals(null, arrayLive); assertEquals(arrayLive, arrayStatic); }
Example #24
Source File: WordVectorSerializerTest.java From deeplearning4j with Apache License 2.0 | 5 votes |
@Test @Ignore("AB 2019/06/24 - Failing: Ignored to get to all passing baseline to prevent regressions via CI - see issue #7912") public void testStaticLoaderFromStream() throws Exception { logger.info("Executor name: {}", Nd4j.getExecutioner().getClass().getSimpleName()); WordVectors vectorsLive = WordVectorSerializer.readWord2VecModel(binaryFile); WordVectors vectorsStatic = WordVectorSerializer.loadStaticModel(new FileInputStream(binaryFile)); INDArray arrayLive = vectorsLive.getWordVectorMatrix("Morgan_Freeman"); INDArray arrayStatic = vectorsStatic.getWordVectorMatrix("Morgan_Freeman"); assertNotEquals(null, arrayLive); assertEquals(arrayLive, arrayStatic); }
Example #25
Source File: CnnTextFilesEmbeddingInstanceIteratorTest.java From wekaDeeplearning4j with GNU General Public License v3.0 | 5 votes |
public Instances makeData() throws Exception { final Instances data = TestUtil.makeTestDataset(42, 100, 0, 0, 1, 0, 0, 1, Attribute.NUMERIC, 1, false); WordVectors wordVectors = WordVectorSerializer .loadStaticModel(DatasetLoader.loadGoogleNewsVectors()); String[] words = (String[]) wordVectors.vocab().words().toArray(new String[0]); Random rand = new Random(42); for (Instance inst : data) { StringBuilder sentence = new StringBuilder(); for (int i = 0; i < 10; i++) { final int idx = rand.nextInt(words.length); sentence.append(" ").append(words[idx]); } inst.setValue(0, sentence.toString()); } return data; }
Example #26
Source File: WordVectorSerializerTest.java From deeplearning4j with Apache License 2.0 | 5 votes |
@Test @Ignore public void testLoader() throws Exception { WordVectors vec = WordVectorSerializer.loadTxtVectors(new File("/home/raver119/Downloads/_vectors.txt")); logger.info("Rewinding: " + Arrays.toString(vec.getWordVector("rewinding"))); }
Example #27
Source File: WordVectorSerializerTest.java From deeplearning4j with Apache License 2.0 | 5 votes |
@Test @Ignore("AB 2019/06/24 - Failing: Ignored to get to all passing baseline to prevent regressions via CI - see issue #7912") public void testIndexPersistence() throws Exception { File inputFile = Resources.asFile("big/raw_sentences.txt"); SentenceIterator iter = UimaSentenceIterator.createWithPath(inputFile.getAbsolutePath()); // Split on white spaces in the line to get words TokenizerFactory t = new DefaultTokenizerFactory(); t.setTokenPreProcessor(new CommonPreprocessor()); Word2Vec vec = new Word2Vec.Builder().minWordFrequency(5).iterations(1).epochs(1).layerSize(100) .stopWords(new ArrayList<String>()).useAdaGrad(false).negativeSample(5).seed(42).windowSize(5) .iterate(iter).tokenizerFactory(t).build(); vec.fit(); VocabCache orig = vec.getVocab(); File tempFile = File.createTempFile("temp", "w2v"); tempFile.deleteOnExit(); WordVectorSerializer.writeWordVectors(vec, tempFile); WordVectors vec2 = WordVectorSerializer.loadTxtVectors(tempFile); VocabCache rest = vec2.vocab(); assertEquals(orig.totalNumberOfDocs(), rest.totalNumberOfDocs()); for (VocabWord word : vec.getVocab().vocabWords()) { INDArray array1 = vec.getWordVectorMatrix(word.getLabel()); INDArray array2 = vec2.getWordVectorMatrix(word.getLabel()); assertEquals(array1, array2); } }
Example #28
Source File: WordVectorSerializerTest.java From deeplearning4j with Apache License 2.0 | 5 votes |
@Test public void testLoaderBinary() throws IOException { WordVectors vec = WordVectorSerializer.readWord2VecModel(binaryFile); assertEquals(vec.vocab().numWords(), 30); assertTrue(vec.vocab().hasToken("Morgan_Freeman")); assertTrue(vec.vocab().hasToken("JA_Montalbano")); double[] wordVector1 = vec.getWordVector("Morgan_Freeman"); double[] wordVector2 = vec.getWordVector("JA_Montalbano"); assertTrue(wordVector1.length == 300); assertTrue(wordVector2.length == 300); assertEquals(Doubles.asList(wordVector1).get(0), 0.044423, 1e-3); assertEquals(Doubles.asList(wordVector2).get(0), 0.051964, 1e-3); }
Example #29
Source File: WordVectorSerializerTest.java From deeplearning4j with Apache License 2.0 | 5 votes |
/** * This method here is only to test real google model few gigabytes worth * Keep it ignored, since it requirs full google model being present in system, which is 1.6gb compressed * * @throws Exception */ @Test @Ignore public void testStaticLoaderGoogleModel() throws Exception { logger.info("Executor name: {}", Nd4j.getExecutioner().getClass().getSimpleName()); long time1 = System.currentTimeMillis(); WordVectors vectors = WordVectorSerializer .loadStaticModel(new File("C:\\Users\\raver\\develop\\GoogleNews-vectors-negative300.bin.gz")); long time2 = System.currentTimeMillis(); logger.info("Loading time: {} ms", (time2 - time1)); }
Example #30
Source File: WordVectorSerializerTest.java From deeplearning4j with Apache License 2.0 | 5 votes |
@Test public void testLoaderStream() throws IOException { WordVectors vec = WordVectorSerializer.readWord2VecModel(textFile); assertEquals(vec.vocab().numWords(), 30); assertTrue(vec.vocab().hasToken("Morgan_Freeman")); assertTrue(vec.vocab().hasToken("JA_Montalbano")); }