org.deeplearning4j.text.sentenceiterator.UimaSentenceIterator Java Examples
The following examples show how to use
org.deeplearning4j.text.sentenceiterator.UimaSentenceIterator.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: UimaSentenceIteratorExample.java From Java-Deep-Learning-Cookbook with MIT License | 5 votes |
public static void main(String[] args) throws Exception { SentenceIterator iterator = UimaSentenceIterator.createWithPath("files/"); int count=0; while(iterator.hasNext()){ iterator.nextSentence(); count++; } System.out.println("count = "+count); iterator.reset(); SentenceDataPreProcessor.setPreprocessor(iterator); while(iterator.hasNext()){ System.out.println(iterator.nextSentence()); } }
Example #2
Source File: UimaSentenceIteratorExample.java From Java-Deep-Learning-Cookbook with MIT License | 5 votes |
public static void main(String[] args) throws Exception { SentenceIterator iterator = UimaSentenceIterator.createWithPath("files/"); int count=0; while(iterator.hasNext()){ iterator.nextSentence(); count++; } System.out.println("count = "+count); iterator.reset(); SentenceDataPreProcessor.setPreprocessor(iterator); while(iterator.hasNext()){ System.out.println(iterator.nextSentence()); } }
Example #3
Source File: Word2VecRawTextExample.java From Java-Data-Science-Cookbook with MIT License | 5 votes |
public static void main(String[] args) throws Exception { // Gets Path to Text file String filePath = "c:/raw_sentences.txt"; log.info("Load & Vectorize Sentences...."); // Strip white space before and after for each line SentenceIterator iter = UimaSentenceIterator.createWithPath(filePath); // Split on white spaces in the line to get words TokenizerFactory t = new DefaultTokenizerFactory(); t.setTokenPreProcessor(new CommonPreprocessor()); InMemoryLookupCache cache = new InMemoryLookupCache(); WeightLookupTable table = new InMemoryLookupTable.Builder() .vectorLength(100) .useAdaGrad(false) .cache(cache) .lr(0.025f).build(); log.info("Building model...."); Word2Vec vec = new Word2Vec.Builder() .minWordFrequency(5).iterations(1) .layerSize(100).lookupTable(table) .stopWords(new ArrayList<String>()) .vocabCache(cache).seed(42) .windowSize(5).iterate(iter).tokenizerFactory(t).build(); log.info("Fitting Word2Vec model...."); vec.fit(); log.info("Writing word vectors to text file...."); // Write word WordVectorSerializer.writeWordVectors(vec, "word2vec.txt"); log.info("Closest Words:"); Collection<String> lst = vec.wordsNearest("man", 5); System.out.println(lst); double cosSim = vec.similarity("cruise", "voyage"); System.out.println(cosSim); }