Java Code Examples for org.nd4j.linalg.ops.transforms.Transforms#cosineSim()
The following examples show how to use
org.nd4j.linalg.ops.transforms.Transforms#cosineSim() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: ParagraphVectors.java From deeplearning4j with Apache License 2.0 | 6 votes |
/** * This method returns similarity of the document to specific label, based on mean value * * @param document * @param label * @return */ public double similarityToLabel(List<VocabWord> document, String label) { if (document.isEmpty()) throw new IllegalStateException("Document has no words inside"); /* INDArray arr = Nd4j.create(document.size(), this.layerSize); for (int i = 0; i < document.size(); i++) { arr.putRow(i, getWordVectorMatrix(document.get(i).getWord())); }*/ INDArray docMean = inferVector(document); //arr.mean(0); INDArray otherVec = getWordVectorMatrix(label); double sim = Transforms.cosineSim(docMean, otherVec); return sim; }
Example 2
Source File: ParagraphVectors.java From deeplearning4j with Apache License 2.0 | 6 votes |
/** * Predict several labels based on the document. * Computes a similarity wrt the mean of the * representation of words in the document * @param document the document * @return possible labels in descending order */ public Collection<String> predictSeveral(List<VocabWord> document, int limit) { /* This code was transferred from original ParagraphVectors DL4j implementation, and yet to be tested */ if (document.isEmpty()) throw new IllegalStateException("Document has no words inside"); /* INDArray arr = Nd4j.create(document.size(), this.layerSize); for (int i = 0; i < document.size(); i++) { arr.putRow(i, getWordVectorMatrix(document.get(i).getWord())); } */ INDArray docMean = inferVector(document); //arr.mean(0); Counter<String> distances = new Counter<>(); for (String s : labelsSource.getLabels()) { INDArray otherVec = getWordVectorMatrix(s); double sim = Transforms.cosineSim(docMean, otherVec); log.debug("Similarity inside: [" + s + "] -> " + sim); distances.incrementCount(s, (float) sim); } val keys = distances.keySetSorted(); return keys.subList(0, Math.min(limit, keys.size())); }
Example 3
Source File: ParagraphVectors.java From deeplearning4j with Apache License 2.0 | 6 votes |
/** * This method predicts label of the document. * Computes a similarity wrt the mean of the * representation of words in the document * @param document the document * @return the word distances for each label */ public String predict(List<VocabWord> document) { /* This code was transferred from original ParagraphVectors DL4j implementation, and yet to be tested */ if (document.isEmpty()) throw new IllegalStateException("Document has no words inside"); /* INDArray arr = Nd4j.create(document.size(), this.layerSize); for (int i = 0; i < document.size(); i++) { arr.putRow(i, getWordVectorMatrix(document.get(i).getWord())); }*/ INDArray docMean = inferVector(document); //arr.mean(0); Counter<String> distances = new Counter<>(); for (String s : labelsSource.getLabels()) { INDArray otherVec = getWordVectorMatrix(s); double sim = Transforms.cosineSim(docMean, otherVec); distances.incrementCount(s, (float) sim); } return distances.argMax(); }
Example 4
Source File: FlatModelUtils.java From deeplearning4j with Apache License 2.0 | 6 votes |
/** * This method does full scan against whole vocabulary, building descending list of similar words * * @param words * @param top * @return the words nearest the mean of the words */ @Override public Collection<String> wordsNearest(INDArray words, int top) { Counter<String> distances = new Counter<>(); words = adjustRank(words); for (String s : vocabCache.words()) { INDArray otherVec = lookupTable.vector(s); double sim = Transforms.cosineSim(Transforms.unitVec(words.dup()), Transforms.unitVec(otherVec.dup())); distances.incrementCount(s, (float) sim); } distances.keepTopNElements(top); return distances.keySetSorted(); }
Example 5
Source File: Word2VecTests.java From deeplearning4j with Apache License 2.0 | 6 votes |
/** * Adding test for cosine similarity, to track changes in Transforms.cosineSim() */ @Test public void testCosineSim() { double[] array1 = new double[] {1.01, 0.91, 0.81, 0.71}; double[] array2 = new double[] {1.01, 0.91, 0.81, 0.71}; double[] array3 = new double[] {1.0, 0.9, 0.8, 0.7}; double sim12 = Transforms.cosineSim(Nd4j.create(array1), Nd4j.create(array2)); double sim23 = Transforms.cosineSim(Nd4j.create(array2), Nd4j.create(array3)); log.info("Arrays 1/2 cosineSim: " + sim12); log.info("Arrays 2/3 cosineSim: " + sim23); log.info("Arrays 1/2 dot: " + Nd4j.getBlasWrapper().dot(Nd4j.create(array1), Nd4j.create(array2))); log.info("Arrays 2/3 dot: " + Nd4j.getBlasWrapper().dot(Nd4j.create(array2), Nd4j.create(array3))); assertEquals(1.0d, sim12, 0.01d); assertEquals(0.99d, sim23, 0.01d); }
Example 6
Source File: OpExecutionerTests.java From deeplearning4j with Apache License 2.0 | 5 votes |
@Test public void testCosineSimilarity() { INDArray vec1 = Nd4j.create(new float[] {1, 2, 3, 4, 5}); INDArray vec2 = Nd4j.create(new float[] {1, 2, 3, 4, 5}); double sim = Transforms.cosineSim(vec1, vec2); assertEquals(getFailureMessage(), 1, sim, 1e-1); }
Example 7
Source File: ParagraphVectorsTest.java From deeplearning4j with Apache License 2.0 | 5 votes |
@Deprecated private double arraysSimilarity(@NonNull INDArray array1, @NonNull INDArray array2) { if (array1.equals(array2)) return 1.0; INDArray vector = Transforms.unitVec(array1); INDArray vector2 = Transforms.unitVec(array2); if (vector == null || vector2 == null) return -1; return Transforms.cosineSim(vector, vector2); }
Example 8
Source File: ParagraphVectors.java From deeplearning4j with Apache License 2.0 | 5 votes |
/** * This method returns top N labels nearest to specified features vector * * @param labelVector * @param topN * @return */ public Collection<String> nearestLabels(INDArray labelVector, int topN) { if (labelsMatrix == null || labelsList == null || labelsList.isEmpty()) extractLabels(); List<BasicModelUtils.WordSimilarity> result = new ArrayList<>(); // if list still empty - return empty collection if (labelsMatrix == null || labelsList == null || labelsList.isEmpty()) { log.warn("Labels list is empty!"); return new ArrayList<>(); } if (!normalizedLabels) { synchronized (this) { if (!normalizedLabels) { labelsMatrix.diviColumnVector(labelsMatrix.norm1(1)); normalizedLabels = true; } } } INDArray similarity = Transforms.unitVec(labelVector).mmul(labelsMatrix.transpose()); List<Double> highToLowSimList = getTopN(similarity, topN + 20); for (int i = 0; i < highToLowSimList.size(); i++) { String word = labelsList.get(highToLowSimList.get(i).intValue()).getLabel(); if (word != null && !word.equals("UNK") && !word.equals("STOP")) { INDArray otherVec = lookupTable.vector(word); double sim = Transforms.cosineSim(labelVector, otherVec); result.add(new BasicModelUtils.WordSimilarity(word, sim)); } } Collections.sort(result, new BasicModelUtils.SimilarityComparator()); return BasicModelUtils.getLabels(result, topN); }
Example 9
Source File: BasicModelUtils.java From deeplearning4j with Apache License 2.0 | 5 votes |
/** * Returns the similarity of 2 words. Result value will be in range [-1,1], where -1.0 is exact opposite similarity, i.e. NO similarity, and 1.0 is total match of two word vectors. * However, most of time you'll see values in range [0,1], but that's something depends of training corpus. * * Returns NaN if any of labels not exists in vocab, or any label is null * * @param label1 the first word * @param label2 the second word * @return a normalized similarity (cosine similarity) */ @Override public double similarity(@NonNull String label1, @NonNull String label2) { if (label1 == null || label2 == null) { log.debug("LABELS: " + label1 + ": " + (label1 == null ? "null" : EXISTS) + ";" + label2 + " vec2:" + (label2 == null ? "null" : EXISTS)); return Double.NaN; } if (!vocabCache.hasToken(label1)) { log.debug("Unknown token 1 requested: [{}]", label1); return Double.NaN; } if (!vocabCache.hasToken(label2)) { log.debug("Unknown token 2 requested: [{}]", label2); return Double.NaN; } INDArray vec1 = lookupTable.vector(label1).dup(); INDArray vec2 = lookupTable.vector(label2).dup(); if (vec1 == null || vec2 == null) { log.debug(label1 + ": " + (vec1 == null ? "null" : EXISTS) + ";" + label2 + " vec2:" + (vec2 == null ? "null" : EXISTS)); return Double.NaN; } if (label1.equals(label2)) return 1.0; return Transforms.cosineSim(vec1, vec2); }
Example 10
Source File: NDArrayTestsFortran.java From deeplearning4j with Apache License 2.0 | 5 votes |
@Test public void testCosineSim() { INDArray vec1 = Nd4j.create(new double[] {1, 2, 3, 4}); INDArray vec2 = Nd4j.create(new double[] {1, 2, 3, 4}); double sim = Transforms.cosineSim(vec1, vec2); assertEquals(getFailureMessage(), 1, sim, 1e-1); INDArray vec3 = Nd4j.create(new float[] {0.2f, 0.3f, 0.4f, 0.5f}); INDArray vec4 = Nd4j.create(new float[] {0.6f, 0.7f, 0.8f, 0.9f}); sim = Transforms.cosineSim(vec3, vec4); assertEquals(getFailureMessage(), 0.98, sim, 1e-1); }
Example 11
Source File: NDArrayDistanceTransform.java From DataVec with Apache License 2.0 | 5 votes |
@Override public List<Writable> map(List<Writable> writables) { int idxFirst = inputSchema.getIndexOfColumn(firstCol); int idxSecond = inputSchema.getIndexOfColumn(secondCol); INDArray arr1 = ((NDArrayWritable) writables.get(idxFirst)).get(); INDArray arr2 = ((NDArrayWritable) writables.get(idxSecond)).get(); double d; switch (distance) { case COSINE: d = Transforms.cosineSim(arr1, arr2); break; case EUCLIDEAN: d = Transforms.euclideanDistance(arr1, arr2); break; case MANHATTAN: d = Transforms.manhattanDistance(arr1, arr2); break; default: throw new UnsupportedOperationException("Unknown or not supported distance metric: " + distance); } List<Writable> out = new ArrayList<>(writables.size() + 1); out.addAll(writables); out.add(new DoubleWritable(d)); return out; }
Example 12
Source File: OpExecutionerTestsC.java From deeplearning4j with Apache License 2.0 | 5 votes |
@Test public void testCosineSimilarity() { INDArray vec1 = Nd4j.create(new float[] {1, 2, 3, 4, 5}); INDArray vec2 = Nd4j.create(new float[] {1, 2, 3, 4, 5}); double sim = Transforms.cosineSim(vec1, vec2); assertEquals(getFailureMessage(), 1, sim, 1e-1); }
Example 13
Source File: NDArrayDistanceTransform.java From deeplearning4j with Apache License 2.0 | 5 votes |
@Override public List<Writable> map(List<Writable> writables) { int idxFirst = inputSchema.getIndexOfColumn(firstCol); int idxSecond = inputSchema.getIndexOfColumn(secondCol); INDArray arr1 = ((NDArrayWritable) writables.get(idxFirst)).get(); INDArray arr2 = ((NDArrayWritable) writables.get(idxSecond)).get(); double d; switch (distance) { case COSINE: d = Transforms.cosineSim(arr1, arr2); break; case EUCLIDEAN: d = Transforms.euclideanDistance(arr1, arr2); break; case MANHATTAN: d = Transforms.manhattanDistance(arr1, arr2); break; default: throw new UnsupportedOperationException("Unknown or not supported distance metric: " + distance); } List<Writable> out = new ArrayList<>(writables.size() + 1); out.addAll(writables); out.add(new DoubleWritable(d)); return out; }
Example 14
Source File: NDArrayTestsFortran.java From nd4j with Apache License 2.0 | 5 votes |
@Test public void testCosineSim() { INDArray vec1 = Nd4j.create(new double[] {1, 2, 3, 4}); INDArray vec2 = Nd4j.create(new double[] {1, 2, 3, 4}); double sim = Transforms.cosineSim(vec1, vec2); assertEquals(getFailureMessage(), 1, sim, 1e-1); INDArray vec3 = Nd4j.create(new float[] {0.2f, 0.3f, 0.4f, 0.5f}); INDArray vec4 = Nd4j.create(new float[] {0.6f, 0.7f, 0.8f, 0.9f}); sim = Transforms.cosineSim(vec3, vec4); assertEquals(getFailureMessage(), 0.98, sim, 1e-1); }
Example 15
Source File: OpExecutionerTestsC.java From nd4j with Apache License 2.0 | 5 votes |
@Test public void testCosineSimilarity() { INDArray vec1 = Nd4j.create(new float[] {1, 2, 3, 4, 5}); INDArray vec2 = Nd4j.create(new float[] {1, 2, 3, 4, 5}); double sim = Transforms.cosineSim(vec1, vec2); assertEquals(getFailureMessage(), 1, sim, 1e-1); }
Example 16
Source File: CudaReduce3Tests.java From nd4j with Apache License 2.0 | 5 votes |
/** * Norm2 + cuBlas dot call * * @throws Exception */ @Test public void testPinnedCosineSim() throws Exception { // simple way to stop test if we're not on CUDA backend here INDArray array1 = Nd4j.create(new float[]{2.01f, 2.01f, 1.01f, 1.01f, 1.01f, 1.01f, 1.01f, 1.01f, 1.01f, 1.01f, 1.01f, 1.01f, 1.01f, 1.01f, 1.01f}); INDArray array2 = Nd4j.create(new float[]{1.00f, 1.00f, 1.00f, 1.00f, 1.00f, 1.00f, 1.00f, 1.00f, 1.00f, 1.00f, 1.00f, 1.00f, 1.00f, 1.00f, 1.00f}); double similarity = Transforms.cosineSim(array1, array2); System.out.println("Cosine similarity: " + similarity); assertEquals(0.95f, similarity, 0.01f); }
Example 17
Source File: HalfOpsTests.java From nd4j with Apache License 2.0 | 5 votes |
@Ignore @Test public void testReduce3_2() throws Exception { INDArray array1 = Nd4j.create(new float[]{2.01f, 2.01f, 1.01f, 1.01f, 1.01f, 1.01f, 1.01f, 1.01f, 1.01f, 1.01f, 1.01f, 1.01f, 1.01f, 1.01f, 1.01f}); INDArray array2 = Nd4j.create(new float[]{1.00f, 1.00f, 1.00f, 1.00f, 1.00f, 1.00f, 1.00f, 1.00f, 1.00f, 1.00f, 1.00f, 1.00f, 1.00f, 1.00f, 1.00f}); double similarity = Transforms.cosineSim(array1, array2); System.out.println("Cosine similarity: " + similarity); assertEquals(0.95f, similarity, 0.01f); }
Example 18
Source File: ParagraphVectorsTest.java From deeplearning4j with Apache License 2.0 | 4 votes |
@Test(timeout = 300000) public void testParagraphVectorsDBOW() throws Exception { skipUnlessIntegrationTests(); File file = Resources.asFile("/big/raw_sentences.txt"); SentenceIterator iter = new BasicLineIterator(file); AbstractCache<VocabWord> cache = new AbstractCache.Builder<VocabWord>().build(); TokenizerFactory t = new DefaultTokenizerFactory(); t.setTokenPreProcessor(new CommonPreprocessor()); LabelsSource source = new LabelsSource("DOC_"); ParagraphVectors vec = new ParagraphVectors.Builder().minWordFrequency(1).iterations(5).seed(119).epochs(1) .layerSize(100).learningRate(0.025).labelsSource(source).windowSize(5).iterate(iter) .trainWordVectors(true).vocabCache(cache).tokenizerFactory(t).negativeSample(0) .allowParallelTokenization(true).useHierarchicSoftmax(true).sampling(0).workers(4) .usePreciseWeightInit(true).sequenceLearningAlgorithm(new DBOW<VocabWord>()).build(); vec.fit(); assertFalse(((InMemoryLookupTable<VocabWord>)vec.getLookupTable()).getSyn0().isAttached()); assertFalse(((InMemoryLookupTable<VocabWord>)vec.getLookupTable()).getSyn1().isAttached()); int cnt1 = cache.wordFrequency("day"); int cnt2 = cache.wordFrequency("me"); assertNotEquals(1, cnt1); assertNotEquals(1, cnt2); assertNotEquals(cnt1, cnt2); double simDN = vec.similarity("day", "night"); log.info("day/night similariry: {}", simDN); double similarity1 = vec.similarity("DOC_9835", "DOC_12492"); log.info("9835/12492 similarity: " + similarity1); // assertTrue(similarity1 > 0.2d); double similarity2 = vec.similarity("DOC_3720", "DOC_16392"); log.info("3720/16392 similarity: " + similarity2); // assertTrue(similarity2 > 0.2d); double similarity3 = vec.similarity("DOC_6347", "DOC_3720"); log.info("6347/3720 similarity: " + similarity3); // assertTrue(similarity3 > 0.6d); double similarityX = vec.similarity("DOC_3720", "DOC_9852"); log.info("3720/9852 similarity: " + similarityX); assertTrue(similarityX < 0.5d); // testing DM inference now INDArray original = vec.getWordVectorMatrix("DOC_16392").dup(); INDArray inferredA1 = vec.inferVector("This is my work"); INDArray inferredB1 = vec.inferVector("This is my work ."); INDArray inferredC1 = vec.inferVector("This is my day"); INDArray inferredD1 = vec.inferVector("This is my night"); log.info("A: {}", Arrays.toString(inferredA1.data().asFloat())); log.info("C: {}", Arrays.toString(inferredC1.data().asFloat())); assertNotEquals(inferredA1, inferredC1); double cosAO1 = Transforms.cosineSim(inferredA1.dup(), original.dup()); double cosAB1 = Transforms.cosineSim(inferredA1.dup(), inferredB1.dup()); double cosAC1 = Transforms.cosineSim(inferredA1.dup(), inferredC1.dup()); double cosCD1 = Transforms.cosineSim(inferredD1.dup(), inferredC1.dup()); log.info("Cos O/A: {}", cosAO1); log.info("Cos A/B: {}", cosAB1); log.info("Cos A/C: {}", cosAC1); log.info("Cos C/D: {}", cosCD1); }
Example 19
Source File: TestNDArrayWritableTransforms.java From deeplearning4j with Apache License 2.0 | 3 votes |
@Test public void testNDArrayDistanceTransform() { Schema s = new Schema.Builder() .addColumnDouble("col0").addColumnNDArray("col1", new long[] {1, 10}) .addColumnNDArray("col2", new long[] {1, 10}).build(); TransformProcess tp = new TransformProcess.Builder(s) .ndArrayDistanceTransform("dist", Distance.COSINE, "col1", "col2").build(); List<String> expColNames = Arrays.asList("col0", "col1", "col2", "dist"); assertEquals(expColNames, tp.getFinalSchema().getColumnNames()); Nd4j.getRandom().setSeed(12345); INDArray arr1 = Nd4j.rand(1, 10); INDArray arr2 = Nd4j.rand(1, 10); double cosine = Transforms.cosineSim(arr1, arr2); List<Writable> in = Arrays.<Writable>asList(new DoubleWritable(0), new NDArrayWritable(arr1.dup()), new NDArrayWritable(arr2.dup())); List<Writable> out = tp.execute(in); List<Writable> exp = Arrays.<Writable>asList(new DoubleWritable(0), new NDArrayWritable(arr1), new NDArrayWritable(arr2), new DoubleWritable(cosine)); assertEquals(exp, out); }
Example 20
Source File: TestNDArrayWritableTransforms.java From DataVec with Apache License 2.0 | 3 votes |
@Test public void testNDArrayDistanceTransform() { Schema s = new Schema.Builder() .addColumnDouble("col0").addColumnNDArray("col1", new long[] {1, 10}) .addColumnNDArray("col2", new long[] {1, 10}).build(); TransformProcess tp = new TransformProcess.Builder(s) .ndArrayDistanceTransform("dist", Distance.COSINE, "col1", "col2").build(); List<String> expColNames = Arrays.asList("col0", "col1", "col2", "dist"); assertEquals(expColNames, tp.getFinalSchema().getColumnNames()); Nd4j.getRandom().setSeed(12345); INDArray arr1 = Nd4j.rand(1, 10); INDArray arr2 = Nd4j.rand(1, 10); double cosine = Transforms.cosineSim(arr1, arr2); List<Writable> in = Arrays.<Writable>asList(new DoubleWritable(0), new NDArrayWritable(arr1.dup()), new NDArrayWritable(arr2.dup())); List<Writable> out = tp.execute(in); List<Writable> exp = Arrays.<Writable>asList(new DoubleWritable(0), new NDArrayWritable(arr1), new NDArrayWritable(arr2), new DoubleWritable(cosine)); assertEquals(exp, out); }