pitt.search.semanticvectors.VectorSearcher Java Examples

The following examples show how to use pitt.search.semanticvectors.VectorSearcher. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: SemVectorsPeer.java    From seldon-server with Apache License 2.0 6 votes vote down vote up
private LinkedList<SearchResult> search(String query,VectorStore queryStore,VectorStore searchStore,int numResults)
{
	 VectorSearcher vecSearcher;
	 LinkedList<SearchResult> results = new LinkedList<>();
	 try 
	 {
		 String[] queryTerms = query.split("\\s+");
		 vecSearcher =
	            new VectorSearcher.VectorSearcherCosine(queryStore,
	                                                    searchStore,
	                                                    luceneUtils,
	                                                    flagConfig,
	                                                    queryTerms);
		 results = vecSearcher.getNearestNeighbors(numResults);
		 
		 
	 } catch (pitt.search.semanticvectors.vectors.ZeroVectorException e) {
		 results = new LinkedList<>();

	}
	 return results;
}
 
Example #2
Source File: SemVectorsPeer.java    From seldon-server with Apache License 2.0 6 votes vote down vote up
/**
 * Find similar users by querying the docstore using a query from the terms passed in
 * @param <T>
 * @param terms
 * @param lUtils : lucene utils
 * @param numResults : max number of results to return
 * @param docResult : the result list of return ids T
 * @param docTransform : the transform from document to return id type T
 */
public <T extends Comparable<T>> void findSimilarUsersFromTerms(String[] terms,LuceneUtils lUtils,int numResults,ArrayList<SemVectorResult<T>> docResult,QueryTransform<T> docTransform)
{
	List<SearchResult> results;
	try 
	{
		VectorSearcher vecSearcher =
	            new VectorSearcher.VectorSearcherCosine(termVecReader,
	                                                    docVecReader,
	                                                    luceneUtils,
	                                                    flagConfig,
	                                                    terms);
		results = vecSearcher.getNearestNeighbors(numResults);
	} 
	catch (pitt.search.semanticvectors.vectors.ZeroVectorException e) {
		results = new LinkedList<>();
	}
	for(SearchResult r : results)
	{
		String filename = r.getObjectVector().getObject().toString();
		
		docResult.add(new SemVectorResult<>(docTransform.fromSV(filename),r.getScore()));
	}
}
 
Example #3
Source File: SemanticVectorsStore.java    From seldon-server with Apache License 2.0 6 votes vote down vote up
private LinkedList<SearchResult> search(String query,VectorStore queryStore,VectorStore searchStore,int numResults)
{
	 VectorSearcher vecSearcher;
	 LinkedList<SearchResult> results = new LinkedList<>();
	 try 
	 {
		 String[] queryTerms = query.split("\\s+");
		 vecSearcher =
	            new VectorSearcher.VectorSearcherCosine(queryStore,
	                                                    searchStore,
	                                                    luceneUtils,
	                                                    flagConfig,
	                                                    queryTerms);
		 results = vecSearcher.getNearestNeighbors(numResults);
		 
		 
	 } catch (pitt.search.semanticvectors.vectors.ZeroVectorException e) {
		 results = new LinkedList<>();

	}
	 return results;
}
 
Example #4
Source File: SemanticVectorsStore.java    From seldon-server with Apache License 2.0 6 votes vote down vote up
/**
 * Find similar users by querying the docstore using a query from the terms passed in
 * @param <T>
 * @param terms
 * @param lUtils : lucene utils
 * @param numResults : max number of results to return
 * @param docResult : the result list of return ids T
 * @param docTransform : the transform from document to return id type T
 */
public <T extends Comparable<T>> void findSimilarUsersFromTerms(String[] terms,LuceneUtils lUtils,int numResults,ArrayList<SemVectorResult<T>> docResult,QueryTransform<T> docTransform)
{
	List<SearchResult> results;
	try 
	{
		VectorSearcher vecSearcher =
	            new VectorSearcher.VectorSearcherCosine(termVecReader,
	                                                    docVecReader,
	                                                    luceneUtils,
	                                                    flagConfig,
	                                                    terms);
		results = vecSearcher.getNearestNeighbors(numResults);
	} 
	catch (pitt.search.semanticvectors.vectors.ZeroVectorException e) {
		results = new LinkedList<>();
	}
	for(SearchResult r : results)
	{
		String filename = r.getObjectVector().getObject().toString();
		
		docResult.add(new SemVectorResult<>(docTransform.fromSV(filename),r.getScore()));
	}
}
 
Example #5
Source File: PsiUtils.java    From semanticvectors with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
/**
 * Prints the nearest predicate for a particular flagConfig. (Please extend this comment!)
 *
 * @param flagConfig
 * @throws IOException
 */
public static void printNearestPredicate(FlagConfig flagConfig) throws IOException {
  VerbatimLogger.info("Printing predicate results.");
  Vector queryVector = VectorFactory.createZeroVector(flagConfig.vectortype(), flagConfig.dimension());
  VectorSearcher.VectorSearcherBoundProduct predicateFinder;
  try {
    predicateFinder = new VectorSearcher.VectorSearcherBoundProduct(
        VectorStoreReader.openVectorStore(flagConfig.semanticvectorfile(), flagConfig),
        VectorStoreReader.openVectorStore(flagConfig.boundvectorfile(), flagConfig),
        null, flagConfig, queryVector);
    List<SearchResult> bestPredicate = predicateFinder.getNearestNeighbors(1);
    if (bestPredicate.size() > 0) {
      String pred = bestPredicate.get(0).getObjectVector().getObject().toString();
      System.out.println(pred);
    }
  } catch (ZeroVectorException e) {
    // TODO Auto-generated catch block
    e.printStackTrace();
  }
}
 
Example #6
Source File: AnalogyTest.java    From semanticvectors with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
/**
	 * Method to process a single analogy
	 * Questions will be skipped if any of the four terms are not present
	 * @return
	 */
	
	
	public void processAnalogy()
	{
		
	String[] queryTerms = inLine.toLowerCase().split(" ");
    
	//not a proportional analogy
	if (queryTerms.length < 4) 
	{
		System.out.println(threadno+": "+inLine+": Skipping line");
		return;
	}
    	
	Vector aTermVector = null;
	Vector bTermVector = null;
	Vector cTermVector = null;

	
	String missingTerms = "";
	if (!termVectors.containsVector(queryTerms[0])) missingTerms = missingTerms + queryTerms[0]+"; ";
	if (!termVectors.containsVector(queryTerms[1])) missingTerms = missingTerms + queryTerms[1]+"; ";
	if (!termVectors.containsVector(queryTerms[2])) missingTerms = missingTerms + queryTerms[2]+"; ";
	if (!termVectors.containsVector(queryTerms[3])) missingTerms = missingTerms + queryTerms[3]+"; ";
	
	if (!missingTerms.isEmpty())
	{
		System.out.println(threadno+": "+"Missing terms "+missingTerms);
		return;
	}
	
	aTermVector = termVectors.getVector(queryTerms[0]);
	bTermVector = termVectors.getVector(queryTerms[1]);
	cTermVector = termVectors.getVector(queryTerms[2]);

	Vector cueVector = bTermVector.copy();
	cueVector.superpose(aTermVector, -1, null);
	cueVector.superpose(cTermVector, +1, null);
	cueVector.normalize();
		
	int rank = 1;
	String object = "";
	
	
	try {
		VectorSearcher.VectorSearcherCosine analogySearcher
		 	 = new VectorSearcher.VectorSearcherCosine(termVectors,termVectors,null, flagConfig, cueVector);
		
		//get top 1000 results plus the three query terms
		LinkedList<SearchResult> results = analogySearcher.getNearestNeighbors(1003);
		
		
		for (SearchResult sr:results)
		{
			object = sr.getObjectVector().getObject().toString();
		
			//result found 
			if (object.equals(queryTerms[3]))
				break;
			
			//ignore query terms
			if (!(object.equals(queryTerms[0])
					|| object.equals(queryTerms[1])
					  || object.equals(queryTerms[2]))
						)
				rank++;
			
		}
		} 	catch (ZeroVectorException e) {
	// TODO Auto-generated catch block
	e.printStackTrace();
	System.out.println(threadno+": "+"Error on example "+ inLine);
}
	
	//calculate reciprocal rank as a more granular metric than accuracy
	double reciprank = 1 / (double) rank;
	if (reciprank < 0.001) reciprank = 0;
	
	examplessubset.incrementAndGet();
	exampletot.incrementAndGet();;
	recipsubset.add(reciprank);
	reciptot.add(reciprank);
	
	//correct result (top ranked other than query terms)
	if (reciprank == 1)
		{
			accsubset.incrementAndGet();;
			acctot.incrementAndGet();;
		}
	
	System.out.println(threadno+": "+inLine +" --> "+object+" "+rank+" "+reciprank);
	
		
}
 
Example #7
Source File: BeagleTest.java    From semanticvectors with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
public void testQuery(FlagConfig flagConfig, String searchfile, String indexfile, String query )
{
	VectorSearcher vs;
	LuceneUtils lUtils = null;
	CloseableVectorStore queryVecReader, searchVecReader;
	LinkedList<SearchResult> results;
	int numResults = 20;

	BeagleUtils utils = BeagleUtils.getInstance();
	utils.setFFTCacheSize(100);

	try
	{
		queryVecReader = VectorStoreReader.openVectorStore(indexfile, flagConfig);
		searchVecReader = VectorStoreReader.openVectorStore(searchfile, flagConfig);

		//BeagleCompoundVecBuilder bcb = new BeagleCompoundVecBuilder ();

		String[] queryTerms = query.split(" ");

		// Create VectorSearcher and search for nearest neighbors.
		vs = new BeagleVectorSearcher( queryVecReader, searchVecReader, lUtils, flagConfig, queryTerms);
		System.err.print("Searching term vectors, searchtype BEAGLE ... ");
		queryVecReader.close();
		searchVecReader.close();

		results = vs.getNearestNeighbors(numResults);

	}
	catch (Exception e)
	{
		System.err.println(e.getMessage());
		results = new LinkedList<SearchResult>();
	}

	// Print out results.
	if (results.size() > 0) {
		System.err.println("Search output follows ...\n");
		for (SearchResult result: results) {
			System.out.println(result.getScore() + ":" +
                                                  ((ObjectVector)result.getObjectVector()).getObject().toString());
		}
	} else {
		System.err.println("No search output.");
	}
}
 
Example #8
Source File: SemanticVectorSearcher.java    From uncc2014watsonsim with GNU General Public License v2.0 4 votes vote down vote up
public List<Passage> query(Question question) {
	List<Passage> passages = new ArrayList<>();
	VectorSearcher[] sv_searchers;
	try {
		sv_searchers = new VectorSearcher[]{
				new VectorSearcher.VectorSearcherCosine( 
				        queryVecReader, resultsVecReader, luceneUtils, 
				        fconfig, question.getTokens().toArray(new String[]{})),
		        /*new VectorSearcher.VectorSearcherLucene(luceneUtils, 
				        fconfig, question.getTokens().toArray(new String[]{})),
		        new VectorSearcher.VectorSearcherMaxSim( 
				        queryVecReader, resultsVecReader, luceneUtils, 
				        fconfig, question.getTokens().toArray(new String[]{})),*/
		        new VectorSearcher.VectorSearcherMinSim(
				        queryVecReader, resultsVecReader, luceneUtils, 
				        fconfig, question.getTokens().toArray(new String[]{})),
		        /*new VectorSearcher.VectorSearcherSubspaceSim(
				        queryVecReader, resultsVecReader, luceneUtils, 
				        fconfig, question.getTokens().toArray(new String[]{})),*/
		};
	
		System.out.println("sv_searchers = " + sv_searchers);
		for (VectorSearcher sv_searcher : sv_searchers)
		if (sv_searcher != null) {
			List<SearchResult> results = sv_searcher.getNearestNeighbors(10);
			System.out.println("result = " + results);
			int rank = 0;
			for (SearchResult result: results) {
				passages.add(new Passage(
						"semvec", 											// Engine
						"",	// Title
						"",	// Text
						result.getObjectVector().getObject().toString())													// Reference
						.score("SEMVEC_RANK", (double) rank++)				// Rank
						.score("SEMVEC_SCORE", (double) result.getScore())	// Score
						.score("SEMVEC_PRESENT", 1.0)
						);
			}
		}
		/*sv_searcher = new VectorSearcher.VectorSearcherCosine( 
        queryVecReader, resultsVecReader, luceneUtils, 
        fconfig, question.tokens.toArray(new String[]{}));*/
	} catch (ZeroVectorException e) {
	// TODO: Under what circumstances does this happen?
	e.printStackTrace();
	}
	return fillFromSources(passages);
}