org.apache.lucene.search.Hits Java Exaples

Source File: NGramQueryParserTest.java From uyuni with GNU General Public License v2.0

6 votes

public Hits performSearch(Directory dir, String query, boolean useMust)
    throws Exception {

    NGramQueryParser parser = new NGramQueryParser("name",
            new NGramAnalyzer(min_ngram, max_ngram), useMust);
    IndexSearcher searcher = new IndexSearcher(dir);
    Query q = parser.parse(query);
    Hits hits = searcher.search(q);
    log.info("Original Query = " + query);
    log.info("Parsed Query = " + q.toString());
    log.info("Hits.length() = " + hits.length());
    for (int i=0; i < hits.length(); i++) {
        log.debug("Document<"+hits.id(i)+"> = " + hits.doc(i));
        //Explanation explain = searcher.explain(q, hits.id(i));
        //log.debug("explain = " + explain.toString());
    }
    return hits;
}

Source File: TestMixedDirectory.java From hadoop-gpu with Apache License 2.0

6 votes

private void verify(Directory dir, int expectedHits) throws IOException {
  IndexSearcher searcher = new IndexSearcher(dir);
  Hits hits = searcher.search(new TermQuery(new Term("content", "apache")));
  int numHits = hits.length();

  assertEquals(expectedHits, numHits);

  int[] docs = new int[numHits];
  for (int i = 0; i < numHits; i++) {
    Document hit = hits.doc(i);
    docs[Integer.parseInt(hit.get("id"))]++;
  }
  for (int i = 0; i < numHits; i++) {
    assertEquals(1, docs[i]);
  }

  searcher.close();
}

Source File: NGramTestSetup.java From uyuni with GNU General Public License v2.0

6 votes

protected int thresholdHits(Hits hits) throws IOException {
    /** We could consider doing thresholding as a relative thing...
     * instead of checking against an absolute value, we grab top score
     * then filter based on difference from that...
     */
    int counter = 0;
    for (int i=0; i < hits.length(); i++) {
        if (hits.score(i) >= score_threshold) {
            counter++;
        }
        else {
            break;
        }
    }
    return counter;
}

Source File: TestMixedDirectory.java From RDFS with Apache License 2.0

6 votes

private void verify(Directory dir, int expectedHits) throws IOException {
  IndexSearcher searcher = new IndexSearcher(dir);
  Hits hits = searcher.search(new TermQuery(new Term("content", "apache")));
  int numHits = hits.length();

  assertEquals(expectedHits, numHits);

  int[] docs = new int[numHits];
  for (int i = 0; i < numHits; i++) {
    Document hit = hits.doc(i);
    docs[Integer.parseInt(hit.get("id"))]++;
  }
  for (int i = 0; i < numHits; i++) {
    assertEquals(1, docs[i]);
  }

  searcher.close();
}

Source File: IndexManager.java From spacewalk with GNU General Public License v2.0

6 votes

private void debugExplainResults(String indexName, Hits hits, IndexSearcher searcher,
        Query q, Set<Term> queryTerms)
    throws IOException {
    log.debug("Parsed Query is " + q.toString());
    log.debug("Looking at index:  " + indexName);
    for (int i = 0; i < hits.length(); i++) {
        if ((i < 10)) {
            Document doc = hits.doc(i);
            Float score = hits.score(i);
            Explanation ex = searcher.explain(q, hits.id(i));
            log.debug("Looking at hit<" + i + ", " + hits.id(i) + ", " + score +
                    ">: " + doc);
            log.debug("Explanation: " + ex);
            MatchingField match = new MatchingField(q.toString(), doc, queryTerms);
            String fieldName = match.getFieldName();
            String fieldValue = match.getFieldValue();
            log.debug("Guessing that matched fieldName is " + fieldName + " = " +
                    fieldValue);
        }
    }
}

Source File: CrawlerTask.java From JPPF with Apache License 2.0

6 votes

/**
 * Search for the user-specified query expression in the current page.
 * @throws Exception if an error occurs.
 */
private void search() throws Exception {
  final QueryParser parser = new QueryParser("contents", new StandardAnalyzer());
  final Query q = parser.parse(query);

  final MemoryIndex index = new MemoryIndex();
  final Link link = new Link(url);
  final PageData pageData = new SimpleHttpClientParser().load(link);
  index.addField("contents", pageData.getData().toString(), new StandardAnalyzer());
  final IndexSearcher searcher = index.createSearcher();
  final Hits hits = searcher.search(q);
  @SuppressWarnings("rawtypes")
  final Iterator it = hits.iterator();
  float relevance = 0f;
  if (it.hasNext()) {
    while (it.hasNext()) {
      final Hit hit = (Hit) it.next();
      relevance += ((float) Math.round(hit.getScore() * 1000)) / 10;
    }
    matchedLinks.add(new LinkMatch(url, relevance));
  }
}

Source File: CrawlerTest.java From JPPF with Apache License 2.0

6 votes

/**
 * Test searching with Lucene.
 * @param search the Lucene query text.
 * @param max the maximum number of results to show.
 * @throws Exception if an error is thrown while executing.
 */
public static void luceneSearch(final String search, final int max) throws Exception {
  print("Searching for: " + search);
  print("  max results: " + max);

  final IndexSearcher is = new IndexSearcher(index);
  final QueryParser parser = new QueryParser("contents", new StandardAnalyzer());

  final Query query = parser.parse(search);
  final Hits hits = is.search(query);

  print("    results: " + hits.length());

  for (int i = 0; i < Math.min(hits.length(), max); i++) {
    final float relevance = ((float) Math.round(hits.score(i) * 1000)) / 10;
    final String url = hits.doc(i).getField("url").stringValue();
    print("No " + (i + 1) + " with relevance " + relevance + "% : " + url);
  }

  is.close();
}

Source File: NGramTestSetup.java From spacewalk with GNU General Public License v2.0

6 votes

protected int thresholdHits(Hits hits) throws IOException {
    /** We could consider doing thresholding as a relative thing...
     * instead of checking against an absolute value, we grab top score
     * then filter based on difference from that...
     */
    int counter = 0;
    for (int i=0; i < hits.length(); i++) {
        if (hits.score(i) >= score_threshold) {
            counter++;
        }
        else {
            break;
        }
    }
    return counter;
}

Source File: NGramQueryParserTest.java From spacewalk with GNU General Public License v2.0

6 votes

public Hits performSearch(Directory dir, String query, boolean useMust)
    throws Exception {

    NGramQueryParser parser = new NGramQueryParser("name",
            new NGramAnalyzer(min_ngram, max_ngram), useMust);
    IndexSearcher searcher = new IndexSearcher(dir);
    Query q = parser.parse(query);
    Hits hits = searcher.search(q);
    log.info("Original Query = " + query);
    log.info("Parsed Query = " + q.toString());
    log.info("Hits.length() = " + hits.length());
    for (int i=0; i < hits.length(); i++) {
        log.debug("Document<"+hits.id(i)+"> = " + hits.doc(i));
        //Explanation explain = searcher.explain(q, hits.id(i));
        //log.debug("explain = " + explain.toString());
    }
    return hits;
}

Source File: NGramQueryParserTest.java From uyuni with GNU General Public License v2.0

5 votes

public void testFreeFormSearch() throws Exception {
    Hits hits = null;
    String query = null;
    boolean useMust = true;
    // Grab all packages with name "spell" AND
    //  description does NOT contain "another"
    query = "name:spell -description:another";
    hits = performSearch(this.ngramDir, query, useMust);
    displayHits(hits);
    assertTrue(hits.length() == 2);

    // Grab all packages with name "virt" AND
    //  description MUST have "factory" in it
    query = "name:virt +description:factory";
    hits = performSearch(this.ngramDir, query, useMust);
    displayHits(hits);
    assertTrue(hits.length() == 2);

    // Grab all packages with name "virt"
    query = "name:virt description:factory";
    hits = performSearch(this.ngramDir, query, useMust);
    displayHits(hits);
    assertTrue(hits.length() == 4);

    query = "name:virt OR description:factory";
    hits = performSearch(this.ngramDir, query, useMust);
    displayHits(hits);
    assertTrue(hits.length() == 4);

    query = "name:virt AND description:factory";
    hits = performSearch(this.ngramDir, query, useMust);
    displayHits(hits);
    assertTrue(hits.length() == 1);

    query = "name:virt -description:factory";
    hits = performSearch(this.ngramDir, query, useMust);
    displayHits(hits);
    assertTrue(hits.length() == 2);
}

Source File: TestDistributionPolicy.java From hadoop-gpu with Apache License 2.0

5 votes

private void verify(Shard[] shards) throws IOException {
  // verify the index
  IndexReader[] readers = new IndexReader[shards.length];
  for (int i = 0; i < shards.length; i++) {
    Directory dir =
        new FileSystemDirectory(fs, new Path(shards[i].getDirectory()),
            false, conf);
    readers[i] = IndexReader.open(dir);
  }

  IndexReader reader = new MultiReader(readers);
  IndexSearcher searcher = new IndexSearcher(reader);
  Hits hits = searcher.search(new TermQuery(new Term("content", "apache")));
  assertEquals(0, hits.length());

  hits = searcher.search(new TermQuery(new Term("content", "hadoop")));
  assertEquals(numDocsPerRun / 2, hits.length());

  int[] counts = new int[numDocsPerRun];
  for (int i = 0; i < hits.length(); i++) {
    Document doc = hits.doc(i);
    counts[Integer.parseInt(doc.get("id"))]++;
  }

  for (int i = 0; i < numDocsPerRun; i++) {
    if (i % 2 == 0) {
      assertEquals(0, counts[i]);
    } else {
      assertEquals(1, counts[i]);
    }
  }

  searcher.close();
  reader.close();
}

Source File: TestDistributionPolicy.java From RDFS with Apache License 2.0

5 votes

private void verify(Shard[] shards) throws IOException {
  // verify the index
  IndexReader[] readers = new IndexReader[shards.length];
  for (int i = 0; i < shards.length; i++) {
    Directory dir =
        new FileSystemDirectory(fs, new Path(shards[i].getDirectory()),
            false, conf);
    readers[i] = IndexReader.open(dir);
  }

  IndexReader reader = new MultiReader(readers);
  IndexSearcher searcher = new IndexSearcher(reader);
  Hits hits = searcher.search(new TermQuery(new Term("content", "apache")));
  assertEquals(0, hits.length());

  hits = searcher.search(new TermQuery(new Term("content", "hadoop")));
  assertEquals(numDocsPerRun / 2, hits.length());

  int[] counts = new int[numDocsPerRun];
  for (int i = 0; i < hits.length(); i++) {
    Document doc = hits.doc(i);
    counts[Integer.parseInt(doc.get("id"))]++;
  }

  for (int i = 0; i < numDocsPerRun; i++) {
    if (i % 2 == 0) {
      assertEquals(0, counts[i]);
    } else {
      assertEquals(1, counts[i]);
    }
  }

  searcher.close();
  reader.close();
}

Source File: NGramTestSetup.java From spacewalk with GNU General Public License v2.0

5 votes

protected void displayHits(Hits hits) throws IOException {
    for (int i = 0; i < hits.length(); i++) {
        Document doc = hits.doc(i);
        String name = doc.get("name");
        String description = doc.get("description");
        log.info("Hit<" + i + "> Score< " + hits.score(i) + ">  name = <" +
                name + "> description = <" + description + ">");
    }
}

Source File: NGramTestSetup.java From spacewalk with GNU General Public License v2.0

5 votes

public Hits performSearch(Directory dir, Analyzer alyz, String query) throws Exception {
    QueryParser parser = new QueryParser("name", alyz);
    IndexSearcher searcher = new IndexSearcher(dir);
    Query q = parser.parse(query);
    Hits hits = searcher.search(q);
    return hits;
}

Source File: NGramQueryParserTest.java From spacewalk with GNU General Public License v2.0

5 votes

public void testFreeFormSearch() throws Exception {
    Hits hits = null;
    String query = null;
    boolean useMust = true;
    // Grab all packages with name "spell" AND
    //  description does NOT contain "another"
    query = "name:spell -description:another";
    hits = performSearch(this.ngramDir, query, useMust);
    displayHits(hits);
    assertTrue(hits.length() == 2);

    // Grab all packages with name "virt" AND
    //  description MUST have "factory" in it
    query = "name:virt +description:factory";
    hits = performSearch(this.ngramDir, query, useMust);
    displayHits(hits);
    assertTrue(hits.length() == 2);

    // Grab all packages with name "virt"
    query = "name:virt description:factory";
    hits = performSearch(this.ngramDir, query, useMust);
    displayHits(hits);
    assertTrue(hits.length() == 4);

    query = "name:virt OR description:factory";
    hits = performSearch(this.ngramDir, query, useMust);
    displayHits(hits);
    assertTrue(hits.length() == 4);

    query = "name:virt AND description:factory";
    hits = performSearch(this.ngramDir, query, useMust);
    displayHits(hits);
    assertTrue(hits.length() == 1);

    query = "name:virt -description:factory";
    hits = performSearch(this.ngramDir, query, useMust);
    displayHits(hits);
    assertTrue(hits.length() == 2);
}

Source File: LuceneResultSet.java From alfresco-repository with GNU Lesser General Public License v3.0

5 votes

/**
 * Wrap a lucene seach result with node support
 * 
 * @param hits Hits
 * @param searcher Searcher
 * @param nodeService nodeService
 * @param tenantService tenant service
 * @param searchParameters SearchParameters
 * @param config - lucene config
 */
public LuceneResultSet(Hits hits, Searcher searcher, NodeService nodeService, TenantService tenantService, SearchParameters searchParameters,
        LuceneConfig config)
{
    super();
    this.hits = hits;
    this.searcher = searcher;
    this.nodeService = nodeService;
    this.tenantService = tenantService;
    this.searchParameters = searchParameters;
    this.config = config;
    prefetch = new BitSet(hits.length());
}

Source File: IndexManager.java From uyuni with GNU General Public License v2.0

5 votes

private void debugExplainResults(String indexName, Hits hits, IndexSearcher searcher,
        Query q, Set<Term> queryTerms)
    throws IOException {
    log.debug("Parsed Query is " + q.toString());
    log.debug("Looking at index:  " + indexName);
    for (int i = 0; i < hits.length(); i++) {
        if ((i < 10)) {
            Document doc = hits.doc(i);
            Float score = hits.score(i);
            Explanation ex = searcher.explain(q, hits.id(i));
            log.debug("Looking at hit<" + i + ", " + hits.id(i) + ", " + score +
                    ">: " + doc);
            log.debug("Explanation: " + ex);
            MatchingField match = new MatchingField(q.toString(), doc, queryTerms);
            String fieldName = match.getFieldName();
            String fieldValue = match.getFieldValue();
            log.debug("Guessing that matched fieldName is " + fieldName + " = " +
                    fieldValue);
        }
    }
}

Source File: NGramTestSetup.java From uyuni with GNU General Public License v2.0

5 votes

protected void displayHits(Hits hits) throws IOException {
    for (int i = 0; i < hits.length(); i++) {
        Document doc = hits.doc(i);
        String name = doc.get("name");
        String description = doc.get("description");
        log.info("Hit<" + i + "> Score< " + hits.score(i) + ">  name = <" +
                name + "> description = <" + description + ">");
    }
}

Source File: NGramTestSetup.java From uyuni with GNU General Public License v2.0

5 votes

public Hits performSearch(Directory dir, Analyzer alyz, String query) throws Exception {
    QueryParser parser = new QueryParser("name", alyz);
    IndexSearcher searcher = new IndexSearcher(dir);
    Query q = parser.parse(query);
    Hits hits = searcher.search(q);
    return hits;
}

Source File: NGramQueryParserTest.java From spacewalk with GNU General Public License v2.0

4 votes

/**
 *
 * */
public void testBasicSearch() throws Exception {
    Hits hits;
    String query;
    boolean useMust = false;
    query = "spell";
    hits = performSearch(this.ngramDir, query, useMust);
    displayHits(hits);
    assertTrue(thresholdHits(hits) == 5);
    assertTrue(hits.length() == 16);

    query = "aspelll";
    hits = performSearch(this.ngramDir, query, useMust);
    displayHits(hits);
    assertTrue(thresholdHits(hits) == 4);
    assertTrue(hits.length() == 17);

    query = "aspell";
    hits = performSearch(this.ngramDir, query, useMust);
    displayHits(hits);
    assertTrue(thresholdHits(hits) == 4);
    assertTrue(hits.length() == 17);

    query = "pel";
    hits = performSearch(this.ngramDir, query, useMust);
    displayHits(hits);
    assertTrue(thresholdHits(hits) == 8);
    assertTrue(hits.length() == 16);

    query = "gtk";
    hits = performSearch(this.ngramDir, query, useMust);
    displayHits(hits);
    assertTrue(thresholdHits(hits) == 7);
    assertTrue(hits.length() == 17);


    // We want a search for kernel-hugemem to return kernel-hugemem as top hit
    //   but currently, kernel-hugemem-devel is matchin instead.  This test
    //   is a placeholder as we explore ways to fix this.
    query = "((name:kernel-hugemem)^2 (description:kernel-hugemem) " +
        "(filename:kernel-hugemem))";
    hits = performSearch(this.ngramDir, query, useMust);
    displayHits(hits);
    assertTrue(thresholdHits(hits) == 3);
    assertTrue(hits.length() == 20);
    String firstHitName = hits.doc(0).get("name");
    assertTrue(firstHitName.compareToIgnoreCase("kernel-hugemem-devel") == 0);
}

Source File: SearchInTypeShortName.java From gAnswer with BSD 3-Clause "New" or "Revised" License

4 votes

public  ArrayList<String> searchType(String s, double thres1, double thres2, int k) throws Exception
{		
	Hits hits = null;
	String queryString = null;
	Query query = null;
	
	IndexSearcher searcher = new IndexSearcher(Globals.localPath+"data/DBpedia2016/lucene/type_fragment_index");
	
	ArrayList<String> typeNames = new ArrayList<String>(); 
	
	//String[] array = s.split(" ");
	//queryString = array[array.length-1];
	queryString = s;

	Analyzer analyzer = new StandardAnalyzer();
	try {
		QueryParser qp = new QueryParser("SplittedTypeShortName", analyzer);
		query = qp.parse(queryString);
	} catch (ParseException e) {
		e.printStackTrace();
	}
	
	if (searcher != null) {
		hits = searcher.search(query);
		
		System.out.println("find " + hits.length() + " answars!");
		if (hits.length() > 0) {
			for (int i=0; i<hits.length(); i++) {
				if (i < k) {
					System.out.println("<<<<---" + hits.doc(i).get("TypeShortName") + " : " + hits.score(i));
				    if(hits.score(i) >= thres1){
				    	System.out.println("Score>=thres1("+thres1+") ---" + hits.doc(i).get("TypeShortName") + " : " + hits.score(i));
				    	typeNames.add(hits.doc(i).get("TypeShortName"));
				    	//if (satisfiedStrictly(hits.doc(i).get("SplittedTypeShortName"), queryString)) typeNames.add(hits.doc(i).get("TypeShortName"));
				    }
				    else {
				    	//break;
				    }
				}
				else {
				    if(hits.score(i) >= thres2){
				    	System.out.println("<<<<---" + hits.doc(i).get("TypeShortName") + " : " + hits.score(i));
				    	typeNames.add(hits.doc(i).get("TypeShortName"));
				    	//if (satisfiedStrictly(hits.doc(i).get("SplittedTypeShortName"), queryString)) typeNames.add(hits.doc(i).get("TypeShortName"));
				    }
				    else {
				    	break;
				    }						
				}
			}				
		}
	}		
	return typeNames;	
}

Source File: IndexManager.java From spacewalk with GNU General Public License v2.0

4 votes

private List<Result> processHits(String indexName, Hits hits, Set<Term> queryTerms,
        String query, String lang)
    throws IOException {
    List<Result> retval = new ArrayList<Result>();
    for (int x = 0; x < hits.length(); x++) {
        Document doc = hits.doc(x);
        Result pr = null;
        if (!isScoreAcceptable(indexName, hits, x, query)) {
            break;
        }
        if (indexName.compareTo(BuilderFactory.DOCS_TYPE) == 0) {
            pr = new DocResult(x, hits.score(x), doc);
            String summary = lookupDocSummary(doc, query, lang);
            if (summary != null) {
                ((DocResult)pr).setSummary(summary);
            }
        }
        else if (indexName.compareTo(BuilderFactory.HARDWARE_DEVICE_TYPE) == 0) {
            pr = new HardwareDeviceResult(x, hits.score(x), doc);
        }
        else if (indexName.compareTo(BuilderFactory.SNAPSHOT_TAG_TYPE)  == 0) {
            pr = new SnapshotTagResult(x, hits.score(x), doc);
        }
        else if (indexName.compareTo(BuilderFactory.SERVER_CUSTOM_INFO_TYPE) == 0) {
            pr = new ServerCustomInfoResult(x, hits.score(x), doc);
        }
        else if (indexName.compareTo(BuilderFactory.XCCDF_IDENT_TYPE) == 0) {
            pr = new Result(x,
                    doc.getField("id").stringValue(),
                    doc.getField("identifier").stringValue(),
                    hits.score(x));
        }
        else {
            pr = new Result(x,
                    doc.getField("id").stringValue(),
                    doc.getField("name").stringValue(),
                    hits.score(x));
        }
        if (log.isDebugEnabled()) {
            log.debug("Hit[" + x + "] Score = " + hits.score(x) + ", Result = " + pr);
        }
        /**
         * matchingField will help the webUI to understand what field was responsible
         * for this match.  Later implementation should use "Explanation" to determine
         * field, for now we will simply grab one term and return it's field.
         */
        try {
            MatchingField match = new MatchingField(query, doc, queryTerms);
            pr.setMatchingField(match.getFieldName());
            pr.setMatchingFieldValue(match.getFieldValue());
            log.info("hit[" + x + "] matchingField is being set to: <" +
                pr.getMatchingField() + "> based on passed in query field.  " +
                "matchingFieldValue = " + pr.getMatchingFieldValue());
        }
        catch (Exception e) {
            log.error("Caught exception: ", e);
        }
        if (pr != null) {
            retval.add(pr);
        }
        if (maxHits > 0 && x == maxHits) {
            break;
        }
    }
    return retval;
}

Source File: IndexManager.java From spacewalk with GNU General Public License v2.0

4 votes

/**
 *
 * @param indexName
 * @param hits
 * @param x
 * @param query
 * @return  true - score is acceptable
 *          false - score is NOT acceptable
 * @throws IOException
 */
private boolean isScoreAcceptable(String indexName, Hits hits, int x, String queryIn)
    throws IOException {
    String guessMainQueryTerm = MatchingField.getFirstFieldName(queryIn);

    if ((indexName.compareTo(BuilderFactory.DOCS_TYPE) == 0) &&
            (!filterDocResults)) {
        return true;
    }
    /**
     * Dropping matches which are a poor fit.
     * system searches are filtered based on "system_score_threshold"
     * other searches will return 10 best matches, then filter anything below
     * "score_threshold"
     */
    if ((indexName.compareTo(BuilderFactory.SERVER_TYPE) == 0) ||
            (indexName.compareTo(BuilderFactory.SERVER_CUSTOM_INFO_TYPE) == 0) ||
            (indexName.compareTo(BuilderFactory.SNAPSHOT_TAG_TYPE)  == 0) ||
            (indexName.compareTo(BuilderFactory.HARDWARE_DEVICE_TYPE) == 0)) {
        if (hits.score(x) < system_score_threshold) {
            if (log.isDebugEnabled()) {
                log.debug("hits.score(" + x + ") is " + hits.score(x));
                log.debug("Filtering out search results from " + x + " to " +
                        hits.length() + ", due to their score being below " +
                        "system_score_threshold = " + system_score_threshold);
            }
            return false;
        }
    }
    else if (indexName.compareTo(BuilderFactory.ERRATA_TYPE) == 0) {
        if (guessMainQueryTerm.compareTo("name") == 0) {
            if (hits.score(x) < errata_advisory_score_threshold) {
                if (log.isDebugEnabled()) {
                    log.debug("hits.score(" + x + ") is " + hits.score(x));
                    log.debug("Filtering out search results from " + x + " to " +
                        hits.length() + ", due to their score being below " +
                        "errata_advisory_score_threshold = " +
                        errata_advisory_score_threshold);
                }
                return false;
            }
        }
        else {
            if (hits.score(x) < errata_score_threshold) {
                if (log.isDebugEnabled()) {
                    log.debug("hits.score(" + x + ") is " + hits.score(x));
                    log.debug("Filtering out search results from " + x + " to " +
                        hits.length() + ", due to their score being below " +
                        "errata_score_threshold = " +
                        errata_score_threshold);
                }
                return false;
            }
        }
    }
    else if (((hits.score(x) < score_threshold) && (x > 10)) ||
            (hits.score(x) < 0.001)) {
        /**
         * Dropping matches which are a poor fit.
         * First term is configurable, it allows matches like spelling errors or
         * suggestions to be possible.
         * Second term is intended to get rid of pure and utter crap hits
         */
        if (log.isDebugEnabled()) {
            log.debug("hits.score(" + x + ") is " + hits.score(x));
            log.debug("Filtering out search results from " + x + " to " +
                    hits.length() + ", due to their score being below " +
                    "score_threshold = " + score_threshold);
        }
        return false;
    }
    return true;
}

Source File: IndexManager.java From uyuni with GNU General Public License v2.0

4 votes

/**
 *
 * @param indexName
 * @param hits
 * @param x
 * @param query
 * @return  true - score is acceptable
 *          false - score is NOT acceptable
 * @throws IOException
 */
private boolean isScoreAcceptable(String indexName, Hits hits, int x, String queryIn)
    throws IOException {
    String guessMainQueryTerm = MatchingField.getFirstFieldName(queryIn);

    if ((indexName.compareTo(BuilderFactory.DOCS_TYPE) == 0) &&
            (!filterDocResults)) {
        return true;
    }
    /**
     * Dropping matches which are a poor fit.
     * system searches are filtered based on "system_score_threshold"
     * other searches will return 10 best matches, then filter anything below
     * "score_threshold"
     */
    if ((indexName.compareTo(BuilderFactory.SERVER_TYPE) == 0) ||
            (indexName.compareTo(BuilderFactory.SERVER_CUSTOM_INFO_TYPE) == 0) ||
            (indexName.compareTo(BuilderFactory.SNAPSHOT_TAG_TYPE)  == 0) ||
            (indexName.compareTo(BuilderFactory.HARDWARE_DEVICE_TYPE) == 0)) {
        if (hits.score(x) < system_score_threshold) {
            if (log.isDebugEnabled()) {
                log.debug("hits.score(" + x + ") is " + hits.score(x));
                log.debug("Filtering out search results from " + x + " to " +
                        hits.length() + ", due to their score being below " +
                        "system_score_threshold = " + system_score_threshold);
            }
            return false;
        }
    }
    else if (indexName.compareTo(BuilderFactory.ERRATA_TYPE) == 0) {
        if (guessMainQueryTerm.compareTo("name") == 0) {
            if (hits.score(x) < errata_advisory_score_threshold) {
                if (log.isDebugEnabled()) {
                    log.debug("hits.score(" + x + ") is " + hits.score(x));
                    log.debug("Filtering out search results from " + x + " to " +
                        hits.length() + ", due to their score being below " +
                        "errata_advisory_score_threshold = " +
                        errata_advisory_score_threshold);
                }
                return false;
            }
        }
        else {
            if (hits.score(x) < errata_score_threshold) {
                if (log.isDebugEnabled()) {
                    log.debug("hits.score(" + x + ") is " + hits.score(x));
                    log.debug("Filtering out search results from " + x + " to " +
                        hits.length() + ", due to their score being below " +
                        "errata_score_threshold = " +
                        errata_score_threshold);
                }
                return false;
            }
        }
    }
    else if (((hits.score(x) < score_threshold) && (x > 10)) ||
            (hits.score(x) < 0.001)) {
        /**
         * Dropping matches which are a poor fit.
         * First term is configurable, it allows matches like spelling errors or
         * suggestions to be possible.
         * Second term is intended to get rid of pure and utter crap hits
         */
        if (log.isDebugEnabled()) {
            log.debug("hits.score(" + x + ") is " + hits.score(x));
            log.debug("Filtering out search results from " + x + " to " +
                    hits.length() + ", due to their score being below " +
                    "score_threshold = " + score_threshold);
        }
        return false;
    }
    return true;
}

Source File: IndexManager.java From uyuni with GNU General Public License v2.0

4 votes

private List<Result> processHits(String indexName, Hits hits, Set<Term> queryTerms,
        String query, String lang)
    throws IOException {
    List<Result> retval = new ArrayList<Result>();
    for (int x = 0; x < hits.length(); x++) {
        Document doc = hits.doc(x);
        Result pr = null;
        if (!isScoreAcceptable(indexName, hits, x, query)) {
            break;
        }
        if (indexName.compareTo(BuilderFactory.DOCS_TYPE) == 0) {
            pr = new DocResult(x, hits.score(x), doc);
            String summary = lookupDocSummary(doc, query, lang);
            if (summary != null) {
                ((DocResult)pr).setSummary(summary);
            }
        }
        else if (indexName.compareTo(BuilderFactory.HARDWARE_DEVICE_TYPE) == 0) {
            pr = new HardwareDeviceResult(x, hits.score(x), doc);
        }
        else if (indexName.compareTo(BuilderFactory.SNAPSHOT_TAG_TYPE)  == 0) {
            pr = new SnapshotTagResult(x, hits.score(x), doc);
        }
        else if (indexName.compareTo(BuilderFactory.SERVER_CUSTOM_INFO_TYPE) == 0) {
            pr = new ServerCustomInfoResult(x, hits.score(x), doc);
        }
        else if (indexName.compareTo(BuilderFactory.XCCDF_IDENT_TYPE) == 0) {
            pr = new Result(x,
                    doc.getField("id").stringValue(),
                    doc.getField("identifier").stringValue(),
                    hits.score(x));
        }
        else {
            pr = new Result(x,
                    doc.getField("id").stringValue(),
                    doc.getField("name").stringValue(),
                    hits.score(x));
        }
        if (log.isDebugEnabled()) {
            log.debug("Hit[" + x + "] Score = " + hits.score(x) + ", Result = " + pr);
        }
        /**
         * matchingField will help the webUI to understand what field was responsible
         * for this match.  Later implementation should use "Explanation" to determine
         * field, for now we will simply grab one term and return it's field.
         */
        try {
            MatchingField match = new MatchingField(query, doc, queryTerms);
            pr.setMatchingField(match.getFieldName());
            pr.setMatchingFieldValue(match.getFieldValue());
            log.info("hit[" + x + "] matchingField is being set to: <" +
                pr.getMatchingField() + "> based on passed in query field.  " +
                "matchingFieldValue = " + pr.getMatchingFieldValue());
        }
        catch (Exception e) {
            log.error("Caught exception: ", e);
        }
        if (pr != null) {
            retval.add(pr);
        }
        if (maxHits > 0 && x == maxHits) {
            break;
        }
    }
    return retval;
}

Source File: NGramQueryParserTest.java From uyuni with GNU General Public License v2.0

4 votes

/**
 *
 * */
public void testBasicSearch() throws Exception {
    Hits hits;
    String query;
    boolean useMust = false;
    query = "spell";
    hits = performSearch(this.ngramDir, query, useMust);
    displayHits(hits);
    assertTrue(thresholdHits(hits) == 5);
    assertTrue(hits.length() == 16);

    query = "aspelll";
    hits = performSearch(this.ngramDir, query, useMust);
    displayHits(hits);
    assertTrue(thresholdHits(hits) == 4);
    assertTrue(hits.length() == 17);

    query = "aspell";
    hits = performSearch(this.ngramDir, query, useMust);
    displayHits(hits);
    assertTrue(thresholdHits(hits) == 4);
    assertTrue(hits.length() == 17);

    query = "pel";
    hits = performSearch(this.ngramDir, query, useMust);
    displayHits(hits);
    assertTrue(thresholdHits(hits) == 8);
    assertTrue(hits.length() == 16);

    query = "gtk";
    hits = performSearch(this.ngramDir, query, useMust);
    displayHits(hits);
    assertTrue(thresholdHits(hits) == 7);
    assertTrue(hits.length() == 17);


    // We want a search for kernel-hugemem to return kernel-hugemem as top hit
    //   but currently, kernel-hugemem-devel is matchin instead.  This test
    //   is a placeholder as we explore ways to fix this.
    query = "((name:kernel-hugemem)^2 (description:kernel-hugemem) " +
        "(filename:kernel-hugemem))";
    hits = performSearch(this.ngramDir, query, useMust);
    displayHits(hits);
    assertTrue(thresholdHits(hits) == 3);
    assertTrue(hits.length() == 20);
    String firstHitName = hits.doc(0).get("name");
    assertTrue(firstHitName.compareToIgnoreCase("kernel-hugemem-devel") == 0);
}

Source File: 387581_IndexTaskTest_0_s.java From coming with MIT License

3 votes

public void testSearch() throws Exception {
   Query query = QueryParser.parse("test", "contents", analyzer);
 
    Hits hits = searcher.search(query);
 
    assertEquals("Find document(s)", 2, hits.length());
}

Source File: 387581_IndexTaskTest_0_t.java From coming with MIT License

3 votes

public void testSearch() throws Exception {
   Query query = new QueryParser("contents",analyzer).parse("test");
 
    Hits hits = searcher.search(query);
 
    assertEquals("Find document(s)", 2, hits.length());
}

org.apache.lucene.search.Hits Java Examples