org.apache.lucene.search.highlight.InvalidTokenOffsetsException Java Exaples

Source File: HighlightCustomQueryTest.java From lucene-solr with Apache License 2.0

6 votes

/**
 * This method intended for use with
 * <code>testHighlightingWithDefaultField()</code>
 */
private String highlightField(Query query, String fieldName,
    String text) throws IOException, InvalidTokenOffsetsException {
  try (MockAnalyzer mockAnalyzer = new MockAnalyzer(random(), MockTokenizer.SIMPLE,true,
      MockTokenFilter.ENGLISH_STOPSET); TokenStream tokenStream = mockAnalyzer.tokenStream(fieldName, text)) {
    // Assuming "<B>", "</B>" used to highlight
    SimpleHTMLFormatter formatter = new SimpleHTMLFormatter();
    MyQueryScorer scorer = new MyQueryScorer(query, fieldName, FIELD_NAME);
    Highlighter highlighter = new Highlighter(formatter, scorer);
    highlighter.setTextFragmenter(new SimpleFragmenter(Integer.MAX_VALUE));

    String rv = highlighter.getBestFragments(tokenStream, text, 1,
        "(FIELD TEXT TRUNCATED)");
    return rv.length() == 0 ? text : rv;
  }
}

Source File: HighlightCustomQueryTest.java From lucene-solr with Apache License 2.0

5 votes

public void testHighlightCustomQuery() throws IOException,
    InvalidTokenOffsetsException {
  String s1 = "I call our world Flatland, not because we call it so,";

  // Verify that a query against the default field results in text being
  // highlighted
  // regardless of the field name.

  CustomQuery q = new CustomQuery(new Term(FIELD_NAME, "world"));

  String expected = "I call our <B>world</B> Flatland, not because we call it so,";
  String observed = highlightField(q, "SOME_FIELD_NAME", s1);
  if (VERBOSE)
    System.out.println("Expected: \"" + expected + "\n" + "Observed: \""
        + observed);
  assertEquals(
      "Query in the default field results in text for *ANY* field being highlighted",
      expected, observed);

  // Verify that a query against a named field does not result in any
  // highlighting
  // when the query field name differs from the name of the field being
  // highlighted,
  // which in this example happens to be the default field name.
  q = new CustomQuery(new Term("text", "world"));

  expected = s1;
  observed = highlightField(q, FIELD_NAME, s1);
  if (VERBOSE)
    System.out.println("Expected: \"" + expected + "\n" + "Observed: \""
        + observed);
  assertEquals(
      "Query in a named field does not result in highlighting when that field isn't in the query",
      s1, highlightField(q, FIELD_NAME, s1));

}

Source File: SearchResultsImpl.java From olat with Apache License 2.0

5 votes

/**
 * Highlight (bold,color) query words in result-document. Set HighlightResult for content or description.
 * 
 * @param query
 * @param analyzer
 * @param doc
 * @param resultDocument
 * @throws IOException
 */
private void doHighlight(final Query query, final Analyzer analyzer, final Document doc, final ResultDocument resultDocument) throws IOException {
    final Highlighter highlighter = new Highlighter(new SimpleHTMLFormatter(HIGHLIGHT_PRE_TAG, HIGHLIGHT_POST_TAG), new QueryScorer(query));
    // Get 3 best fragments of content and seperate with a "..."
    try {
        // highlight content
        final String content = doc.get(AbstractOlatDocument.CONTENT_FIELD_NAME);
        TokenStream tokenStream = analyzer.tokenStream(AbstractOlatDocument.CONTENT_FIELD_NAME, new StringReader(content));
        String highlightResult = highlighter.getBestFragments(tokenStream, content, 3, HIGHLIGHT_SEPARATOR);

        // if no highlightResult is in content => look in description
        if (highlightResult.length() == 0) {
            final String description = doc.get(AbstractOlatDocument.DESCRIPTION_FIELD_NAME);
            tokenStream = analyzer.tokenStream(AbstractOlatDocument.DESCRIPTION_FIELD_NAME, new StringReader(description));
            highlightResult = highlighter.getBestFragments(tokenStream, description, 3, HIGHLIGHT_SEPARATOR);
            resultDocument.setHighlightingDescription(true);
        }
        resultDocument.setHighlightResult(highlightResult);

        // highlight title
        final String title = doc.get(AbstractOlatDocument.TITLE_FIELD_NAME);
        tokenStream = analyzer.tokenStream(AbstractOlatDocument.TITLE_FIELD_NAME, new StringReader(title));
        final String highlightTitle = highlighter.getBestFragments(tokenStream, title, 3, " ");
        resultDocument.setHighlightTitle(highlightTitle);
    } catch (final InvalidTokenOffsetsException e) {
        log.warn("", e);
    }
}

Source File: SearchResultsImpl.java From olat with Apache License 2.0

5 votes

/**
 * Highlight (bold,color) query words in result-document. Set HighlightResult for content or description.
 * 
 * @param query
 * @param analyzer
 * @param doc
 * @param resultDocument
 * @throws IOException
 */
private void doHighlight(final Query query, final Analyzer analyzer, final Document doc, final ResultDocument resultDocument) throws IOException {
    final Highlighter highlighter = new Highlighter(new SimpleHTMLFormatter(HIGHLIGHT_PRE_TAG, HIGHLIGHT_POST_TAG), new QueryScorer(query));
    // Get 3 best fragments of content and seperate with a "..."
    try {
        // highlight content
        final String content = doc.get(AbstractOlatDocument.CONTENT_FIELD_NAME);
        TokenStream tokenStream = analyzer.tokenStream(AbstractOlatDocument.CONTENT_FIELD_NAME, new StringReader(content));
        String highlightResult = highlighter.getBestFragments(tokenStream, content, 3, HIGHLIGHT_SEPARATOR);

        // if no highlightResult is in content => look in description
        if (highlightResult.length() == 0) {
            final String description = doc.get(AbstractOlatDocument.DESCRIPTION_FIELD_NAME);
            tokenStream = analyzer.tokenStream(AbstractOlatDocument.DESCRIPTION_FIELD_NAME, new StringReader(description));
            highlightResult = highlighter.getBestFragments(tokenStream, description, 3, HIGHLIGHT_SEPARATOR);
            resultDocument.setHighlightingDescription(true);
        }
        resultDocument.setHighlightResult(highlightResult);

        // highlight title
        final String title = doc.get(AbstractOlatDocument.TITLE_FIELD_NAME);
        tokenStream = analyzer.tokenStream(AbstractOlatDocument.TITLE_FIELD_NAME, new StringReader(title));
        final String highlightTitle = highlighter.getBestFragments(tokenStream, title, 3, " ");
        resultDocument.setHighlightTitle(highlightTitle);
    } catch (final InvalidTokenOffsetsException e) {
        log.warn("", e);
    }
}

Source File: HighlightHelper.java From incubator-retired-blur with Apache License 2.0

4 votes

/**
 * NOTE: This method will not preserve the correct field types.
 * 
 * @param preTag
 * @param postTag
 */
public static Document highlight(int docId, Document document, Query query, FieldManager fieldManager,
    IndexReader reader, String preTag, String postTag) throws IOException, InvalidTokenOffsetsException {

  String fieldLessFieldName = fieldManager.getFieldLessFieldName();

  Query fixedQuery = fixSuperQuery(query, null, fieldLessFieldName);

  Analyzer analyzer = fieldManager.getAnalyzerForQuery();

  SimpleHTMLFormatter htmlFormatter = new SimpleHTMLFormatter(preTag, postTag);
  Document result = new Document();
  for (IndexableField f : document) {
    String name = f.name();
    if (fieldLessFieldName.equals(name) || FIELDS_NOT_TO_HIGHLIGHT.contains(name)) {
      result.add(f);
      continue;
    }
    String text = f.stringValue();
    Number numericValue = f.numericValue();

    Query fieldFixedQuery;
    if (fieldManager.isFieldLessIndexed(name)) {
      fieldFixedQuery = fixSuperQuery(query, name, fieldLessFieldName);
    } else {
      fieldFixedQuery = fixedQuery;
    }

    if (numericValue != null) {
      if (shouldNumberBeHighlighted(name, numericValue, fieldFixedQuery)) {
        String numberHighlight = preTag + text + postTag;
        result.add(new StringField(name, numberHighlight, Store.YES));
      }
    } else {
      Highlighter highlighter = new Highlighter(htmlFormatter, new QueryScorer(fieldFixedQuery, name));
      TokenStream tokenStream = TokenSources.getAnyTokenStream(reader, docId, name, analyzer);
      TextFragment[] frag = highlighter.getBestTextFragments(tokenStream, text, false, 10);
      for (int j = 0; j < frag.length; j++) {
        if ((frag[j] != null) && (frag[j].getScore() > 0)) {
          result.add(new StringField(name, frag[j].toString(), Store.YES));
        }
      }
    }
  }
  return result;
}

org.apache.lucene.search.highlight.InvalidTokenOffsetsException Java Examples