org.apache.lucene.analysis.payloads.PayloadHelper Java Examples

The following examples show how to use org.apache.lucene.analysis.payloads.PayloadHelper. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: TestNGramFilters.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
/**
 * Test NGramFilterFactory on tokens with payloads
 */
public void testNGramFilterPayload() throws Exception {
  Reader reader = new StringReader("test|0.1");
  TokenStream stream = whitespaceMockTokenizer(reader);
  stream = tokenFilterFactory("DelimitedPayload", "encoder", "float").create(stream);
  stream = tokenFilterFactory("NGram", "minGramSize", "1", "maxGramSize", "2").create(stream);

  stream.reset();
  while (stream.incrementToken()) {
    PayloadAttribute payAttr = stream.getAttribute(PayloadAttribute.class);
    assertNotNull(payAttr);
    BytesRef payData = payAttr.getPayload();
    assertNotNull(payData);
    float payFloat = PayloadHelper.decodeFloat(payData.bytes);
    assertEquals(0.1f, payFloat, 0.0f);
  }
  stream.end();
  stream.close();
}
 
Example #2
Source File: TestNGramFilters.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
/**
 * Test EdgeNGramFilterFactory on tokens with payloads
 */
public void testEdgeNGramFilterPayload() throws Exception {
  Reader reader = new StringReader("test|0.1");
  TokenStream stream = whitespaceMockTokenizer(reader);
  stream = tokenFilterFactory("DelimitedPayload", "encoder", "float").create(stream);
  stream = tokenFilterFactory("EdgeNGram", "minGramSize", "1", "maxGramSize", "2").create(stream);

  stream.reset();
  while (stream.incrementToken()) {
    PayloadAttribute payAttr = stream.getAttribute(PayloadAttribute.class);
    assertNotNull(payAttr);
    BytesRef payData = payAttr.getPayload();
    assertNotNull(payData);
    float payFloat = PayloadHelper.decodeFloat(payData.bytes);
    assertEquals(0.1f, payFloat, 0.0f);
  }
  stream.end();
  stream.close();
}
 
Example #3
Source File: MtasBasicParser.java    From mtas with Apache License 2.0 6 votes vote down vote up
/**
 * Compute maximum filtered payload.
 *
 * @param value the value
 * @param payload the payload
 * @param filter the filter
 * @return the bytes ref
 */
private BytesRef computeMaximumFilteredPayload(String value, BytesRef payload,
    String filter) {
  // do magic with filter
  if (value != null) {
    if (payload != null) {
      Float payloadFloat = PayloadHelper.decodeFloat(payload.bytes,
          payload.offset);
      Float valueFloat = Float.parseFloat(value);
      return new BytesRef(
          PayloadHelper.encodeFloat(Math.max(payloadFloat, valueFloat)));
    } else {
      return new BytesRef(PayloadHelper.encodeFloat(Float.parseFloat(value)));
    }
  } else {
    return payload;
  }
}
 
Example #4
Source File: TermPosition.java    From Elasticsearch with Apache License 2.0 5 votes vote down vote up
public float payloadAsFloat(float defaultMissing) {
    if (payload != null && payload.length != 0) {
        return PayloadHelper.decodeFloat(payload.bytes, payload.offset);
    } else {
        return defaultMissing;
    }
}
 
Example #5
Source File: TermPosition.java    From Elasticsearch with Apache License 2.0 5 votes vote down vote up
public int payloadAsInt(int defaultMissing) {
    if (payload != null && payload.length != 0) {
        return PayloadHelper.decodeInt(payload.bytes, payload.offset);
    } else {
        return defaultMissing;
    }
}
 
Example #6
Source File: MtasToken.java    From mtas with Apache License 2.0 5 votes vote down vote up
@Override
public String toString() {
  String text = "";
  text += "[" + String.format("%05d", getId()) + "] ";
  text += ((getRealOffsetStart() == null) ? "[-------,-------]"
      : "[" + String.format("%07d", getRealOffsetStart()) + "-"
          + String.format("%07d", getRealOffsetEnd()) + "]");
  text += (provideRealOffset ? "  " : "* ");
  text += ((getOffsetStart() == null) ? "[-------,-------]"
      : "[" + String.format("%07d", getOffsetStart()) + "-"
          + String.format("%07d", getOffsetEnd()) + "]");
  text += (provideOffset ? "  " : "* ");
  if (getPositionLength() == null) {
    text += String.format("%11s", "");
  } else if (getPositionStart().equals(getPositionEnd())) {
    text += String.format("%11s", "[" + getPositionStart() + "]");
  } else if ((getPositions() == null) || (getPositions().length == (1
      + getPositionEnd() - getPositionStart()))) {
    text += String.format("%11s",
        "[" + getPositionStart() + "-" + getPositionEnd() + "]");
  } else {
    text += String.format("%11s", Arrays.toString(getPositions()));
  }
  text += ((getParentId() == null) ? "[-----]"
      : "[" + String.format("%05d", getParentId()) + "]");
  text += (provideParentId ? "  " : "* ");
  BytesRef payload = getPayload();
  text += (payload == null) ? "[------] "
      : "["
          + String
              .format("%.4f",
                  PayloadHelper.decodeFloat(Arrays.copyOfRange(payload.bytes,
                      payload.offset, (payload.offset + payload.length))))
          + "] ";
  text += String.format("%25s", "[" + getPrefix() + "]") + " ";
  text += ((getPostfix() == null) ? "---" : "[" + getPostfix() + "]") + " ";
  return text;
}
 
Example #7
Source File: VectorScoreQuery.java    From solr-vector-scoring with Apache License 2.0 4 votes vote down vote up
@Override
protected CustomScoreProvider getCustomScoreProvider(LeafReaderContext context) throws IOException {
	return new CustomScoreProvider(context){
		@Override
		public float customScore(int docID, float subQueryScore, float valSrcScore) throws IOException {
			float score = 0;
			double docVectorNorm = 0;
			LeafReader reader = context.reader();
			Terms terms = reader.getTermVector(docID, field);
			if(vector.size() != terms.size()){
				throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "indexed and input vector array must have same length");
			}
			TermsEnum iter = terms.iterator();
		    BytesRef text;
		    while ((text = iter.next()) != null) {
		    	String term = text.utf8ToString();
		    	float payloadValue = 0f;
		    	PostingsEnum postings = iter.postings(null, PostingsEnum.ALL);
		    	while (postings.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
		    		int freq = postings.freq();
		    		while (freq-- > 0) postings.nextPosition();

		    		BytesRef payload = postings.getPayload();
		    		payloadValue = PayloadHelper.decodeFloat(payload.bytes, payload.offset); 
		    		
		    		if (cosine)
		              docVectorNorm += Math.pow(payloadValue, 2.0);
		    	}
		    		
		    	score = (float)(score + payloadValue * (vector.get(Integer.parseInt(term))));
		    }
		    
		    if (cosine) {
		      if ((docVectorNorm == 0) || (queryVectorNorm == 0)) return 0f;
		      return (float)(score / (Math.sqrt(docVectorNorm) * Math.sqrt(queryVectorNorm)));
		    }

			return score;
		}
	};
}
 
Example #8
Source File: MtasTokenCollection.java    From mtas with Apache License 2.0 4 votes vote down vote up
/**
 * Gets the list.
 *
 * @return the list
 * @throws MtasParserException the mtas parser exception
 */
public String[][] getList() throws MtasParserException {
  String[][] result = new String[(tokenCollection.size() + 1)][];
  result[0] = new String[] { "id", "start real offset", "end real offset",
      "provide real offset", "start offset", "end offset", "provide offset",
      "start position", "end position", "multiple positions", "parent",
      "provide parent", "payload", "prefix", "postfix" };
  int number = 1;
  Iterator<MtasToken> it = this.iterator();
  while (it.hasNext()) {
    MtasToken token = it.next();
    String[] row = new String[15];
    row[0] = token.getId().toString();
    if (token.getRealOffsetStart() != null) {
      row[1] = token.getRealOffsetStart().toString();
      row[2] = token.getRealOffsetEnd().toString();
      row[3] = token.getProvideRealOffset() ? "1" : null;
    }
    if (token.getOffsetStart() != null) {
      row[4] = token.getOffsetStart().toString();
      row[5] = token.getOffsetEnd().toString();
      row[6] = token.getProvideOffset() ? "1" : null;
    }
    if (token.getPositionLength() != null) {
      if (token.getPositionStart().equals(token.getPositionEnd())) {
        row[7] = token.getPositionStart().toString();
        row[8] = token.getPositionEnd().toString();
        row[9] = null;
      } else if ((token.getPositions() == null)
          || (token.getPositions().length == (1 + token.getPositionEnd()
              - token.getPositionStart()))) {
        row[7] = token.getPositionStart().toString();
        row[8] = token.getPositionEnd().toString();
        row[9] = null;
      } else {
        row[7] = null;
        row[8] = null;
        row[9] = Arrays.toString(token.getPositions());
      }
    }
    if (token.getParentId() != null) {
      row[10] = token.getParentId().toString();
      row[11] = token.getProvideParentId() ? "1" : null;
    }
    if (token.getPayload() != null) {
      BytesRef payload = token.getPayload();
      row[12] = Float.toString(PayloadHelper.decodeFloat(Arrays.copyOfRange(
          payload.bytes, payload.offset, (payload.offset + payload.length))));
    }
    row[13] = token.getPrefix();
    row[14] = token.getPostfix();
    result[number] = row;
    number++;
  }
  return result;
}