org.apache.lucene.search.similarities.DefaultSimilarity Java Examples
The following examples show how to use
org.apache.lucene.search.similarities.DefaultSimilarity.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: MoreLikeThisQuery.java From Elasticsearch with Apache License 2.0 | 6 votes |
@Override public Query rewrite(IndexReader reader) throws IOException { XMoreLikeThis mlt = new XMoreLikeThis(reader, similarity == null ? new DefaultSimilarity() : similarity); mlt.setFieldNames(moreLikeFields); mlt.setAnalyzer(analyzer); mlt.setMinTermFreq(minTermFrequency); mlt.setMinDocFreq(minDocFreq); mlt.setMaxDocFreq(maxDocFreq); mlt.setMaxQueryTerms(maxQueryTerms); mlt.setMinWordLen(minWordLen); mlt.setMaxWordLen(maxWordLen); mlt.setStopWords(stopWords); mlt.setBoost(boostTerms); mlt.setBoostFactor(boostTermsFactor); if (this.unlikeText != null || this.unlikeFields != null) { handleUnlike(mlt, this.unlikeText, this.unlikeFields); } return createQuery(mlt); }
Example #2
Source File: ContextAnalyzerIndex.java From modernmt with Apache License 2.0 | 6 votes |
public ContextAnalyzerIndex(Directory directory, Rescorer rescorer) throws IOException { this.indexDirectory = directory; this.analyzer = new CorpusAnalyzer(); this.rescorer = rescorer; // Index writer setup IndexWriterConfig indexConfig = new IndexWriterConfig(Version.LUCENE_4_10_4, this.analyzer); indexConfig.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND); indexConfig.setSimilarity(new DefaultSimilarity() { @Override public float lengthNorm(FieldInvertState state) { return 1.f; } }); this.indexWriter = new IndexWriter(this.indexDirectory, indexConfig); // Ensure index exists if (!DirectoryReader.indexExists(directory)) this.indexWriter.commit(); }
Example #3
Source File: TermVectorsFilter.java From Elasticsearch with Apache License 2.0 | 5 votes |
public TermVectorsFilter(Fields termVectorsByField, Fields topLevelFields, Set<String> selectedFields, @Nullable AggregatedDfs dfs) { this.fields = termVectorsByField; this.topLevelFields = topLevelFields; this.selectedFields = selectedFields; this.dfs = dfs; this.scoreTerms = new HashMap<>(); this.sizes = AtomicLongMap.create(); this.similarity = new DefaultSimilarity(); }
Example #4
Source File: DefaultSimilarityProvider.java From Elasticsearch with Apache License 2.0 | 4 votes |
/** * {@inheritDoc} */ @Override public DefaultSimilarity get() { return similarity; }
Example #5
Source File: XMoreLikeThis.java From Elasticsearch with Apache License 2.0 | 4 votes |
/** * Constructor requiring an IndexReader. */ public XMoreLikeThis(IndexReader ir) { this(ir, new DefaultSimilarity()); }
Example #6
Source File: FullTextIndexTupleSerializer.java From database with GNU General Public License v2.0 | 4 votes |
protected ITermDocKey<V> deserialize(final ITuple tuple, final boolean keyOnly) { // key is {term,docId,fieldId} // final byte[] key = tuple.getKey(); // // // decode the document identifier. // final long docId = KeyBuilder.decodeLong(key, key.length // - Bytes.SIZEOF_LONG /*docId*/ - Bytes.SIZEOF_INT/*fieldId*/); final ByteArrayBuffer kbuf = tuple.getKeyBuffer(); /* * The byte offset of the docId in the key. * * Note: This is also the byte length of the match on the unicode sort * key, which appears at the head of the key. */ final int docIdOffset = kbuf.limit() - Bytes.SIZEOF_LONG /* docId */ - (fieldsEnabled ? Bytes.SIZEOF_INT/* fieldId */: 0); final V docId = (V) (Object)Long.valueOf(KeyBuilder.decodeLong(kbuf.array(), docIdOffset)); // Decode field when present final int fieldId; if (fieldsEnabled) { fieldId = KeyBuilder.decodeShort(kbuf.array(), kbuf.limit() - Bytes.SIZEOF_INT); } else { fieldId = -1; } final int termWeightOffset = docIdOffset - Bytes.SIZEOF_BYTE; final byte termWeightCompact = kbuf.getByte(termWeightOffset); /* * See: http://lucene.apache.org/core/5_1_0/core/org/apache/lucene/search/similarities/DefaultSimilarity.html * * For more information on the round-trip of normalized term weight. */ final DefaultSimilarity similarity = new DefaultSimilarity(); final double termWeight = similarity.decodeNormValue(termWeightCompact); if (keyOnly) { return new ReadOnlyTermDocKey(docId, fieldId, termWeight); } // final int termFreq; // final double termWeight; // try { // // final DataInputBuffer dis = tuple.getValueStream(); // // termFreq = dis.readShort(); // // if(doublePrecision) // termWeight = dis.readDouble(); // else // termWeight = dis.readFloat(); // // } catch (IOException ex) { // // throw new RuntimeException(ex); // // } // return new ReadOnlyTermDocRecord<V>(null/* token */, docId, fieldId, /* termFreq, */ termWeight); }
Example #7
Source File: RDFFullTextIndexTupleSerializer.java From database with GNU General Public License v2.0 | 4 votes |
protected ITermDocKey deserialize(final ITuple tuple, final boolean keyOnly) { final ByteArrayBuffer kbuf = tuple.getKeyBuffer(); // The byte length of the docId IV. final int byteLength; try { // byteLength = LongPacker.unpackInt((DataInput) tuple // .getValueStream()); byteLength = ShortPacker.unpackShort((DataInput) tuple .getValueStream()); } catch (IOException ex) { throw new RuntimeException(ex); } final int docIdOffset = kbuf.limit() - byteLength; // Decode the IV. final IV docId = (IV) IVUtility.decodeFromOffset(kbuf.array(), docIdOffset); final int termWeightOffset = docIdOffset - Bytes.SIZEOF_BYTE; final byte termWeightCompact = kbuf.getByte(termWeightOffset); /* * See: http://lucene.apache.org/core/old_versioned_docs/versions/3_0_2/api/all/org/apache/lucene/search/Similarity.html * * For more information on the round-trip of normalized term weight. */ final DefaultSimilarity similarity = new DefaultSimilarity(); final double termWeight = similarity.decodeNormValue(termWeightCompact); if (keyOnly) { return new ReadOnlyTermDocKey(docId, NO_FIELD, termWeight); } // final int termFreq; // final double termWeight; // try { // // final DataInputBuffer dis = tuple.getValueStream(); // // // skip the byte length of the IV. // LongPacker.unpackInt((DataInput) dis); // // termFreq = dis.readShort(); // termFreq = LongPacker.unpackInt((DataInput) dis); // if (doublePrecision) // termWeight = dis.readDouble(); // else // termWeight = dis.readFloat(); // // } catch (IOException ex) { // // throw new RuntimeException(ex); // // } return new ReadOnlyTermDocRecord(null/* token */, docId, NO_FIELD, /* termFreq, */ termWeight); }
Example #8
Source File: FullTextIndexTupleSerializer.java From database with GNU General Public License v2.0 | 2 votes |
@Override public byte[] serializeKey(final Object obj) { @SuppressWarnings("unchecked") final ITermDocKey<V> entry = (ITermDocKey<V>) obj; final String termText = entry.getToken(); final double termWeight = entry.getLocalTermWeight(); /* * See: http://lucene.apache.org/core/5_1_0/core/org/apache/lucene/search/similarities/DefaultSimilarity.html * * For more information on the round-trip of normalized term weight. */ final DefaultSimilarity similarity = new DefaultSimilarity(); final long termWeightCompact = similarity.encodeNormValue((float) termWeight); final V docId = entry.getDocId(); final IKeyBuilder keyBuilder = getKeyBuilder(); keyBuilder.reset(); // the token text (or its successor as desired). keyBuilder .appendText(termText, true/* unicode */, false/* successor */); keyBuilder.append(termWeightCompact); keyBuilder.append((V) docId); if (fieldsEnabled) keyBuilder.append(entry.getFieldId()); final byte[] key = keyBuilder.getKey(); if (log.isDebugEnabled()) { log.debug("{" + termText + "," + docId + (fieldsEnabled ? "," + entry.getFieldId() : "") + "}, key=" + BytesUtil.toString(key)); } return key; }
Example #9
Source File: RDFFullTextIndexTupleSerializer.java From database with GNU General Public License v2.0 | 2 votes |
@Override public byte[] serializeKey(final Object obj) { final ITermDocKey entry = (ITermDocKey) obj; final String termText = entry.getToken(); final double termWeight = entry.getLocalTermWeight(); /* * See: http://lucene.apache.org/core/old_versioned_docs/versions/3_0_2/api/all/org/apache/lucene/search/Similarity.html * * For more information on the round-trip of normalized term weight. */ final DefaultSimilarity similarity = new DefaultSimilarity(); final long termWeightCompact = similarity.encodeNormValue((float) termWeight); final IV docId = (IV)entry.getDocId(); final IKeyBuilder keyBuilder = getKeyBuilder(); keyBuilder.reset(); // the token text (or its successor as desired). keyBuilder .appendText(termText, true/* unicode */, false/* successor */); keyBuilder.append(termWeightCompact); IVUtility.encode(keyBuilder, docId); final byte[] key = keyBuilder.getKey(); if (log.isDebugEnabled()) { log.debug("{" + termText + "," + docId + "}, key=" + BytesUtil.toString(key)); } return key; }