Java Code Examples for org.apache.lucene.index.IndexWriterConfig#setSimilarity()
The following examples show how to use
org.apache.lucene.index.IndexWriterConfig#setSimilarity() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TestTaxonomyFacetCounts.java From lucene-solr with Apache License 2.0 | 6 votes |
public void testReallyNoNormsForDrillDown() throws Exception { Directory dir = newDirectory(); Directory taxoDir = newDirectory(); IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random())); iwc.setSimilarity(new PerFieldSimilarityWrapper() { final Similarity sim = new ClassicSimilarity(); @Override public Similarity get(String name) { assertEquals("field", name); return sim; } }); TaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir, IndexWriterConfig.OpenMode.CREATE); RandomIndexWriter writer = new RandomIndexWriter(random(), dir, iwc); FacetsConfig config = new FacetsConfig(); Document doc = new Document(); doc.add(newTextField("field", "text", Field.Store.NO)); doc.add(new FacetField("a", "path")); writer.addDocument(config.build(taxoWriter, doc)); writer.close(); IOUtils.close(taxoWriter, dir, taxoDir); }
Example 2
Source File: test.java From vscode-extension with MIT License | 5 votes |
private IndexWriterConfig getIndexWriterConfig() { final IndexWriterConfig iwc = new IndexWriterConfig(engineConfig.getAnalyzer()); iwc.setCommitOnClose(false); // we by default don't commit on close iwc.setOpenMode(IndexWriterConfig.OpenMode.APPEND); iwc.setIndexDeletionPolicy(combinedDeletionPolicy); // with tests.verbose, lucene sets this up: plumb to align with filesystem stream boolean verbose = false; try { verbose = Boolean.parseBoolean(System.getProperty("tests.verbose")); } catch (Exception ignore) { } iwc.setInfoStream(verbose ? InfoStream.getDefault() : new LoggerInfoStream(logger)); iwc.setMergeScheduler(mergeScheduler); // Give us the opportunity to upgrade old segments while performing // background merges MergePolicy mergePolicy = config().getMergePolicy(); // always configure soft-deletes field so an engine with soft-deletes disabled can open a Lucene index with soft-deletes. iwc.setSoftDeletesField(Lucene.SOFT_DELETES_FIELD); if (softDeleteEnabled) { mergePolicy = new RecoverySourcePruneMergePolicy(SourceFieldMapper.RECOVERY_SOURCE_NAME, softDeletesPolicy::getRetentionQuery, new SoftDeletesRetentionMergePolicy(Lucene.SOFT_DELETES_FIELD, softDeletesPolicy::getRetentionQuery, mergePolicy)); } iwc.setMergePolicy(new ElasticsearchMergePolicy(mergePolicy)); iwc.setSimilarity(engineConfig.getSimilarity()); iwc.setRAMBufferSizeMB(engineConfig.getIndexingBufferSize().getMbFrac()); iwc.setCodec(engineConfig.getCodec()); iwc.setUseCompoundFile(true); // always use compound on flush - reduces # of file-handles on refresh if (config().getIndexSort() != null) { iwc.setIndexSort(config().getIndexSort()); } return iwc; }
Example 3
Source File: SolrIndexConfig.java From lucene-solr with Apache License 2.0 | 5 votes |
public IndexWriterConfig toIndexWriterConfig(SolrCore core) throws IOException { IndexSchema schema = core.getLatestSchema(); IndexWriterConfig iwc = new IndexWriterConfig(new DelayedSchemaAnalyzer(core)); if (maxBufferedDocs != -1) iwc.setMaxBufferedDocs(maxBufferedDocs); if (ramBufferSizeMB != -1) iwc.setRAMBufferSizeMB(ramBufferSizeMB); if (ramPerThreadHardLimitMB != -1) { iwc.setRAMPerThreadHardLimitMB(ramPerThreadHardLimitMB); } iwc.setSimilarity(schema.getSimilarity()); MergePolicy mergePolicy = buildMergePolicy(core.getResourceLoader(), schema); iwc.setMergePolicy(mergePolicy); MergeScheduler mergeScheduler = buildMergeScheduler(core.getResourceLoader()); iwc.setMergeScheduler(mergeScheduler); iwc.setInfoStream(infoStream); if (mergePolicy instanceof SortingMergePolicy) { Sort indexSort = ((SortingMergePolicy) mergePolicy).getSort(); iwc.setIndexSort(indexSort); } iwc.setUseCompoundFile(useCompoundFile); if (mergedSegmentWarmerInfo != null) { // TODO: add infostream -> normal logging system (there is an issue somewhere) @SuppressWarnings({"rawtypes"}) IndexReaderWarmer warmer = core.getResourceLoader().newInstance(mergedSegmentWarmerInfo.className, IndexReaderWarmer.class, null, new Class[] { InfoStream.class }, new Object[] { iwc.getInfoStream() }); iwc.setMergedSegmentWarmer(warmer); } return iwc; }
Example 4
Source File: Lucene.java From uncc2014watsonsim with GNU General Public License v2.0 | 5 votes |
public Lucene(Path path) throws IOException { /* Setup Lucene */ Directory dir = FSDirectory.open(path); // here we are using a standard analyzer, there are a lot of analyzers available to our use. Analyzer analyzer = new StandardAnalyzer(); IndexWriterConfig iwc = new IndexWriterConfig(analyzer); //this mode by default overwrites the previous index, not a very good option in real usage iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND); iwc.setSimilarity(new BM25Similarity()); index = new IndexWriter(dir, iwc); }
Example 5
Source File: LuceneTermQueryBuilderTest.java From querqy with Apache License 2.0 | 5 votes |
@Test public void testThatQueryUsesTermButNoFieldBoost() throws Exception { Analyzer analyzer = new StandardAnalyzer(); Directory directory = new ByteBuffersDirectory(); IndexWriterConfig config = new IndexWriterConfig(analyzer); config.setSimilarity(new ClassicSimilarity()); IndexWriter indexWriter = new IndexWriter(directory, config); TestUtil.addNumDocsWithTextField("f1", "v1 v1", indexWriter, 4); TestUtil.addNumDocsWithTextField("f1", "v2", indexWriter, 1); indexWriter.close(); IndexReader indexReader = DirectoryReader.open(directory); IndexSearcher indexSearcher = new IndexSearcher(indexReader); indexSearcher.setSimilarity(new ClassicSimilarity()); final TermQuery termQuery = new LuceneTermQueryBuilder() .createTermQuery(new Term("f1", "v1"), new ConstantFieldBoost(3f)); final Term term = termQuery.getTerm(); assertEquals("f1", term.field()); assertEquals("v1", term.text()); TopDocs topDocs = indexSearcher.search(termQuery, 10); final Weight weight = termQuery.createWeight(indexSearcher, ScoreMode.COMPLETE, 4.5f); final Explanation explain = weight.explain(indexReader.getContext().leaves().get(0), topDocs.scoreDocs[0].doc); String explainText = explain.toString(); assertTrue(explainText.contains("4.5 = boost")); // 4.5 (query) but ignore field boost assertTrue(explainText.contains("4 = docFreq")); // 4 * v1 assertTrue(explainText.contains("2.0 = freq")); // 2 * v1 in field }
Example 6
Source File: LuceneIndex.java From rdf4j with BSD 3-Clause "New" or "Revised" License | 4 votes |
/** * Method produces {@link IndexWriterConfig} using settings. * * @return */ private IndexWriterConfig getIndexWriterConfig() { IndexWriterConfig cnf = new IndexWriterConfig(analyzer); cnf.setSimilarity(similarity); return cnf; }
Example 7
Source File: LuceneTranslationMemory.java From modernmt with Apache License 2.0 | 4 votes |
public LuceneTranslationMemory(Directory directory, DocumentBuilder documentBuilder, QueryBuilder queryBuilder, Rescorer rescorer, AnalyzerFactory analyzerFactory, int minQuerySize) throws IOException { this.indexDirectory = directory; this.queryBuilder = queryBuilder; this.rescorer = rescorer; this.documentBuilder = documentBuilder; this.analyzerFactory = analyzerFactory; this.shortQueryAnalyzer = analyzerFactory.createShortQueryAnalyzer(); this.longQueryAnalyzer = analyzerFactory.createLongQueryAnalyzer(); this.minQuerySize = minQuerySize; // Index writer setup IndexWriterConfig indexConfig = new IndexWriterConfig(Version.LUCENE_4_10_4, new DelegatingAnalyzerWrapper(PER_FIELD_REUSE_STRATEGY) { @Override protected Analyzer getWrappedAnalyzer(String fieldName) { if (documentBuilder.isHashField(fieldName)) return analyzerFactory.createHashAnalyzer(); else return analyzerFactory.createContentAnalyzer(); } }); indexConfig.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND); indexConfig.setSimilarity(analyzerFactory.createSimilarity()); this.indexWriter = new IndexWriter(this.indexDirectory, indexConfig); // Ensure index exists if (!DirectoryReader.indexExists(directory)) this.indexWriter.commit(); // Read channels status IndexSearcher searcher = this.getIndexSearcher(); Query query = this.queryBuilder.getChannels(this.documentBuilder); TopDocs docs = searcher.search(query, 1); if (docs.scoreDocs.length > 0) { Document channelsDocument = searcher.doc(docs.scoreDocs[0].doc); this.channels = this.documentBuilder.asChannels(channelsDocument); } else { this.channels = new HashMap<>(); } }
Example 8
Source File: DependentTermQueryBuilderTest.java From querqy with Apache License 2.0 | 4 votes |
@Test public void testCreateWeight() throws Exception { Analyzer analyzer = new StandardAnalyzer(); Directory directory = new ByteBuffersDirectory(); IndexWriterConfig config = new IndexWriterConfig(analyzer); config.setSimilarity(new ClassicSimilarity()); IndexWriter indexWriter = new IndexWriter(directory, config); TestUtil.addNumDocsWithTextField("f1", "v1", indexWriter, 4); TestUtil.addNumDocsWithTextField("f2", "v1 v1", indexWriter, 1); indexWriter.close(); IndexReader indexReader = DirectoryReader.open(directory); IndexSearcher indexSearcher = new IndexSearcher(indexReader); indexSearcher.setSimilarity(new ClassicSimilarity()); DocumentFrequencyCorrection dfc = new DocumentFrequencyCorrection(); Term qTerm1 = new Term("f1", "v1"); Term qTerm2 = new Term("f2", "v1"); dfc.newClause(); dfc.prepareTerm(qTerm1); dfc.prepareTerm(qTerm2); dfc.finishedUserQuery(); DependentTermQueryBuilder.DependentTermQuery query1 = new DependentTermQueryBuilder(dfc) .createTermQuery(qTerm1, fieldBoost1); DependentTermQueryBuilder.DependentTermQuery query2 = new DependentTermQueryBuilder(dfc) .createTermQuery(qTerm2, fieldBoost2); TopDocs topDocs = indexSearcher.search(query2, 10); final Weight weight2 = query2.createWeight(indexSearcher, ScoreMode.COMPLETE, 4.5f); final Explanation explain = weight2.explain(indexReader.leaves().get(0), topDocs.scoreDocs[0].doc); String explainText = explain.toString(); assertTrue(explainText.contains("9.0 = boost")); // 4.5 (query) * 2.0 (field) assertTrue(explainText.contains("4 = docFreq")); // 4 * df of f1:v1 assertTrue(explainText.contains("2.0 = freq")); // don't use tf indexReader.close(); directory.close(); analyzer.close(); }
Example 9
Source File: SimilarityTermQueryBuilderTest.java From querqy with Apache License 2.0 | 2 votes |
@Test public void testCreateWeight() throws Exception { Analyzer analyzer = new StandardAnalyzer(); Directory directory = new ByteBuffersDirectory(); IndexWriterConfig config = new IndexWriterConfig(analyzer); config.setSimilarity(new ClassicSimilarity()); IndexWriter indexWriter = new IndexWriter(directory, config); TestUtil.addNumDocsWithTextField("f1", "v1 v1", indexWriter, 4); TestUtil.addNumDocsWithTextField("f1", "v2", indexWriter, 1); indexWriter.close(); IndexReader indexReader = DirectoryReader.open(directory); IndexSearcher indexSearcher = new IndexSearcher(indexReader); indexSearcher.setSimilarity(new ClassicSimilarity()); Term term = new Term("f1", "v1"); SimilarityTermQuery query = new SimilarityTermQueryBuilder().createTermQuery(term, fieldBoost2); TopDocs topDocs = indexSearcher.search(query, 10); final Weight weight = query.createWeight(indexSearcher, ScoreMode.COMPLETE, 4.5f); final Explanation explain = weight.explain(indexReader.getContext().leaves().get(0), topDocs.scoreDocs[0].doc); String explainText = explain.toString(); assertTrue(explainText.contains("9.0 = boost")); // 4.5 (query) * 2.0 (field) assertTrue(explainText.contains("4 = docFreq")); // 4 * v1 assertTrue(explainText.contains("2.0 = freq")); // 2 * v1 in field indexReader.close(); directory.close(); analyzer.close(); }
Example 10
Source File: DependentTermQueryBuilderTest.java From querqy with Apache License 2.0 | 2 votes |
@Test public void testPostingsVsMaxScore() throws Exception { Analyzer analyzer = new StandardAnalyzer(); Directory directory = new ByteBuffersDirectory(); IndexWriterConfig config = new IndexWriterConfig(analyzer); config.setSimilarity(new ClassicSimilarity()); IndexWriter indexWriter = new IndexWriter(directory, config); TestUtil.addNumDocsWithTextField("f1", "v1", indexWriter, 1); TestUtil.addNumDocsWithTextField("f2", "v1 v2", indexWriter, 1); indexWriter.close(); IndexReader indexReader = DirectoryReader.open(directory); IndexSearcher indexSearcher = new IndexSearcher(indexReader); indexSearcher.setSimilarity(new ClassicSimilarity()); DocumentFrequencyCorrection dfc = new DocumentFrequencyCorrection(); Term qTerm1 = new Term("f2", "v1"); Term qTerm2 = new Term("f2", "v2"); dfc.newClause(); dfc.prepareTerm(qTerm1); dfc.newClause(); dfc.prepareTerm(qTerm2); dfc.finishedUserQuery(); DependentTermQueryBuilder.DependentTermQuery query1 = new DependentTermQueryBuilder(dfc) .createTermQuery(qTerm1, fieldBoost1); DependentTermQueryBuilder.DependentTermQuery query2 = new DependentTermQueryBuilder(dfc) .createTermQuery(qTerm2, fieldBoost2); BooleanQuery.Builder builder = new BooleanQuery.Builder(); builder.add(query1, BooleanClause.Occur.SHOULD); builder.add(query2, BooleanClause.Occur.SHOULD); builder.setMinimumNumberShouldMatch(2); BooleanQuery bq = builder.build(); // Query execution will call org.apache.lucene.search.Scorer.getMaxScore which might consume // the postingsEnum so that we don't get any hit TopDocs topDocs = indexSearcher.search(bq, 10); assertEquals(1, topDocs.scoreDocs.length); }