org.apache.lucene.index.memory.MemoryIndex Java Examples
The following examples show how to use
org.apache.lucene.index.memory.MemoryIndex.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: CrawlerTask.java From JPPF with Apache License 2.0 | 6 votes |
/** * Search for the user-specified query expression in the current page. * @throws Exception if an error occurs. */ private void search() throws Exception { final QueryParser parser = new QueryParser("contents", new StandardAnalyzer()); final Query q = parser.parse(query); final MemoryIndex index = new MemoryIndex(); final Link link = new Link(url); final PageData pageData = new SimpleHttpClientParser().load(link); index.addField("contents", pageData.getData().toString(), new StandardAnalyzer()); final IndexSearcher searcher = index.createSearcher(); final Hits hits = searcher.search(q); @SuppressWarnings("rawtypes") final Iterator it = hits.iterator(); float relevance = 0f; if (it.hasNext()) { while (it.hasNext()) { final Hit hit = (Hit) it.next(); relevance += ((float) Math.round(hit.getScore() * 1000)) / 10; } matchedLinks.add(new LinkMatch(url, relevance)); } }
Example #2
Source File: MultiDocumentPercolatorIndex.java From Elasticsearch with Apache License 2.0 | 6 votes |
MemoryIndex indexDoc(ParseContext.Document d, Analyzer analyzer, MemoryIndex memoryIndex) { for (IndexableField field : d.getFields()) { if (field.fieldType().indexOptions() == IndexOptions.NONE && field.name().equals(UidFieldMapper.NAME)) { continue; } try { // TODO: instead of passing null here, we can have a CTL<Map<String,TokenStream>> and pass previous, // like the indexer does try (TokenStream tokenStream = field.tokenStream(analyzer, null)) { if (tokenStream != null) { memoryIndex.addField(field.name(), tokenStream, field.boost()); } } } catch (IOException e) { throw new ElasticsearchException("Failed to create token stream", e); } } return memoryIndex; }
Example #3
Source File: SingleDocumentPercolatorIndex.java From Elasticsearch with Apache License 2.0 | 6 votes |
@Override public void prepare(PercolateContext context, ParsedDocument parsedDocument) { MemoryIndex memoryIndex = cache.get(); for (IndexableField field : parsedDocument.rootDoc().getFields()) { if (field.fieldType().indexOptions() == IndexOptions.NONE && field.name().equals(UidFieldMapper.NAME)) { continue; } try { Analyzer analyzer = context.mapperService().documentMapper(parsedDocument.type()).mappers().indexAnalyzer(); // TODO: instead of passing null here, we can have a CTL<Map<String,TokenStream>> and pass previous, // like the indexer does try (TokenStream tokenStream = field.tokenStream(analyzer, null)) { if (tokenStream != null) { memoryIndex.addField(field.name(), tokenStream, field.boost()); } } } catch (Exception e) { throw new ElasticsearchException("Failed to create token stream for [" + field.name() + "]", e); } } context.initialize(new DocEngineSearcher(memoryIndex), parsedDocument); }
Example #4
Source File: ShardTermVectorsService.java From Elasticsearch with Apache License 2.0 | 6 votes |
private Fields generateTermVectors(Collection<GetField> getFields, boolean withOffsets, @Nullable Map<String, String> perFieldAnalyzer, Set<String> fields) throws IOException { /* store document in memory index */ MemoryIndex index = new MemoryIndex(withOffsets); for (GetField getField : getFields) { String field = getField.getName(); if (fields.contains(field) == false) { // some fields are returned even when not asked for, eg. _timestamp continue; } Analyzer analyzer = getAnalyzerAtField(field, perFieldAnalyzer); for (Object text : getField.getValues()) { index.addField(field, text.toString(), analyzer); } } /* and read vectors from it */ return MultiFields.getFields(index.createSearcher().getIndexReader()); }
Example #5
Source File: PercolatorService.java From Elasticsearch with Apache License 2.0 | 5 votes |
@Inject public PercolatorService(Settings settings, IndexNameExpressionResolver indexNameExpressionResolver, IndicesService indicesService, PageCacheRecycler pageCacheRecycler, BigArrays bigArrays, HighlightPhase highlightPhase, ClusterService clusterService, AggregationPhase aggregationPhase, ScriptService scriptService, MappingUpdatedAction mappingUpdatedAction) { super(settings); this.indexNameExpressionResolver = indexNameExpressionResolver; this.parseFieldMatcher = new ParseFieldMatcher(settings); this.indicesService = indicesService; this.pageCacheRecycler = pageCacheRecycler; this.bigArrays = bigArrays; this.clusterService = clusterService; this.highlightPhase = highlightPhase; this.aggregationPhase = aggregationPhase; this.scriptService = scriptService; this.mappingUpdatedAction = mappingUpdatedAction; this.sortParseElement = new SortParseElement(); final long maxReuseBytes = settings.getAsBytesSize("indices.memory.memory_index.size_per_thread", new ByteSizeValue(1, ByteSizeUnit.MB)).bytes(); cache = new CloseableThreadLocal<MemoryIndex>() { @Override protected MemoryIndex initialValue() { // TODO: should we expose payloads as an option? should offsets be turned on always? return new ExtendedMemoryIndex(true, false, maxReuseBytes); } }; single = new SingleDocumentPercolatorIndex(cache); multi = new MultiDocumentPercolatorIndex(cache); percolatorTypes = new IntObjectHashMap<>(6); percolatorTypes.put(countPercolator.id(), countPercolator); percolatorTypes.put(queryCountPercolator.id(), queryCountPercolator); percolatorTypes.put(matchPercolator.id(), matchPercolator); percolatorTypes.put(queryPercolator.id(), queryPercolator); percolatorTypes.put(scoringPercolator.id(), scoringPercolator); percolatorTypes.put(topMatchingPercolator.id(), topMatchingPercolator); }
Example #6
Source File: MemoryIndexOffsetStrategy.java From lucene-solr with Apache License 2.0 | 5 votes |
public MemoryIndexOffsetStrategy(UHComponents components, Analyzer analyzer) { super(components, analyzer); boolean storePayloads = components.getPhraseHelper().hasPositionSensitivity(); // might be needed memoryIndex = new MemoryIndex(true, storePayloads);//true==store offsets memIndexLeafReader = (LeafReader) memoryIndex.createSearcher().getIndexReader(); // appears to be re-usable // preFilter for MemoryIndex preMemIndexFilterAutomaton = buildCombinedAutomaton(components); }
Example #7
Source File: DocumentBatch.java From lucene-solr with Apache License 2.0 | 5 votes |
private SingletonDocumentBatch(Analyzer analyzer, Document doc) { MemoryIndex memoryindex = new MemoryIndex(true, true); for (IndexableField field : doc) { memoryindex.addField(field, analyzer); } memoryindex.freeze(); reader = (LeafReader) memoryindex.createSearcher().getIndexReader(); }
Example #8
Source File: ExampleStatsApp.java From lucene4ir with Apache License 2.0 | 5 votes |
public void buildTermVector(int docid) throws IOException { /* */ Set<String> fieldList = new HashSet<>(); fieldList.add("content"); Document doc = reader.document(docid, fieldList); MemoryIndex mi = MemoryIndex.fromDocument(doc, new StandardAnalyzer()); IndexReader mr = mi.createSearcher().getIndexReader(); Terms t = mr.leaves().get(0).reader().terms("content"); if ((t != null) && (t.size()>0)) { TermsEnum te = t.iterator(); BytesRef term = null; System.out.println(t.size()); while ((term = te.next()) != null) { System.out.println("BytesRef: " + term.utf8ToString()); System.out.println("docFreq: " + te.docFreq()); System.out.println("totalTermFreq: " + te.totalTermFreq()); } } }
Example #9
Source File: TestTaggedQuery.java From solr-redis with Apache License 2.0 | 5 votes |
@Test public void testRewrite() throws IOException { MemoryIndex memoryIndex = new MemoryIndex(); TaggedQuery taggedQuery = new TaggedQuery(new TermQuery(new Term("field", "value")), "tag"); Query rewrittenQuery = taggedQuery.rewrite(memoryIndex.createSearcher().getTopReaderContext().reader()); assertTrue(rewrittenQuery instanceof TermQuery); assertEquals("field", ((TermQuery) rewrittenQuery).getTerm().field()); assertEquals("value", ((TermQuery) rewrittenQuery).getTerm().text()); }
Example #10
Source File: MultiDocumentPercolatorIndex.java From Elasticsearch with Apache License 2.0 | 4 votes |
MultiDocumentPercolatorIndex(CloseableThreadLocal<MemoryIndex> cache) { this.cache = cache; }
Example #11
Source File: MultiDocumentPercolatorIndex.java From Elasticsearch with Apache License 2.0 | 4 votes |
private DocSearcher(IndexSearcher searcher, MemoryIndex rootDocMemoryIndex) { super("percolate", searcher); this.rootDocMemoryIndex = rootDocMemoryIndex; }
Example #12
Source File: SingleDocumentPercolatorIndex.java From Elasticsearch with Apache License 2.0 | 4 votes |
SingleDocumentPercolatorIndex(CloseableThreadLocal<MemoryIndex> cache) { this.cache = cache; }
Example #13
Source File: SingleDocumentPercolatorIndex.java From Elasticsearch with Apache License 2.0 | 4 votes |
public DocEngineSearcher(MemoryIndex memoryIndex) { super("percolate", memoryIndex.createSearcher()); this.memoryIndex = memoryIndex; }
Example #14
Source File: TestMultipassPresearcher.java From lucene-solr with Apache License 2.0 | 4 votes |
public void testQueryBuilder() throws IOException { IndexWriterConfig iwc = new IndexWriterConfig(new KeywordAnalyzer()); Presearcher presearcher = createPresearcher(); Directory dir = new ByteBuffersDirectory(); IndexWriter writer = new IndexWriter(dir, iwc); MonitorConfiguration config = new MonitorConfiguration(){ @Override public IndexWriter buildIndexWriter() { return writer; } }; try (Monitor monitor = new Monitor(ANALYZER, presearcher, config)) { monitor.register(new MonitorQuery("1", parse("f:test"))); try (IndexReader reader = DirectoryReader.open(writer, false, false)) { MemoryIndex mindex = new MemoryIndex(); mindex.addField("f", "this is a test document", WHITESPACE); LeafReader docsReader = (LeafReader) mindex.createSearcher().getIndexReader(); QueryIndex.QueryTermFilter termFilter = new QueryIndex.QueryTermFilter(reader); BooleanQuery q = (BooleanQuery) presearcher.buildQuery(docsReader, termFilter); BooleanQuery expected = new BooleanQuery.Builder() .add(should(new BooleanQuery.Builder() .add(must(new BooleanQuery.Builder().add(should(new TermInSetQuery("f_0", new BytesRef("test")))).build())) .add(must(new BooleanQuery.Builder().add(should(new TermInSetQuery("f_1", new BytesRef("test")))).build())) .add(must(new BooleanQuery.Builder().add(should(new TermInSetQuery("f_2", new BytesRef("test")))).build())) .add(must(new BooleanQuery.Builder().add(should(new TermInSetQuery("f_3", new BytesRef("test")))).build())) .build())) .add(should(new TermQuery(new Term("__anytokenfield", "__ANYTOKEN__")))) .build(); assertEquals(expected, q); } } }
Example #15
Source File: TestTermPresearcher.java From lucene-solr with Apache License 2.0 | 4 votes |
public void testQueryBuilder() throws IOException { Presearcher presearcher = createPresearcher(); IndexWriterConfig iwc = new IndexWriterConfig(new KeywordAnalyzer()); Directory dir = new ByteBuffersDirectory(); IndexWriter writer = new IndexWriter(dir, iwc); MonitorConfiguration config = new MonitorConfiguration(){ @Override public IndexWriter buildIndexWriter() { return writer; } }; try (Monitor monitor = new Monitor(ANALYZER, presearcher, config)) { monitor.register(new MonitorQuery("1", parse("f:test"))); try (IndexReader reader = DirectoryReader.open(writer, false, false)) { MemoryIndex mindex = new MemoryIndex(); mindex.addField("f", "this is a test document", WHITESPACE); mindex.addField("g", "#######", ANALYZER); // analyzes away to empty field LeafReader docsReader = (LeafReader) mindex.createSearcher().getIndexReader(); QueryIndex.QueryTermFilter termFilter = new QueryIndex.QueryTermFilter(reader); BooleanQuery q = (BooleanQuery) presearcher.buildQuery(docsReader, termFilter); BooleanQuery expected = new BooleanQuery.Builder() .add(should(new BooleanQuery.Builder() .add(should(new TermInSetQuery("f", new BytesRef("test")))).build())) .add(should(new TermQuery(new Term("__anytokenfield", "__ANYTOKEN__")))) .build(); assertEquals(expected, q); } } }
Example #16
Source File: ExampleStatsApp.java From lucene4ir with Apache License 2.0 | 4 votes |
public Map<String, Map<String, List<Integer>>> buildTermVectorWithPosition(int docid, Set<String> fields) throws IOException { Map<String, Map<String, List<Integer>>> fieldToTermVector = new HashMap<>(); Document doc = reader.document(docid, fields); MemoryIndex mi = MemoryIndex.fromDocument(doc, new StandardAnalyzer()); IndexReader mr = mi.createSearcher().getIndexReader(); for (LeafReaderContext leafContext : mr.leaves()) { LeafReader leaf = leafContext.reader(); for (String field : fields) { Map<String, List<Integer>> termToPositions = new HashMap<>(); Terms t = leaf.terms(field); if(t != null) { fieldToTermVector.put(field, termToPositions); TermsEnum tenum = t.iterator(); BytesRef termBytes = null; PostingsEnum postings = null; while ((termBytes = tenum.next()) != null) { List<Integer> positions = new ArrayList<>(); termToPositions.put(termBytes.utf8ToString(), positions); postings = tenum.postings(postings); postings.advance(0); for (int i = 0; i < postings.freq(); i++) { positions.add(postings.nextPosition()); } } } } } return fieldToTermVector; }