org.apache.lucene.index.memory.MemoryIndex Java Exaples

Source File: CrawlerTask.java From JPPF with Apache License 2.0

6 votes

/**
 * Search for the user-specified query expression in the current page.
 * @throws Exception if an error occurs.
 */
private void search() throws Exception {
  final QueryParser parser = new QueryParser("contents", new StandardAnalyzer());
  final Query q = parser.parse(query);

  final MemoryIndex index = new MemoryIndex();
  final Link link = new Link(url);
  final PageData pageData = new SimpleHttpClientParser().load(link);
  index.addField("contents", pageData.getData().toString(), new StandardAnalyzer());
  final IndexSearcher searcher = index.createSearcher();
  final Hits hits = searcher.search(q);
  @SuppressWarnings("rawtypes")
  final Iterator it = hits.iterator();
  float relevance = 0f;
  if (it.hasNext()) {
    while (it.hasNext()) {
      final Hit hit = (Hit) it.next();
      relevance += ((float) Math.round(hit.getScore() * 1000)) / 10;
    }
    matchedLinks.add(new LinkMatch(url, relevance));
  }
}

Source File: MultiDocumentPercolatorIndex.java From Elasticsearch with Apache License 2.0

6 votes

MemoryIndex indexDoc(ParseContext.Document d, Analyzer analyzer, MemoryIndex memoryIndex) {
    for (IndexableField field : d.getFields()) {
        if (field.fieldType().indexOptions() == IndexOptions.NONE && field.name().equals(UidFieldMapper.NAME)) {
            continue;
        }
        try {
            // TODO: instead of passing null here, we can have a CTL<Map<String,TokenStream>> and pass previous,
            // like the indexer does
            try (TokenStream tokenStream = field.tokenStream(analyzer, null)) {
                if (tokenStream != null) {
                    memoryIndex.addField(field.name(), tokenStream, field.boost());
                }
             }
        } catch (IOException e) {
            throw new ElasticsearchException("Failed to create token stream", e);
        }
    }
    return memoryIndex;
}

Source File: SingleDocumentPercolatorIndex.java From Elasticsearch with Apache License 2.0

6 votes

@Override
public void prepare(PercolateContext context, ParsedDocument parsedDocument) {
    MemoryIndex memoryIndex = cache.get();
    for (IndexableField field : parsedDocument.rootDoc().getFields()) {
        if (field.fieldType().indexOptions() == IndexOptions.NONE && field.name().equals(UidFieldMapper.NAME)) {
            continue;
        }
        try {
            Analyzer analyzer = context.mapperService().documentMapper(parsedDocument.type()).mappers().indexAnalyzer();
            // TODO: instead of passing null here, we can have a CTL<Map<String,TokenStream>> and pass previous,
            // like the indexer does
            try (TokenStream tokenStream = field.tokenStream(analyzer, null)) {
                if (tokenStream != null) {
                    memoryIndex.addField(field.name(), tokenStream, field.boost());
                }
             }
        } catch (Exception e) {
            throw new ElasticsearchException("Failed to create token stream for [" + field.name() + "]", e);
        }
    }
    context.initialize(new DocEngineSearcher(memoryIndex), parsedDocument);
}

Source File: ShardTermVectorsService.java From Elasticsearch with Apache License 2.0

6 votes

private Fields generateTermVectors(Collection<GetField> getFields, boolean withOffsets, @Nullable Map<String, String> perFieldAnalyzer, Set<String> fields)
        throws IOException {
    /* store document in memory index */
    MemoryIndex index = new MemoryIndex(withOffsets);
    for (GetField getField : getFields) {
        String field = getField.getName();
        if (fields.contains(field) == false) {
            // some fields are returned even when not asked for, eg. _timestamp
            continue;
        }
        Analyzer analyzer = getAnalyzerAtField(field, perFieldAnalyzer);
        for (Object text : getField.getValues()) {
            index.addField(field, text.toString(), analyzer);
        }
    }
    /* and read vectors from it */
    return MultiFields.getFields(index.createSearcher().getIndexReader());
}

Source File: PercolatorService.java From Elasticsearch with Apache License 2.0

5 votes

@Inject
public PercolatorService(Settings settings, IndexNameExpressionResolver indexNameExpressionResolver, IndicesService indicesService,
                         PageCacheRecycler pageCacheRecycler, BigArrays bigArrays,
                         HighlightPhase highlightPhase, ClusterService clusterService,
                         AggregationPhase aggregationPhase, ScriptService scriptService,
                         MappingUpdatedAction mappingUpdatedAction) {
    super(settings);
    this.indexNameExpressionResolver = indexNameExpressionResolver;
    this.parseFieldMatcher = new ParseFieldMatcher(settings);
    this.indicesService = indicesService;
    this.pageCacheRecycler = pageCacheRecycler;
    this.bigArrays = bigArrays;
    this.clusterService = clusterService;
    this.highlightPhase = highlightPhase;
    this.aggregationPhase = aggregationPhase;
    this.scriptService = scriptService;
    this.mappingUpdatedAction = mappingUpdatedAction;
    this.sortParseElement = new SortParseElement();

    final long maxReuseBytes = settings.getAsBytesSize("indices.memory.memory_index.size_per_thread", new ByteSizeValue(1, ByteSizeUnit.MB)).bytes();
    cache = new CloseableThreadLocal<MemoryIndex>() {
        @Override
        protected MemoryIndex initialValue() {
            // TODO: should we expose payloads as an option? should offsets be turned on always?
            return new ExtendedMemoryIndex(true, false, maxReuseBytes);
        }
    };
    single = new SingleDocumentPercolatorIndex(cache);
    multi = new MultiDocumentPercolatorIndex(cache);

    percolatorTypes = new IntObjectHashMap<>(6);
    percolatorTypes.put(countPercolator.id(), countPercolator);
    percolatorTypes.put(queryCountPercolator.id(), queryCountPercolator);
    percolatorTypes.put(matchPercolator.id(), matchPercolator);
    percolatorTypes.put(queryPercolator.id(), queryPercolator);
    percolatorTypes.put(scoringPercolator.id(), scoringPercolator);
    percolatorTypes.put(topMatchingPercolator.id(), topMatchingPercolator);
}

Source File: MemoryIndexOffsetStrategy.java From lucene-solr with Apache License 2.0

5 votes

public MemoryIndexOffsetStrategy(UHComponents components, Analyzer analyzer) {
  super(components, analyzer);
  boolean storePayloads = components.getPhraseHelper().hasPositionSensitivity(); // might be needed
  memoryIndex = new MemoryIndex(true, storePayloads);//true==store offsets
  memIndexLeafReader = (LeafReader) memoryIndex.createSearcher().getIndexReader(); // appears to be re-usable
  // preFilter for MemoryIndex
  preMemIndexFilterAutomaton = buildCombinedAutomaton(components);
}

Source File: DocumentBatch.java From lucene-solr with Apache License 2.0

5 votes

private SingletonDocumentBatch(Analyzer analyzer, Document doc) {
  MemoryIndex memoryindex = new MemoryIndex(true, true);
  for (IndexableField field : doc) {
    memoryindex.addField(field, analyzer);
  }
  memoryindex.freeze();
  reader = (LeafReader) memoryindex.createSearcher().getIndexReader();
}

Source File: ExampleStatsApp.java From lucene4ir with Apache License 2.0

5 votes

public void buildTermVector(int docid) throws IOException {
    /*

    */

    Set<String> fieldList = new HashSet<>();
    fieldList.add("content");

    Document doc = reader.document(docid, fieldList);
    MemoryIndex mi = MemoryIndex.fromDocument(doc, new StandardAnalyzer());
    IndexReader mr = mi.createSearcher().getIndexReader();

    Terms t = mr.leaves().get(0).reader().terms("content");

    if ((t != null) && (t.size()>0)) {
        TermsEnum te = t.iterator();
        BytesRef term = null;

        System.out.println(t.size());

        while ((term = te.next()) != null) {
            System.out.println("BytesRef: " + term.utf8ToString());
            System.out.println("docFreq: " + te.docFreq());
            System.out.println("totalTermFreq: " + te.totalTermFreq());

        }

    }
}

Source File: TestTaggedQuery.java From solr-redis with Apache License 2.0

5 votes

@Test
public void testRewrite() throws IOException {
  MemoryIndex memoryIndex = new MemoryIndex();

  TaggedQuery taggedQuery = new TaggedQuery(new TermQuery(new Term("field", "value")), "tag");
  Query rewrittenQuery = taggedQuery.rewrite(memoryIndex.createSearcher().getTopReaderContext().reader());

  assertTrue(rewrittenQuery instanceof TermQuery);
  assertEquals("field", ((TermQuery) rewrittenQuery).getTerm().field());
  assertEquals("value", ((TermQuery) rewrittenQuery).getTerm().text());
}

Source File: MultiDocumentPercolatorIndex.java From Elasticsearch with Apache License 2.0

4 votes

MultiDocumentPercolatorIndex(CloseableThreadLocal<MemoryIndex> cache) {
    this.cache = cache;
}

Source File: MultiDocumentPercolatorIndex.java From Elasticsearch with Apache License 2.0

4 votes

private DocSearcher(IndexSearcher searcher, MemoryIndex rootDocMemoryIndex) {
    super("percolate", searcher);
    this.rootDocMemoryIndex = rootDocMemoryIndex;
}

Source File: SingleDocumentPercolatorIndex.java From Elasticsearch with Apache License 2.0

4 votes

SingleDocumentPercolatorIndex(CloseableThreadLocal<MemoryIndex> cache) {
    this.cache = cache;
}

Source File: SingleDocumentPercolatorIndex.java From Elasticsearch with Apache License 2.0

4 votes

public DocEngineSearcher(MemoryIndex memoryIndex) {
    super("percolate", memoryIndex.createSearcher());
    this.memoryIndex = memoryIndex;
}

Source File: TestMultipassPresearcher.java From lucene-solr with Apache License 2.0

4 votes

public void testQueryBuilder() throws IOException {

    IndexWriterConfig iwc = new IndexWriterConfig(new KeywordAnalyzer());
    Presearcher presearcher = createPresearcher();

    Directory dir = new ByteBuffersDirectory();
    IndexWriter writer = new IndexWriter(dir, iwc);
    MonitorConfiguration config = new MonitorConfiguration(){
      @Override
      public IndexWriter buildIndexWriter() {
        return writer;
      }
    };
    try (Monitor monitor = new Monitor(ANALYZER, presearcher, config)) {

      monitor.register(new MonitorQuery("1", parse("f:test")));

      try (IndexReader reader = DirectoryReader.open(writer, false, false)) {

        MemoryIndex mindex = new MemoryIndex();
        mindex.addField("f", "this is a test document", WHITESPACE);
        LeafReader docsReader = (LeafReader) mindex.createSearcher().getIndexReader();

        QueryIndex.QueryTermFilter termFilter = new QueryIndex.QueryTermFilter(reader);

        BooleanQuery q = (BooleanQuery) presearcher.buildQuery(docsReader, termFilter);
        BooleanQuery expected = new BooleanQuery.Builder()
            .add(should(new BooleanQuery.Builder()
                .add(must(new BooleanQuery.Builder().add(should(new TermInSetQuery("f_0", new BytesRef("test")))).build()))
                .add(must(new BooleanQuery.Builder().add(should(new TermInSetQuery("f_1", new BytesRef("test")))).build()))
                .add(must(new BooleanQuery.Builder().add(should(new TermInSetQuery("f_2", new BytesRef("test")))).build()))
                .add(must(new BooleanQuery.Builder().add(should(new TermInSetQuery("f_3", new BytesRef("test")))).build()))
                .build()))
            .add(should(new TermQuery(new Term("__anytokenfield", "__ANYTOKEN__"))))
            .build();

        assertEquals(expected, q);
      }

    }

  }

Source File: TestTermPresearcher.java From lucene-solr with Apache License 2.0

4 votes

public void testQueryBuilder() throws IOException {

    Presearcher presearcher = createPresearcher();

    IndexWriterConfig iwc = new IndexWriterConfig(new KeywordAnalyzer());
    Directory dir = new ByteBuffersDirectory();
    IndexWriter writer = new IndexWriter(dir, iwc);
    MonitorConfiguration config = new MonitorConfiguration(){
      @Override
      public IndexWriter buildIndexWriter() {
        return writer;
      }
    };

    try (Monitor monitor = new Monitor(ANALYZER, presearcher, config)) {

      monitor.register(new MonitorQuery("1", parse("f:test")));

      try (IndexReader reader = DirectoryReader.open(writer, false, false)) {

        MemoryIndex mindex = new MemoryIndex();
        mindex.addField("f", "this is a test document", WHITESPACE);
        mindex.addField("g", "#######", ANALYZER); // analyzes away to empty field
        LeafReader docsReader = (LeafReader) mindex.createSearcher().getIndexReader();

        QueryIndex.QueryTermFilter termFilter = new QueryIndex.QueryTermFilter(reader);

        BooleanQuery q = (BooleanQuery) presearcher.buildQuery(docsReader, termFilter);
        BooleanQuery expected = new BooleanQuery.Builder()
            .add(should(new BooleanQuery.Builder()
                .add(should(new TermInSetQuery("f", new BytesRef("test")))).build()))
            .add(should(new TermQuery(new Term("__anytokenfield", "__ANYTOKEN__"))))
            .build();

        assertEquals(expected, q);

      }

    }

  }

Source File: ExampleStatsApp.java From lucene4ir with Apache License 2.0

4 votes

public Map<String, Map<String, List<Integer>>> buildTermVectorWithPosition(int docid, Set<String> fields) throws IOException {

	    	Map<String, Map<String, List<Integer>>> fieldToTermVector = new HashMap<>();
	
	    	Document doc = reader.document(docid, fields);
	
	    	MemoryIndex mi = MemoryIndex.fromDocument(doc, new StandardAnalyzer());
	    	IndexReader mr = mi.createSearcher().getIndexReader();
	
	    	for (LeafReaderContext leafContext : mr.leaves()) {
	
	    		LeafReader leaf = leafContext.reader();
	
	    		for (String field : fields) {
	    			Map<String, List<Integer>> termToPositions = new HashMap<>();
	
	    			Terms t = leaf.terms(field);
	
	    			if(t != null) {
	    				fieldToTermVector.put(field, termToPositions);
	    				TermsEnum tenum = t.iterator();
	
	    				BytesRef termBytes = null;
	    				PostingsEnum postings = null;
	    				while ((termBytes = tenum.next()) != null) {
	
	    					List<Integer> positions = new ArrayList<>();
	    					termToPositions.put(termBytes.utf8ToString(), positions);
	    					postings = tenum.postings(postings);
	    					postings.advance(0);
	
	    					for (int i = 0; i < postings.freq(); i++) {
	    						positions.add(postings.nextPosition());
	    					}
	    				}
	    			}
	    		}
	
	    	}
	    	return fieldToTermVector;
    }

org.apache.lucene.index.memory.MemoryIndex Java Examples