org.apache.lucene.search.spans.Spans Java Examples
The following examples show how to use
org.apache.lucene.search.spans.Spans.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: CustomSpanPayloadCheckQuery.java From elasticsearch-plugin-bundle with GNU Affero General Public License v3.0 | 6 votes |
@Override public SpanScorer scorer(LeafReaderContext context) throws IOException { if (field == null) return null; Terms terms = context.reader().terms(field); if (terms != null && !terms.hasPositions()) { throw new IllegalStateException("field \"" + field + "\" was indexed without position data; cannot run SpanQuery (query=" + parentQuery + ")"); } final Spans spans = getSpans(context, Postings.PAYLOADS); if (spans == null) { return null; } final Similarity.SimScorer docScorer = getSimScorer(context); return new SpanScorer(this, spans, docScorer); }
Example #2
Source File: MtasIgnoreItem.java From mtas with Apache License 2.0 | 6 votes |
/** * Advance to doc. * * @param docId the doc id * @return true, if successful * @throws IOException Signals that an I/O exception has occurred. */ public boolean advanceToDoc(int docId) throws IOException { if (ignoreSpans == null || currentDocId == Spans.NO_MORE_DOCS) { return false; } else if (currentDocId == docId) { return true; } else { baseEndPositionList.clear(); fullEndPositionList.clear(); maxBaseEndPosition.clear(); minFullStartPosition.clear(); if (currentDocId < docId) { currentDocId = ignoreSpans.advance(docId); currentPosition = -1; minimumPosition = -1; } return currentDocId == docId; } }
Example #3
Source File: MtasIgnoreItem.java From mtas with Apache License 2.0 | 6 votes |
/** * Instantiates a new mtas ignore item. * * @param ignoreSpans the ignore spans * @param maximumIgnoreLength the maximum ignore length */ public MtasIgnoreItem(Spans ignoreSpans, Integer maximumIgnoreLength) { this.ignoreSpans = ignoreSpans; currentDocId = -1; currentPosition = -1; minimumPosition = -1; baseStartPositionList = new HashMap<>(); baseEndPositionList = new HashMap<>(); fullEndPositionList = new HashMap<>(); minBaseStartPosition = new HashMap<>(); maxBaseEndPosition = new HashMap<>(); minFullStartPosition = new HashMap<>(); maxFullEndPosition = new HashMap<>(); if (maximumIgnoreLength == null) { this.maximumIgnoreLength = DEFAULT_MAXIMUM_IGNORE_LENGTH; } else { this.maximumIgnoreLength = maximumIgnoreLength; } }
Example #4
Source File: CodecCollector.java From mtas with Apache License 2.0 | 6 votes |
/** * Collect spans for occurences. * * @param occurences * the occurences * @param prefixes * the prefixes * @param field * the field * @param searcher * the searcher * @param lrc * the lrc * @return the map * @throws IOException * Signals that an I/O exception has occurred. */ private static Map<GroupHit, Spans> collectSpansForOccurences( Set<GroupHit> occurences, Set<String> prefixes, String field, IndexSearcher searcher, LeafReaderContext lrc) throws IOException { Map<GroupHit, Spans> list = new HashMap<>(); IndexReader reader = searcher.getIndexReader(); final float boost = 0; for (GroupHit hit : occurences) { MtasSpanQuery queryHit = createQueryFromGroupHit(prefixes, field, hit); if (queryHit != null) { MtasSpanQuery queryHitRewritten = queryHit.rewrite(reader); SpanWeight weight = queryHitRewritten.createWeight(searcher, false, boost); Spans spans = weight.getSpans(lrc, SpanWeight.Postings.POSITIONS); if (spans != null) { list.put(hit, spans); } } } return list; }
Example #5
Source File: PhraseCountQuery.java From pyramid with Apache License 2.0 | 6 votes |
@Override public Spans getSpans(final LeafReaderContext context, Postings requiredPostings) throws IOException { Terms terms = context.reader().terms(field); if (terms == null) { return null; // field does not exist } ArrayList<Spans> subSpans = new ArrayList<>(clauses.size()); for (CustomSpanWeight w : subWeights) { Spans subSpan = w.getSpans(context, requiredPostings); if (subSpan != null) { subSpans.add(subSpan); } else { return null; // all required } } // all NearSpans require at least two subSpans return (!inOrder) ? new CustomNearSpansUnordered(slop, subSpans) : new NearSpansOrdered(slop, subSpans); }
Example #6
Source File: MtasSpanRecurrenceQuery.java From mtas with Apache License 2.0 | 6 votes |
@Override public MtasSpans getSpans(LeafReaderContext context, Postings requiredPostings) throws IOException { if (field == null) { return null; } else { Terms terms = context.reader().terms(field); if (terms == null) { return null; // field does not exist } Spans subSpans = subWeight.getSpans(context, requiredPostings); if (subSpans == null) { return null; } else { Spans ignoreSpans = null; if (ignoreWeight != null) { ignoreSpans = ignoreWeight.getSpans(context, requiredPostings); } return new MtasSpanRecurrenceSpans(MtasSpanRecurrenceQuery.this, subSpans, minimumRecurrence, maximumRecurrence, ignoreSpans, maximumIgnoreLength); } } }
Example #7
Source File: MtasSpanSequenceSpans.java From mtas with Apache License 2.0 | 6 votes |
/** * Instantiates a new mtas span sequence spans. * * @param query the query * @param setSequenceSpans the set sequence spans * @param ignoreSpans the ignore spans * @param maximumIgnoreLength the maximum ignore length */ public MtasSpanSequenceSpans(MtasSpanSequenceQuery query, List<MtasSpanSequenceQuerySpans> setSequenceSpans, Spans ignoreSpans, Integer maximumIgnoreLength) { super(); docId = -1; this.query = query; queueSpans = new ArrayList<>(); queueMatches = new ArrayList<>(); for (MtasSpanSequenceQuerySpans sequenceSpans : setSequenceSpans) { queueSpans.add(new QueueItem(sequenceSpans)); } ignoreItem = new MtasIgnoreItem(ignoreSpans, maximumIgnoreLength); resetQueue(); computeCosts(); }
Example #8
Source File: DisiWrapper.java From lucene-solr with Apache License 2.0 | 6 votes |
public DisiWrapper(Spans spans) { this.scorer = null; this.spans = spans; this.iterator = spans; this.cost = iterator.cost(); this.doc = -1; this.twoPhaseView = spans.asTwoPhaseIterator(); if (twoPhaseView != null) { approximation = twoPhaseView.approximation(); matchCost = twoPhaseView.matchCost(); } else { approximation = iterator; matchCost = 0f; } this.lastApproxNonMatchDoc = -2; this.lastApproxMatchDoc = -2; }
Example #9
Source File: TestPayloadTermQuery.java From lucene-solr with Apache License 2.0 | 6 votes |
public void test() throws IOException { SpanQuery query = new PayloadScoreQuery(new SpanTermQuery(new Term("field", "seventy")), new MaxPayloadFunction(), PayloadDecoder.FLOAT_DECODER); TopDocs hits = searcher.search(query, 100); assertTrue("hits is null and it shouldn't be", hits != null); assertTrue("hits Size: " + hits.totalHits.value + " is not: " + 100, hits.totalHits.value == 100); //they should all have the exact same score, because they all contain seventy once, and we set //all the other similarity factors to be 1 for (int i = 0; i < hits.scoreDocs.length; i++) { ScoreDoc doc = hits.scoreDocs[i]; assertTrue(doc.score + " does not equal: " + 1, doc.score == 1); } CheckHits.checkExplanations(query, PayloadHelper.FIELD, searcher, true); Spans spans = query.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); assertTrue("spans is null and it shouldn't be", spans != null); /*float score = hits.score(0); for (int i =1; i < hits.length(); i++) { assertTrue("scores are not equal and they should be", score == hits.score(i)); }*/ }
Example #10
Source File: TestPayloadSpans.java From lucene-solr with Apache License 2.0 | 6 votes |
private void checkSpans(Spans spans, int numSpans, int[] numPayloads) throws IOException { int cnt = 0; VerifyingCollector collector = new VerifyingCollector(); while (spans.nextDoc() != Spans.NO_MORE_DOCS) { while (spans.nextStartPosition() != Spans.NO_MORE_POSITIONS) { if(VERBOSE) System.out.println("\nSpans Dump --"); collector.reset(); spans.collect(collector); assertEquals("payload size", numPayloads[cnt], collector.payloads.size()); cnt++; } } assertEquals("expected numSpans", numSpans, cnt); }
Example #11
Source File: TestPayloadSpans.java From lucene-solr with Apache License 2.0 | 6 votes |
private void checkSpans(Spans spans, int expectedNumSpans, int expectedNumPayloads, int expectedPayloadLength, int expectedFirstByte) throws IOException { assertTrue("spans is null and it shouldn't be", spans != null); //each position match should have a span associated with it, since there is just one underlying term query, there should //only be one entry in the span VerifyingCollector collector = new VerifyingCollector(); int seen = 0; while (spans.nextDoc() != Spans.NO_MORE_DOCS) { while (spans.nextStartPosition() != Spans.NO_MORE_POSITIONS) { collector.reset(); spans.collect(collector); collector.verify(expectedPayloadLength, expectedFirstByte); assertEquals("expectedNumPayloads", expectedNumPayloads, collector.payloads.size()); seen++; } } assertEquals("expectedNumSpans", expectedNumSpans, seen); }
Example #12
Source File: CustomSpanTermQuery.java From pyramid with Apache License 2.0 | 6 votes |
@Override public Spans getSpans(final LeafReaderContext context, Postings requiredPostings) throws IOException { assert termContext.wasBuiltFor(ReaderUtil.getTopLevelContext(context)) : "The top-reader used to create Weight is not the same as the current reader's top-reader (" + ReaderUtil.getTopLevelContext(context); final TermState state = termContext.get(context.ord); if (state == null) { // term is not present in that reader assert context.reader().docFreq(term) == 0 : "no termstate found but term exists in reader term=" + term; return null; } final Terms terms = context.reader().terms(term.field()); if (terms == null) return null; if (terms.hasPositions() == false) throw new IllegalStateException("field \"" + term.field() + "\" was indexed without position data; cannot run CustomSpanTermQuery (term=" + term.text() + ")"); final TermsEnum termsEnum = terms.iterator(); termsEnum.seekExact(term.bytes(), state); final PostingsEnum postings = termsEnum.postings(null, requiredPostings.getRequiredPostings()); float positionsCost = termPositionsCost(termsEnum) * PHRASE_TO_SPAN_TERM_POSITIONS_COST; return new TermSpans(getSimScorer(context), postings, term, positionsCost); }
Example #13
Source File: SpanPayloadCheckQuery.java From lucene-solr with Apache License 2.0 | 6 votes |
@Override public SpanScorer scorer(LeafReaderContext context) throws IOException { if (field == null) return null; Terms terms = context.reader().terms(field); if (terms != null && terms.hasPositions() == false) { throw new IllegalStateException("field \"" + field + "\" was indexed without position data; cannot run SpanQuery (query=" + parentQuery + ")"); } final Spans spans = getSpans(context, Postings.PAYLOADS); if (spans == null) { return null; } final LeafSimScorer docScorer = getSimScorer(context); return new SpanScorer(this, spans, docScorer); }
Example #14
Source File: CustomSpanPayloadCheckQuery.java From elasticsearch-plugin-bundle with GNU Affero General Public License v3.0 | 5 votes |
@Override public Spans getSpans(final LeafReaderContext context, Postings requiredPostings) throws IOException { final PayloadChecker collector = new PayloadChecker(); Spans matchSpans = matchWeight.getSpans(context, requiredPostings.atLeast(Postings.PAYLOADS)); return (matchSpans == null) ? null : new FilterSpans(matchSpans) { @Override protected AcceptStatus accept(Spans candidate) throws IOException { collector.reset(); candidate.collect(collector); return collector.match(); } }; }
Example #15
Source File: MtasSpanUniquePositionQuery.java From mtas with Apache License 2.0 | 5 votes |
@Override public MtasSpans getSpans(LeafReaderContext context, Postings requiredPostings) throws IOException { Spans subSpan = subWeight.getSpans(context, requiredPostings); if (subSpan == null) { return null; } else { return new MtasSpanUniquePositionSpans(MtasSpanUniquePositionQuery.this, subSpan); } }
Example #16
Source File: MtasMaximumExpandSpans.java From mtas with Apache License 2.0 | 5 votes |
/** * Instantiates a new mtas maximum expand spans. * * @param query the query * @param mtasCodecInfo the mtas codec info * @param field the field * @param subSpans the sub spans */ public MtasMaximumExpandSpans(MtasMaximumExpandSpanQuery query, CodecInfo mtasCodecInfo, String field, Spans subSpans) { super(); this.subSpans = subSpans; this.field = field; this.mtasCodecInfo = mtasCodecInfo; this.query = query; docId = -1; reset(); }
Example #17
Source File: SpanPayloadCheckQuery.java From lucene-solr with Apache License 2.0 | 5 votes |
@Override public Spans getSpans(final LeafReaderContext context, Postings requiredPostings) throws IOException { final PayloadChecker collector = new PayloadChecker(); Spans matchSpans = matchWeight.getSpans(context, requiredPostings.atLeast(Postings.PAYLOADS)); return (matchSpans == null) ? null : new FilterSpans(matchSpans) { @Override protected AcceptStatus accept(Spans candidate) throws IOException { collector.reset(); candidate.collect(collector); return collector.match(); } }; }
Example #18
Source File: MtasSpanRecurrenceSpans.java From mtas with Apache License 2.0 | 5 votes |
/** * Instantiates a new mtas span recurrence spans. * * @param query the query * @param spans the spans * @param minimumRecurrence the minimum recurrence * @param maximumRecurrence the maximum recurrence * @param ignoreSpans the ignore spans * @param maximumIgnoreLength the maximum ignore length */ public MtasSpanRecurrenceSpans(MtasSpanRecurrenceQuery query, Spans spans, int minimumRecurrence, int maximumRecurrence, Spans ignoreSpans, Integer maximumIgnoreLength) { assert minimumRecurrence <= maximumRecurrence : "minimumRecurrence > maximumRecurrence"; assert minimumRecurrence > 0 : "minimumRecurrence < 1 not supported"; this.query = query; this.spans = spans; this.minimumRecurrence = minimumRecurrence; this.maximumRecurrence = maximumRecurrence; queueSpans = new ArrayList<>(); queueMatches = new ArrayList<>(); ignoreItem = new MtasIgnoreItem(ignoreSpans, maximumIgnoreLength); resetQueue(); }
Example #19
Source File: CustomSpanWeight.java From pyramid with Apache License 2.0 | 5 votes |
@Override public PhraseCountScorer scorer(LeafReaderContext context) throws IOException { final Spans spans = getSpans(context, Postings.POSITIONS); if (spans == null) { return null; } final SimScorer docScorer = getSimScorer(context); return new PhraseCountScorer(this, spans, docScorer, weightedCount); }
Example #20
Source File: MtasIgnoreItem.java From mtas with Apache License 2.0 | 5 votes |
/** * Move to. * * @param position the position */ private void moveTo(int position) { while (position >= currentPosition) { try { currentPosition = ignoreSpans.nextStartPosition(); if (currentPosition != Spans.NO_MORE_POSITIONS && currentPosition >= minimumPosition) { if (!baseEndPositionList.containsKey(currentPosition)) { baseEndPositionList.put(currentPosition, new HashSet<Integer>()); maxBaseEndPosition.put(currentPosition, currentPosition); } else { maxBaseEndPosition.put(currentPosition, Math.max(maxBaseEndPosition.get(currentPosition), ignoreSpans.endPosition())); } if (!baseStartPositionList.containsKey(ignoreSpans.endPosition())) { baseStartPositionList.put(ignoreSpans.endPosition(), new HashSet<Integer>()); minBaseStartPosition.put(ignoreSpans.endPosition(), ignoreSpans.endPosition()); } else { minBaseStartPosition.put(ignoreSpans.endPosition(), Math.min(minBaseStartPosition.get(ignoreSpans.endPosition()), currentPosition)); } baseStartPositionList.get(ignoreSpans.endPosition()) .add(currentPosition); baseEndPositionList.get(currentPosition) .add(ignoreSpans.endPosition()); } } catch (IOException e) { log.debug(e); currentPosition = Spans.NO_MORE_POSITIONS; break; } } }
Example #21
Source File: CustomConjunctionSpans.java From pyramid with Apache License 2.0 | 5 votes |
CustomConjunctionSpans(List<Spans> subSpans) { if (subSpans.size() < 2) { throw new IllegalArgumentException("Less than 2 subSpans.size():" + subSpans.size()); } this.subSpans = subSpans.toArray(new Spans[subSpans.size()]); this.conjunction = ConjunctionDISI.intersectSpans(subSpans); this.atFirstInCurrentDoc = true; // ensure for doc -1 that start/end positions are -1 }
Example #22
Source File: MtasSpanUniquePositionSpans.java From mtas with Apache License 2.0 | 5 votes |
/** * Instantiates a new mtas span unique position spans. * * @param query the query * @param spans the spans */ public MtasSpanUniquePositionSpans(MtasSpanUniquePositionQuery query, Spans spans) { super(); this.query = query; this.spans = spans; queueSpans = new ArrayList<>(); queueMatches = new ArrayList<>(); resetQueue(); }
Example #23
Source File: MtasExpandSpans.java From mtas with Apache License 2.0 | 5 votes |
/** * Instantiates a new mtas expand spans. * * @param query the query * @param mtasCodecInfo the mtas codec info * @param field the field * @param subSpans the sub spans */ public MtasExpandSpans(MtasExpandSpanQuery query, CodecInfo mtasCodecInfo, String field, Spans subSpans) { super(); this.subSpans = subSpans; this.field = field; this.mtasCodecInfo = mtasCodecInfo; this.query = query; docId = -1; collectedPositions = new TreeMap<>(); reset(); }
Example #24
Source File: CustomNearSpansUnordered.java From pyramid with Apache License 2.0 | 5 votes |
public CustomNearSpansUnordered(int allowedSlop, List<Spans> subSpans) throws IOException { super(subSpans); this.allowedSlop = allowedSlop; this.spanWindow = new SpanTotalLengthEndPositionWindow(); }
Example #25
Source File: MtasSpanSequenceQuery.java From mtas with Apache License 2.0 | 5 votes |
@Override public MtasSpans getSpans(LeafReaderContext context, Postings requiredPostings) throws IOException { if (field == null) { return null; } else { Terms terms = context.reader().terms(field); if (terms == null) { return null; // field does not exist } List<MtasSpanSequenceQuerySpans> setSequenceSpans = new ArrayList<>( items.size()); Spans ignoreSpans = null; boolean allSpansEmpty = true; for (MtasSpanSequenceQueryWeight w : subWeights) { Spans sequenceSpans = w.spanWeight.getSpans(context, requiredPostings); if (sequenceSpans != null) { setSequenceSpans.add(new MtasSpanSequenceQuerySpans( MtasSpanSequenceQuery.this, sequenceSpans, w.optional)); allSpansEmpty = false; } else { if (w.optional) { setSequenceSpans.add(new MtasSpanSequenceQuerySpans( MtasSpanSequenceQuery.this, null, w.optional)); } else { return null; } } } if (allSpansEmpty) { return null; // at least one required } else if (ignoreWeight != null) { ignoreSpans = ignoreWeight.getSpans(context, requiredPostings); } return new MtasSpanSequenceSpans(MtasSpanSequenceQuery.this, setSequenceSpans, ignoreSpans, maximumIgnoreLength); } }
Example #26
Source File: CustomNearSpansUnordered.java From pyramid with Apache License 2.0 | 5 votes |
/** Check whether two Spans in the same document are ordered with possible overlap. * @return true iff spans1 starts before spans2 * or the spans start at the same position, * and spans1 ends before spans2. */ static boolean positionsOrdered(Spans spans1, Spans spans2) { assert spans1.docID() == spans2.docID() : "doc1 " + spans1.docID() + " != doc2 " + spans2.docID(); int start1 = spans1.startPosition(); int start2 = spans2.startPosition(); return (start1 == start2) ? (spans1.endPosition() < spans2.endPosition()) : (start1 < start2); }
Example #27
Source File: PayloadScoreQuery.java From lucene-solr with Apache License 2.0 | 5 votes |
@Override public SpanScorer scorer(LeafReaderContext context) throws IOException { Spans spans = getSpans(context, Postings.PAYLOADS); if (spans == null) return null; LeafSimScorer docScorer = innerWeight.getSimScorer(context); PayloadSpans payloadSpans = new PayloadSpans(spans, decoder); return new PayloadSpanScorer(this, payloadSpans, docScorer); }
Example #28
Source File: TestPayloadSpans.java From lucene-solr with Apache License 2.0 | 5 votes |
public void testShrinkToAfterShortestMatch() throws IOException { Directory directory = newDirectory(); RandomIndexWriter writer = new RandomIndexWriter(random(), directory, newIndexWriterConfig(new TestPayloadAnalyzer())); Document doc = new Document(); doc.add(new TextField("content", new StringReader("a b c d e f g h i j a k"))); writer.addDocument(doc); IndexReader reader = writer.getReader(); IndexSearcher is = newSearcher(getOnlyLeafReader(reader), false); writer.close(); SpanTermQuery stq1 = new SpanTermQuery(new Term("content", "a")); SpanTermQuery stq2 = new SpanTermQuery(new Term("content", "k")); SpanQuery[] sqs = { stq1, stq2 }; SpanNearQuery snq = new SpanNearQuery(sqs, 1, true); VerifyingCollector collector = new VerifyingCollector(); Spans spans = snq.createWeight(is, ScoreMode.COMPLETE_NO_SCORES, 1f).getSpans(is.getIndexReader().leaves().get(0), SpanWeight.Postings.PAYLOADS); TopDocs topDocs = is.search(snq, 1); Set<String> payloadSet = new HashSet<>(); for (int i = 0; i < topDocs.scoreDocs.length; i++) { while (spans.nextDoc() != Spans.NO_MORE_DOCS) { while (spans.nextStartPosition() != Spans.NO_MORE_POSITIONS) { collector.reset(); spans.collect(collector); for (final BytesRef payload : collector.payloads) { payloadSet.add(Term.toString(payload)); } } } } assertEquals(2, payloadSet.size()); assertTrue(payloadSet.contains("a:Noise:10")); assertTrue(payloadSet.contains("k:Noise:11")); reader.close(); directory.close(); }
Example #29
Source File: PhraseCountScorer.java From pyramid with Apache License 2.0 | 5 votes |
/** Sole constructor. */ public PhraseCountScorer(CustomSpanWeight weight, Spans spans, Similarity.SimScorer docScorer, boolean weightedCount) { super(weight); this.spans = Objects.requireNonNull(spans); this.docScorer = docScorer; this.weightedCount = weightedCount; }
Example #30
Source File: ConjunctionDISI.java From lucene-solr with Apache License 2.0 | 5 votes |
/** Adds the Spans. */ private static void addSpans(Spans spans, List<DocIdSetIterator> allIterators, List<TwoPhaseIterator> twoPhaseIterators) { TwoPhaseIterator twoPhaseIter = spans.asTwoPhaseIterator(); if (twoPhaseIter != null) { addTwoPhaseIterator(twoPhaseIter, allIterators, twoPhaseIterators); } else { // no approximation support, use the iterator as-is addIterator(spans, allIterators, twoPhaseIterators); } }