org.apache.lucene.util.automaton.Automaton Java Examples
The following examples show how to use
org.apache.lucene.util.automaton.Automaton.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: TermAutomatonQuery.java From lucene-solr with Apache License 2.0 | 6 votes |
public TermAutomatonWeight(Automaton automaton, IndexSearcher searcher, Map<Integer,TermStates> termStates, float boost) throws IOException { super(TermAutomatonQuery.this); this.automaton = automaton; this.termStates = termStates; this.similarity = searcher.getSimilarity(); List<TermStatistics> allTermStats = new ArrayList<>(); for(Map.Entry<Integer,BytesRef> ent : idToTerm.entrySet()) { Integer termID = ent.getKey(); if (ent.getValue() != null) { TermStates ts = termStates.get(termID); if (ts.docFreq() > 0) { allTermStats.add(searcher.termStatistics(new Term(field, ent.getValue()), ts.docFreq(), ts.totalTermFreq())); } } } if (allTermStats.isEmpty()) { stats = null; // no terms matched at all, will not use sim } else { stats = similarity.scorer(boost, searcher.collectionStatistics(field), allTermStats.toArray(new TermStatistics[allTermStats.size()])); } }
Example #2
Source File: TestDuelingAnalyzers.java From lucene-solr with Apache License 2.0 | 6 votes |
@BeforeClass public static void beforeClass() throws Exception { Automaton single = new Automaton(); int initial = single.createState(); int accept = single.createState(); single.setAccept(accept, true); // build an automaton matching this jvm's letter definition for (int i = 0; i <= 0x10FFFF; i++) { if (Character.isLetter(i)) { single.addTransition(initial, accept, i); } } Automaton repeat = Operations.repeat(single); jvmLetter = new CharacterRunAutomaton(repeat); }
Example #3
Source File: AnalyzingSuggester.java From lucene-solr with Apache License 2.0 | 6 votes |
final Automaton toAutomaton(final BytesRef surfaceForm, final TokenStreamToAutomaton ts2a) throws IOException { // Analyze surface form: Automaton automaton; try (TokenStream ts = indexAnalyzer.tokenStream("", surfaceForm.utf8ToString())) { // Create corresponding automaton: labels are bytes // from each analyzed token, with byte 0 used as // separator between tokens: automaton = ts2a.toAutomaton(ts); } automaton = replaceSep(automaton); automaton = convertAutomaton(automaton); // TODO: LUCENE-5660 re-enable this once we disallow massive suggestion strings // assert SpecialOperations.isFinite(automaton); // Get all paths from the automaton (there can be // more than one path, eg if the analyzer created a // graph using SynFilter or WDF): return automaton; }
Example #4
Source File: MtasToken.java From mtas with Apache License 2.0 | 6 votes |
/** * Creates the automaton map. * * @param prefix the prefix * @param valueList the value list * @param filter the filter * @return the map */ public static Map<String, Automaton> createAutomatonMap(String prefix, List<String> valueList, Boolean filter) { HashMap<String, Automaton> automatonMap = new HashMap<>(); if (valueList != null) { for (String item : valueList) { if (filter) { item = item.replaceAll("([\\\"\\)\\(\\<\\>\\.\\@\\#\\]\\[\\{\\}])", "\\\\$1"); } automatonMap.put(item, new RegExp(prefix + MtasToken.DELIMITER + item + "\u0000*") .toAutomaton()); } } return automatonMap; }
Example #5
Source File: TestSynonymGraphFilter.java From lucene-solr with Apache License 2.0 | 6 votes |
/** Just creates a side path from startState to endState with the provided tokens. */ private static void addSidePath(Automaton.Builder a, int startState, int endState, char[] tokens, List<Integer> flatStates) { int lastState = startState; for(int i=0;i<tokens.length;i++) { int nextState; if (i == tokens.length-1) { nextState = endState; } else if (flatStates == null || i >= flatStates.size()) { nextState = a.createState(); if (flatStates != null) { assert i == flatStates.size(); flatStates.add(nextState); } } else { nextState = flatStates.get(i); } a.addTransition(lastState, nextState, tokens[i]); lastState = nextState; } }
Example #6
Source File: FuzzySuggester.java From lucene-solr with Apache License 2.0 | 6 votes |
@Override protected List<FSTUtil.Path<Pair<Long,BytesRef>>> getFullPrefixPaths(List<FSTUtil.Path<Pair<Long,BytesRef>>> prefixPaths, Automaton lookupAutomaton, FST<Pair<Long,BytesRef>> fst) throws IOException { // TODO: right now there's no penalty for fuzzy/edits, // ie a completion whose prefix matched exactly what the // user typed gets no boost over completions that // required an edit, which get no boost over completions // requiring two edits. I suspect a multiplicative // factor is appropriate (eg, say a fuzzy match must be at // least 2X better weight than the non-fuzzy match to // "compete") ... in which case I think the wFST needs // to be log weights or something ... Automaton levA = convertAutomaton(toLevenshteinAutomata(lookupAutomaton)); /* Writer w = new OutputStreamWriter(new FileOutputStream("out.dot"), StandardCharsets.UTF_8); w.write(levA.toDot()); w.close(); System.out.println("Wrote LevA to out.dot"); */ return FSTUtil.intersectPrefixPaths(levA, fst); }
Example #7
Source File: PrefixQuery.java From lucene-solr with Apache License 2.0 | 6 votes |
/** Build an automaton accepting all terms with the specified prefix. */ public static Automaton toAutomaton(BytesRef prefix) { final int numStatesAndTransitions = prefix.length+1; final Automaton automaton = new Automaton(numStatesAndTransitions, numStatesAndTransitions); int lastState = automaton.createState(); for(int i=0;i<prefix.length;i++) { int state = automaton.createState(); automaton.addTransition(lastState, state, prefix.bytes[prefix.offset+i]&0xff); lastState = state; } automaton.setAccept(lastState, true); automaton.addTransition(lastState, lastState, 0, 255); automaton.finishState(); assert automaton.isDeterministic(); return automaton; }
Example #8
Source File: GraphTokenStreamFiniteStrings.java From lucene-solr with Apache License 2.0 | 6 votes |
/** * Returns the articulation points (or cut vertices) of the graph: * https://en.wikipedia.org/wiki/Biconnected_component */ public int[] articulationPoints() { if (det.getNumStates() == 0) { return new int[0]; } // Automaton.Builder undirect = new Automaton.Builder(); undirect.copy(det); for (int i = 0; i < det.getNumStates(); i++) { int numT = det.initTransition(i, transition); for (int j = 0; j < numT; j++) { det.getNextTransition(transition); undirect.addTransition(transition.dest, i, transition.min); } } int numStates = det.getNumStates(); BitSet visited = new BitSet(numStates); int[] depth = new int[det.getNumStates()]; int[] low = new int[det.getNumStates()]; int[] parent = new int[det.getNumStates()]; Arrays.fill(parent, -1); List<Integer> points = new ArrayList<>(); articulationPointsRecurse(undirect.finish(), 0, 0, depth, low, parent, visited, points); Collections.reverse(points); return points.stream().mapToInt(p -> p).toArray(); }
Example #9
Source File: ContextMapping.java From Elasticsearch with Apache License 2.0 | 6 votes |
/** * Create a automaton for a given context query this automaton will be used * to find the matching paths with the fst * * @param preserveSep set an additional char (<code>XAnalyzingSuggester.SEP_LABEL</code>) between each context query * @param queries list of {@link ContextQuery} defining the lookup context * * @return Automaton matching the given Query */ public static Automaton toAutomaton(boolean preserveSep, Iterable<ContextQuery> queries) { Automaton a = Automata.makeEmptyString(); Automaton gap = Automata.makeChar(ContextMapping.SEPARATOR); if (preserveSep) { // if separators are preserved the fst contains a SEP_LABEL // behind each gap. To have a matching automaton, we need to // include the SEP_LABEL in the query as well gap = Operations.concatenate(gap, Automata.makeChar(XAnalyzingSuggester.SEP_LABEL)); } for (ContextQuery query : queries) { a = Operations.concatenate(Arrays.asList(query.toAutomaton(), gap, a)); } // TODO: should we limit this? Do any of our ContextQuery impls really create exponential regexps? GeoQuery looks safe (union // of strings). return Operations.determinize(a, Integer.MAX_VALUE); }
Example #10
Source File: IncludeExclude.java From Elasticsearch with Apache License 2.0 | 6 votes |
private Automaton toAutomaton() { Automaton a = null; if (include != null) { a = include.toAutomaton(); } else if (includeValues != null) { a = Automata.makeStringUnion(includeValues); } else { a = Automata.makeAnyString(); } if (exclude != null) { a = Operations.minus(a, exclude.toAutomaton(), Operations.DEFAULT_MAX_DETERMINIZED_STATES); } else if (excludeValues != null) { a = Operations.minus(a, Automata.makeStringUnion(excludeValues), Operations.DEFAULT_MAX_DETERMINIZED_STATES); } return a; }
Example #11
Source File: TestRegexpQuery.java From lucene-solr with Apache License 2.0 | 6 votes |
public void testCustomProvider() throws IOException { AutomatonProvider myProvider = new AutomatonProvider() { // automaton that matches quick or brown private Automaton quickBrownAutomaton = Operations.union(Arrays .asList(Automata.makeString("quick"), Automata.makeString("brown"), Automata.makeString("bob"))); @Override public Automaton getAutomaton(String name) { if (name.equals("quickBrown")) return quickBrownAutomaton; else return null; } }; RegexpQuery query = new RegexpQuery(newTerm("<quickBrown>"), RegExp.ALL, myProvider, DEFAULT_MAX_DETERMINIZED_STATES); assertEquals(1, searcher.search(query, 5).totalHits.value); }
Example #12
Source File: XAnalyzingSuggester.java From Elasticsearch with Apache License 2.0 | 6 votes |
final Automaton toLookupAutomaton(final CharSequence key) throws IOException { // TODO: is there a Reader from a CharSequence? // Turn tokenstream into automaton: Automaton automaton = null; try (TokenStream ts = queryAnalyzer.tokenStream("", key.toString())) { automaton = getTokenStreamToAutomaton().toAutomaton(ts); } automaton = replaceSep(automaton); // TODO: we can optimize this somewhat by determinizing // while we convert // This automaton should not blow up during determinize: automaton = Operations.determinize(automaton, Integer.MAX_VALUE); return automaton; }
Example #13
Source File: FuzzyCompletionQuery.java From lucene-solr with Apache License 2.0 | 6 votes |
@Override public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { final Automaton originalAutomata; try (CompletionTokenStream stream = (CompletionTokenStream) analyzer.tokenStream(getField(), getTerm().text()) ) { originalAutomata = stream.toAutomaton(unicodeAware); } Set<IntsRef> refs = new HashSet<>(); Automaton automaton = toLevenshteinAutomata(originalAutomata, refs); if (unicodeAware) { Automaton utf8automaton = new UTF32ToUTF8().convert(automaton); utf8automaton = Operations.determinize(utf8automaton, maxDeterminizedStates); automaton = utf8automaton; } // TODO Accumulating all refs is bad, because the resulting set may be very big. // TODO Better iterate over automaton again inside FuzzyCompletionWeight? return new FuzzyCompletionWeight(this, automaton, refs); }
Example #14
Source File: ContextQuery.java From lucene-solr with Apache License 2.0 | 6 votes |
private static Automaton toContextAutomaton(final Map<IntsRef, ContextMetaData> contexts, final boolean matchAllContexts) { final Automaton matchAllAutomaton = Operations.repeat(Automata.makeAnyString()); final Automaton sep = Automata.makeChar(ContextSuggestField.CONTEXT_SEPARATOR); if (matchAllContexts || contexts.size() == 0) { return Operations.concatenate(matchAllAutomaton, sep); } else { Automaton contextsAutomaton = null; for (Map.Entry<IntsRef, ContextMetaData> entry : contexts.entrySet()) { final ContextMetaData contextMetaData = entry.getValue(); final IntsRef ref = entry.getKey(); Automaton contextAutomaton = Automata.makeString(ref.ints, ref.offset, ref.length); if (contextMetaData.exact == false) { contextAutomaton = Operations.concatenate(contextAutomaton, matchAllAutomaton); } contextAutomaton = Operations.concatenate(contextAutomaton, sep); if (contextsAutomaton == null) { contextsAutomaton = contextAutomaton; } else { contextsAutomaton = Operations.union(contextsAutomaton, contextAutomaton); } } return contextsAutomaton; } }
Example #15
Source File: TestGraphTokenizers.java From lucene-solr with Apache License 2.0 | 5 votes |
public void testSynOverMultipleHoles() throws Exception { final TokenStream ts = new CannedTokenStream( new Token[] { token("a", 1, 1), token("x", 0, 3), token("b", 3, 1), }); final Automaton a1 = join(s2a("a"), SEP_A, HOLE_A, SEP_A, HOLE_A, SEP_A, s2a("b")); final Automaton a2 = join(s2a("x"), SEP_A, s2a("b")); assertSameLanguage(Operations.union(a1, a2), ts); }
Example #16
Source File: TestAutomatonQuery.java From lucene-solr with Apache License 2.0 | 5 votes |
private void assertAutomatonHits(int expected, Automaton automaton) throws IOException { AutomatonQuery query = new AutomatonQuery(newTerm("bogus"), automaton); query.setRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_REWRITE); assertEquals(expected, automatonQueryNrHits(query)); query.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_REWRITE); assertEquals(expected, automatonQueryNrHits(query)); query.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_BOOLEAN_REWRITE); assertEquals(expected, automatonQueryNrHits(query)); }
Example #17
Source File: TestAutomatonQueryUnicode.java From lucene-solr with Apache License 2.0 | 5 votes |
private void assertAutomatonHits(int expected, Automaton automaton) throws IOException { AutomatonQuery query = new AutomatonQuery(newTerm("bogus"), automaton); query.setRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_REWRITE); assertEquals(expected, automatonQueryNrHits(query)); query.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_REWRITE); assertEquals(expected, automatonQueryNrHits(query)); query.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_BOOLEAN_REWRITE); assertEquals(expected, automatonQueryNrHits(query)); }
Example #18
Source File: TestAutomatonQuery.java From lucene-solr with Apache License 2.0 | 5 votes |
/** * Test that rewriting to a prefix query works as expected, preserves * MultiTermQuery semantics. */ public void testRewritePrefix() throws IOException { Automaton pfx = Automata.makeString("do"); Automaton prefixAutomaton = Operations.concatenate(pfx, Automata.makeAnyString()); AutomatonQuery aq = new AutomatonQuery(newTerm("bogus"), prefixAutomaton); assertEquals(3, automatonQueryNrHits(aq)); }
Example #19
Source File: XContentMapValues.java From crate with Apache License 2.0 | 5 votes |
/** * Returns a function that filters a document map based on the given include and exclude rules. * @see #filter(Map, String[], String[]) for details */ public static Function<Map<String, ?>, Map<String, Object>> filter(String[] includes, String[] excludes) { CharacterRunAutomaton matchAllAutomaton = new CharacterRunAutomaton(Automata.makeAnyString()); CharacterRunAutomaton include; if (includes == null || includes.length == 0) { include = matchAllAutomaton; } else { Automaton includeA = Regex.simpleMatchToAutomaton(includes); includeA = makeMatchDotsInFieldNames(includeA); include = new CharacterRunAutomaton(includeA); } Automaton excludeA; if (excludes == null || excludes.length == 0) { excludeA = Automata.makeEmpty(); } else { excludeA = Regex.simpleMatchToAutomaton(excludes); excludeA = makeMatchDotsInFieldNames(excludeA); } CharacterRunAutomaton exclude = new CharacterRunAutomaton(excludeA); // NOTE: We cannot use Operations.minus because of the special case that // we want all sub properties to match as soon as an object matches return (map) -> filter(map, include, 0, exclude, 0, matchAllAutomaton); }
Example #20
Source File: TestIndexWriter.java From lucene-solr with Apache License 2.0 | 5 votes |
public void testStopwordsPosIncHole2() throws Exception { // use two stopfilters for testing here Directory dir = newDirectory(); final Automaton secondSet = Automata.makeString("foobar"); Analyzer a = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName) { Tokenizer tokenizer = new MockTokenizer(); TokenStream stream = new MockTokenFilter(tokenizer, MockTokenFilter.ENGLISH_STOPSET); stream = new MockTokenFilter(stream, new CharacterRunAutomaton(secondSet)); return new TokenStreamComponents(tokenizer, stream); } }; RandomIndexWriter iw = new RandomIndexWriter(random(), dir, a); Document doc = new Document(); doc.add(new TextField("body", "just a foobar", Field.Store.NO)); doc.add(new TextField("body", "test of gaps", Field.Store.NO)); iw.addDocument(doc); IndexReader ir = iw.getReader(); iw.close(); IndexSearcher is = newSearcher(ir); PhraseQuery.Builder builder = new PhraseQuery.Builder(); builder.add(new Term("body", "just"), 0); builder.add(new Term("body", "test"), 3); PhraseQuery pq = builder.build(); // body:"just ? ? test" assertEquals(1, is.search(pq, 5).totalHits.value); ir.close(); dir.close(); }
Example #21
Source File: TestGraphTokenizers.java From lucene-solr with Apache License 2.0 | 5 votes |
public void testOverlappedTokensLattice() throws Exception { final TokenStream ts = new CannedTokenStream( new Token[] { token("abc", 1, 1), token("xyz", 0, 2), token("def", 1, 1), }); final Automaton a1 = s2a("xyz"); final Automaton a2 = join("abc", "def"); assertSameLanguage(Operations.union(a1, a2), ts); }
Example #22
Source File: XAnalyzingSuggester.java From Elasticsearch with Apache License 2.0 | 5 votes |
/** * Creates a new suggester. * * @param indexAnalyzer Analyzer that will be used for * analyzing suggestions while building the index. * @param queryAnalyzer Analyzer that will be used for * analyzing query text during lookup * @param options see {@link #EXACT_FIRST}, {@link #PRESERVE_SEP} * @param maxSurfaceFormsPerAnalyzedForm Maximum number of * surface forms to keep for a single analyzed form. * When there are too many surface forms we discard the * lowest weighted ones. * @param maxGraphExpansions Maximum number of graph paths * to expand from the analyzed form. Set this to -1 for * no limit. */ public XAnalyzingSuggester(Analyzer indexAnalyzer, Automaton queryPrefix, Analyzer queryAnalyzer, int options, int maxSurfaceFormsPerAnalyzedForm, int maxGraphExpansions, boolean preservePositionIncrements, FST<Pair<Long, BytesRef>> fst, boolean hasPayloads, int maxAnalyzedPathsForOneInput, int sepLabel, int payloadSep, int endByte, int holeCharacter) { // SIMON EDIT: I added fst, hasPayloads and maxAnalyzedPathsForOneInput this.indexAnalyzer = indexAnalyzer; this.queryAnalyzer = queryAnalyzer; this.fst = fst; this.hasPayloads = hasPayloads; if ((options & ~(EXACT_FIRST | PRESERVE_SEP)) != 0) { throw new IllegalArgumentException("options should only contain EXACT_FIRST and PRESERVE_SEP; got " + options); } this.exactFirst = (options & EXACT_FIRST) != 0; this.preserveSep = (options & PRESERVE_SEP) != 0; // FLORIAN EDIT: I added <code>queryPrefix</code> for context dependent suggestions this.queryPrefix = queryPrefix; // NOTE: this is just an implementation limitation; if // somehow this is a problem we could fix it by using // more than one byte to disambiguate ... but 256 seems // like it should be way more then enough. if (maxSurfaceFormsPerAnalyzedForm <= 0 || maxSurfaceFormsPerAnalyzedForm > 256) { throw new IllegalArgumentException("maxSurfaceFormsPerAnalyzedForm must be > 0 and < 256 (got: " + maxSurfaceFormsPerAnalyzedForm + ")"); } this.maxSurfaceFormsPerAnalyzedForm = maxSurfaceFormsPerAnalyzedForm; if (maxGraphExpansions < 1 && maxGraphExpansions != -1) { throw new IllegalArgumentException("maxGraphExpansions must -1 (no limit) or > 0 (got: " + maxGraphExpansions + ")"); } this.maxGraphExpansions = maxGraphExpansions; this.maxAnalyzedPathsForOneInput = maxAnalyzedPathsForOneInput; this.preservePositionIncrements = preservePositionIncrements; this.sepLabel = sepLabel; this.payloadSep = payloadSep; this.endByte = endByte; this.holeCharacter = holeCharacter; }
Example #23
Source File: TestSimplePatternTokenizer.java From lucene-solr with Apache License 2.0 | 5 votes |
public void testNotDeterminized() throws Exception { Automaton a = new Automaton(); int start = a.createState(); int mid1 = a.createState(); int mid2 = a.createState(); int end = a.createState(); a.setAccept(end, true); a.addTransition(start, mid1, 'a', 'z'); a.addTransition(start, mid2, 'a', 'z'); a.addTransition(mid1, end, 'b'); a.addTransition(mid2, end, 'b'); expectThrows(IllegalArgumentException.class, () -> {new SimplePatternTokenizer(a);}); }
Example #24
Source File: SimplePatternTokenizer.java From lucene-solr with Apache License 2.0 | 5 votes |
/** Runs a pre-built automaton. */ public SimplePatternTokenizer(AttributeFactory factory, Automaton dfa) { super(factory); // we require user to do this up front because it is a possibly very costly operation, and user may be creating us frequently, not // realizing this ctor is otherwise trappy if (dfa.isDeterministic() == false) { throw new IllegalArgumentException("please determinize the incoming automaton first"); } runDFA = new CharacterRunAutomaton(dfa, Operations.DEFAULT_MAX_DETERMINIZED_STATES); }
Example #25
Source File: TestSynonymGraphFilter.java From lucene-solr with Apache License 2.0 | 5 votes |
private Automaton toAutomaton(TokenStream ts) throws IOException { PositionIncrementAttribute posIncAtt = ts.addAttribute(PositionIncrementAttribute.class); PositionLengthAttribute posLenAtt = ts.addAttribute(PositionLengthAttribute.class); CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class); ts.reset(); Automaton a = new Automaton(); int srcNode = -1; int destNode = -1; int state = a.createState(); while (ts.incrementToken()) { assert termAtt.length() == 1; char c = termAtt.charAt(0); int posInc = posIncAtt.getPositionIncrement(); if (posInc != 0) { srcNode += posInc; while (state < srcNode) { state = a.createState(); } } destNode = srcNode + posLenAtt.getPositionLength(); while (state < destNode) { state = a.createState(); } a.addTransition(srcNode, destNode, c); } ts.end(); ts.close(); a.finishState(); a.setAccept(destNode, true); return a; }
Example #26
Source File: ConcatenateGraphFilter.java From lucene-solr with Apache License 2.0 | 5 votes |
@Override public boolean incrementToken() throws IOException { if (finiteStrings == null) { if (wasReset == false) { throw new IllegalStateException("reset() missing before incrementToken"); } // lazy init/consume Automaton automaton = toAutomaton(); // calls reset(), incrementToken() repeatedly, and end() on inputTokenStream finiteStrings = new LimitedFiniteStringsIterator(automaton, maxGraphExpansions); //note: would be nice to know the startOffset but toAutomaton doesn't capture it. We'll assume 0 endOffset = inputTokenStream.getAttribute(OffsetAttribute.class).endOffset(); } IntsRef string = finiteStrings.next(); if (string == null) { return false; } clearAttributes(); if (finiteStrings.size() > 1) { // if number of iterated strings so far is more than one... posIncrAtt.setPositionIncrement(0); // stacked } offsetAtt.setOffset(0, endOffset); Util.toBytesRef(string, bytesAtt.builder()); // now we have UTF-8 if (charTermAttribute != null) { charTermAttribute.setLength(0); charTermAttribute.append(bytesAtt.toUTF16()); } return true; }
Example #27
Source File: TestSynonymGraphFilter.java From lucene-solr with Apache License 2.0 | 5 votes |
private boolean approxSubsetOf(Automaton a1, Automaton a2) { AutomatonTestUtil.RandomAcceptedStrings ras = new AutomatonTestUtil.RandomAcceptedStrings(a1); for(int i=0;i<2000;i++) { int[] ints = ras.getRandomAcceptedString(random()); IntsRef path = new IntsRef(ints, 0, ints.length); if (accepts(a2, path) == false) { throw new RuntimeException("a2 does not accept " + path); } } // Presumed true return true; }
Example #28
Source File: GraphEdgeCollector.java From lucene-solr with Apache License 2.0 | 5 votes |
@Override public Query getResultQuery(SchemaField matchField, boolean useAutomaton) { if (collectorTerms == null || collectorTerms.size() == 0) { // return null if there are no terms (edges) to traverse. return null; } else { // Create a query Query q = null; // TODO: see if we should dynamically select this based on the frontier size. if (useAutomaton) { // build an automaton based query for the frontier. Automaton autn = buildAutomaton(collectorTerms); AutomatonQuery autnQuery = new AutomatonQuery(new Term(matchField.getName()), autn); q = autnQuery; } else { List<BytesRef> termList = new ArrayList<>(collectorTerms.size()); for (int i = 0; i < collectorTerms.size(); i++) { BytesRef ref = new BytesRef(); collectorTerms.get(i, ref); termList.add(ref); } q = (matchField.hasDocValues() && !matchField.indexed()) ? new DocValuesTermsQuery(matchField.getName(), termList) : new TermInSetQuery(matchField.getName(), termList); } return q; } }
Example #29
Source File: Regex.java From crate with Apache License 2.0 | 5 votes |
/** * Return an Automaton that matches the union of the provided patterns. */ public static Automaton simpleMatchToAutomaton(String... patterns) { if (patterns.length < 1) { throw new IllegalArgumentException("There must be at least one pattern, zero given"); } List<Automaton> automata = new ArrayList<>(); for (String pattern : patterns) { automata.add(simpleMatchToAutomaton(pattern)); } return Operations.union(automata); }
Example #30
Source File: GraphEdgeCollector.java From lucene-solr with Apache License 2.0 | 5 votes |
/** * Build an automaton to represent the frontier query */ private Automaton buildAutomaton(BytesRefHash termBytesHash) { // need top pass a sorted set of terms to the autn builder (maybe a better way to avoid this?) final TreeSet<BytesRef> terms = new TreeSet<BytesRef>(); for (int i = 0; i < termBytesHash.size(); i++) { BytesRef ref = new BytesRef(); termBytesHash.get(i, ref); terms.add(ref); } final Automaton a = DaciukMihovAutomatonBuilder.build(terms); return a; }