Java Code Examples for org.apache.lucene.util.automaton.Automaton#Builder
The following examples show how to use
org.apache.lucene.util.automaton.Automaton#Builder .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TestSynonymGraphFilter.java From lucene-solr with Apache License 2.0 | 6 votes |
/** Just creates a side path from startState to endState with the provided tokens. */ private static void addSidePath(Automaton.Builder a, int startState, int endState, char[] tokens, List<Integer> flatStates) { int lastState = startState; for(int i=0;i<tokens.length;i++) { int nextState; if (i == tokens.length-1) { nextState = endState; } else if (flatStates == null || i >= flatStates.size()) { nextState = a.createState(); if (flatStates != null) { assert i == flatStates.size(); flatStates.add(nextState); } } else { nextState = flatStates.get(i); } a.addTransition(lastState, nextState, tokens[i]); lastState = nextState; } }
Example 2
Source File: GraphTokenStreamFiniteStrings.java From lucene-solr with Apache License 2.0 | 6 votes |
/** * Returns the articulation points (or cut vertices) of the graph: * https://en.wikipedia.org/wiki/Biconnected_component */ public int[] articulationPoints() { if (det.getNumStates() == 0) { return new int[0]; } // Automaton.Builder undirect = new Automaton.Builder(); undirect.copy(det); for (int i = 0; i < det.getNumStates(); i++) { int numT = det.initTransition(i, transition); for (int j = 0; j < numT; j++) { det.getNextTransition(transition); undirect.addTransition(transition.dest, i, transition.min); } } int numStates = det.getNumStates(); BitSet visited = new BitSet(numStates); int[] depth = new int[det.getNumStates()]; int[] low = new int[det.getNumStates()]; int[] parent = new int[det.getNumStates()]; Arrays.fill(parent, -1); List<Integer> points = new ArrayList<>(); articulationPointsRecurse(undirect.finish(), 0, 0, depth, low, parent, visited, points); Collections.reverse(points); return points.stream().mapToInt(p -> p).toArray(); }
Example 3
Source File: TestSynonymGraphFilter.java From lucene-solr with Apache License 2.0 | 5 votes |
/** Renumbers nodes according to their topo sort */ private Automaton topoSort(Automaton in) { int[] newToOld = Operations.topoSortStates(in); int[] oldToNew = new int[newToOld.length]; Automaton.Builder a = new Automaton.Builder(); //System.out.println("remap:"); for(int i=0;i<newToOld.length;i++) { a.createState(); oldToNew[newToOld[i]] = i; //System.out.println(" " + newToOld[i] + " -> " + i); if (in.isAccept(newToOld[i])) { a.setAccept(i, true); //System.out.println(" **"); } } Transition t = new Transition(); for(int i=0;i<newToOld.length;i++) { int count = in.initTransition(newToOld[i], t); for(int j=0;j<count;j++) { in.getNextTransition(t); a.addTransition(i, oldToNew[t.dest], t.min, t.max); } } return a.finish(); }
Example 4
Source File: TokenStreamToAutomaton.java From lucene-solr with Apache License 2.0 | 5 votes |
private static void addHoles(Automaton.Builder builder, RollingBuffer<Position> positions, int pos) { Position posData = positions.get(pos); Position prevPosData = positions.get(pos-1); while(posData.arriving == -1 || prevPosData.leaving == -1) { if (posData.arriving == -1) { posData.arriving = builder.createState(); builder.addTransition(posData.arriving, posData.leaving, POS_SEP); } if (prevPosData.leaving == -1) { if (pos == 1) { prevPosData.leaving = 0; } else { prevPosData.leaving = builder.createState(); } if (prevPosData.arriving != -1) { builder.addTransition(prevPosData.arriving, prevPosData.leaving, POS_SEP); } } builder.addTransition(prevPosData.leaving, posData.arriving, HOLE); pos--; if (pos <= 0) { break; } posData = prevPosData; prevPosData = positions.get(pos-1); } }
Example 5
Source File: TermAutomatonQuery.java From lucene-solr with Apache License 2.0 | 4 votes |
public TermAutomatonQuery(String field) { this.field = field; this.builder = new Automaton.Builder(); }
Example 6
Source File: GraphTokenStreamFiniteStrings.java From lucene-solr with Apache License 2.0 | 4 votes |
/** * Build an automaton from the provided {@link TokenStream}. */ private Automaton build(final TokenStream in) throws IOException { Automaton.Builder builder = new Automaton.Builder(); final PositionIncrementAttribute posIncAtt = in.addAttribute(PositionIncrementAttribute.class); final PositionLengthAttribute posLengthAtt = in.addAttribute(PositionLengthAttribute.class); in.reset(); int pos = -1; int prevIncr = 1; int state = -1; int id = -1; int gap = 0; while (in.incrementToken()) { int currentIncr = posIncAtt.getPositionIncrement(); if (pos == -1 && currentIncr < 1) { throw new IllegalStateException("Malformed TokenStream, start token can't have increment less than 1"); } if (currentIncr == 0) { if (gap > 0) { pos -= gap; } } else { pos++; gap = currentIncr - 1; } int endPos = pos + posLengthAtt.getPositionLength() + gap; while (state < endPos) { state = builder.createState(); } id++; if (tokens.length < id + 1) { tokens = ArrayUtil.grow(tokens, id + 1); } tokens[id] = in.cloneAttributes(); builder.addTransition(pos, endPos, id); pos += gap; // we always produce linear token graphs from getFiniteStrings(), so we need to adjust // posLength and posIncrement accordingly tokens[id].addAttribute(PositionLengthAttribute.class).setPositionLength(1); if (currentIncr == 0) { // stacked token should have the same increment as original token at this position tokens[id].addAttribute(PositionIncrementAttribute.class).setPositionIncrement(prevIncr); } // only save last increment on non-zero increment in case we have multiple stacked tokens if (currentIncr > 0) { prevIncr = currentIncr; } } in.end(); if (state != -1) { builder.setAccept(state, true); } return builder.finish(); }