Java Code Examples for org.apache.lucene.util.fst.Util#toUTF32()
The following examples show how to use
org.apache.lucene.util.fst.Util#toUTF32() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: FiniteStringsIteratorTest.java From lucene-solr with Apache License 2.0 | 6 votes |
public void testFiniteStringsEatsStack() { char[] chars = new char[50000]; TestUtil.randomFixedLengthUnicodeString(random(), chars, 0, chars.length); String bigString1 = new String(chars); TestUtil.randomFixedLengthUnicodeString(random(), chars, 0, chars.length); String bigString2 = new String(chars); Automaton a = Operations.union(Automata.makeString(bigString1), Automata.makeString(bigString2)); FiniteStringsIterator iterator = new FiniteStringsIterator(a); List<IntsRef> actual = getFiniteStrings(iterator); assertEquals(2, actual.size()); IntsRefBuilder scratch = new IntsRefBuilder(); Util.toUTF32(bigString1.toCharArray(), 0, bigString1.length(), scratch); assertTrue(actual.contains(scratch.get())); Util.toUTF32(bigString2.toCharArray(), 0, bigString2.length(), scratch); assertTrue(actual.contains(scratch.get())); }
Example 2
Source File: Dictionary.java From lucene-solr with Apache License 2.0 | 5 votes |
private FST<IntsRef> affixFST(TreeMap<String,List<Integer>> affixes) throws IOException { IntSequenceOutputs outputs = IntSequenceOutputs.getSingleton(); FSTCompiler<IntsRef> fstCompiler = new FSTCompiler<>(FST.INPUT_TYPE.BYTE4, outputs); IntsRefBuilder scratch = new IntsRefBuilder(); for (Map.Entry<String,List<Integer>> entry : affixes.entrySet()) { Util.toUTF32(entry.getKey(), scratch); List<Integer> entries = entry.getValue(); IntsRef output = new IntsRef(entries.size()); for (Integer c : entries) { output.ints[output.length++] = c; } fstCompiler.add(scratch.get(), output); } return fstCompiler.compile(); }
Example 3
Source File: FiniteStringsIteratorTest.java From lucene-solr with Apache License 2.0 | 5 votes |
public void testSingletonNoLimit() { Automaton a = Automata.makeString("foobar"); FiniteStringsIterator iterator = new FiniteStringsIterator(a); List<IntsRef> actual = getFiniteStrings(iterator); assertEquals(1, actual.size()); IntsRefBuilder scratch = new IntsRefBuilder(); Util.toUTF32("foobar".toCharArray(), 0, 6, scratch); assertTrue(actual.contains(scratch.get())); }
Example 4
Source File: LimitedFiniteStringsIteratorTest.java From lucene-solr with Apache License 2.0 | 5 votes |
public void testSingleton() { Automaton a = Automata.makeString("foobar"); List<IntsRef> actual = getFiniteStrings(new LimitedFiniteStringsIterator(a, 1)); assertEquals(1, actual.size()); IntsRefBuilder scratch = new IntsRefBuilder(); Util.toUTF32("foobar".toCharArray(), 0, 6, scratch); assertTrue(actual.contains(scratch.get())); }
Example 5
Source File: FiniteStringsIteratorTest.java From lucene-solr with Apache License 2.0 | 4 votes |
public void testRandomFiniteStrings1() { int numStrings = atLeast(100); if (VERBOSE) { System.out.println("TEST: numStrings=" + numStrings); } Set<IntsRef> strings = new HashSet<>(); List<Automaton> automata = new ArrayList<>(); IntsRefBuilder scratch = new IntsRefBuilder(); for(int i=0;i<numStrings;i++) { String s = TestUtil.randomSimpleString(random(), 1, 200); Util.toUTF32(s.toCharArray(), 0, s.length(), scratch); if (strings.add(scratch.toIntsRef())) { automata.add(Automata.makeString(s)); if (VERBOSE) { System.out.println(" add string=" + s); } } } // TODO: we could sometimes use // DaciukMihovAutomatonBuilder here // TODO: what other random things can we do here... Automaton a = Operations.union(automata); if (random().nextBoolean()) { a = MinimizationOperations.minimize(a, 1000000); if (VERBOSE) { System.out.println("TEST: a.minimize numStates=" + a.getNumStates()); } } else if (random().nextBoolean()) { if (VERBOSE) { System.out.println("TEST: a.determinize"); } a = Operations.determinize(a, 1000000); } else if (random().nextBoolean()) { if (VERBOSE) { System.out.println("TEST: a.removeDeadStates"); } a = Operations.removeDeadStates(a); } FiniteStringsIterator iterator = new FiniteStringsIterator(a); List<IntsRef> actual = getFiniteStrings(iterator); assertFiniteStringsRecursive(a, actual); if (!strings.equals(new HashSet<>(actual))) { System.out.println("strings.size()=" + strings.size() + " actual.size=" + actual.size()); List<IntsRef> x = new ArrayList<>(strings); Collections.sort(x); List<IntsRef> y = new ArrayList<>(actual); Collections.sort(y); int end = Math.min(x.size(), y.size()); for(int i=0;i<end;i++) { System.out.println(" i=" + i + " string=" + toString(x.get(i)) + " actual=" + toString(y.get(i))); } fail("wrong strings found"); } }