Java Code Examples for org.apache.lucene.util.TestUtil#randomUnicodeString()
The following examples show how to use
org.apache.lucene.util.TestUtil#randomUnicodeString() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TestRawResponseWriter.java From lucene-solr with Apache License 2.0 | 6 votes |
/** * Regardless of base writer, the String in should be the same as the String out * when response is a raw ContentStream written to a Writer (or OutputStream) */ public void testRawStringContentStream() throws IOException { SolrQueryResponse rsp = new SolrQueryResponse(); String data = TestUtil.randomUnicodeString(random()); StringStream stream = new StringStream(data); stream.setContentType(TestUtil.randomSimpleString(random())); rsp.add(RawResponseWriter.CONTENT, stream); for (RawResponseWriter writer : allWriters) { assertEquals(stream.getContentType(), writer.getContentType(req(), rsp)); // we should have the same string if we use a Writer StringWriter sout = new StringWriter(); writer.write(sout, req(), rsp); assertEquals(data, sout.toString()); // we should have UTF-8 Bytes if we use an OutputStream ByteArrayOutputStream bout = new ByteArrayOutputStream(); writer.write(bout, req(), rsp); assertEquals(data, bout.toString(StandardCharsets.UTF_8.toString())); } }
Example 2
Source File: TestPrefixCodedTerms.java From lucene-solr with Apache License 2.0 | 6 votes |
public void testRandom() { Set<Term> terms = new TreeSet<>(); int nterms = atLeast(10000); for (int i = 0; i < nterms; i++) { Term term = new Term(TestUtil.randomUnicodeString(random(), 2), TestUtil.randomUnicodeString(random())); terms.add(term); } PrefixCodedTerms.Builder b = new PrefixCodedTerms.Builder(); for (Term ref: terms) { b.add(ref); } PrefixCodedTerms pb = b.finish(); TermIterator iter = pb.iterator(); Iterator<Term> expected = terms.iterator(); assertEquals(terms.size(), pb.size()); //System.out.println("TEST: now iter"); while (iter.next() != null) { assertTrue(expected.hasNext()); assertEquals(expected.next(), new Term(iter.field(), iter.bytes)); } assertFalse(expected.hasNext()); }
Example 3
Source File: TestRawResponseWriter.java From lucene-solr with Apache License 2.0 | 6 votes |
/** * Regardless of base writer, the bytes in should be the same as the bytes out * when response is a raw ContentStream written to an OutputStream */ public void testRawBinaryContentStream() throws IOException { SolrQueryResponse rsp = new SolrQueryResponse(); byte[] data = new byte[TestUtil.nextInt(random(), 10, 2048)]; random().nextBytes(data); ByteArrayStream stream = new ByteArrayStream(data, TestUtil.randomUnicodeString(random())); stream.setContentType(TestUtil.randomSimpleString(random())); rsp.add(RawResponseWriter.CONTENT, stream); for (RawResponseWriter writer : allWriters) { assertEquals(stream.getContentType(), writer.getContentType(req(), rsp)); ByteArrayOutputStream out = new ByteArrayOutputStream(); writer.write(out, req(), rsp); assertArrayEquals(data, out.toByteArray()); } }
Example 4
Source File: TestDuelingAnalyzers.java From lucene-solr with Apache License 2.0 | 6 votes |
public void testLetterUnicodeHuge() throws Exception { Random random = random(); int maxLength = 4300; // CharTokenizer.IO_BUFFER_SIZE + fudge MockAnalyzer left = new MockAnalyzer(random, jvmLetter, false); left.setMaxTokenLength(255); // match CharTokenizer's max token length Analyzer right = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName) { Tokenizer tokenizer = new LetterTokenizer(newAttributeFactory()); return new TokenStreamComponents(tokenizer, tokenizer); } }; int numIterations = atLeast(10); for (int i = 0; i < numIterations; i++) { String s = TestUtil.randomUnicodeString(random, maxLength); assertEquals(s, left.tokenStream("foo", newStringReader(s)), right.tokenStream("foo", newStringReader(s))); } IOUtils.close(left, right); }
Example 5
Source File: TestDuelingAnalyzers.java From lucene-solr with Apache License 2.0 | 6 votes |
public void testLetterUnicode() throws Exception { Random random = random(); Analyzer left = new MockAnalyzer(random(), jvmLetter, false); Analyzer right = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName) { Tokenizer tokenizer = new LetterTokenizer(newAttributeFactory()); return new TokenStreamComponents(tokenizer, tokenizer); } }; for (int i = 0; i < 200; i++) { String s = TestUtil.randomUnicodeString(random); assertEquals(s, left.tokenStream("foo", newStringReader(s)), right.tokenStream("foo", newStringReader(s))); } IOUtils.close(left, right); }
Example 6
Source File: TestCodepointCountFilter.java From lucene-solr with Apache License 2.0 | 6 votes |
public void testRandomStrings() throws IOException { for (int i = 0; i < 10000; i++) { String text = TestUtil.randomUnicodeString(random(), 100); int min = TestUtil.nextInt(random(), 0, 100); int max = TestUtil.nextInt(random(), 0, 100); int count = text.codePointCount(0, text.length()); if(min>max){ int temp = min; min = max; max = temp; } boolean expected = count >= min && count <= max; TokenStream stream = new KeywordTokenizer(); ((Tokenizer)stream).setReader(new StringReader(text)); stream = new CodepointCountFilter(stream, min, max); stream.reset(); assertEquals(expected, stream.incrementToken()); stream.end(); stream.close(); } }
Example 7
Source File: FieldTermStackTest.java From lucene-solr with Apache License 2.0 | 6 votes |
public void testTermInfoComparisonConsistency() { TermInfo a = new TermInfo( TestUtil.randomUnicodeString(random()), 0, 0, 0, 1 ); TermInfo b = new TermInfo( TestUtil.randomUnicodeString(random()), 0, 0, 1, 1 ); TermInfo c = new TermInfo( TestUtil.randomUnicodeString(random()), 0, 0, 2, 1 ); TermInfo d = new TermInfo( TestUtil.randomUnicodeString(random()), 0, 0, 0, 1 ); assertConsistentEquals( a, a ); assertConsistentEquals( b, b ); assertConsistentEquals( c, c ); assertConsistentEquals( d, d ); assertConsistentEquals( a, d ); assertConsistentLessThan( a, b ); assertConsistentLessThan( b, c ); assertConsistentLessThan( a, c ); assertConsistentLessThan( d, b ); assertConsistentLessThan( d, c ); }
Example 8
Source File: TestFieldCache.java From lucene-solr with Apache License 2.0 | 5 votes |
private static String generateString(int i) { String s = null; if (i > 0 && random().nextInt(3) == 1) { // reuse past string -- try to find one that's not null for(int iter = 0; iter < 10 && s == null;iter++) { s = unicodeStrings[random().nextInt(i)]; } if (s == null) { s = TestUtil.randomUnicodeString(random()); } } else { s = TestUtil.randomUnicodeString(random()); } return s; }
Example 9
Source File: TestDeterminizeLexicon.java From lucene-solr with Apache License 2.0 | 5 votes |
public void testLexicon() throws Exception { int num = atLeast(1); for (int i = 0; i < num; i++) { automata.clear(); terms.clear(); for (int j = 0; j < 5000; j++) { String randomString = TestUtil.randomUnicodeString(random()); terms.add(randomString); automata.add(Automata.makeString(randomString)); } assertLexicon(); } }
Example 10
Source File: SolrCoreMetricManagerTest.java From lucene-solr with Apache License 2.0 | 5 votes |
@Test public void testLoadReporter() throws Exception { Random random = random(); String className = MockMetricReporter.class.getName(); String reporterName = TestUtil.randomUnicodeString(random); String taggedName = reporterName + "@" + coreMetricManager.getTag(); Map<String, Object> attrs = new HashMap<>(); attrs.put(FieldType.CLASS_NAME, className); attrs.put(CoreAdminParams.NAME, reporterName); boolean shouldDefineConfigurable = random.nextBoolean(); String configurable = TestUtil.randomUnicodeString(random); if (shouldDefineConfigurable) attrs.put("configurable", configurable); boolean shouldDefinePlugin = random.nextBoolean(); PluginInfo pluginInfo = shouldDefinePlugin ? new PluginInfo(TestUtil.randomUnicodeString(random), attrs) : null; try { metricManager.loadReporter(coreMetricManager.getRegistryName(), coreMetricManager.getCore(), pluginInfo, coreMetricManager.getTag()); assertNotNull(pluginInfo); Map<String, SolrMetricReporter> reporters = metricManager.getReporters(coreMetricManager.getRegistryName()); assertTrue("reporters.size should be > 0, but was + " + reporters.size(), reporters.size() > 0); assertNotNull("reporter " + reporterName + " not present among " + reporters, reporters.get(taggedName)); assertTrue("wrong reporter class: " + reporters.get(taggedName), reporters.get(taggedName) instanceof MockMetricReporter); } catch (IllegalArgumentException e) { assertTrue(pluginInfo == null || attrs.get("configurable") == null); assertNull(metricManager.getReporters(coreMetricManager.getRegistryName()).get(taggedName)); } }
Example 11
Source File: TestSimpleQueryParser.java From lucene-solr with Apache License 2.0 | 5 votes |
public void testRandomQueries() throws Exception { for (int i = 0; i < 1000; i++) { String query = TestUtil.randomUnicodeString(random()); parse(query); // no exception parseKeyword(query, TestUtil.nextInt(random(), 0, 1024)); // no exception } }
Example 12
Source File: SolrJmxReporterTest.java From lucene-solr with Apache License 2.0 | 5 votes |
private PluginInfo createReporterPluginInfo(String rootName, boolean enabled) { Random random = random(); String className = SolrJmxReporter.class.getName(); String reporterName = PREFIX + TestUtil.randomSimpleString(random, 5, 10); Map<String, Object> attrs = new HashMap<>(); attrs.put(FieldType.CLASS_NAME, className); attrs.put(CoreAdminParams.NAME, reporterName); attrs.put("rootName", rootName); attrs.put("enabled", enabled); try { String agentId = (String) TEST_MBEAN_SERVER.getAttribute (new ObjectName("JMImplementation:type=MBeanServerDelegate"), "MBeanServerId"); attrs.put("agentId", agentId); } catch (Exception e) { throw new RuntimeException("Unable to determine agentId of MBeanServer: " + e.getMessage(), e); } boolean shouldOverrideDomain = random.nextBoolean(); if (shouldOverrideDomain) { domain = PREFIX + TestUtil.randomSimpleString(random); attrs.put("domain", domain); } return new PluginInfo(TestUtil.randomUnicodeString(random), attrs); }
Example 13
Source File: NGramTokenizerTest.java From lucene-solr with Apache License 2.0 | 5 votes |
public void testFullUTF8Range() throws IOException { final int minGram = TestUtil.nextInt(random(), 1, 100); final int maxGram = TestUtil.nextInt(random(), minGram, 100); final String s = TestUtil.randomUnicodeString(random(), 4 * 1024); testNGrams(minGram, maxGram, s, ""); testNGrams(minGram, maxGram, s, "abcdef"); }
Example 14
Source File: TestSimplePatternTokenizer.java From lucene-solr with Apache License 2.0 | 5 votes |
public void testOneToken() throws Exception { Tokenizer t = new SimplePatternTokenizer(".*"); CharTermAttribute termAtt = t.getAttribute(CharTermAttribute.class); String s; while (true) { s = TestUtil.randomUnicodeString(random()); if (s.length() > 0) { break; } } t.setReader(new StringReader(s)); t.reset(); assertTrue(t.incrementToken()); assertEquals(s, termAtt.toString()); }
Example 15
Source File: TestExtendedMode.java From lucene-solr with Apache License 2.0 | 5 votes |
/** random test ensuring we don't ever split supplementaries */ public void testSurrogates2() throws IOException { int numIterations = atLeast(500); for (int i = 0; i < numIterations; i++) { String s = TestUtil.randomUnicodeString(random(), 100); try (TokenStream ts = analyzer.tokenStream("foo", s)) { CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class); ts.reset(); while (ts.incrementToken()) { assertTrue(UnicodeUtil.validUTF16String(termAtt)); } ts.end(); } } }
Example 16
Source File: TestUTF32ToUTF8.java From lucene-solr with Apache License 2.0 | 5 votes |
private void assertAutomaton(Automaton automaton) throws Exception { CharacterRunAutomaton cra = new CharacterRunAutomaton(automaton); ByteRunAutomaton bra = new ByteRunAutomaton(automaton); final AutomatonTestUtil.RandomAcceptedStrings ras = new AutomatonTestUtil.RandomAcceptedStrings(automaton); int num = atLeast(1000); for (int i = 0; i < num; i++) { final String string; if (random().nextBoolean()) { // likely not accepted string = TestUtil.randomUnicodeString(random()); } else { // will be accepted int[] codepoints = ras.getRandomAcceptedString(random()); try { string = UnicodeUtil.newString(codepoints, 0, codepoints.length); } catch (Exception e) { System.out.println(codepoints.length + " codepoints:"); for(int j=0;j<codepoints.length;j++) { System.out.println(" " + Integer.toHexString(codepoints[j])); } throw e; } } byte bytes[] = string.getBytes(StandardCharsets.UTF_8); assertEquals(cra.run(string), bra.run(bytes, 0, bytes.length)); } }
Example 17
Source File: TestMemoryIndexAgainstDirectory.java From lucene-solr with Apache License 2.0 | 5 votes |
/** * half of the time, returns a random term from TEST_TERMS. * the other half of the time, returns a random unicode string. */ private String randomTerm() { if (random().nextBoolean()) { // return a random TEST_TERM return TEST_TERMS[random().nextInt(TEST_TERMS.length)]; } else { // return a random unicode term return TestUtil.randomUnicodeString(random()); } }
Example 18
Source File: EdgeNGramTokenFilterTest.java From lucene-solr with Apache License 2.0 | 4 votes |
public void testSupplementaryCharacters() throws IOException { for (int i = 0; i < 20; i++) { final String s = TestUtil.randomUnicodeString(random(), 10); final int codePointCount = s.codePointCount(0, s.length()); final int minGram = TestUtil.nextInt(random(), 1, 3); final int maxGram = TestUtil.nextInt(random(), minGram, 10); final boolean preserveOriginal = TestUtil.nextInt(random(), 0, 1) % 2 == 0; TokenStream tk = new KeywordTokenizer(); ((Tokenizer)tk).setReader(new StringReader(s)); tk = new EdgeNGramTokenFilter(tk, minGram, maxGram, preserveOriginal); final CharTermAttribute termAtt = tk.addAttribute(CharTermAttribute.class); final OffsetAttribute offsetAtt = tk.addAttribute(OffsetAttribute.class); tk.reset(); if (codePointCount < minGram && preserveOriginal) { assertTrue(tk.incrementToken()); assertEquals(0, offsetAtt.startOffset()); assertEquals(s.length(), offsetAtt.endOffset()); assertEquals(s, termAtt.toString()); } for (int j = minGram; j <= Math.min(codePointCount, maxGram); j++) { assertTrue(tk.incrementToken()); assertEquals(0, offsetAtt.startOffset()); assertEquals(s.length(), offsetAtt.endOffset()); final int end = Character.offsetByCodePoints(s, 0, j); assertEquals(s.substring(0, end), termAtt.toString()); } if (codePointCount > maxGram && preserveOriginal) { assertTrue(tk.incrementToken()); assertEquals(0, offsetAtt.startOffset()); assertEquals(s.length(), offsetAtt.endOffset()); assertEquals(s, termAtt.toString()); } assertFalse(tk.incrementToken()); tk.close(); } }
Example 19
Source File: TestMultiDocValues.java From lucene-solr with Apache License 2.0 | 4 votes |
public void testBinary() throws Exception { Directory dir = newDirectory(); Document doc = new Document(); Field field = new BinaryDocValuesField("bytes", new BytesRef()); doc.add(field); IndexWriterConfig iwc = newIndexWriterConfig(random(), null); iwc.setMergePolicy(newLogMergePolicy()); RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc); int numDocs = TEST_NIGHTLY ? atLeast(500) : atLeast(50); for (int i = 0; i < numDocs; i++) { BytesRef ref = new BytesRef(TestUtil.randomUnicodeString(random())); field.setBytesValue(ref); iw.addDocument(doc); if (random().nextInt(17) == 0) { iw.commit(); } } DirectoryReader ir = iw.getReader(); iw.forceMerge(1); DirectoryReader ir2 = iw.getReader(); LeafReader merged = getOnlyLeafReader(ir2); iw.close(); BinaryDocValues multi = MultiDocValues.getBinaryValues(ir, "bytes"); BinaryDocValues single = merged.getBinaryDocValues("bytes"); for (int i = 0; i < numDocs; i++) { assertEquals(i, multi.nextDoc()); assertEquals(i, single.nextDoc()); final BytesRef expected = BytesRef.deepCopyOf(single.binaryValue()); final BytesRef actual = multi.binaryValue(); assertEquals(expected, actual); } testRandomAdvance(merged.getBinaryDocValues("bytes"), MultiDocValues.getBinaryValues(ir, "bytes")); testRandomAdvanceExact(merged.getBinaryDocValues("bytes"), MultiDocValues.getBinaryValues(ir, "bytes"), merged.maxDoc()); ir.close(); ir2.close(); dir.close(); }
Example 20
Source File: TestMultiDocValues.java From lucene-solr with Apache License 2.0 | 4 votes |
public void testSorted() throws Exception { Directory dir = newDirectory(); Document doc = new Document(); Field field = new SortedDocValuesField("bytes", new BytesRef()); doc.add(field); IndexWriterConfig iwc = newIndexWriterConfig(random(), null); iwc.setMergePolicy(newLogMergePolicy()); RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc); int numDocs = TEST_NIGHTLY ? atLeast(500) : atLeast(50); for (int i = 0; i < numDocs; i++) { BytesRef ref = new BytesRef(TestUtil.randomUnicodeString(random())); field.setBytesValue(ref); if (random().nextInt(7) == 0) { iw.addDocument(new Document()); } iw.addDocument(doc); if (random().nextInt(17) == 0) { iw.commit(); } } DirectoryReader ir = iw.getReader(); iw.forceMerge(1); DirectoryReader ir2 = iw.getReader(); LeafReader merged = getOnlyLeafReader(ir2); iw.close(); SortedDocValues multi = MultiDocValues.getSortedValues(ir, "bytes"); SortedDocValues single = merged.getSortedDocValues("bytes"); assertEquals(single.getValueCount(), multi.getValueCount()); while (true) { assertEquals(single.nextDoc(), multi.nextDoc()); if (single.docID() == NO_MORE_DOCS) { break; } // check value final BytesRef expected = BytesRef.deepCopyOf(single.binaryValue()); final BytesRef actual = multi.binaryValue(); assertEquals(expected, actual); // check ord assertEquals(single.ordValue(), multi.ordValue()); } testRandomAdvance(merged.getSortedDocValues("bytes"), MultiDocValues.getSortedValues(ir, "bytes")); testRandomAdvanceExact(merged.getSortedDocValues("bytes"), MultiDocValues.getSortedValues(ir, "bytes"), merged.maxDoc()); ir.close(); ir2.close(); dir.close(); }