org.apache.lucene.util.TestUtil#randomUnicodeString

Source File: TestRawResponseWriter.java From lucene-solr with Apache License 2.0

6 votes

/**
 * Regardless of base writer, the String in should be the same as the String out 
 * when response is a raw ContentStream written to a Writer (or OutputStream)
 */
public void testRawStringContentStream()  throws IOException {
  SolrQueryResponse rsp = new SolrQueryResponse();
  String data = TestUtil.randomUnicodeString(random());
  StringStream stream = new StringStream(data);

  stream.setContentType(TestUtil.randomSimpleString(random()));
  rsp.add(RawResponseWriter.CONTENT, stream);
  
  for (RawResponseWriter writer : allWriters) {
    assertEquals(stream.getContentType(), writer.getContentType(req(), rsp));

    // we should have the same string if we use a Writer
    StringWriter sout = new StringWriter();
    writer.write(sout, req(), rsp);
    assertEquals(data, sout.toString());

    // we should have UTF-8 Bytes if we use an OutputStream
    ByteArrayOutputStream bout = new ByteArrayOutputStream();
    writer.write(bout, req(), rsp);
    assertEquals(data, bout.toString(StandardCharsets.UTF_8.toString()));
  }
}

Source File: TestPrefixCodedTerms.java From lucene-solr with Apache License 2.0

6 votes

public void testRandom() {
  Set<Term> terms = new TreeSet<>();
  int nterms = atLeast(10000);
  for (int i = 0; i < nterms; i++) {
    Term term = new Term(TestUtil.randomUnicodeString(random(), 2), TestUtil.randomUnicodeString(random()));
    terms.add(term);
  }    
  
  PrefixCodedTerms.Builder b = new PrefixCodedTerms.Builder();
  for (Term ref: terms) {
    b.add(ref);
  }
  PrefixCodedTerms pb = b.finish();
  
  TermIterator iter = pb.iterator();
  Iterator<Term> expected = terms.iterator();
  assertEquals(terms.size(), pb.size());
  //System.out.println("TEST: now iter");
  while (iter.next() != null) {
    assertTrue(expected.hasNext());
    assertEquals(expected.next(), new Term(iter.field(), iter.bytes));
  }

  assertFalse(expected.hasNext());
}

Source File: TestRawResponseWriter.java From lucene-solr with Apache License 2.0

6 votes

/**
 * Regardless of base writer, the bytes in should be the same as the bytes out 
 * when response is a raw ContentStream written to an OutputStream
 */
public void testRawBinaryContentStream()  throws IOException {
  SolrQueryResponse rsp = new SolrQueryResponse();
  byte[] data = new byte[TestUtil.nextInt(random(), 10, 2048)];
  random().nextBytes(data);
  ByteArrayStream stream = new ByteArrayStream(data, TestUtil.randomUnicodeString(random()));

  stream.setContentType(TestUtil.randomSimpleString(random()));
  rsp.add(RawResponseWriter.CONTENT, stream);
  
  for (RawResponseWriter writer : allWriters) {
    assertEquals(stream.getContentType(), writer.getContentType(req(), rsp));
    ByteArrayOutputStream out = new ByteArrayOutputStream();
    writer.write(out, req(), rsp);
    assertArrayEquals(data, out.toByteArray());
  }
}

Source File: TestDuelingAnalyzers.java From lucene-solr with Apache License 2.0

6 votes

public void testLetterUnicodeHuge() throws Exception {
  Random random = random();
  int maxLength = 4300; // CharTokenizer.IO_BUFFER_SIZE + fudge
  MockAnalyzer left = new MockAnalyzer(random, jvmLetter, false);
  left.setMaxTokenLength(255); // match CharTokenizer's max token length
  Analyzer right = new Analyzer() {
    @Override
    protected TokenStreamComponents createComponents(String fieldName) {
      Tokenizer tokenizer = new LetterTokenizer(newAttributeFactory());
      return new TokenStreamComponents(tokenizer, tokenizer);
    }
  };
  int numIterations = atLeast(10);
  for (int i = 0; i < numIterations; i++) {
    String s = TestUtil.randomUnicodeString(random, maxLength);
    assertEquals(s, left.tokenStream("foo", newStringReader(s)), 
                 right.tokenStream("foo", newStringReader(s)));
  }
  IOUtils.close(left, right);
}

Source File: TestDuelingAnalyzers.java From lucene-solr with Apache License 2.0

6 votes

public void testLetterUnicode() throws Exception {
  Random random = random();
  Analyzer left = new MockAnalyzer(random(), jvmLetter, false);
  Analyzer right = new Analyzer() {
    @Override
    protected TokenStreamComponents createComponents(String fieldName) {
      Tokenizer tokenizer = new LetterTokenizer(newAttributeFactory());
      return new TokenStreamComponents(tokenizer, tokenizer);
    }
  };
  for (int i = 0; i < 200; i++) {
    String s = TestUtil.randomUnicodeString(random);
    assertEquals(s, left.tokenStream("foo", newStringReader(s)), 
                 right.tokenStream("foo", newStringReader(s)));
  }
  IOUtils.close(left, right);
}

Source File: TestCodepointCountFilter.java From lucene-solr with Apache License 2.0

6 votes

public void testRandomStrings() throws IOException {
  for (int i = 0; i < 10000; i++) {
    String text = TestUtil.randomUnicodeString(random(), 100);
    int min = TestUtil.nextInt(random(), 0, 100);
    int max = TestUtil.nextInt(random(), 0, 100);
    int count = text.codePointCount(0, text.length());
    if(min>max){
      int temp = min;
      min = max;
      max = temp;
    }
    boolean expected = count >= min && count <= max;
    TokenStream stream = new KeywordTokenizer();
    ((Tokenizer)stream).setReader(new StringReader(text));
    stream = new CodepointCountFilter(stream, min, max);
    stream.reset();
    assertEquals(expected, stream.incrementToken());
    stream.end();
    stream.close();
  }
}

Source File: FieldTermStackTest.java From lucene-solr with Apache License 2.0

6 votes

public void testTermInfoComparisonConsistency() {
  TermInfo a = new TermInfo( TestUtil.randomUnicodeString(random()), 0, 0, 0, 1 );
  TermInfo b = new TermInfo( TestUtil.randomUnicodeString(random()), 0, 0, 1, 1 );
  TermInfo c = new TermInfo( TestUtil.randomUnicodeString(random()), 0, 0, 2, 1 );
  TermInfo d = new TermInfo( TestUtil.randomUnicodeString(random()), 0, 0, 0, 1 );

  assertConsistentEquals( a, a );
  assertConsistentEquals( b, b );
  assertConsistentEquals( c, c );
  assertConsistentEquals( d, d );
  assertConsistentEquals( a, d );
  assertConsistentLessThan( a, b );
  assertConsistentLessThan( b, c );
  assertConsistentLessThan( a, c );
  assertConsistentLessThan( d, b );
  assertConsistentLessThan( d, c );
}

Source File: TestFieldCache.java From lucene-solr with Apache License 2.0

5 votes

private static String generateString(int i) {
  String s = null;
  if (i > 0 && random().nextInt(3) == 1) {
    // reuse past string -- try to find one that's not null
    for(int iter = 0; iter < 10 && s == null;iter++) {
      s = unicodeStrings[random().nextInt(i)];
    }
    if (s == null) {
      s = TestUtil.randomUnicodeString(random());
    }
  } else {
    s = TestUtil.randomUnicodeString(random());
  }
  return s;
}

Source File: TestDeterminizeLexicon.java From lucene-solr with Apache License 2.0

5 votes

public void testLexicon() throws Exception {
  int num = atLeast(1);
  for (int i = 0; i < num; i++) {
    automata.clear();
    terms.clear();
    for (int j = 0; j < 5000; j++) {
      String randomString = TestUtil.randomUnicodeString(random());
      terms.add(randomString);
      automata.add(Automata.makeString(randomString));
    }
    assertLexicon();
  }
}

Source File: SolrCoreMetricManagerTest.java From lucene-solr with Apache License 2.0

5 votes

@Test
public void testLoadReporter() throws Exception {
  Random random = random();

  String className = MockMetricReporter.class.getName();
  String reporterName = TestUtil.randomUnicodeString(random);
  String taggedName = reporterName + "@" + coreMetricManager.getTag();

  Map<String, Object> attrs = new HashMap<>();
  attrs.put(FieldType.CLASS_NAME, className);
  attrs.put(CoreAdminParams.NAME, reporterName);

  boolean shouldDefineConfigurable = random.nextBoolean();
  String configurable = TestUtil.randomUnicodeString(random);
  if (shouldDefineConfigurable) attrs.put("configurable", configurable);

  boolean shouldDefinePlugin = random.nextBoolean();
  PluginInfo pluginInfo = shouldDefinePlugin ? new PluginInfo(TestUtil.randomUnicodeString(random), attrs) : null;

  try {
    metricManager.loadReporter(coreMetricManager.getRegistryName(), coreMetricManager.getCore(),
        pluginInfo, coreMetricManager.getTag());
    assertNotNull(pluginInfo);
    Map<String, SolrMetricReporter> reporters = metricManager.getReporters(coreMetricManager.getRegistryName());
    assertTrue("reporters.size should be > 0, but was + " + reporters.size(), reporters.size() > 0);
    assertNotNull("reporter " + reporterName + " not present among " + reporters, reporters.get(taggedName));
    assertTrue("wrong reporter class: " + reporters.get(taggedName), reporters.get(taggedName) instanceof MockMetricReporter);
  } catch (IllegalArgumentException e) {
    assertTrue(pluginInfo == null || attrs.get("configurable") == null);
    assertNull(metricManager.getReporters(coreMetricManager.getRegistryName()).get(taggedName));
  }
}

Source File: TestSimpleQueryParser.java From lucene-solr with Apache License 2.0

5 votes

public void testRandomQueries() throws Exception {
  for (int i = 0; i < 1000; i++) {
    String query = TestUtil.randomUnicodeString(random());
    parse(query); // no exception
    parseKeyword(query, TestUtil.nextInt(random(), 0, 1024)); // no exception
  }
}

Source File: SolrJmxReporterTest.java From lucene-solr with Apache License 2.0

5 votes

private PluginInfo createReporterPluginInfo(String rootName, boolean enabled) {
  Random random = random();
  String className = SolrJmxReporter.class.getName();
  String reporterName = PREFIX + TestUtil.randomSimpleString(random, 5, 10);
    
  
  Map<String, Object> attrs = new HashMap<>();
  attrs.put(FieldType.CLASS_NAME, className);
  attrs.put(CoreAdminParams.NAME, reporterName);
  attrs.put("rootName", rootName);
  attrs.put("enabled", enabled);

  try {
    String agentId = (String) TEST_MBEAN_SERVER.getAttribute
      (new ObjectName("JMImplementation:type=MBeanServerDelegate"),
       "MBeanServerId");
    attrs.put("agentId", agentId);
  } catch (Exception e) {
    throw new RuntimeException("Unable to determine agentId of MBeanServer: " + e.getMessage(), e);
  }
  boolean shouldOverrideDomain = random.nextBoolean();
  if (shouldOverrideDomain) {
    domain = PREFIX + TestUtil.randomSimpleString(random);
    attrs.put("domain", domain);
  }

  return new PluginInfo(TestUtil.randomUnicodeString(random), attrs);
}

Source File: NGramTokenizerTest.java From lucene-solr with Apache License 2.0

5 votes

public void testFullUTF8Range() throws IOException {
  final int minGram = TestUtil.nextInt(random(), 1, 100);
  final int maxGram = TestUtil.nextInt(random(), minGram, 100);
  final String s = TestUtil.randomUnicodeString(random(), 4 * 1024);
  testNGrams(minGram, maxGram, s, "");
  testNGrams(minGram, maxGram, s, "abcdef");
}

Source File: TestSimplePatternTokenizer.java From lucene-solr with Apache License 2.0

5 votes

public void testOneToken() throws Exception {
  Tokenizer t = new SimplePatternTokenizer(".*");
  CharTermAttribute termAtt = t.getAttribute(CharTermAttribute.class);
  String s;
  while (true) {
    s = TestUtil.randomUnicodeString(random());
    if (s.length() > 0) {
      break;
    }
  }
  t.setReader(new StringReader(s));
  t.reset();
  assertTrue(t.incrementToken());
  assertEquals(s, termAtt.toString());
}

Source File: TestExtendedMode.java From lucene-solr with Apache License 2.0

5 votes

/** random test ensuring we don't ever split supplementaries */
public void testSurrogates2() throws IOException {
  int numIterations = atLeast(500);
  for (int i = 0; i < numIterations; i++) {
    String s = TestUtil.randomUnicodeString(random(), 100);
    try (TokenStream ts = analyzer.tokenStream("foo", s)) {
      CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
      ts.reset();
      while (ts.incrementToken()) {
        assertTrue(UnicodeUtil.validUTF16String(termAtt));
      }
      ts.end();
    }
  }
}

Source File: TestUTF32ToUTF8.java From lucene-solr with Apache License 2.0

5 votes

private void assertAutomaton(Automaton automaton) throws Exception {
  CharacterRunAutomaton cra = new CharacterRunAutomaton(automaton);
  ByteRunAutomaton bra = new ByteRunAutomaton(automaton);
  final AutomatonTestUtil.RandomAcceptedStrings ras = new AutomatonTestUtil.RandomAcceptedStrings(automaton);
  
  int num = atLeast(1000);
  for (int i = 0; i < num; i++) {
    final String string;
    if (random().nextBoolean()) {
      // likely not accepted
      string = TestUtil.randomUnicodeString(random());
    } else {
      // will be accepted
      int[] codepoints = ras.getRandomAcceptedString(random());
      try {
        string = UnicodeUtil.newString(codepoints, 0, codepoints.length);
      } catch (Exception e) {
        System.out.println(codepoints.length + " codepoints:");
        for(int j=0;j<codepoints.length;j++) {
          System.out.println("  " + Integer.toHexString(codepoints[j]));
        }
        throw e;
      }
    }
    byte bytes[] = string.getBytes(StandardCharsets.UTF_8);
    assertEquals(cra.run(string), bra.run(bytes, 0, bytes.length));
  }
}

Source File: TestMemoryIndexAgainstDirectory.java From lucene-solr with Apache License 2.0

5 votes

/**
 * half of the time, returns a random term from TEST_TERMS.
 * the other half of the time, returns a random unicode string.
 */
private String randomTerm() {
  if (random().nextBoolean()) {
    // return a random TEST_TERM
    return TEST_TERMS[random().nextInt(TEST_TERMS.length)];
  } else {
    // return a random unicode term
    return TestUtil.randomUnicodeString(random());
  }
}

Source File: EdgeNGramTokenFilterTest.java From lucene-solr with Apache License 2.0

4 votes

public void testSupplementaryCharacters() throws IOException {
  for (int i = 0; i < 20; i++) {
    final String s = TestUtil.randomUnicodeString(random(), 10);
    final int codePointCount = s.codePointCount(0, s.length());
    final int minGram = TestUtil.nextInt(random(), 1, 3);
    final int maxGram = TestUtil.nextInt(random(), minGram, 10);
    final boolean preserveOriginal = TestUtil.nextInt(random(), 0, 1) % 2 == 0;

    TokenStream tk = new KeywordTokenizer();
    ((Tokenizer)tk).setReader(new StringReader(s));
    tk = new EdgeNGramTokenFilter(tk, minGram, maxGram, preserveOriginal);
    final CharTermAttribute termAtt = tk.addAttribute(CharTermAttribute.class);
    final OffsetAttribute offsetAtt = tk.addAttribute(OffsetAttribute.class);
    tk.reset();

    if (codePointCount < minGram && preserveOriginal) {
      assertTrue(tk.incrementToken());
      assertEquals(0, offsetAtt.startOffset());
      assertEquals(s.length(), offsetAtt.endOffset());
      assertEquals(s, termAtt.toString());
    }

    for (int j = minGram; j <= Math.min(codePointCount, maxGram); j++) {
      assertTrue(tk.incrementToken());
      assertEquals(0, offsetAtt.startOffset());
      assertEquals(s.length(), offsetAtt.endOffset());
      final int end = Character.offsetByCodePoints(s, 0, j);
      assertEquals(s.substring(0, end), termAtt.toString());
    }

    if (codePointCount > maxGram && preserveOriginal) {
      assertTrue(tk.incrementToken());
      assertEquals(0, offsetAtt.startOffset());
      assertEquals(s.length(), offsetAtt.endOffset());
      assertEquals(s, termAtt.toString());
    }

    assertFalse(tk.incrementToken());
    tk.close();
  }
}

Source File: TestMultiDocValues.java From lucene-solr with Apache License 2.0

4 votes

public void testBinary() throws Exception {
  Directory dir = newDirectory();
  Document doc = new Document();
  Field field = new BinaryDocValuesField("bytes", new BytesRef());
  doc.add(field);
  
  IndexWriterConfig iwc = newIndexWriterConfig(random(), null);
  iwc.setMergePolicy(newLogMergePolicy());
  RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);

  int numDocs = TEST_NIGHTLY ? atLeast(500) : atLeast(50);

  for (int i = 0; i < numDocs; i++) {
    BytesRef ref = new BytesRef(TestUtil.randomUnicodeString(random()));
    field.setBytesValue(ref);
    iw.addDocument(doc);
    if (random().nextInt(17) == 0) {
      iw.commit();
    }
  }
  DirectoryReader ir = iw.getReader();
  iw.forceMerge(1);
  DirectoryReader ir2 = iw.getReader();
  LeafReader merged = getOnlyLeafReader(ir2);
  iw.close();

  BinaryDocValues multi = MultiDocValues.getBinaryValues(ir, "bytes");
  BinaryDocValues single = merged.getBinaryDocValues("bytes");
  for (int i = 0; i < numDocs; i++) {
    assertEquals(i, multi.nextDoc());
    assertEquals(i, single.nextDoc());
    final BytesRef expected = BytesRef.deepCopyOf(single.binaryValue());
    final BytesRef actual = multi.binaryValue();
    assertEquals(expected, actual);
  }
  testRandomAdvance(merged.getBinaryDocValues("bytes"), MultiDocValues.getBinaryValues(ir, "bytes"));
  testRandomAdvanceExact(merged.getBinaryDocValues("bytes"), MultiDocValues.getBinaryValues(ir, "bytes"), merged.maxDoc());

  ir.close();
  ir2.close();
  dir.close();
}

Source File: TestMultiDocValues.java From lucene-solr with Apache License 2.0

4 votes

public void testSorted() throws Exception {
  Directory dir = newDirectory();
  Document doc = new Document();
  Field field = new SortedDocValuesField("bytes", new BytesRef());
  doc.add(field);
  
  IndexWriterConfig iwc = newIndexWriterConfig(random(), null);
  iwc.setMergePolicy(newLogMergePolicy());
  RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);

  int numDocs = TEST_NIGHTLY ? atLeast(500) : atLeast(50);
  for (int i = 0; i < numDocs; i++) {
    BytesRef ref = new BytesRef(TestUtil.randomUnicodeString(random()));
    field.setBytesValue(ref);
    if (random().nextInt(7) == 0) {
      iw.addDocument(new Document());
    }
    iw.addDocument(doc);
    if (random().nextInt(17) == 0) {
      iw.commit();
    }
  }
  DirectoryReader ir = iw.getReader();
  iw.forceMerge(1);
  DirectoryReader ir2 = iw.getReader();
  LeafReader merged = getOnlyLeafReader(ir2);
  iw.close();
  SortedDocValues multi = MultiDocValues.getSortedValues(ir, "bytes");
  SortedDocValues single = merged.getSortedDocValues("bytes");
  assertEquals(single.getValueCount(), multi.getValueCount());
  while (true) {
    assertEquals(single.nextDoc(), multi.nextDoc());
    if (single.docID() == NO_MORE_DOCS) {
      break;
    }

    // check value
    final BytesRef expected = BytesRef.deepCopyOf(single.binaryValue());
    final BytesRef actual = multi.binaryValue();
    assertEquals(expected, actual);

    // check ord
    assertEquals(single.ordValue(), multi.ordValue());
  }
  testRandomAdvance(merged.getSortedDocValues("bytes"), MultiDocValues.getSortedValues(ir, "bytes"));
  testRandomAdvanceExact(merged.getSortedDocValues("bytes"), MultiDocValues.getSortedValues(ir, "bytes"), merged.maxDoc());
  ir.close();
  ir2.close();
  dir.close();
}

Java Code Examples for org.apache.lucene.util.TestUtil#randomUnicodeString()