org.apache.lucene.analysis.core.WhitespaceAnalyzer Java Examples
The following examples show how to use
Example #1
Source File: From solr-redis with Apache License 2.0 | 6 votes |
@Test public void shouldRetryWhenRedisFailed() throws SyntaxError, IOException { when(localParamsMock.get("command")).thenReturn("smembers"); when(localParamsMock.get("key")).thenReturn("simpleKey"); when(localParamsMock.getBool("useAnalyzer", false)).thenReturn(false); when(localParamsMock.get("retries")).thenReturn("2"); when(localParamsMock.get(QueryParsing.V)).thenReturn("string_field"); when(requestMock.getSchema()).thenReturn(schema); when(schema.getQueryAnalyzer()).thenReturn(new WhitespaceAnalyzer()); when(jedisPoolMock.getResource()).thenReturn(jedisFailingMock).thenReturn(jedisMock); when(jedisFailingMock.smembers("simpleKey")).thenThrow(new JedisException("Synthetic exception")); when(jedisMock.smembers("simpleKey")).thenReturn(new HashSet<String>(Collections.singletonList("value"))); redisQParser = new RedisQParser("string_field", localParamsMock, paramsMock, requestMock, new RetryingCommandHandler(jedisPoolMock, 1)); final Query query = redisQParser.parse(); IndexSearcher searcher = new IndexSearcher(new MultiReader()); final Set<Term> terms = extractTerms(searcher, query); Assert.assertEquals(1, terms.size()); }
Example #2
Source File: From lucene-solr with Apache License 2.0 | 6 votes |
@Test @SuppressWarnings({"rawtypes"}) public void testMultipleClauses() { SpellingQueryConverter converter = new SpellingQueryConverter(); converter.init(new NamedList()); converter.setAnalyzer(new WhitespaceAnalyzer()); // two field:value pairs should give two tokens Collection<Token> tokens = converter.convert("买text_field:我购买了道具和服装。 field2:bar"); assertTrue("tokens is null and it shouldn't be", tokens != null); assertEquals("tokens Size: " + tokens.size() + " is not 2", 2, tokens.size()); // a field:value pair and a search term should give two tokens tokens = converter.convert("text_field:我购买了道具和服装。 bar"); assertTrue("tokens is null and it shouldn't be", tokens != null); assertEquals("tokens Size: " + tokens.size() + " is not 2", 2, tokens.size()); }
Example #3
Source File: From lucene-solr with Apache License 2.0 | 6 votes |
@Test @SuppressWarnings({"rawtypes"}) public void testUnicode() { SpellingQueryConverter converter = new SpellingQueryConverter(); converter.init(new NamedList()); converter.setAnalyzer(new WhitespaceAnalyzer()); // chinese text value Collection<Token> tokens = converter.convert("text_field:我购买了道具和服装。"); assertTrue("tokens is null and it shouldn't be", tokens != null); assertEquals("tokens Size: " + tokens.size() + " is not 1", 1, tokens.size()); tokens = converter.convert("text_购field:我购买了道具和服装。"); assertTrue("tokens is null and it shouldn't be", tokens != null); assertEquals("tokens Size: " + tokens.size() + " is not 1", 1, tokens.size()); tokens = converter.convert("text_field:我购xyz买了道具和服装。"); assertTrue("tokens is null and it shouldn't be", tokens != null); assertEquals("tokens Size: " + tokens.size() + " is not 1", 1, tokens.size()); }
Example #4
Source File: From lucene-solr with Apache License 2.0 | 6 votes |
@Test public void testOffsetWindowTokenFilter() throws Exception { String[] multivalued = { "a b c d", "e f g", "h", "i j k l m n" }; try (Analyzer a1 = new WhitespaceAnalyzer()) { TokenStream tokenStream = a1.tokenStream("", "a b c d e f g h i j k l m n"); try (DefaultSolrHighlighter.OffsetWindowTokenFilter tots = new DefaultSolrHighlighter.OffsetWindowTokenFilter(tokenStream)) { for (String v : multivalued) { TokenStream ts1 = tots.advanceToNextWindowOfLength(v.length()); ts1.reset(); try (Analyzer a2 = new WhitespaceAnalyzer()) { TokenStream ts2 = a2.tokenStream("", v); ts2.reset(); while (ts1.incrementToken()) { assertTrue(ts2.incrementToken()); assertEquals(ts1, ts2); } assertFalse(ts2.incrementToken()); } } } } }
Example #5
Source File: From lucene-solr with Apache License 2.0 | 6 votes |
@Test public void testAnalyzeMultiTerm() { // No terms provided by the StopFilter (stop word) for the multi-term part. // This is supported. Check TextField.analyzeMultiTerm returns null (and does not throw an exception). BytesRef termBytes = TextField.analyzeMultiTerm("field", "the", new StopAnalyzer(EnglishAnalyzer.ENGLISH_STOP_WORDS_SET)); assertNull(termBytes); // One term provided by the WhitespaceTokenizer for the multi-term part. // This is the regular case. Check TextField.analyzeMultiTerm returns it (and does not throw an exception). termBytes = TextField.analyzeMultiTerm("field", "Sol", new WhitespaceAnalyzer()); assertEquals("Sol", termBytes.utf8ToString()); // Two terms provided by the WhitespaceTokenizer for the multi-term part. // This is not allowed. Expect an exception. SolrException exception = expectThrows(SolrException.class, () -> TextField.analyzeMultiTerm("field", "term1 term2", new WhitespaceAnalyzer())); assertEquals("Unexpected error code", SolrException.ErrorCode.BAD_REQUEST.code, exception.code()); }
Example #6
Source File: From lucene-solr with Apache License 2.0 | 6 votes |
public void testTermStreamWrapping() throws IOException { CustomQueryHandler handler = new RegexpQueryHandler("FOO", 10, "__wibble__", Collections.singleton("field1")); try (Analyzer input = new WhitespaceAnalyzer()) { // field1 is in the excluded set, so nothing should happen assertTokenStreamContents(handler.wrapTermStream("field1", input.tokenStream("field1", "hello world")), new String[]{ "hello", "world" }); // field2 is not excluded assertTokenStreamContents(handler.wrapTermStream("field2", input.tokenStream("field2", "harm alarm asdasasdasdasd")), new String[]{ "harm", "harmFOO", "harFOO", "haFOO", "hFOO", "armFOO", "arFOO", "aFOO", "rmFOO", "rFOO", "mFOO", "FOO", "alarm", "alarmFOO", "alarFOO", "alaFOO", "alFOO", "larmFOO", "larFOO", "laFOO", "lFOO", "asdasasdasdasd", "__wibble__" }); } }
Example #7
Source File: From lucene-solr with Apache License 2.0 | 6 votes |
/** * Create an index writer. * * @param dir - index directory * @param analyzer - analyser used by the index writer * @param useCompound - if true, compound index files are used * @param keepAllCommits - if true, all commit generations are kept * @param ps - information stream * @return new index writer * @throws IOException - if there is a low level IO error. */ public static IndexWriter createWriter(Directory dir, Analyzer analyzer, boolean useCompound, boolean keepAllCommits, PrintStream ps) throws IOException { Objects.requireNonNull(dir); IndexWriterConfig config = new IndexWriterConfig(analyzer == null ? new WhitespaceAnalyzer() : analyzer); config.setUseCompoundFile(useCompound); if (ps != null) { config.setInfoStream(ps); } if (keepAllCommits) { config.setIndexDeletionPolicy(NoDeletionPolicy.INSTANCE); } else { config.setIndexDeletionPolicy(new KeepOnlyLastCommitDeletionPolicy()); } return new IndexWriter(dir, config); }
Example #8
Source File: From lucene-solr with Apache License 2.0 | 6 votes |
/** Build the example index. */ public void index() throws IOException { IndexWriter indexWriter = new IndexWriter(indexDir, new IndexWriterConfig( new WhitespaceAnalyzer()).setOpenMode(OpenMode.CREATE)); // Add documents with a fake timestamp, 1000 sec before // "now", 2000 sec before "now", ...: for(int i=0;i<100;i++) { Document doc = new Document(); long then = nowSec - i * 1000; // Add as doc values field, so we can compute range facets: doc.add(new NumericDocValuesField("timestamp", then)); // Add as numeric field so we can drill-down: doc.add(new LongPoint("timestamp", then)); indexWriter.addDocument(doc); } // Open near-real-time searcher searcher = new IndexSearcher(; indexWriter.close(); }
Example #9
Source File: From solr-redis with Apache License 2.0 | 6 votes |
@Test public void shouldTurnAnalysisOn() throws SyntaxError, IOException { when(localParamsMock.get("command")).thenReturn("smembers"); when(localParamsMock.get("key")).thenReturn("simpleKey"); when(localParamsMock.getBool("useAnalyzer", false)).thenReturn(true); when(localParamsMock.get(QueryParsing.V)).thenReturn("string_field"); when(requestMock.getSchema()).thenReturn(schema); when(schema.getQueryAnalyzer()).thenReturn(new WhitespaceAnalyzer()); when(jedisMock.smembers(anyString())).thenReturn(new HashSet<>(Arrays.asList("123 124", "321"))); redisQParser = new RedisQParser("string_field", localParamsMock, paramsMock, requestMock, commandHandler); final Query query = redisQParser.parse(); verify(jedisMock).smembers("simpleKey"); IndexSearcher searcher = new IndexSearcher(new MultiReader()); final Set<Term> terms = extractTerms(searcher, query); Assert.assertEquals(3, terms.size()); }
Example #10
Source File: From lucene-solr with Apache License 2.0 | 6 votes |
private Document createTestNormsDocument(boolean setNormsProp, boolean normsPropVal, boolean setBodyNormsProp, boolean bodyNormsVal) throws Exception { Properties props = new Properties(); // Indexing configuration. props.setProperty("analyzer", WhitespaceAnalyzer.class.getName()); props.setProperty("directory", "ByteBuffersDirectory"); if (setNormsProp) { props.setProperty("doc.tokenized.norms", Boolean.toString(normsPropVal)); } if (setBodyNormsProp) { props.setProperty("doc.body.tokenized.norms", Boolean.toString(bodyNormsVal)); } // Create PerfRunData Config config = new Config(props); DocMaker dm = new DocMaker(); dm.setConfig(config, new OneDocSource()); return dm.makeDocument(); }
Example #11
Source File: From RedisDirectory with Apache License 2.0 | 6 votes |
public void testMMapDirectory() throws IOException { long start = System.currentTimeMillis(); IndexWriterConfig indexWriterConfig = new IndexWriterConfig(new WhitespaceAnalyzer()).setOpenMode(IndexWriterConfig .OpenMode.CREATE); FSDirectory open ="E:/testlucene")); IndexWriter indexWriter = new IndexWriter(open, indexWriterConfig); for (int i = 0; i < 10000000; i++) { indexWriter.addDocument(addDocument(i)); } indexWriter.commit(); indexWriter.close(); long end = System.currentTimeMillis(); log.error("MMapDirectory consumes {}s!", (end - start) / 1000); start = System.currentTimeMillis(); IndexSearcher indexSearcher = new IndexSearcher(; int total = 0; for (int i = 0; i < 10000000; i++) { TermQuery key1 = new TermQuery(new Term("key1", "key" + i)); TopDocs search =, 10); total += search.totalHits; } System.out.println(total); end = System.currentTimeMillis(); log.error("MMapDirectory search consumes {}ms!", (end - start)); }
Example #12
Source File: From solr-redis with Apache License 2.0 | 6 votes |
@Test public void shouldUseTermsQuery() throws SyntaxError, IOException { when(localParamsMock.get("command")).thenReturn("smembers"); when(localParamsMock.get("key")).thenReturn("simpleKey"); when(localParamsMock.get("ignoreScore")).thenReturn("true"); when(localParamsMock.getBool("useAnalyzer", false)).thenReturn(true); when(localParamsMock.get(QueryParsing.V)).thenReturn("string_field"); when(requestMock.getSchema()).thenReturn(schema); when(schema.getQueryAnalyzer()).thenReturn(new WhitespaceAnalyzer()); when(jedisMock.smembers(anyString())).thenReturn(new HashSet<>(Arrays.asList("123 124", "321", "322", "323", "324", "325", "326", "327", "328", "329", "330", "331", "332", "333", "334", "335", "336", "337", "338"))); redisQParser = new RedisQParser("string_field", localParamsMock, paramsMock, requestMock, commandHandler); final Query query = redisQParser.parse(); verify(jedisMock).smembers("simpleKey"); IndexSearcher searcher = new IndexSearcher(new MultiReader()); Query rewrittenQuery = searcher.rewrite(query); assertTrue(rewrittenQuery instanceof TermsQuery); }
Example #13
Source File: From pyramid with Apache License 2.0 | 6 votes |
protected Query doToQuery(QueryShardContext context) throws IOException { // Analyzer analyzer = context.getMapperService().searchAnalyzer(); Analyzer analyzer = new WhitespaceAnalyzer(); try (TokenStream source = analyzer.tokenStream(fieldName, value.toString())) { CachingTokenFilter stream = new CachingTokenFilter(new LowerCaseFilter(source)); TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class); if (termAtt == null) { return null; } List<CustomSpanTermQuery> clauses = new ArrayList<>(); stream.reset(); while (stream.incrementToken()) { Term term = new Term(fieldName, termAtt.getBytesRef()); clauses.add(new CustomSpanTermQuery(term)); } return new PhraseCountQuery(clauses.toArray(new CustomSpanTermQuery[clauses.size()]), slop, inOrder, weightedCount); } catch (IOException e) { throw new RuntimeException("Error analyzing query text", e); } }
Example #14
Source File: From RedisDirectory with Apache License 2.0 | 6 votes |
public void testRamDirectory() throws IOException { long start = System.currentTimeMillis(); IndexWriterConfig indexWriterConfig = new IndexWriterConfig(new WhitespaceAnalyzer()).setOpenMode(IndexWriterConfig .OpenMode.CREATE); RAMDirectory ramDirectory = new RAMDirectory(); IndexWriter indexWriter = new IndexWriter(ramDirectory, indexWriterConfig); for (int i = 0; i < 10000000; i++) { indexWriter.addDocument(addDocument(i)); } indexWriter.commit(); indexWriter.close(); long end = System.currentTimeMillis(); log.error("RamDirectory consumes {}s!", (end - start) / 1000); start = System.currentTimeMillis(); IndexSearcher indexSearcher = new IndexSearcher(; int total = 0; for (int i = 0; i < 10000000; i++) { TermQuery key1 = new TermQuery(new Term("key1", "key" + i)); TopDocs search =, 10); total += search.totalHits; } System.out.println(total); end = System.currentTimeMillis(); log.error("RamDirectory search consumes {}ms!", (end - start)); }
Example #15
Source File: From incubator-retired-blur with Apache License 2.0 | 6 votes |
@Test public void testDocValuesFormat() throws IOException { RAMDirectory directory = new RAMDirectory(); IndexWriterConfig conf = new IndexWriterConfig(Version.LUCENE_43, new WhitespaceAnalyzer(Version.LUCENE_43)); conf.setCodec(new Blur024Codec()); IndexWriter writer = new IndexWriter(directory, conf); Document doc = new Document(); doc.add(new StringField("f", "v", Store.YES)); doc.add(new SortedDocValuesField("f", new BytesRef("v"))); writer.addDocument(doc); writer.close(); DirectoryReader reader =; AtomicReaderContext context = reader.leaves().get(0); AtomicReader atomicReader = context.reader(); SortedDocValues sortedDocValues = atomicReader.getSortedDocValues("f"); assertTrue(sortedDocValues.getClass().getName().startsWith(DiskDocValuesProducer.class.getName())); reader.close(); }
Example #16
Source File: From incubator-retired-blur with Apache License 2.0 | 6 votes |
@Test public void test5() throws ParseException, IOException { parser = new SuperParser(LUCENE_VERSION, getFieldManager(new WhitespaceAnalyzer(LUCENE_VERSION)), true, null, ScoreType.SUPER, new Term("_primedoc_")); Query query = parser.parse("<a.a:a a.d:{e TO f} a.b:b a.test:hello\\<> -<g.c:c g.d:d>"); BooleanQuery booleanQuery1 = new BooleanQuery(); booleanQuery1.add(new TermQuery(new Term("a.a", "a")), Occur.SHOULD); booleanQuery1.add(new TermRangeQuery("a.d", new BytesRef("e"), new BytesRef("f"), false, false), Occur.SHOULD); booleanQuery1.add(new TermQuery(new Term("a.b", "b")), Occur.SHOULD); // std analyzer took the "<" out booleanQuery1.add(new TermQuery(new Term("a.test", "hello<")), Occur.SHOULD); BooleanQuery booleanQuery2 = new BooleanQuery(); booleanQuery2.add(new TermQuery(new Term("g.c", "c")), Occur.SHOULD); booleanQuery2.add(new TermQuery(new Term("g.d", "d")), Occur.SHOULD); SuperQuery superQuery1 = new SuperQuery(booleanQuery1, ScoreType.SUPER, new Term("_primedoc_")); SuperQuery superQuery2 = new SuperQuery(booleanQuery2, ScoreType.SUPER, new Term("_primedoc_")); BooleanQuery booleanQuery = new BooleanQuery(); booleanQuery.add(superQuery1, Occur.SHOULD); booleanQuery.add(superQuery2, Occur.MUST_NOT); assertQuery(booleanQuery, query); }
Example #17
Source File: From RedisDirectory with Apache License 2.0 | 6 votes |
public void testMMapDirectory() throws IOException { long start = System.currentTimeMillis(); IndexWriterConfig indexWriterConfig = new IndexWriterConfig(new WhitespaceAnalyzer()).setOpenMode(IndexWriterConfig .OpenMode.CREATE); FSDirectory open ="E:/testlucene")); IndexWriter indexWriter = new IndexWriter(open, indexWriterConfig); for (int i = 0; i < 10000000; i++) { indexWriter.addDocument(addDocument(i)); } indexWriter.commit(); indexWriter.close(); long end = System.currentTimeMillis(); log.error("MMapDirectory consumes {}s!", (end - start) / 1000); start = System.currentTimeMillis(); IndexSearcher indexSearcher = new IndexSearcher(; int total = 0; for (int i = 0; i < 10000000; i++) { TermQuery key1 = new TermQuery(new Term("key1", "key" + i)); TopDocs search =, 10); total += search.totalHits; } System.out.println(total); end = System.currentTimeMillis(); log.error("MMapDirectory search consumes {}ms!", (end - start)); }
Example #18
Source File: From fess with Apache License 2.0 | 6 votes |
@Override public void setUp() throws Exception { super.setUp(); queryHelper = new QueryHelper() { protected QueryParser getQueryParser() { ExtendableQueryParser queryParser = new ExtendableQueryParser(Constants.DEFAULT_FIELD, new WhitespaceAnalyzer()); queryParser.setAllowLeadingWildcard(true); queryParser.setDefaultOperator(QueryParser.Operator.AND); return queryParser; } }; File file = File.createTempFile("test", ".properties"); file.deleteOnExit(); FileUtil.writeBytes(file.getAbsolutePath(), "".getBytes("UTF-8")); DynamicProperties systemProps = new DynamicProperties(file); ComponentUtil.register(systemProps, "systemProperties"); ComponentUtil.register(new SystemHelper(), "systemHelper"); ComponentUtil.register(new VirtualHostHelper(), "virtualHostHelper"); ComponentUtil.register(new KeyMatchHelper(), "keyMatchHelper"); inject(queryHelper); queryHelper.init(); }
Example #19
Source File: From RedisDirectory with Apache License 2.0 | 6 votes |
public void testRamDirectory() throws IOException { long start = System.currentTimeMillis(); IndexWriterConfig indexWriterConfig = new IndexWriterConfig(new WhitespaceAnalyzer()).setOpenMode(IndexWriterConfig .OpenMode.CREATE); RAMDirectory ramDirectory = new RAMDirectory(); IndexWriter indexWriter = new IndexWriter(ramDirectory, indexWriterConfig); for (int i = 0; i < 10000000; i++) { indexWriter.addDocument(addDocument(i)); } indexWriter.commit(); indexWriter.close(); long end = System.currentTimeMillis(); log.error("RamDirectory consumes {}s!", (end - start) / 1000); start = System.currentTimeMillis(); IndexSearcher indexSearcher = new IndexSearcher(; int total = 0; for (int i = 0; i < 10000000; i++) { TermQuery key1 = new TermQuery(new Term("key1", "key" + i)); TopDocs search =, 10); total += search.totalHits; } System.out.println(total); end = System.currentTimeMillis(); log.error("RamDirectory search consumes {}ms!", (end - start)); }
Example #20
Source File: From RedisDirectory with Apache License 2.0 | 5 votes |
public void testRedisDirectoryWithRemoteJedisPool() throws IOException { long start = System.currentTimeMillis(); IndexWriterConfig indexWriterConfig = new IndexWriterConfig(new WhitespaceAnalyzer()).setOpenMode(IndexWriterConfig .OpenMode.CREATE); JedisPool jedisPool = new JedisPool(new JedisPoolConfig(), "", 6379, Constants.TIME_OUT); RedisDirectory redisDirectory = new RedisDirectory(new JedisPoolStream(jedisPool)); IndexWriter indexWriter = new IndexWriter(redisDirectory, indexWriterConfig); for (int i = 0; i < 5000000; i++) { indexWriter.addDocument(addDocument(i)); } indexWriter.commit(); indexWriter.close(); redisDirectory.close(); long end = System.currentTimeMillis(); log.error("RedisDirectoryWithJedisPool consumes {}s!", (end - start) / 1000); start = System.currentTimeMillis(); IndexSearcher indexSearcher = new IndexSearcher( RedisDirectory(new JedisStream("localhost", 6379)))); int total = 0; for (int i = 0; i < 1000000; i++) { TermQuery key1 = new TermQuery(new Term("key1", "key" + i)); TopDocs search =, 10); total += search.totalHits; } System.out.println(total); end = System.currentTimeMillis(); log.error("RedisDirectoryWithJedisPool search consumes {}ms!", (end - start)); }
Example #21
Source File: From incubator-retired-blur with Apache License 2.0 | 5 votes |
@Test public void testFetchPreFilterNotEmpty() throws IOException, BlurException, ParseException { BlurConfiguration configuration = new BlurConfiguration(); configuration.set("blur.filter.alias.test.super:abc1", "(fam1.f1:abc1 fam2.f1:abc1)"); configuration.set("blur.filter.alias.test.super:abc2", "(fam1.f1:abc2 fam2.f1:abc2)"); configuration.set("blur.filter.alias.test.super:abc3", "(fam1.f1:abc3 fam2.f1:abc3)"); AliasBlurFilterCache defaultBlurFilterCache = new AliasBlurFilterCache(configuration); TableDescriptor tableDescriptor = new TableDescriptor(); tableDescriptor.setName(TABLE); tableDescriptor.setTableUri("file:///"); final TableContext tableContext = TableContext.create(tableDescriptor); final BaseFieldManager fieldManager = getFieldManager(new WhitespaceAnalyzer(LUCENE_VERSION)); Filter filter = QueryParserUtil.parseFilter(TABLE, TEST_FILTER, false, fieldManager, defaultBlurFilterCache, tableContext); Filter filterToRun = defaultBlurFilterCache.storePreFilter(TABLE, TEST_FILTER, filter, new FilterParser() { @Override public Query parse(String query) throws ParseException { return new SuperParser(LUCENE_VERSION, fieldManager, false, null, ScoreType.CONSTANT, tableContext .getDefaultPrimeDocTerm()).parse(query); } }); assertNotNull(filterToRun); assertEquals("BooleanFilter(" + "FilterCache(super-abc1,QueryWrapperFilter(fam1.f1:abc1 fam2.f1:abc1)) " + "FilterCache(super-abc2,QueryWrapperFilter(fam1.f1:abc2 fam2.f1:abc2)) " + "FilterCache(super-abc3,QueryWrapperFilter(fam1.f1:abc3 fam2.f1:abc3))" + ")", filterToRun.toString()); Filter fetchPreFilter = defaultBlurFilterCache.fetchPreFilter(TABLE, TEST_FILTER); assertNotNull(fetchPreFilter); assertTrue(filterToRun == fetchPreFilter); }
Example #22
Source File: From semanticvectors with BSD 3-Clause "New" or "Revised" License | 5 votes |
/** Index all text files under a directory. */ public static void main(String[] args) { String usage = "java [triples text file] "; if (args.length == 0) { System.err.println("Usage: " + usage); System.exit(1); } FlagConfig flagConfig = FlagConfig.getFlagConfig(args); // Allow for the specification of a directory to write the index to. if (flagConfig.luceneindexpath().length() > 0) { INDEX_DIR = FileSystems.getDefault().getPath(flagConfig.luceneindexpath()); } if (Files.exists(INDEX_DIR)) { throw new IllegalArgumentException( "Cannot save index to '" + INDEX_DIR + "' directory, please delete it first"); } try { // Create IndexWriter using WhiteSpaceAnalyzer without any stopword list. IndexWriterConfig writerConfig = new IndexWriterConfig(new WhitespaceAnalyzer()); IndexWriter writer = new IndexWriter(, writerConfig); final File triplesTextFile = new File(args[0]); if (!triplesTextFile.exists() || !triplesTextFile.canRead()) { writer.close(); throw new IOException("Document file '" + triplesTextFile.getAbsolutePath() + "' does not exist or is not readable, please check the path"); } System.out.println("Indexing to directory '" +INDEX_DIR+ "'..."); indexDoc(writer, triplesTextFile); writer.close(); } catch (IOException e) { System.out.println(" caught a " + e.getClass() + "\n with message: " + e.getMessage()); } }
Example #23
Source File: From lucene-solr with Apache License 2.0 | 5 votes |
@Override public Collection<Token> convert(String origQuery) { Collection<Token> result = new HashSet<>(); try (WhitespaceAnalyzer analyzer = new WhitespaceAnalyzer(); TokenStream ts = analyzer.tokenStream("", origQuery)) { // TODO: support custom attributes CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class); OffsetAttribute offsetAtt = ts.addAttribute(OffsetAttribute.class); TypeAttribute typeAtt = ts.addAttribute(TypeAttribute.class); FlagsAttribute flagsAtt = ts.addAttribute(FlagsAttribute.class); PayloadAttribute payloadAtt = ts.addAttribute(PayloadAttribute.class); PositionIncrementAttribute posIncAtt = ts.addAttribute(PositionIncrementAttribute.class); ts.reset(); while (ts.incrementToken()) { Token tok = new Token(); tok.copyBuffer(termAtt.buffer(), 0, termAtt.length()); tok.setOffset(offsetAtt.startOffset(), offsetAtt.endOffset()); tok.setFlags(flagsAtt.getFlags()); tok.setPayload(payloadAtt.getPayload()); tok.setPositionIncrement(posIncAtt.getPositionIncrement()); tok.setType(typeAtt.type()); result.add(tok); } ts.end(); return result; } catch (IOException e) { throw new RuntimeException(e); } }
Example #24
Source File: From crate with Apache License 2.0 | 5 votes |
@Override public KeywordFieldMapper build(BuilderContext context) { setupFieldType(context); if (normalizerName != null) { NamedAnalyzer normalizer = indexAnalyzers.getNormalizer(normalizerName); if (normalizer == null) { throw new MapperParsingException("normalizer [" + normalizerName + "] not found for field [" + name + "]"); } fieldType().setNormalizer(normalizer); final NamedAnalyzer searchAnalyzer; if (fieldType().splitQueriesOnWhitespace) { searchAnalyzer = indexAnalyzers.getWhitespaceNormalizer(normalizerName); } else { searchAnalyzer = normalizer; } fieldType().setSearchAnalyzer(searchAnalyzer); } else if (fieldType().splitQueriesOnWhitespace) { fieldType().setSearchAnalyzer(new NamedAnalyzer("whitespace", AnalyzerScope.INDEX, new WhitespaceAnalyzer())); } return new KeywordFieldMapper( name, position, defaultExpression, fieldType, defaultFieldType, ignoreAbove, lengthLimit, context.indexSettings(),, context), copyTo ); }
Example #25
Source File: From RedisDirectory with Apache License 2.0 | 5 votes |
public void testRedisDirectoryWithJedisPool() throws IOException { long start = System.currentTimeMillis(); IndexWriterConfig indexWriterConfig = new IndexWriterConfig(new WhitespaceAnalyzer()).setOpenMode(IndexWriterConfig .OpenMode.CREATE); //indexWriterConfig.setInfoStream(System.out); //indexWriterConfig.setRAMBufferSizeMB(2048); //LogByteSizeMergePolicy logByteSizeMergePolicy = new LogByteSizeMergePolicy(); //logByteSizeMergePolicy.setMinMergeMB(1); //logByteSizeMergePolicy.setMaxMergeMB(64); //logByteSizeMergePolicy.setMaxCFSSegmentSizeMB(64); //indexWriterConfig.setRAMBufferSizeMB(1024).setMergePolicy(logByteSizeMergePolicy).setUseCompoundFile(false); //GenericObjectPoolConfig genericObjectPoolConfig = new GenericObjectPoolConfig(); //获取连接等待时间 //genericObjectPoolConfig.setMaxWaitMillis(3000); //10s超时时间 JedisPool jedisPool = new JedisPool(new JedisPoolConfig(), "localhost", 6379, Constants.TIME_OUT); RedisDirectory redisDirectory = new RedisDirectory(new JedisPoolStream(jedisPool)); IndexWriter indexWriter = new IndexWriter(redisDirectory, indexWriterConfig); for (int i = 0; i < 10000000; i++) { indexWriter.addDocument(addDocument(i)); } indexWriter.commit(); indexWriter.close(); redisDirectory.close(); long end = System.currentTimeMillis(); log.error("RedisDirectoryWithJedisPool consumes {}s!", (end - start) / 1000); start = System.currentTimeMillis(); IndexSearcher indexSearcher = new IndexSearcher( RedisDirectory(new JedisStream("localhost", 6379)))); int total = 0; for (int i = 0; i < 10000000; i++) { TermQuery key1 = new TermQuery(new Term("key1", "key" + i)); TopDocs search =, 10); total += search.totalHits; } System.out.println(total); end = System.currentTimeMillis(); log.error("RedisDirectoryWithJedisPool search consumes {}ms!", (end - start)); }
Example #26
Source File: From lucene-solr with Apache License 2.0 | 5 votes |
@Test @SuppressWarnings({"rawtypes"}) public void testNumeric() throws Exception { SpellingQueryConverter converter = new SpellingQueryConverter(); converter.init(new NamedList()); converter.setAnalyzer(new WhitespaceAnalyzer()); String[] queries = {"12345", "foo:12345", "12345 67890", "foo:(12345 67890)", "foo:(life 67890)", "12345 life", "+12345 +life", "-12345 life"}; int[] tokensToExpect = {1, 1, 2, 2, 2, 2, 2, 2}; for (int i = 0; i < queries.length; i++) { Collection<Token> tokens = converter.convert(queries[i]); assertTrue("tokens Size: " + tokens.size() + " is not: " + tokensToExpect[i], tokens.size() == tokensToExpect[i]); } }
Example #27
Source File: From lucene-solr with Apache License 2.0 | 5 votes |
@Test @SuppressWarnings({"rawtypes"}) public void test() throws Exception { SpellingQueryConverter converter = new SpellingQueryConverter(); converter.init(new NamedList()); converter.setAnalyzer(new WhitespaceAnalyzer()); Collection<Token> tokens = converter.convert("field:foo"); assertTrue("tokens is null and it shouldn't be", tokens != null); assertTrue("tokens Size: " + tokens.size() + " is not: " + 1, tokens.size() == 1); }
Example #28
Source File: From RedisDirectory with Apache License 2.0 | 5 votes |
public void testRedisDirectoryWithJedis() throws IOException { long start = System.currentTimeMillis(); IndexWriterConfig indexWriterConfig = new IndexWriterConfig(new WhitespaceAnalyzer()).setOpenMode(IndexWriterConfig .OpenMode.CREATE); //indexWriterConfig.setInfoStream(System.out); //indexWriterConfig.setRAMBufferSizeMB(2048); //LogByteSizeMergePolicy logByteSizeMergePolicy = new LogByteSizeMergePolicy(); //logByteSizeMergePolicy.setMinMergeMB(1); //logByteSizeMergePolicy.setMaxMergeMB(64); //logByteSizeMergePolicy.setMaxCFSSegmentSizeMB(64); //indexWriterConfig.setRAMBufferSizeMB(1024).setMergePolicy(logByteSizeMergePolicy).setUseCompoundFile(false); //GenericObjectPoolConfig genericObjectPoolConfig = new GenericObjectPoolConfig(); //获取连接等待时间 //genericObjectPoolConfig.setMaxWaitMillis(3000); //10s超时时间 RedisDirectory redisDirectory = new RedisDirectory(new JedisStream("localhost", 6379)); IndexWriter indexWriter = new IndexWriter(redisDirectory, indexWriterConfig); for (int i = 0; i < 10000000; i++) { indexWriter.addDocument(addDocument(i)); } indexWriter.commit(); indexWriter.close(); redisDirectory.close(); long end = System.currentTimeMillis(); log.error("RedisDirectoryWithJedis consumes {}s!", (end - start) / 1000); start = System.currentTimeMillis(); IndexSearcher indexSearcher = new IndexSearcher( RedisDirectory(new JedisStream("localhost", 6379)))); int total = 0; for (int i = 0; i < 10000000; i++) { TermQuery key1 = new TermQuery(new Term("key1", "key" + i)); TopDocs search =, 10); total += search.totalHits; } System.out.println(total); end = System.currentTimeMillis(); log.error("RedisDirectoryWithJedis search consumes {}ms!", (end - start)); }
Example #29
Source File: From lucene-solr with Apache License 2.0 | 5 votes |
private void doTestIndexProperties(boolean setIndexProps, boolean indexPropsVal, int numExpectedResults) throws Exception { Properties props = new Properties(); // Indexing configuration. props.setProperty("analyzer", WhitespaceAnalyzer.class.getName()); props.setProperty("content.source", OneDocSource.class.getName()); props.setProperty("directory", "ByteBuffersDirectory"); if (setIndexProps) { props.setProperty("doc.index.props", Boolean.toString(indexPropsVal)); } // Create PerfRunData Config config = new Config(props); PerfRunData runData = new PerfRunData(config); TaskSequence tasks = new TaskSequence(runData, getTestName(), null, false); tasks.addTask(new CreateIndexTask(runData)); tasks.addTask(new AddDocTask(runData)); tasks.addTask(new CloseIndexTask(runData)); tasks.doLogic(); IndexReader reader =; IndexSearcher searcher = newSearcher(reader); TopDocs td = TermQuery(new Term("key", "value")), 10); assertEquals(numExpectedResults, td.totalHits.value); reader.close(); }
Example #30
Source File: From RedisDirectory with Apache License 2.0 | 5 votes |
public void testRedisDirectoryWithJedis() throws IOException { long start = System.currentTimeMillis(); IndexWriterConfig indexWriterConfig = new IndexWriterConfig(new WhitespaceAnalyzer()).setOpenMode(IndexWriterConfig .OpenMode.CREATE); //indexWriterConfig.setInfoStream(System.out); //indexWriterConfig.setRAMBufferSizeMB(2048); //LogByteSizeMergePolicy logByteSizeMergePolicy = new LogByteSizeMergePolicy(); //logByteSizeMergePolicy.setMinMergeMB(1); //logByteSizeMergePolicy.setMaxMergeMB(64); //logByteSizeMergePolicy.setMaxCFSSegmentSizeMB(64); //indexWriterConfig.setRAMBufferSizeMB(1024).setMergePolicy(logByteSizeMergePolicy).setUseCompoundFile(false); //GenericObjectPoolConfig genericObjectPoolConfig = new GenericObjectPoolConfig(); //获取连接等待时间 //genericObjectPoolConfig.setMaxWaitMillis(3000); //10s超时时间 RedisDirectory redisDirectory = new RedisDirectory(new JedisStream("localhost", 6379)); IndexWriter indexWriter = new IndexWriter(redisDirectory, indexWriterConfig); for (int i = 0; i < 10000000; i++) { indexWriter.addDocument(addDocument(i)); } indexWriter.commit(); indexWriter.close(); redisDirectory.close(); long end = System.currentTimeMillis(); log.error("RedisDirectoryWithJedis consumes {}s!", (end - start) / 1000); start = System.currentTimeMillis(); IndexSearcher indexSearcher = new IndexSearcher( RedisDirectory(new JedisStream("localhost", 6379)))); int total = 0; for (int i = 0; i < 10000000; i++) { TermQuery key1 = new TermQuery(new Term("key1", "key" + i)); TopDocs search =, 10); total += search.totalHits; } System.out.println(total); end = System.currentTimeMillis(); log.error("RedisDirectoryWithJedis search consumes {}ms!", (end - start)); }