Java Code Examples for org.apache.lucene.analysis.Analyzer#PER_FIELD_REUSE_STRATEGY
The following examples show how to use
org.apache.lucene.analysis.Analyzer#PER_FIELD_REUSE_STRATEGY .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: MLAnalayser.java From SearchServices with GNU Lesser General Public License v3.0 | 5 votes |
public MLAnalayser(MLAnalysisMode mlAnalaysisMode, IndexSchema schema, Mode mode) { super(Analyzer.PER_FIELD_REUSE_STRATEGY); this.mlAnalaysisMode = mlAnalaysisMode; this.schema = schema; this.mode = mode; }
Example 2
Source File: AlfrescoAnalyzerWrapper.java From SearchServices with GNU Lesser General Public License v3.0 | 5 votes |
/** * @param schema * @param index */ public AlfrescoAnalyzerWrapper(IndexSchema schema, Mode mode) { super(Analyzer.PER_FIELD_REUSE_STRATEGY); this.schema = schema; this.mode = mode; }
Example 3
Source File: TestIndexWriterExceptions.java From lucene-solr with Apache License 2.0 | 5 votes |
public void testExceptionJustBeforeFlush() throws IOException { Directory dir = newDirectory(); final AtomicBoolean doCrash = new AtomicBoolean(); Analyzer analyzer = new Analyzer(Analyzer.PER_FIELD_REUSE_STRATEGY) { @Override public TokenStreamComponents createComponents(String fieldName) { MockTokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false); tokenizer.setEnableChecks(false); // disable workflow checking as we forcefully close() in exceptional cases. TokenStream stream = tokenizer; if (doCrash.get()) { stream = new CrashingFilter(fieldName, stream); } return new TokenStreamComponents(tokenizer, stream); } }; IndexWriter w = RandomIndexWriter.mockIndexWriter(random(), dir, newIndexWriterConfig(analyzer) .setMaxBufferedDocs(2), new TestPoint1()); Document doc = new Document(); doc.add(newTextField("field", "a field", Field.Store.YES)); w.addDocument(doc); Document crashDoc = new Document(); crashDoc.add(newTextField("crash", "do it on token 4", Field.Store.YES)); doCrash.set(true); expectThrows(IOException.class, () -> { w.addDocument(crashDoc); }); w.addDocument(doc); w.close(); dir.close(); }
Example 4
Source File: TestIndexWriterExceptions.java From lucene-solr with Apache License 2.0 | 5 votes |
public void testDocumentsWriterExceptionFailOneDoc() throws Exception { Analyzer analyzer = new Analyzer(Analyzer.PER_FIELD_REUSE_STRATEGY) { @Override public TokenStreamComponents createComponents(String fieldName) { MockTokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false); tokenizer.setEnableChecks(false); // disable workflow checking as we forcefully close() in exceptional cases. return new TokenStreamComponents(tokenizer, new CrashingFilter(fieldName, tokenizer)); } }; for (int i = 0; i < 10; i++) { try (Directory dir = newDirectory(); final IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(analyzer) .setMaxBufferedDocs(-1) .setRAMBufferSizeMB(random().nextBoolean() ? 0.00001 : Integer.MAX_VALUE) .setMergePolicy(new FilterMergePolicy(NoMergePolicy.INSTANCE) { @Override public boolean keepFullyDeletedSegment(IOSupplier<CodecReader> readerIOSupplier) { return true; } }))) { Document doc = new Document(); doc.add(newField("contents", "here are some contents", DocCopyIterator.custom5)); writer.addDocument(doc); doc.add(newField("crash", "this should crash after 4 terms", DocCopyIterator.custom5)); doc.add(newField("other", "this will not get indexed", DocCopyIterator.custom5)); expectThrows(IOException.class, () -> { writer.addDocument(doc); }); writer.commit(); try (IndexReader reader = DirectoryReader.open(dir)) { assertEquals(2, reader.docFreq(new Term("contents", "here"))); assertEquals(2, reader.maxDoc()); assertEquals(1, reader.numDocs()); } } } }
Example 5
Source File: FieldNameAnalyzer.java From Elasticsearch with Apache License 2.0 | 4 votes |
public FieldNameAnalyzer(Map<String, Analyzer> analyzers) { super(Analyzer.PER_FIELD_REUSE_STRATEGY); this.analyzers = CopyOnWriteHashMap.copyOf(analyzers); }
Example 6
Source File: MapperService.java From Elasticsearch with Apache License 2.0 | 4 votes |
MapperAnalyzerWrapper(Analyzer defaultAnalyzer, Function<MappedFieldType, Analyzer> extractAnalyzer) { super(Analyzer.PER_FIELD_REUSE_STRATEGY); this.defaultAnalyzer = defaultAnalyzer; this.extractAnalyzer = extractAnalyzer; }
Example 7
Source File: MLAnalayser.java From SearchServices with GNU Lesser General Public License v3.0 | 4 votes |
public MLAnalayser(MLAnalysisMode mlAnalaysisMode) { super(Analyzer.PER_FIELD_REUSE_STRATEGY); this.mlAnalaysisMode = mlAnalaysisMode; }
Example 8
Source File: LanguagePrefixedTextAnalyzer.java From SearchServices with GNU Lesser General Public License v3.0 | 4 votes |
public LanguagePrefixedTextAnalyzer(IndexSchema indexSchema, AlfrescoAnalyzerWrapper.Mode mode) { super(Analyzer.PER_FIELD_REUSE_STRATEGY); this.mode = mode; this.indexSchema = indexSchema; }
Example 9
Source File: FastVectorHighlighterTest.java From lucene-solr with Apache License 2.0 | 4 votes |
private void matchedFieldsTestCase( boolean useMatchedFields, boolean fieldMatch, String fieldValue, String expected, Query... queryClauses ) throws IOException { Document doc = new Document(); FieldType stored = new FieldType( TextField.TYPE_STORED ); stored.setStoreTermVectorOffsets( true ); stored.setStoreTermVectorPositions( true ); stored.setStoreTermVectors( true ); stored.freeze(); FieldType matched = new FieldType( TextField.TYPE_NOT_STORED ); matched.setStoreTermVectorOffsets( true ); matched.setStoreTermVectorPositions( true ); matched.setStoreTermVectors( true ); matched.freeze(); doc.add( new Field( "field", fieldValue, stored ) ); // Whitespace tokenized with English stop words doc.add( new Field( "field_exact", fieldValue, matched ) ); // Whitespace tokenized without stop words doc.add( new Field( "field_super_exact", fieldValue, matched ) ); // Whitespace tokenized without toLower doc.add( new Field( "field_characters", fieldValue, matched ) ); // Each letter is a token doc.add( new Field( "field_tripples", fieldValue, matched ) ); // Every three letters is a token doc.add( new Field( "field_sliced", fieldValue.substring( 0, // Sliced at 10 chars then analyzed just like field Math.min( fieldValue.length() - 1 , 10 ) ), matched ) ); doc.add( new Field( "field_der_red", new CannedTokenStream( // Hacky field containing "der" and "red" at pos = 0 token( "der", 1, 0, 3 ), token( "red", 0, 0, 3 ) ), matched ) ); final Map<String, Analyzer> fieldAnalyzers = new TreeMap<>(); fieldAnalyzers.put( "field", new MockAnalyzer( random(), MockTokenizer.WHITESPACE, true, MockTokenFilter.ENGLISH_STOPSET ) ); fieldAnalyzers.put( "field_exact", new MockAnalyzer( random() ) ); fieldAnalyzers.put( "field_super_exact", new MockAnalyzer( random(), MockTokenizer.WHITESPACE, false ) ); fieldAnalyzers.put( "field_characters", new MockAnalyzer( random(), new CharacterRunAutomaton( new RegExp(".").toAutomaton() ), true ) ); fieldAnalyzers.put( "field_tripples", new MockAnalyzer( random(), new CharacterRunAutomaton( new RegExp("...").toAutomaton() ), true ) ); fieldAnalyzers.put( "field_sliced", fieldAnalyzers.get( "field" ) ); fieldAnalyzers.put( "field_der_red", fieldAnalyzers.get( "field" ) ); // This is required even though we provide a token stream Analyzer analyzer = new DelegatingAnalyzerWrapper(Analyzer.PER_FIELD_REUSE_STRATEGY) { public Analyzer getWrappedAnalyzer(String fieldName) { return fieldAnalyzers.get( fieldName ); } }; Directory dir = newDirectory(); IndexWriter writer = new IndexWriter( dir, newIndexWriterConfig(analyzer)); writer.addDocument( doc ); FastVectorHighlighter highlighter = new FastVectorHighlighter(); FragListBuilder fragListBuilder = new SimpleFragListBuilder(); FragmentsBuilder fragmentsBuilder = new ScoreOrderFragmentsBuilder(); IndexReader reader = DirectoryReader.open(writer); String[] preTags = new String[] { "<b>" }; String[] postTags = new String[] { "</b>" }; Encoder encoder = new DefaultEncoder(); int docId = 0; BooleanQuery.Builder query = new BooleanQuery.Builder(); for ( Query clause : queryClauses ) { query.add( clause, Occur.MUST ); } FieldQuery fieldQuery = new FieldQuery( query.build(), reader, true, fieldMatch ); String[] bestFragments; if ( useMatchedFields ) { Set< String > matchedFields = new HashSet<>(); matchedFields.add( "field" ); matchedFields.add( "field_exact" ); matchedFields.add( "field_super_exact" ); matchedFields.add( "field_characters" ); matchedFields.add( "field_tripples" ); matchedFields.add( "field_sliced" ); matchedFields.add( "field_der_red" ); bestFragments = highlighter.getBestFragments( fieldQuery, reader, docId, "field", matchedFields, 25, 1, fragListBuilder, fragmentsBuilder, preTags, postTags, encoder ); } else { bestFragments = highlighter.getBestFragments( fieldQuery, reader, docId, "field", 25, 1, fragListBuilder, fragmentsBuilder, preTags, postTags, encoder ); } assertEquals( expected, bestFragments[ 0 ] ); reader.close(); writer.close(); dir.close(); }
Example 10
Source File: MapperService.java From crate with Apache License 2.0 | 4 votes |
MapperAnalyzerWrapper(Analyzer defaultAnalyzer, Function<MappedFieldType, Analyzer> extractAnalyzer) { super(Analyzer.PER_FIELD_REUSE_STRATEGY); this.defaultAnalyzer = defaultAnalyzer; this.extractAnalyzer = extractAnalyzer; }