org.apache.lucene.codecs.Codec Java Examples
The following examples show how to use
org.apache.lucene.codecs.Codec.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: TestSuggestField.java From lucene-solr with Apache License 2.0 | 6 votes |
static IndexWriterConfig iwcWithSuggestField(Analyzer analyzer, final Set<String> suggestFields) { IndexWriterConfig iwc = newIndexWriterConfig(random(), analyzer); iwc.setMergePolicy(newLogMergePolicy()); Codec filterCodec = new Lucene86Codec() { CompletionPostingsFormat.FSTLoadMode fstLoadMode = RandomPicks.randomFrom(random(), CompletionPostingsFormat.FSTLoadMode.values()); PostingsFormat postingsFormat = new Completion84PostingsFormat(fstLoadMode); @Override public PostingsFormat getPostingsFormatForField(String field) { if (suggestFields.contains(field)) { return postingsFormat; } return super.getPostingsFormatForField(field); } }; iwc.setCodec(filterCodec); return iwc; }
Example #2
Source File: SolrResourceLoader.java From lucene-solr with Apache License 2.0 | 6 votes |
/** * Reloads all Lucene SPI implementations using the new classloader. * This method must be called after {@link #addToClassLoader(List)} * and before using this ResourceLoader. */ synchronized void reloadLuceneSPI() { // TODO improve to use a static Set<URL> to check when we need to if (!needToReloadLuceneSPI) { return; } needToReloadLuceneSPI = false; // reset log.debug("Reloading Lucene SPI"); // Codecs: PostingsFormat.reloadPostingsFormats(this.classLoader); DocValuesFormat.reloadDocValuesFormats(this.classLoader); Codec.reloadCodecs(this.classLoader); // Analysis: CharFilterFactory.reloadCharFilters(this.classLoader); TokenFilterFactory.reloadTokenFilters(this.classLoader); TokenizerFactory.reloadTokenizers(this.classLoader); }
Example #3
Source File: CommitsImpl.java From lucene-solr with Apache License 2.0 | 6 votes |
@Override public Optional<Codec> getSegmentCodec(long commitGen, String name) throws LukeException { try { SegmentInfos infos = findSegmentInfos(commitGen); if (infos == null) { return Optional.empty(); } return infos.asList().stream() .filter(seg -> seg.info.name.equals(name)) .findAny() .map(seg -> seg.info.getCodec()); } catch (IOException e) { throw new LukeException(String.format(Locale.ENGLISH, "Failed to load segment infos for commit generation %d", commitGen), e); } }
Example #4
Source File: RandomPostingsTester.java From lucene-solr with Apache License 2.0 | 6 votes |
/** Indexes all fields/terms at the specified * IndexOptions, and fully tests at that IndexOptions. */ public void testFull(Codec codec, Path path, IndexOptions options, boolean withPayloads) throws Exception { Directory dir = LuceneTestCase.newFSDirectory(path); // TODO test thread safety of buildIndex too FieldsProducer fieldsProducer = buildIndex(codec, dir, options, withPayloads, true); testFields(fieldsProducer); IndexOptions[] allOptions = IndexOptions.values(); int maxIndexOption = Arrays.asList(allOptions).indexOf(options); for(int i=0;i<=maxIndexOption;i++) { testTerms(fieldsProducer, EnumSet.allOf(Option.class), allOptions[i], options, true); if (withPayloads) { // If we indexed w/ payloads, also test enums w/o accessing payloads: testTerms(fieldsProducer, EnumSet.complementOf(EnumSet.of(Option.PAYLOADS)), allOptions[i], options, true); } } fieldsProducer.close(); dir.close(); }
Example #5
Source File: TestSegmentInfos.java From lucene-solr with Apache License 2.0 | 6 votes |
public void testVersionsOneSegment() throws IOException { BaseDirectoryWrapper dir = newDirectory(); dir.setCheckIndexOnClose(false); byte id[] = StringHelper.randomId(); Codec codec = Codec.getDefault(); SegmentInfos sis = new SegmentInfos(Version.LATEST.major); SegmentInfo info = new SegmentInfo(dir, Version.LUCENE_9_0_0, Version.LUCENE_9_0_0, "_0", 1, false, Codec.getDefault(), Collections.<String,String>emptyMap(), id, Collections.<String,String>emptyMap(), null); info.setFiles(Collections.<String>emptySet()); codec.segmentInfoFormat().write(dir, info, IOContext.DEFAULT); SegmentCommitInfo commitInfo = new SegmentCommitInfo(info, 0, 0, -1, -1, -1, StringHelper.randomId()); sis.add(commitInfo); sis.commit(dir); sis = SegmentInfos.readLatestCommit(dir); assertEquals(Version.LUCENE_9_0_0, sis.getMinSegmentLuceneVersion()); assertEquals(Version.LATEST, sis.getCommitLuceneVersion()); dir.close(); }
Example #6
Source File: BaseFieldInfoFormatTestCase.java From lucene-solr with Apache License 2.0 | 6 votes |
/** Test field infos attributes coming back are not mutable */ public void testImmutableAttributes() throws Exception { Directory dir = newDirectory(); Codec codec = getCodec(); SegmentInfo segmentInfo = newSegmentInfo(dir, "_123"); FieldInfos.Builder builder = new FieldInfos.Builder(new FieldInfos.FieldNumbers(null)); FieldInfo fi = builder.getOrAdd("field"); fi.setIndexOptions(TextField.TYPE_STORED.indexOptions()); addAttributes(fi); fi.putAttribute("foo", "bar"); fi.putAttribute("bar", "baz"); FieldInfos infos = builder.finish(); codec.fieldInfosFormat().write(dir, segmentInfo, "", infos, IOContext.DEFAULT); FieldInfos infos2 = codec.fieldInfosFormat().read(dir, segmentInfo, "", IOContext.DEFAULT); assertEquals(1, infos2.size()); assertNotNull(infos2.fieldInfo("field")); Map<String,String> attributes = infos2.fieldInfo("field").attributes(); // shouldn't be able to modify attributes expectThrows(UnsupportedOperationException.class, () -> { attributes.put("bogus", "bogus"); }); dir.close(); }
Example #7
Source File: TestRuleSetupAndRestoreClassEnv.java From lucene-solr with Apache License 2.0 | 6 votes |
/** * Check codec restrictions. * * @throws AssumptionViolatedException if the class does not work with a given codec. */ private void checkCodecRestrictions(Codec codec) { assumeFalse("Class not allowed to use codec: " + codec.getName() + ".", shouldAvoidCodec(codec.getName())); if (codec instanceof RandomCodec && !avoidCodecs.isEmpty()) { for (String name : ((RandomCodec)codec).formatNames) { assumeFalse("Class not allowed to use postings format: " + name + ".", shouldAvoidCodec(name)); } } PostingsFormat pf = codec.postingsFormat(); assumeFalse("Class not allowed to use postings format: " + pf.getName() + ".", shouldAvoidCodec(pf.getName())); assumeFalse("Class not allowed to use postings format: " + LuceneTestCase.TEST_POSTINGSFORMAT + ".", shouldAvoidCodec(LuceneTestCase.TEST_POSTINGSFORMAT)); }
Example #8
Source File: TestTermVectorsReader.java From lucene-solr with Apache License 2.0 | 6 votes |
public void testReader() throws IOException { TermVectorsReader reader = Codec.getDefault().termVectorsFormat().vectorsReader(dir, seg.info, fieldInfos, newIOContext(random())); for (int j = 0; j < 5; j++) { Terms vector = reader.get(j).terms(testFields[0]); assertNotNull(vector); assertEquals(testTerms.length, vector.size()); TermsEnum termsEnum = vector.iterator(); for (int i = 0; i < testTerms.length; i++) { final BytesRef text = termsEnum.next(); assertNotNull(text); String term = text.utf8ToString(); //System.out.println("Term: " + term); assertEquals(testTerms[i], term); } assertNull(termsEnum.next()); } reader.close(); }
Example #9
Source File: BaseSegmentInfoFormatTestCase.java From lucene-solr with Apache License 2.0 | 6 votes |
/** Test attributes map */ public void testAttributes() throws Exception { Directory dir = newDirectory(); Codec codec = getCodec(); byte id[] = StringHelper.randomId(); Map<String,String> attributes = new HashMap<>(); attributes.put("key1", "value1"); attributes.put("key2", "value2"); SegmentInfo info = new SegmentInfo(dir, getVersions()[0], getVersions()[0], "_123", 1, false, codec, Collections.emptyMap(), id, attributes, null); info.setFiles(Collections.<String>emptySet()); codec.segmentInfoFormat().write(dir, info, IOContext.DEFAULT); SegmentInfo info2 = codec.segmentInfoFormat().read(dir, "_123", id, IOContext.DEFAULT); assertEquals(attributes, info2.getAttributes()); // attributes map should be immutable expectThrows(UnsupportedOperationException.class, () -> { info2.getAttributes().put("bogus", "bogus"); }); dir.close(); }
Example #10
Source File: TestPerFieldPostingsFormat2.java From lucene-solr with Apache License 2.0 | 5 votes |
public void testSameCodecDifferentInstance() throws Exception { Codec codec = new AssertingCodec() { @Override public PostingsFormat getPostingsFormatForField(String field) { if ("id".equals(field)) { return new DirectPostingsFormat(); } else if ("date".equals(field)) { return new DirectPostingsFormat(); } else { return super.getPostingsFormatForField(field); } } }; doTestMixedPostings(codec); }
Example #11
Source File: CommitsImplTest.java From lucene-solr with Apache License 2.0 | 5 votes |
@Test public void testSegmentCodec_invalid_name() { CommitsImpl commits = new CommitsImpl(reader, indexDir.toString()); Optional<Codec> codec = commits.getSegmentCodec(1, "xxx"); assertFalse(codec.isPresent()); }
Example #12
Source File: TestPendingDeletes.java From lucene-solr with Apache License 2.0 | 5 votes |
public void testDeleteDoc() throws IOException { Directory dir = new ByteBuffersDirectory(); SegmentInfo si = new SegmentInfo(dir, Version.LATEST, Version.LATEST, "test", 10, false, Codec.getDefault(), Collections.emptyMap(), StringHelper.randomId(), new HashMap<>(), null); SegmentCommitInfo commitInfo = new SegmentCommitInfo(si, 0, 0, -1, -1, -1, StringHelper.randomId()); PendingDeletes deletes = newPendingDeletes(commitInfo); assertNull(deletes.getLiveDocs()); int docToDelete = TestUtil.nextInt(random(), 0, 7); assertTrue(deletes.delete(docToDelete)); assertNotNull(deletes.getLiveDocs()); assertEquals(1, deletes.numPendingDeletes()); Bits liveDocs = deletes.getLiveDocs(); assertFalse(liveDocs.get(docToDelete)); assertFalse(deletes.delete(docToDelete)); // delete again assertTrue(liveDocs.get(8)); assertTrue(deletes.delete(8)); assertTrue(liveDocs.get(8)); // we have a snapshot assertEquals(2, deletes.numPendingDeletes()); assertTrue(liveDocs.get(9)); assertTrue(deletes.delete(9)); assertTrue(liveDocs.get(9)); // now make sure new live docs see the deletions liveDocs = deletes.getLiveDocs(); assertFalse(liveDocs.get(9)); assertFalse(liveDocs.get(8)); assertFalse(liveDocs.get(docToDelete)); assertEquals(3, deletes.numPendingDeletes()); dir.close(); }
Example #13
Source File: TestDoc.java From lucene-solr with Apache License 2.0 | 5 votes |
private SegmentCommitInfo merge(Directory dir, SegmentCommitInfo si1, SegmentCommitInfo si2, String merged, boolean useCompoundFile) throws Exception { IOContext context = newIOContext(random(), new IOContext(new MergeInfo(-1, -1, false, -1))); SegmentReader r1 = new SegmentReader(si1, Version.LATEST.major, context); SegmentReader r2 = new SegmentReader(si2, Version.LATEST.major, context); final Codec codec = Codec.getDefault(); TrackingDirectoryWrapper trackingDir = new TrackingDirectoryWrapper(si1.info.dir); final SegmentInfo si = new SegmentInfo(si1.info.dir, Version.LATEST, null, merged, -1, false, codec, Collections.emptyMap(), StringHelper.randomId(), new HashMap<>(), null); SegmentMerger merger = new SegmentMerger(Arrays.<CodecReader>asList(r1, r2), si, InfoStream.getDefault(), trackingDir, new FieldInfos.FieldNumbers(null), context); MergeState mergeState = merger.merge(); r1.close(); r2.close();; si.setFiles(new HashSet<>(trackingDir.getCreatedFiles())); if (useCompoundFile) { Collection<String> filesToDelete = si.files(); codec.compoundFormat().write(dir, si, context); si.setUseCompoundFile(true); for(String name : filesToDelete) { si1.info.dir.deleteFile(name); } } return new SegmentCommitInfo(si, 0, 0, -1L, -1L, -1L, StringHelper.randomId()); }
Example #14
Source File: CrankyCodec.java From lucene-solr with Apache License 2.0 | 5 votes |
/** * Wrap the provided codec with crankiness. * Try passing Asserting for the most fun. */ public CrankyCodec(Codec delegate, Random random) { // we impersonate the passed-in codec, so we don't need to be in SPI, // and so we dont change file formats super(delegate.getName(), delegate); this.random = random; }
Example #15
Source File: TestCodecSupport.java From lucene-solr with Apache License 2.0 | 5 votes |
public void testDocValuesFormats() { // NOTE: Direct (and Disk) DocValues formats were removed, so we use "Asserting" // as a way to vet that the configuration actually matters. Codec codec = h.getCore().getCodec(); Map<String, SchemaField> fields = h.getCore().getLatestSchema().getFields(); SchemaField schemaField = fields.get("string_disk_f"); PerFieldDocValuesFormat format = (PerFieldDocValuesFormat) codec.docValuesFormat(); assertEquals(TestUtil.getDefaultDocValuesFormat().getName(), format.getDocValuesFormatForField(schemaField.getName()).getName()); schemaField = fields.get("string_direct_f"); assertEquals("Asserting", format.getDocValuesFormatForField(schemaField.getName()).getName()); schemaField = fields.get("string_f"); assertEquals(TestUtil.getDefaultDocValuesFormat().getName(), format.getDocValuesFormatForField(schemaField.getName()).getName()); }
Example #16
Source File: TestLucene70SegmentInfoFormat.java From lucene-solr with Apache License 2.0 | 5 votes |
@Override protected Codec getCodec() { return new FilterCodec("Lucene84", Codec.forName("Lucene84")) { @Override public SegmentInfoFormat segmentInfoFormat() { return new Lucene70RWSegmentInfoFormat(); } }; }
Example #17
Source File: PluginsService.java From crate with Apache License 2.0 | 5 votes |
/** * Reloads all Lucene SPI implementations using the new classloader. * This method must be called after the new classloader has been created to * register the services for use. */ static void reloadLuceneSPI(ClassLoader loader) { // do NOT change the order of these method calls! // Codecs: PostingsFormat.reloadPostingsFormats(loader); DocValuesFormat.reloadDocValuesFormats(loader); Codec.reloadCodecs(loader); // Analysis: CharFilterFactory.reloadCharFilters(loader); TokenFilterFactory.reloadTokenFilters(loader); TokenizerFactory.reloadTokenizers(loader); }
Example #18
Source File: TestCodecSupport.java From lucene-solr with Apache License 2.0 | 5 votes |
public void testPostingsFormats() { Codec codec = h.getCore().getCodec(); Map<String, SchemaField> fields = h.getCore().getLatestSchema().getFields(); SchemaField schemaField = fields.get("string_direct_f"); PerFieldPostingsFormat format = (PerFieldPostingsFormat) codec.postingsFormat(); assertEquals("Direct", format.getPostingsFormatForField(schemaField.getName()).getName()); schemaField = fields.get("string_standard_f"); assertEquals(TestUtil.getDefaultPostingsFormat().getName(), format.getPostingsFormatForField(schemaField.getName()).getName()); schemaField = fields.get("string_f"); assertEquals(TestUtil.getDefaultPostingsFormat().getName(), format.getPostingsFormatForField(schemaField.getName()).getName()); }
Example #19
Source File: TestRuleSetupAndRestoreClassEnv.java From lucene-solr with Apache License 2.0 | 5 votes |
/** * After suite cleanup (always invoked). */ @Override protected void after() throws Exception { Codec.setDefault(savedCodec); InfoStream.setDefault(savedInfoStream); if (savedLocale != null) Locale.setDefault(savedLocale); if (savedTimeZone != null) TimeZone.setDefault(savedTimeZone); }
Example #20
Source File: TestUtil.java From lucene-solr with Apache License 2.0 | 5 votes |
public static String getDocValuesFormat(Codec codec, String field) { DocValuesFormat f = codec.docValuesFormat(); if (f instanceof PerFieldDocValuesFormat) { return ((PerFieldDocValuesFormat) f).getDocValuesFormatForField(field).getName(); } else { return f.getName(); } }
Example #21
Source File: TestCompressingTermVectorsFormat.java From lucene-solr with Apache License 2.0 | 5 votes |
@Override protected Codec getCodec() { if (TEST_NIGHTLY) { return CompressingCodec.randomInstance(random()); } else { return CompressingCodec.reasonableInstance(random()); } }
Example #22
Source File: SegmentInfo.java From lucene-solr with Apache License 2.0 | 5 votes |
/** Can only be called once. */ public void setCodec(Codec codec) { assert this.codec == null; if (codec == null) { throw new IllegalArgumentException("codec must be non-null"); } this.codec = codec; }
Example #23
Source File: SegmentInfos.java From lucene-solr with Apache License 2.0 | 5 votes |
private static Codec readCodec(DataInput input) throws IOException { final String name = input.readString(); try { return Codec.forName(name); } catch (IllegalArgumentException e) { // maybe it's an old default codec that moved if (name.startsWith("Lucene")) { throw new IllegalArgumentException("Could not load codec '" + name + "'. Did you forget to add lucene-backward-codecs.jar?", e); } throw e; } }
Example #24
Source File: IndexWriterConfig.java From lucene-solr with Apache License 2.0 | 5 votes |
/** * Set the {@link Codec}. * * <p> * Only takes effect when IndexWriter is first created. */ public IndexWriterConfig setCodec(Codec codec) { if (codec == null) { throw new IllegalArgumentException("codec must not be null"); } this.codec = codec; return this; }
Example #25
Source File: BaseFieldInfoFormatTestCase.java From lucene-solr with Apache License 2.0 | 5 votes |
/** Test field infos read/write with random fields, with different values. */ public void testRandom() throws Exception { Directory dir = newDirectory(); Codec codec = getCodec(); SegmentInfo segmentInfo = newSegmentInfo(dir, "_123"); // generate a bunch of fields int numFields = atLeast(2000); Set<String> fieldNames = new HashSet<>(); for (int i = 0; i < numFields; i++) { fieldNames.add(TestUtil.randomUnicodeString(random())); } FieldInfos.Builder builder = new FieldInfos.Builder(new FieldInfos.FieldNumbers(null)); for (String field : fieldNames) { IndexableFieldType fieldType = randomFieldType(random()); FieldInfo fi = builder.getOrAdd(field); IndexOptions indexOptions = fieldType.indexOptions(); if (indexOptions != IndexOptions.NONE) { fi.setIndexOptions(indexOptions); if (fieldType.omitNorms()) { fi.setOmitsNorms(); } } fi.setDocValuesType(fieldType.docValuesType()); if (fieldType.indexOptions() != IndexOptions.NONE && fieldType.indexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0) { if (random().nextBoolean()) { fi.setStorePayloads(); } } addAttributes(fi); } FieldInfos infos = builder.finish(); codec.fieldInfosFormat().write(dir, segmentInfo, "", infos, IOContext.DEFAULT); FieldInfos infos2 = codec.fieldInfosFormat().read(dir, segmentInfo, "", IOContext.DEFAULT); assertEquals(infos, infos2); dir.close(); }
Example #26
Source File: PRMSFieldBoostTest.java From querqy with Apache License 2.0 | 4 votes |
@Test public void testGetThatFieldProbabilityRatioIsReflectedInBoost() throws Exception { DocumentFrequencyCorrection dfc = new DocumentFrequencyCorrection(); Directory directory = newDirectory(); Analyzer analyzer = new StandardAnalyzer(); IndexWriterConfig conf = new IndexWriterConfig(analyzer); conf.setCodec(Codec.forName(TestUtil.LUCENE_CODEC)); IndexWriter indexWriter = new IndexWriter(directory, conf); addNumDocs("f1", "abc", indexWriter, 2); addNumDocs("f1", "def", indexWriter, 4); addNumDocs("f2", "abc", indexWriter, 4); addNumDocs("f2", "def", indexWriter, 2); indexWriter.close(); Map<String, Float> fields = new HashMap<>(); fields.put("f1", 1f); fields.put("f2", 1f); SearchFieldsAndBoosting searchFieldsAndBoosting = new SearchFieldsAndBoosting(FieldBoostModel.PRMS, fields, fields, 0.8f); LuceneQueryBuilder queryBuilder = new LuceneQueryBuilder(new DependentTermQueryBuilder(dfc), analyzer, searchFieldsAndBoosting, 0.01f, null); WhiteSpaceQuerqyParser parser = new WhiteSpaceQuerqyParser(); Query query = queryBuilder.createQuery(parser.parse("abc")); dfc.finishedUserQuery(); assertTrue(query instanceof DisjunctionMaxQuery); DisjunctionMaxQuery dmq = (DisjunctionMaxQuery) query; List<Query> disjuncts = dmq.getDisjuncts(); assertEquals(2, disjuncts.size()); Query disjunct1 = disjuncts.get(0); assertTrue(disjunct1 instanceof DependentTermQueryBuilder.DependentTermQuery); DependentTermQueryBuilder.DependentTermQuery dtq1 = (DependentTermQueryBuilder.DependentTermQuery) disjunct1; Query disjunct2 = disjuncts.get(1); assertTrue(disjunct2 instanceof DependentTermQueryBuilder.DependentTermQuery); DependentTermQueryBuilder.DependentTermQuery dtq2 = (DependentTermQueryBuilder.DependentTermQuery) disjunct2; assertNotEquals(dtq1.getTerm().field(), dtq2.getTerm().field()); Similarity similarity = Mockito.mock(Similarity.class); Similarity.SimScorer simScorer = Mockito.mock(Similarity.SimScorer.class); ArgumentCaptor<Float> computeWeightBoostCaptor = ArgumentCaptor.forClass(Float.class); when(similarity.scorer( computeWeightBoostCaptor.capture(), any(CollectionStatistics.class), ArgumentMatchers.<TermStatistics>any())).thenReturn(simScorer); IndexReader indexReader = DirectoryReader.open(directory); IndexSearcher indexSearcher = new IndexSearcher(indexReader); indexSearcher.setSimilarity(similarity); Weight weight1 = indexSearcher.createWeight(dtq1, ScoreMode.COMPLETE, 1.0f); Weight weight2 = indexSearcher.createWeight(dtq2, ScoreMode.COMPLETE, 1.0f); final List<Float> capturedBoosts = computeWeightBoostCaptor.getAllValues(); float bf1 = capturedBoosts.get(0); float bf2 = capturedBoosts.get(1); assertEquals(2f, bf2 / bf1, 0.00001); indexReader.close(); directory.close(); analyzer.close(); }
Example #27
Source File: TestLucene80NormsFormat.java From lucene-solr with Apache License 2.0 | 4 votes |
@Override protected Codec getCodec() { return codec; }
Example #28
Source File: TestUtil.java From lucene-solr with Apache License 2.0 | 4 votes |
public static String getDocValuesFormat(String field) { return getDocValuesFormat(Codec.getDefault(), field); }
Example #29
Source File: TestAssertingTermVectorsFormat.java From lucene-solr with Apache License 2.0 | 4 votes |
@Override protected Codec getCodec() { return codec; }
Example #30
Source File: TestAssertingNormsFormat.java From lucene-solr with Apache License 2.0 | 4 votes |
@Override protected Codec getCodec() { return codec; }