Java Code Examples for org.apache.lucene.index.IndexWriterConfig#setCodec()
The following examples show how to use
org.apache.lucene.index.IndexWriterConfig#setCodec() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TestPerFieldPostingsFormat2.java From lucene-solr with Apache License 2.0 | 6 votes |
private void doTestMixedPostings(Codec codec) throws Exception { Directory dir = newDirectory(); IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random())); iwc.setCodec(codec); RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc); Document doc = new Document(); FieldType ft = new FieldType(TextField.TYPE_NOT_STORED); // turn on vectors for the checkindex cross-check ft.setStoreTermVectors(true); ft.setStoreTermVectorOffsets(true); ft.setStoreTermVectorPositions(true); Field idField = new Field("id", "", ft); Field dateField = new Field("date", "", ft); doc.add(idField); doc.add(dateField); for (int i = 0; i < 100; i++) { idField.setStringValue(Integer.toString(random().nextInt(50))); dateField.setStringValue(Integer.toString(random().nextInt(100))); iw.addDocument(doc); } iw.close(); dir.close(); // checkindex }
Example 2
Source File: TestPointQueries.java From lucene-solr with Apache License 2.0 | 6 votes |
public void testWrongNumBytes() throws Exception { Directory dir = newDirectory(); IndexWriterConfig iwc = newIndexWriterConfig(); iwc.setCodec(getCodec()); RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc); Document doc = new Document(); doc.add(new LongPoint("value", Long.MIN_VALUE)); w.addDocument(doc); IndexReader r = w.getReader(); // no wrapping, else the exc might happen in executor thread: IndexSearcher s = new IndexSearcher(r); byte[][] point = new byte[1][]; point[0] = new byte[10]; IllegalArgumentException expected = expectThrows(IllegalArgumentException.class, () -> { s.count(BinaryPoint.newRangeQuery("value", point, point)); }); assertEquals("field=\"value\" was indexed with bytesPerDim=8 but this query has bytesPerDim=10", expected.getMessage()); IOUtils.close(r, w, dir); }
Example 3
Source File: TestPointQueries.java From lucene-solr with Apache License 2.0 | 6 votes |
public void testEmptyPointInSetQuery() throws Exception { Directory dir = newDirectory(); IndexWriterConfig iwc = newIndexWriterConfig(); iwc.setCodec(getCodec()); IndexWriter w = new IndexWriter(dir, iwc); Document doc = new Document(); doc.add(new IntPoint("int", 17)); doc.add(new LongPoint("long", 17L)); doc.add(new FloatPoint("float", 17.0f)); doc.add(new DoublePoint("double", 17.0)); doc.add(new BinaryPoint("bytes", new byte[] {0, 17})); w.addDocument(doc); IndexReader r = DirectoryReader.open(w); IndexSearcher s = newSearcher(r, false); assertEquals(0, s.count(IntPoint.newSetQuery("int"))); assertEquals(0, s.count(LongPoint.newSetQuery("long"))); assertEquals(0, s.count(FloatPoint.newSetQuery("float"))); assertEquals(0, s.count(DoublePoint.newSetQuery("double"))); assertEquals(0, s.count(BinaryPoint.newSetQuery("bytes"))); w.close(); r.close(); dir.close(); }
Example 4
Source File: TestSuggestField.java From lucene-solr with Apache License 2.0 | 6 votes |
static IndexWriterConfig iwcWithSuggestField(Analyzer analyzer, final Set<String> suggestFields) { IndexWriterConfig iwc = newIndexWriterConfig(random(), analyzer); iwc.setMergePolicy(newLogMergePolicy()); Codec filterCodec = new Lucene86Codec() { CompletionPostingsFormat.FSTLoadMode fstLoadMode = RandomPicks.randomFrom(random(), CompletionPostingsFormat.FSTLoadMode.values()); PostingsFormat postingsFormat = new Completion84PostingsFormat(fstLoadMode); @Override public PostingsFormat getPostingsFormatForField(String field) { if (suggestFields.contains(field)) { return postingsFormat; } return super.getPostingsFormatForField(field); } }; iwc.setCodec(filterCodec); return iwc; }
Example 5
Source File: Blur024CodecTest.java From incubator-retired-blur with Apache License 2.0 | 6 votes |
@Test public void testDocValuesFormat() throws IOException { RAMDirectory directory = new RAMDirectory(); IndexWriterConfig conf = new IndexWriterConfig(Version.LUCENE_43, new WhitespaceAnalyzer(Version.LUCENE_43)); conf.setCodec(new Blur024Codec()); IndexWriter writer = new IndexWriter(directory, conf); Document doc = new Document(); doc.add(new StringField("f", "v", Store.YES)); doc.add(new SortedDocValuesField("f", new BytesRef("v"))); writer.addDocument(doc); writer.close(); DirectoryReader reader = DirectoryReader.open(directory); AtomicReaderContext context = reader.leaves().get(0); AtomicReader atomicReader = context.reader(); SortedDocValues sortedDocValues = atomicReader.getSortedDocValues("f"); assertTrue(sortedDocValues.getClass().getName().startsWith(DiskDocValuesProducer.class.getName())); reader.close(); }
Example 6
Source File: TestPointQueries.java From lucene-solr with Apache License 2.0 | 6 votes |
public void testBasicMultiValueMultiDimPointInSetQuery() throws Exception { Directory dir = newDirectory(); IndexWriterConfig iwc = newIndexWriterConfig(); iwc.setCodec(getCodec()); IndexWriter w = new IndexWriter(dir, iwc); Document doc = new Document(); doc.add(new IntPoint("int", 17, 42)); doc.add(new IntPoint("int", 34, 79)); w.addDocument(doc); IndexReader r = DirectoryReader.open(w); IndexSearcher s = newSearcher(r, false); assertEquals(0, s.count(newMultiDimIntSetQuery("int", 2, 17, 41))); assertEquals(1, s.count(newMultiDimIntSetQuery("int", 2, 17, 42))); assertEquals(1, s.count(newMultiDimIntSetQuery("int", 2, 17, 42, 34, 79))); assertEquals(1, s.count(newMultiDimIntSetQuery("int", 2, -7, -7, 17, 42))); assertEquals(1, s.count(newMultiDimIntSetQuery("int", 2, -7, -7, 34, 79))); assertEquals(1, s.count(newMultiDimIntSetQuery("int", 2, 17, 42, -14, -14))); assertEquals("int:{-14,-14 17,42}", newMultiDimIntSetQuery("int", 2, 17, 42, -14, -14).toString()); w.close(); r.close(); dir.close(); }
Example 7
Source File: TestIDVersionPostingsFormat.java From lucene-solr with Apache License 2.0 | 6 votes |
public void testInvalidVersions2() throws IOException { Directory dir = newDirectory(); IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random())); iwc.setCodec(TestUtil.alwaysPostingsFormat(new IDVersionPostingsFormat())); RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc, false); Document doc = new Document(); // Long.MAX_VALUE: doc.add(new StringAndPayloadField("id", "id", new BytesRef(new byte[] {(byte)0x7f, (byte)0xff, (byte)0xff, (byte)0xff, (byte)0xff, (byte)0xff, (byte)0xff, (byte)0xff}))); expectThrows(IllegalArgumentException.class, () -> { w.addDocument(doc); w.commit(false); }); expectThrows(AlreadyClosedException.class, () -> { w.addDocument(doc); }); dir.close(); }
Example 8
Source File: TestIDVersionPostingsFormat.java From lucene-solr with Apache License 2.0 | 6 votes |
public void testMoreThanOnceInSingleDoc() throws IOException { Directory dir = newDirectory(); IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random())); iwc.setCodec(TestUtil.alwaysPostingsFormat(new IDVersionPostingsFormat())); RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc, false); Document doc = new Document(); doc.add(makeIDField("id", 17)); doc.add(makeIDField("id", 17)); expectThrows(IllegalArgumentException.class, () -> { w.addDocument(doc); w.commit(false); }); w.close(); dir.close(); }
Example 9
Source File: TestBlockPostingsFormat2.java From lucene-solr with Apache License 2.0 | 5 votes |
@Override public void setUp() throws Exception { super.setUp(); dir = newFSDirectory(createTempDir("testDFBlockSize")); IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random())); iwc.setCodec(TestUtil.alwaysPostingsFormat(new Lucene50RWPostingsFormat())); iw = new RandomIndexWriter(random(), dir, iwc); iw.setDoRandomForceMerge(false); // we will ourselves }
Example 10
Source File: FilterCacheTest.java From incubator-retired-blur with Apache License 2.0 | 5 votes |
private void writeDocs(FilterCache filterCache, RAMDirectory directory) throws IOException { IndexWriterConfig conf = new IndexWriterConfig(Version.LUCENE_43, new KeywordAnalyzer()); conf.setCodec(new Blur024Codec()); IndexWriter indexWriter = new IndexWriter(directory, conf); int count = 10000; addDocs(indexWriter, count); indexWriter.close(); }
Example 11
Source File: TestDocTermOrds.java From lucene-solr with Apache License 2.0 | 4 votes |
public void testRandom() throws Exception { Directory dir = newDirectory(); final int NUM_TERMS = atLeast(20); final Set<BytesRef> terms = new HashSet<>(); while(terms.size() < NUM_TERMS) { final String s = TestUtil.randomRealisticUnicodeString(random()); //final String s = _TestUtil.randomSimpleString(random); if (s.length() > 0) { terms.add(new BytesRef(s)); } } final BytesRef[] termsArray = terms.toArray(new BytesRef[terms.size()]); Arrays.sort(termsArray); final int NUM_DOCS = atLeast(100); IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random())); // Sometimes swap in codec that impls ord(): if (random().nextInt(10) == 7) { // Make sure terms index has ords: Codec codec = TestUtil.alwaysPostingsFormat(TestUtil.getPostingsFormatWithOrds(random())); conf.setCodec(codec); } final RandomIndexWriter w = new RandomIndexWriter(random(), dir, conf); final int[][] idToOrds = new int[NUM_DOCS][]; final Set<Integer> ordsForDocSet = new HashSet<>(); for(int id=0;id<NUM_DOCS;id++) { Document doc = new Document(); doc.add(new LegacyIntField("id", id, Field.Store.YES)); final int termCount = TestUtil.nextInt(random(), 0, 20 * RANDOM_MULTIPLIER); while(ordsForDocSet.size() < termCount) { ordsForDocSet.add(random().nextInt(termsArray.length)); } final int[] ordsForDoc = new int[termCount]; int upto = 0; if (VERBOSE) { System.out.println("TEST: doc id=" + id); } for(int ord : ordsForDocSet) { ordsForDoc[upto++] = ord; Field field = newStringField("field", termsArray[ord].utf8ToString(), Field.Store.NO); if (VERBOSE) { System.out.println(" f=" + termsArray[ord].utf8ToString()); } doc.add(field); } ordsForDocSet.clear(); Arrays.sort(ordsForDoc); idToOrds[id] = ordsForDoc; w.addDocument(doc); } final DirectoryReader r = w.getReader(); w.close(); if (VERBOSE) { System.out.println("TEST: reader=" + r); } for(LeafReaderContext ctx : r.leaves()) { if (VERBOSE) { System.out.println("\nTEST: sub=" + ctx.reader()); } verify(ctx.reader(), idToOrds, termsArray, null); } // Also test top-level reader: its enum does not support // ord, so this forces the OrdWrapper to run: if (VERBOSE) { System.out.println("TEST: top reader"); } LeafReader slowR = SlowCompositeReaderWrapper.wrap(r); TestUtil.checkReader(slowR); verify(slowR, idToOrds, termsArray, null); FieldCache.DEFAULT.purgeByCacheKey(slowR.getCoreCacheHelper().getKey()); r.close(); dir.close(); }
Example 12
Source File: MtasSearchTestConsistency.java From mtas with Apache License 2.0 | 4 votes |
/** * Creates the index. * * @param configFile the config file * @param files the files * @throws IOException Signals that an I/O exception has occurred. */ private static void createIndex(String configFile, HashMap<String, String> files) throws IOException { // analyzer Map<String, String> paramsCharFilterMtas = new HashMap<>(); paramsCharFilterMtas.put("type", "file"); Map<String, String> paramsTokenizer = new HashMap<>(); paramsTokenizer.put("configFile", configFile); Analyzer mtasAnalyzer = CustomAnalyzer .builder(Paths.get("docker").toAbsolutePath()) .addCharFilter("mtas", paramsCharFilterMtas) .withTokenizer("mtas", paramsTokenizer).build(); Map<String, Analyzer> analyzerPerField = new HashMap<>(); analyzerPerField.put(FIELD_CONTENT, mtasAnalyzer); PerFieldAnalyzerWrapper analyzer = new PerFieldAnalyzerWrapper( new StandardAnalyzer(), analyzerPerField); // indexwriter IndexWriterConfig config = new IndexWriterConfig(analyzer); config.setUseCompoundFile(false); config.setCodec(Codec.forName("MtasCodec")); IndexWriter w = new IndexWriter(directory, config); // delete w.deleteAll(); // add int counter = 0; for (Entry<String, String> entry : files.entrySet()) { addDoc(w, counter, entry.getKey(), entry.getValue()); if (counter == 0) { w.commit(); } else { addDoc(w, counter, entry.getKey(), entry.getValue()); addDoc(w, counter, "deletable", entry.getValue()); w.commit(); w.deleteDocuments(new Term(FIELD_ID, Integer.toString(counter))); w.deleteDocuments(new Term(FIELD_TITLE, "deletable")); addDoc(w, counter, entry.getKey(), entry.getValue()); } counter++; } w.commit(); // finish w.close(); }
Example 13
Source File: Blur022CodecTest.java From incubator-retired-blur with Apache License 2.0 | 4 votes |
@Test public void testLargeDocs() throws IOException { Random random = new Random(); Iterable<? extends IndexableField> doc = getLargeDoc(random); RAMDirectory directory = new RAMDirectory(); IndexWriterConfig conf1 = new IndexWriterConfig(Version.LUCENE_43, new WhitespaceAnalyzer(Version.LUCENE_43)); conf1.setCodec(new Blur022Codec()); IndexWriter writer1 = new IndexWriter(directory, conf1); writer1.addDocument(doc); writer1.close(); DirectoryReader reader1 = DirectoryReader.open(directory); int numDocs1 = reader1.numDocs(); assertEquals(1, numDocs1); // for (int i = 0; i < numDocs1; i++) { // System.out.println(reader1.document(i)); // } IndexWriterConfig conf2 = new IndexWriterConfig(Version.LUCENE_43, new WhitespaceAnalyzer(Version.LUCENE_43)); conf2.setCodec(new Blur022Codec(1 << 16, CompressionMode.HIGH_COMPRESSION)); IndexWriter writer2 = new IndexWriter(directory, conf2); writer2.addDocument(doc); writer2.close(); DirectoryReader reader2 = DirectoryReader.open(directory); int numDocs2 = reader2.numDocs(); assertEquals(2, numDocs2); for (int i = 0; i < 2; i++) { long t1 = System.nanoTime(); Document document1 = reader1.document(0); long t2 = System.nanoTime(); Document document2 = reader2.document(1); long t3 = System.nanoTime(); System.out.println((t3 - t2) / 1000000.0); System.out.println((t2 - t1) / 1000000.0); System.out.println("doc1 " + document1.hashCode()); System.out.println("doc2 " + document2.hashCode()); } // for (int i = 0; i < numDocs2; i++) { // System.out.println(reader2.document(i)); // } // long fileLength = directory.fileLength("_0.fdt"); for (String name : directory.listAll()) { if (name.endsWith(".fdt")) { System.out.println(name); System.out.println(directory.fileLength(name)); } } }
Example 14
Source File: TestNearest.java From lucene-solr with Apache License 2.0 | 4 votes |
private IndexWriterConfig getIndexWriterConfig() { IndexWriterConfig iwc = newIndexWriterConfig(); iwc.setCodec(Codec.forName("Lucene86")); return iwc; }
Example 15
Source File: Blur024CodecTest.java From incubator-retired-blur with Apache License 2.0 | 4 votes |
@Test public void testLargeDocs() throws IOException { Random random = new Random(); Iterable<? extends IndexableField> doc = getLargeDoc(random); RAMDirectory directory = new RAMDirectory(); IndexWriterConfig conf1 = new IndexWriterConfig(Version.LUCENE_43, new WhitespaceAnalyzer(Version.LUCENE_43)); conf1.setCodec(new Blur024Codec()); IndexWriter writer1 = new IndexWriter(directory, conf1); writer1.addDocument(doc); writer1.close(); DirectoryReader reader1 = DirectoryReader.open(directory); int numDocs1 = reader1.numDocs(); assertEquals(1, numDocs1); // for (int i = 0; i < numDocs1; i++) { // System.out.println(reader1.document(i)); // } IndexWriterConfig conf2 = new IndexWriterConfig(Version.LUCENE_43, new WhitespaceAnalyzer(Version.LUCENE_43)); conf2.setCodec(new Blur024Codec(1 << 16, CompressionMode.HIGH_COMPRESSION)); IndexWriter writer2 = new IndexWriter(directory, conf2); writer2.addDocument(doc); writer2.close(); DirectoryReader reader2 = DirectoryReader.open(directory); int numDocs2 = reader2.numDocs(); assertEquals(2, numDocs2); for (int i = 0; i < 2; i++) { long t1 = System.nanoTime(); Document document1 = reader1.document(0); long t2 = System.nanoTime(); Document document2 = reader2.document(1); long t3 = System.nanoTime(); System.out.println((t3 - t2) / 1000000.0); System.out.println((t2 - t1) / 1000000.0); System.out.println("doc1 " + document1.hashCode()); System.out.println("doc2 " + document2.hashCode()); } // for (int i = 0; i < numDocs2; i++) { // System.out.println(reader2.document(i)); // } // long fileLength = directory.fileLength("_0.fdt"); for (String name : directory.listAll()) { if (name.endsWith(".fdt")) { System.out.println(name); System.out.println(directory.fileLength(name)); } } }
Example 16
Source File: TestCompressingStoredFieldsFormat.java From lucene-solr with Apache License 2.0 | 4 votes |
public void testDeletePartiallyWrittenFilesIfAbort() throws IOException { Directory dir = newDirectory(); IndexWriterConfig iwConf = newIndexWriterConfig(new MockAnalyzer(random())); iwConf.setMaxBufferedDocs(RandomNumbers.randomIntBetween(random(), 2, 30)); iwConf.setCodec(getCodec()); // disable CFS because this test checks file names iwConf.setMergePolicy(newLogMergePolicy(false)); iwConf.setUseCompoundFile(false); // Cannot use RIW because this test wants CFS to stay off: IndexWriter iw = new IndexWriter(dir, iwConf); final Document validDoc = new Document(); validDoc.add(new IntPoint("id", 0)); validDoc.add(new StoredField("id", 0)); iw.addDocument(validDoc); iw.commit(); // make sure that #writeField will fail to trigger an abort final Document invalidDoc = new Document(); FieldType fieldType = new FieldType(); fieldType.setStored(true); invalidDoc.add(new Field("invalid", fieldType) { @Override public String stringValue() { // TODO: really bad & scary that this causes IW to // abort the segment!! We should fix this. return null; } }); try { iw.addDocument(invalidDoc); iw.commit(); } catch(IllegalArgumentException iae) { // expected assertEquals(iae, iw.getTragicException()); } // Writer should be closed by tragedy assertFalse(iw.isOpen()); dir.close(); }
Example 17
Source File: TestPerFieldPostingsFormat2.java From lucene-solr with Apache License 2.0 | 4 votes |
@Test public void testChangeCodecAndMerge() throws IOException { Directory dir = newDirectory(); if (VERBOSE) { System.out.println("TEST: make new index"); } IndexWriterConfig iwconf = newIndexWriterConfig(new MockAnalyzer(random())) .setOpenMode(OpenMode.CREATE).setCodec(new MockCodec()); iwconf.setMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH); //((LogMergePolicy) iwconf.getMergePolicy()).setMergeFactor(10); IndexWriter writer = newWriter(dir, iwconf); addDocs(writer, 10); writer.commit(); assertQuery(new Term("content", "aaa"), dir, 10); if (VERBOSE) { System.out.println("TEST: addDocs3"); } addDocs3(writer, 10); writer.commit(); writer.close(); assertQuery(new Term("content", "ccc"), dir, 10); assertQuery(new Term("content", "aaa"), dir, 10); Codec codec = iwconf.getCodec(); iwconf = newIndexWriterConfig(new MockAnalyzer(random())) .setOpenMode(OpenMode.APPEND).setCodec(codec); //((LogMergePolicy) iwconf.getMergePolicy()).setNoCFSRatio(0.0); //((LogMergePolicy) iwconf.getMergePolicy()).setMergeFactor(10); iwconf.setMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH); iwconf.setCodec(new MockCodec()); // uses standard for field content writer = newWriter(dir, iwconf); // swap in new codec for currently written segments if (VERBOSE) { System.out.println("TEST: add docs w/ Standard codec for content field"); } addDocs2(writer, 10); writer.commit(); codec = iwconf.getCodec(); assertEquals(30, writer.getDocStats().maxDoc); assertQuery(new Term("content", "bbb"), dir, 10); assertQuery(new Term("content", "ccc"), dir, 10); //// assertQuery(new Term("content", "aaa"), dir, 10); if (VERBOSE) { System.out.println("TEST: add more docs w/ new codec"); } addDocs2(writer, 10); writer.commit(); assertQuery(new Term("content", "ccc"), dir, 10); assertQuery(new Term("content", "bbb"), dir, 20); assertQuery(new Term("content", "aaa"), dir, 10); assertEquals(40, writer.getDocStats().maxDoc); if (VERBOSE) { System.out.println("TEST: now optimize"); } writer.forceMerge(1); assertEquals(40, writer.getDocStats().maxDoc); writer.close(); assertQuery(new Term("content", "ccc"), dir, 10); assertQuery(new Term("content", "bbb"), dir, 20); assertQuery(new Term("content", "aaa"), dir, 10); dir.close(); }
Example 18
Source File: MtasDocumentIndex.java From inception with Apache License 2.0 | 4 votes |
private synchronized IndexWriter getIndexWriter() throws IOException { if (_indexWriter == null) { log.debug("Opening index for project [{}]({})", project.getName(), project.getId()); OPEN_INDEXES.put(project.getId(), this); // Initialize and populate the hash maps for the layers and features features = schemaService.listAnnotationFeature(project).stream() .filter(feat -> feat.getLayer().isEnabled()) .filter(feat -> feat.isEnabled()) .collect(Collectors.toList()); // Add the project id to the configuration JSONObject jsonParserConfiguration = new JSONObject(); jsonParserConfiguration.put(PARAM_PROJECT_ID, project.getId()); // Tokenizer parameters Map<String, String> tokenizerArguments = new HashMap<>(); tokenizerArguments.put(ARGUMENT_PARSER, MtasUimaParser.class.getName()); tokenizerArguments.put(ARGUMENT_PARSER_ARGS, jsonParserConfiguration.toString()); // Build analyzer Analyzer mtasAnalyzer = CustomAnalyzer.builder() .withTokenizer(MtasTokenizerFactory.class, tokenizerArguments) .build(); Map<String, Analyzer> analyzerPerField = new HashMap<String, Analyzer>(); analyzerPerField.put(FIELD_CONTENT, mtasAnalyzer); PerFieldAnalyzerWrapper analyzer = new PerFieldAnalyzerWrapper(new StandardAnalyzer(), analyzerPerField); // Build IndexWriter FileUtils.forceMkdir(getIndexDir()); IndexWriterConfig config = new IndexWriterConfig(analyzer); config.setCodec(Codec.forName(MTAS_CODEC_NAME)); IndexWriter indexWriter = new IndexWriter(FSDirectory.open(getIndexDir().toPath()), config); // Initialize the index indexWriter.commit(); // After the index has been initialized, assign the _indexWriter - this is also used // by isOpen() to check if the index writer is available. _indexWriter = indexWriter; } return _indexWriter; }
Example 19
Source File: Blur022CodecTest.java From incubator-retired-blur with Apache License 2.0 | 4 votes |
@Test public void testSmallDocs() throws IOException { RAMDirectory directory = new RAMDirectory(); IndexWriterConfig conf1 = new IndexWriterConfig(Version.LUCENE_43, new WhitespaceAnalyzer(Version.LUCENE_43)); conf1.setCodec(new Blur022Codec()); Random random1 = new Random(1); IndexWriter writer1 = new IndexWriter(directory, conf1); for (int i = 0; i < 1000; i++) { writer1.addDocument(getSmallDoc(random1)); } writer1.close(); DirectoryReader reader1 = DirectoryReader.open(directory); int numDocs1 = reader1.numDocs(); assertEquals(1000, numDocs1); // for (int i = 0; i < numDocs1; i++) { // System.out.println(reader1.document(i)); // } IndexWriterConfig conf2 = new IndexWriterConfig(Version.LUCENE_43, new WhitespaceAnalyzer(Version.LUCENE_43)); conf2.setCodec(new Blur022Codec(1 << 16, CompressionMode.HIGH_COMPRESSION)); Random random2 = new Random(1); IndexWriter writer2 = new IndexWriter(directory, conf2); for (int i = 0; i < 1000; i++) { writer2.addDocument(getSmallDoc(random2)); } writer2.close(); DirectoryReader reader2 = DirectoryReader.open(directory); int numDocs2 = reader2.numDocs(); assertEquals(2000, numDocs2); for (int i = 0; i < 2; i++) { long t1 = System.nanoTime(); long hash1 = 0; long hash2 = 0; for (int d = 0; d < 1000; d++) { Document document1 = reader1.document(d); hash1 += document1.hashCode(); } long t2 = System.nanoTime(); for (int d = 0; d < 1000; d++) { Document document2 = reader2.document(d + 1000); hash2 += document2.hashCode(); } long t3 = System.nanoTime(); System.out.println((t3 - t2) / 1000000.0); System.out.println((t2 - t1) / 1000000.0); System.out.println("doc1 " + hash1); System.out.println("doc2 " + hash2); } // for (int i = 0; i < numDocs2; i++) { // System.out.println(reader2.document(i)); // } // long fileLength = directory.fileLength("_0.fdt"); for (String name : directory.listAll()) { if (name.endsWith(".fdt")) { System.out.println(name); System.out.println(directory.fileLength(name)); } } }
Example 20
Source File: Blur024CodecTest.java From incubator-retired-blur with Apache License 2.0 | 4 votes |
@Test public void testSmallDocs() throws IOException { RAMDirectory directory = new RAMDirectory(); IndexWriterConfig conf1 = new IndexWriterConfig(Version.LUCENE_43, new WhitespaceAnalyzer(Version.LUCENE_43)); conf1.setCodec(new Blur024Codec()); Random random1 = new Random(1); IndexWriter writer1 = new IndexWriter(directory, conf1); for (int i = 0; i < 1000; i++) { writer1.addDocument(getSmallDoc(random1)); } writer1.close(); DirectoryReader reader1 = DirectoryReader.open(directory); int numDocs1 = reader1.numDocs(); assertEquals(1000, numDocs1); // for (int i = 0; i < numDocs1; i++) { // System.out.println(reader1.document(i)); // } IndexWriterConfig conf2 = new IndexWriterConfig(Version.LUCENE_43, new WhitespaceAnalyzer(Version.LUCENE_43)); conf2.setCodec(new Blur024Codec(1 << 16, CompressionMode.HIGH_COMPRESSION)); Random random2 = new Random(1); IndexWriter writer2 = new IndexWriter(directory, conf2); for (int i = 0; i < 1000; i++) { writer2.addDocument(getSmallDoc(random2)); } writer2.close(); DirectoryReader reader2 = DirectoryReader.open(directory); int numDocs2 = reader2.numDocs(); assertEquals(2000, numDocs2); for (int i = 0; i < 2; i++) { long t1 = System.nanoTime(); long hash1 = 0; long hash2 = 0; for (int d = 0; d < 1000; d++) { Document document1 = reader1.document(d); hash1 += document1.hashCode(); } long t2 = System.nanoTime(); for (int d = 0; d < 1000; d++) { Document document2 = reader2.document(d + 1000); hash2 += document2.hashCode(); } long t3 = System.nanoTime(); System.out.println((t3 - t2) / 1000000.0); System.out.println((t2 - t1) / 1000000.0); System.out.println("doc1 " + hash1); System.out.println("doc2 " + hash2); } // for (int i = 0; i < numDocs2; i++) { // System.out.println(reader2.document(i)); // } // long fileLength = directory.fileLength("_0.fdt"); for (String name : directory.listAll()) { if (name.endsWith(".fdt")) { System.out.println(name); System.out.println(directory.fileLength(name)); } } }