Java Code Examples for org.apache.lucene.document.Document#getFields()
The following examples show how to use
org.apache.lucene.document.Document#getFields() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TestBlockPostingsFormat2.java From lucene-solr with Apache License 2.0 | 6 votes |
/** tests terms with ttf % blocksize = 0 */ public void testTTFBlockSizeMultiple() throws Exception { Document doc = newDocument(); for (int i = 0; i < Lucene50PostingsFormat.BLOCK_SIZE/2; i++) { for (IndexableField f : doc.getFields()) { String proto = (f.name() + " " + f.name() + " " + f.name() + " " + f.name() + " " + f.name() + "_2 " + f.name() + "_2 " + f.name() + "_2 " + f.name() + "_2"); StringBuilder val = new StringBuilder(); for (int j = 0; j < 16; j++) { val.append(proto); val.append(" "); } ((Field) f).setStringValue(val.toString()); } iw.addDocument(doc); } }
Example 2
Source File: TestStressIndexing2.java From lucene-solr with Apache License 2.0 | 6 votes |
public static void verifyEquals(Document d1, Document d2) { List<IndexableField> ff1 = new ArrayList<>(d1.getFields()); List<IndexableField> ff2 = new ArrayList<>(d2.getFields()); Collections.sort(ff1, fieldNameComparator); Collections.sort(ff2, fieldNameComparator); assertEquals(ff1 + " : " + ff2, ff1.size(), ff2.size()); for (int i=0; i<ff1.size(); i++) { IndexableField f1 = ff1.get(i); IndexableField f2 = ff2.get(i); if (f1.binaryValue() != null) { assert(f2.binaryValue() != null); } else { String s1 = f1.stringValue(); String s2 = f2.stringValue(); assertEquals(ff1 + " : " + ff2, s1,s2); } } }
Example 3
Source File: TestFieldsReader.java From lucene-solr with Apache License 2.0 | 6 votes |
@BeforeClass public static void beforeClass() throws Exception { testDoc = new Document(); fieldInfos = new FieldInfos.Builder(new FieldInfos.FieldNumbers(null)); DocHelper.setupDoc(testDoc); for (IndexableField field : testDoc.getFields()) { FieldInfo fieldInfo = fieldInfos.getOrAdd(field.name()); IndexableFieldType ift = field.fieldType(); fieldInfo.setIndexOptions(ift.indexOptions()); if (ift.omitNorms()) { fieldInfo.setOmitsNorms(); } fieldInfo.setDocValuesType(ift.docValuesType()); } dir = newDirectory(); IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random())) .setMergePolicy(newLogMergePolicy()); conf.getMergePolicy().setNoCFSRatio(0.0); IndexWriter writer = new IndexWriter(dir, conf); writer.addDocument(testDoc); writer.close(); }
Example 4
Source File: SimpleNaiveBayesDocumentClassifier.java From lucene-solr with Apache License 2.0 | 6 votes |
/** * This methods performs the analysis for the seed document and extract the boosts if present. * This is done only one time for the Seed Document. * * @param inputDocument the seed unseen document * @param fieldName2tokensArray a map that associated to a field name the list of token arrays for all its values * @param fieldName2boost a map that associates the boost to the field * @throws IOException If there is a low-level I/O error */ private void analyzeSeedDocument(Document inputDocument, Map<String, List<String[]>> fieldName2tokensArray, Map<String, Float> fieldName2boost) throws IOException { for (int i = 0; i < textFieldNames.length; i++) { String fieldName = textFieldNames[i]; float boost = 1; List<String[]> tokenizedValues = new LinkedList<>(); if (fieldName.contains("^")) { String[] field2boost = fieldName.split("\\^"); fieldName = field2boost[0]; boost = Float.parseFloat(field2boost[1]); } IndexableField[] fieldValues = inputDocument.getFields(fieldName); for (IndexableField fieldValue : fieldValues) { TokenStream fieldTokens = fieldValue.tokenStream(field2analyzer.get(fieldName), null); String[] fieldTokensArray = getTokenArray(fieldTokens); tokenizedValues.add(fieldTokensArray); } fieldName2tokensArray.put(fieldName, tokenizedValues); fieldName2boost.put(fieldName, boost); textFieldNames[i] = fieldName; } }
Example 5
Source File: TestBlockPostingsFormat2.java From lucene-solr with Apache License 2.0 | 5 votes |
/** tests terms with df % blocksize = 0 */ public void testDFBlockSizeMultiple() throws Exception { Document doc = newDocument(); for (int i = 0; i < Lucene50PostingsFormat.BLOCK_SIZE * 16; i++) { for (IndexableField f : doc.getFields()) { ((Field) f).setStringValue(f.name() + " " + f.name() + "_2"); } iw.addDocument(doc); } }
Example 6
Source File: AclDiscoverFieldTypeDefinitionTest.java From incubator-retired-blur with Apache License 2.0 | 5 votes |
private void test(int expected, boolean rowQuery, Collection<String> discoverAuthorizations) throws IOException, ParseException { DirectoryReader reader = DirectoryReader.open(_dir); SuperParser parser = new SuperParser(Version.LUCENE_43, _fieldManager, rowQuery, null, ScoreType.SUPER, new Term( BlurConstants.PRIME_DOC, BlurConstants.PRIME_DOC_VALUE)); Query query = parser.parse("fam.string:value"); Collection<String> readAuthorizations = null; Set<String> discoverableFields = new HashSet<String>(); discoverableFields.add("rowid"); discoverableFields.add("recordid"); discoverableFields.add("family"); IndexSearcher searcher = new SecureIndexSearcher(reader, getAccessControlFactory(), readAuthorizations, discoverAuthorizations, discoverableFields, null); TopDocs topDocs = searcher.search(query, 10); assertEquals(expected, topDocs.totalHits); for (int i = 0; i < expected; i++) { int doc = topDocs.scoreDocs[i].doc; Document document = searcher.doc(doc); List<IndexableField> fields = document.getFields(); for (IndexableField field : fields) { assertTrue(discoverableFields.contains(field.name())); } } reader.close(); }
Example 7
Source File: MutatableActionTest.java From incubator-retired-blur with Apache License 2.0 | 5 votes |
@Test public void testReplaceRecord() throws IOException { RAMDirectory directory = new RAMDirectory(); DirectoryReader reader = getIndexReader(directory); IndexWriter writer = new IndexWriter(directory, _conf.clone()); assertEquals(0, reader.numDocs()); Row row = genRow(); List<Column> cols = new ArrayList<Column>(); cols.add(new Column("n", "v")); row.addToRecords(new Record("1", "fam", cols)); _action.replaceRow(row); _action.performMutate(getSearcher(reader, directory), writer); reader = commitAndReopen(reader, writer); assertEquals(2, reader.numDocs()); cols.add(new Column("n2", "v2")); Record record = new Record("1", "fam", cols); _action.replaceRecord(row.getId(), record); _action.performMutate(getSearcher(reader, directory), writer); reader = commitAndReopen(reader, writer); assertEquals(2, reader.numDocs()); IndexSearcher searcher = new IndexSearcher(reader); TopDocs topDocs = searcher.search(new TermQuery(new Term(BlurConstants.ROW_ID, row.getId())), 10); Document doc2 = searcher.doc(topDocs.scoreDocs[1].doc); List<IndexableField> fields = doc2.getFields(); assertEquals(fields.size(), 5); String value = doc2.get("fam.n2"); assertEquals("v2", value); }
Example 8
Source File: MutatableActionTest.java From incubator-retired-blur with Apache License 2.0 | 5 votes |
@Test public void testReplaceColumns() throws IOException { RAMDirectory directory = new RAMDirectory(); DirectoryReader reader = getIndexReader(directory); IndexWriter writer = new IndexWriter(directory, _conf.clone()); assertEquals(0, reader.numDocs()); Row row = genRow(); List<Column> cols = new ArrayList<Column>(); cols.add(new Column("n", "v")); cols.add(new Column("n1", "v1")); row.addToRecords(new Record("1", "fam", cols)); _action.replaceRow(row); _action.performMutate(getSearcher(reader, directory), writer); reader = commitAndReopen(reader, writer); assertEquals(2, reader.numDocs()); cols.clear(); cols.add(new Column("n1", "v2")); Record record = new Record("1", "fam", cols); _action.replaceColumns(row.getId(), record); _action.performMutate(getSearcher(reader, directory), writer); reader = commitAndReopen(reader, writer); assertEquals(2, reader.numDocs()); IndexSearcher searcher = new IndexSearcher(reader); TopDocs topDocs = searcher.search(new TermQuery(new Term(BlurConstants.ROW_ID, row.getId())), 10); Document doc2 = searcher.doc(topDocs.scoreDocs[1].doc); List<IndexableField> fields = doc2.getFields(); assertEquals(5, fields.size()); String value = doc2.get("fam.n1"); assertEquals("v2", value); }
Example 9
Source File: MoreLikeThis.java From lucene-solr with Apache License 2.0 | 5 votes |
/** * Find words for a more-like-this query former. * * @param docNum the id of the lucene document from which to find terms */ private PriorityQueue<ScoreTerm> retrieveTerms(int docNum) throws IOException { Map<String, Map<String, Int>> field2termFreqMap = new HashMap<>(); for (String fieldName : fieldNames) { final Fields vectors = ir.getTermVectors(docNum); final Terms vector; if (vectors != null) { vector = vectors.terms(fieldName); } else { vector = null; } // field does not store term vector info if (vector == null) { Document d = ir.document(docNum); IndexableField[] fields = d.getFields(fieldName); for (IndexableField field : fields) { final String stringValue = field.stringValue(); if (stringValue != null) { addTermFrequencies(new StringReader(stringValue), field2termFreqMap, fieldName); } } } else { addTermFrequencies(field2termFreqMap, vector, fieldName); } } return createQueue(field2termFreqMap); }
Example 10
Source File: LuceneIndexTest.java From rdf4j with BSD 3-Clause "New" or "Revised" License | 5 votes |
/** * @param statement112 * @param document */ private void assertNoStatement(Statement statement, Document document) { IndexableField[] fields = document.getFields(SearchFields.getPropertyField(statement.getPredicate())); if (fields == null) { return; } for (IndexableField f : fields) { if (((Literal) statement.getObject()).getLabel().equals(f.stringValue())) { fail("Statement should not be found in document " + statement); } } }
Example 11
Source File: MutatableActionTest.java From incubator-retired-blur with Apache License 2.0 | 5 votes |
@Test public void testAppendColumns() throws IOException { RAMDirectory directory = new RAMDirectory(); DirectoryReader reader = getIndexReader(directory); IndexWriter writer = new IndexWriter(directory, _conf.clone()); assertEquals(0, reader.numDocs()); Row row = genRow(); List<Column> cols = new ArrayList<Column>(); cols.add(new Column("n", "v")); row.addToRecords(new Record("1", "fam", cols)); _action.replaceRow(row); _action.performMutate(getSearcher(reader, directory), writer); reader = commitAndReopen(reader, writer); assertEquals(2, reader.numDocs()); cols.clear(); cols.add(new Column("n2", "v2")); Record record = new Record("1", "fam", cols); _action.appendColumns(row.getId(), record); _action.performMutate(getSearcher(reader, directory), writer); reader = commitAndReopen(reader, writer); assertEquals(2, reader.numDocs()); IndexSearcher searcher = new IndexSearcher(reader); TopDocs topDocs = searcher.search(new TermQuery(new Term(BlurConstants.ROW_ID, row.getId())), 10); Document doc2 = searcher.doc(topDocs.scoreDocs[1].doc); List<IndexableField> fields = doc2.getFields(); assertEquals(fields.size(), 5); String value = doc2.get("fam.n2"); assertEquals("v2", value); }
Example 12
Source File: LuceneIndex.java From rdf4j with BSD 3-Clause "New" or "Revised" License | 5 votes |
@Override protected synchronized SearchDocument copyDocument(SearchDocument doc) { Document document = ((LuceneDocument) doc).getDocument(); Document newDocument = new Document(); // add all existing fields (including id, uri, context, and text) for (IndexableField oldField : document.getFields()) { newDocument.add(oldField); } return new LuceneDocument(newDocument, geoStrategyMapper); }
Example 13
Source File: TestBlockPostingsFormat2.java From lucene-solr with Apache License 2.0 | 5 votes |
/** tests terms with ttf = blocksize */ public void testTTFBlockSize() throws Exception { Document doc = newDocument(); for (int i = 0; i < Lucene50PostingsFormat.BLOCK_SIZE/2; i++) { for (IndexableField f : doc.getFields()) { ((Field) f).setStringValue(f.name() + " " + f.name() + " " + f.name() + "_2 " + f.name() + "_2"); } iw.addDocument(doc); } }
Example 14
Source File: ReferenceCountingReadOnlyIndexReaderFactory.java From alfresco-repository with GNU Lesser General Public License v3.0 | 5 votes |
public String getPathLinkId(int n) throws IOException { Document document = document(n, new SingleFieldSelector("ID", true)); Field[] fields = document.getFields("ID"); Field field = fields[fields.length - 1]; return (field == null) ? null : field.stringValue(); }
Example 15
Source File: ReferenceCountingReadOnlyIndexReaderFactory.java From alfresco-repository with GNU Lesser General Public License v3.0 | 5 votes |
public List<Field> get(int n, FieldSelector fieldSelector) throws IOException { Document document = ReferenceCountingReadOnlyIndexReader.super.document(n, fieldSelector); Field[] fields = document.getFields(fieldName); ArrayList<Field> cacheable = new ArrayList<Field>(fields.length); for (Field field : fields) { cacheable.add(field); } return cacheable; }
Example 16
Source File: ReferenceCountingReadOnlyIndexReaderFactory.java From alfresco-repository with GNU Lesser General Public License v3.0 | 5 votes |
public List<Field> get(int n, FieldSelector fieldSelector) throws IOException { Document document = ReferenceCountingReadOnlyIndexReader.super.document(n, fieldSelector); List<Field> fields = (List<Field>) document.getFields(); ArrayList<Field> cacheable = new ArrayList<Field>(fields.size()); cacheable.addAll(fields); return cacheable; }
Example 17
Source File: IndexSearcherTest.java From incubator-retired-blur with Apache License 2.0 | 4 votes |
private void validateDiscoverFields(Document doc, Collection<String> discoverableFields) { Set<String> fields = new HashSet<String>(discoverableFields); for (IndexableField indexableField : doc.getFields()) { assertTrue(fields.contains(indexableField.name())); } }
Example 18
Source File: LuceneRecord.java From HongsCORE with MIT License | 4 votes |
/** * 填充返回数据(将 doc 填充到 map) * 可覆盖此方法补充额外数据 * * @param doc * @param map * @param rep */ protected void padDat(Document doc, Map map, Set rep) { if (rep != null && rep.isEmpty( )) { rep = null; } Map<String, Map> fields = getFields(); for(Map.Entry<String, Map> e : fields.entrySet()) { Map m = e.getValue(); String k = e.getKey (); if (rep != null && !rep.contains(k)) { continue; } if (k == null || k.equals("@") || unstated( m ) || unstored( m )) { continue; } IValue v ; String t = datatype(m); boolean r = repeated(m); IndexableField[] fs = doc.getFields(k); if (t != null) switch (t) { case "search": case "sorted": continue; // 纯功能字段无可见值 case "date": // 时间戳转 Date 对象时需要乘以 1000 String y = Synt.declare(m.get("type"), ""); if (OBJECT_MODE) { if ("time".equals(y) || "timestamp".equals(y)) { v = new NumberValue( ); } else { v = new DatimeValue(m); } } else { if ("time".equals(y) || "timestamp".equals(y)) { v = new NumeraValue( ); } else { v = new DatextValue(m); } } break; case "int": case "long": case "float": case "double": case "number": if (OBJECT_MODE) { v = new NumberValue(); } else { v = new NumeraValue(); } break; case "object": v = new ObjectValue(); break; default: v = new StringValue(); } else { v = new StringValue(); } if (r) { if (fs.length > 0) { for(IndexableField f : fs ) { Dict.put(map , v.get(f), k, null); } } else { map.put(k , new ArrayList()); } } else { if (fs.length > 0) { map.put(k , v.get ( fs[0] )); } else { map.put(k , null); } } } }
Example 19
Source File: TestDirectoryReader.java From lucene-solr with Apache License 2.0 | 4 votes |
public void testBinaryFields() throws IOException { Directory dir = newDirectory(); byte[] bin = new byte[]{0, 1, 2, 3, 4, 5, 6, 7, 8, 9}; IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random())) .setMergePolicy(newLogMergePolicy())); for (int i = 0; i < 10; i++) { addDoc(writer, "document number " + (i + 1)); addDocumentWithFields(writer); addDocumentWithDifferentFields(writer); addDocumentWithTermVectorFields(writer); } writer.close(); writer = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random())) .setOpenMode(OpenMode.APPEND) .setMergePolicy(newLogMergePolicy())); Document doc = new Document(); doc.add(new StoredField("bin1", bin)); doc.add(new TextField("junk", "junk text", Field.Store.NO)); writer.addDocument(doc); writer.close(); DirectoryReader reader = DirectoryReader.open(dir); Document doc2 = reader.document(reader.maxDoc() - 1); IndexableField[] fields = doc2.getFields("bin1"); assertNotNull(fields); assertEquals(1, fields.length); IndexableField b1 = fields[0]; assertTrue(b1.binaryValue() != null); BytesRef bytesRef = b1.binaryValue(); assertEquals(bin.length, bytesRef.length); for (int i = 0; i < bin.length; i++) { assertEquals(bin[i], bytesRef.bytes[i + bytesRef.offset]); } reader.close(); // force merge writer = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random())) .setOpenMode(OpenMode.APPEND) .setMergePolicy(newLogMergePolicy())); writer.forceMerge(1); writer.close(); reader = DirectoryReader.open(dir); doc2 = reader.document(reader.maxDoc() - 1); fields = doc2.getFields("bin1"); assertNotNull(fields); assertEquals(1, fields.length); b1 = fields[0]; assertTrue(b1.binaryValue() != null); bytesRef = b1.binaryValue(); assertEquals(bin.length, bytesRef.length); for (int i = 0; i < bin.length; i++) { assertEquals(bin[i], bytesRef.bytes[i + bytesRef.offset]); } reader.close(); dir.close(); }
Example 20
Source File: TestDocumentWriter.java From lucene-solr with Apache License 2.0 | 4 votes |
public void testAddDocument() throws Exception { Document testDoc = new Document(); DocHelper.setupDoc(testDoc); IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random()))); writer.addDocument(testDoc); writer.commit(); SegmentCommitInfo info = writer.newestSegment(); writer.close(); //After adding the document, we should be able to read it back in SegmentReader reader = new SegmentReader(info, Version.LATEST.major, newIOContext(random())); assertTrue(reader != null); Document doc = reader.document(0); assertTrue(doc != null); //System.out.println("Document: " + doc); IndexableField[] fields = doc.getFields("textField2"); assertTrue(fields != null && fields.length == 1); assertTrue(fields[0].stringValue().equals(DocHelper.FIELD_2_TEXT)); assertTrue(fields[0].fieldType().storeTermVectors()); fields = doc.getFields("textField1"); assertTrue(fields != null && fields.length == 1); assertTrue(fields[0].stringValue().equals(DocHelper.FIELD_1_TEXT)); assertFalse(fields[0].fieldType().storeTermVectors()); fields = doc.getFields("keyField"); assertTrue(fields != null && fields.length == 1); assertTrue(fields[0].stringValue().equals(DocHelper.KEYWORD_TEXT)); fields = doc.getFields(DocHelper.NO_NORMS_KEY); assertTrue(fields != null && fields.length == 1); assertTrue(fields[0].stringValue().equals(DocHelper.NO_NORMS_TEXT)); fields = doc.getFields(DocHelper.TEXT_FIELD_3_KEY); assertTrue(fields != null && fields.length == 1); assertTrue(fields[0].stringValue().equals(DocHelper.FIELD_3_TEXT)); // test that the norms are not present in the segment if // omitNorms is true for (FieldInfo fi : reader.getFieldInfos()) { if (fi.getIndexOptions() != IndexOptions.NONE) { assertTrue(fi.omitsNorms() == (reader.getNormValues(fi.name) == null)); } } reader.close(); }