org.apache.lucene.document.TextField Java Exaples

Source File: TestIndexReaderFunctions.java From lucene-solr with Apache License 2.0

7 votes

@BeforeClass
public static void beforeClass() throws Exception {
  dir = newDirectory();
  analyzer = new MockAnalyzer(random());
  IndexWriterConfig iwConfig = newIndexWriterConfig(analyzer);
  iwConfig.setMergePolicy(newLogMergePolicy());
  RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwConfig);
  for (String [] doc : documents) {
    Document document = new Document();
    document.add(new StringField("id", doc[0], Field.Store.NO));
    document.add(new SortedDocValuesField("id", new BytesRef(doc[0])));
    document.add(new StringField("string", doc[5], Field.Store.NO));
    document.add(new SortedDocValuesField("string", new BytesRef(doc[5])));
    document.add(new TextField("text", doc[6], Field.Store.NO));
    iw.addDocument(document);
  }

  reader = iw.getReader();
  searcher = newSearcher(reader);
  iw.close();
}

Source File: AbstractLuceneQueryVisitorTest.java From cxf with Apache License 2.0

7 votes

@Before
public void setUp() throws Exception {
    analyzer = new StandardAnalyzer();
    tempDirectory = Files.createTempDirectory("lucene");
    directory = new MMapDirectory(tempDirectory);
    IndexWriterConfig config = new IndexWriterConfig(analyzer);
    IndexWriter iwriter = new IndexWriter(directory, config);

    Document doc = new Document();
    doc.add(new Field("contents", "name=text", TextField.TYPE_STORED));

    IntPoint intPoint = new IntPoint("intfield", 4);
    doc.add(intPoint);
    doc.add(new StoredField("intfield", 4));
    iwriter.addDocument(doc);

    iwriter.close();
    ireader = DirectoryReader.open(directory);
    isearcher = new IndexSearcher(ireader);
}

Source File: FullTextIndex.java From jease with GNU General Public License v3.0

6 votes

public FullTextIndex() {
    try {
        objects = new ArrayList<>();

        Analyzer analyzer = new StandardAnalyzer();
        IndexWriterConfig config =  new IndexWriterConfig(new LimitTokenCountAnalyzer(analyzer, Integer.MAX_VALUE));

        indexDirectory = new RAMDirectory();
        indexWriter = new IndexWriter(indexDirectory, config);

        queryParser = new QueryParser("text", analyzer);
        queryParser.setDefaultOperator(QueryParser.AND_OPERATOR);

        fulltext = new TextField("text", "", Field.Store.NO);

        // Used as base-set for a NOT-Query
        Field inverse = new TextField("true", "yes", Field.Store.NO);

        document = new Document();
        document.add(fulltext);
        document.add(inverse);
    } catch (Exception e) {
        throw new RuntimeException(e.getMessage(), e);
    }
}

Source File: PageDocumentTransformer.java From gravitee-management-rest-api with Apache License 2.0

6 votes

@Override
public Document transform(PageEntity page) {
    Document doc = new Document();

    doc.add(new StringField(FIELD_ID, page.getId(), Field.Store.YES));
    doc.add(new StringField(FIELD_TYPE, FIELD_TYPE_VALUE, Field.Store.YES));
    if (page.getName() != null) {
        doc.add(new TextField(FIELD_NAME, page.getName(), Field.Store.NO));
    }

    if (page.getContent() != null) {
        doc.add(new TextField(FIELD_CONTENT, page.getContent(), Field.Store.NO));
    }

    if (page instanceof ApiPageEntity && ((ApiPageEntity)page).getApi() != null) {
        doc.add(new StringField(FIELD_API, ((ApiPageEntity)page).getApi(), Field.Store.YES));
    }

    return doc;
}

Source File: TestIndexWriterOnDiskFull.java From lucene-solr with Apache License 2.0

6 votes

public void testImmediateDiskFull() throws IOException {
  MockDirectoryWrapper dir = newMockDirectory();
  IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random()))
                                              .setMaxBufferedDocs(2)
                                              .setMergeScheduler(new ConcurrentMergeScheduler())
                                              .setCommitOnClose(false));
  writer.commit(); // empty commit, to not create confusing situation with first commit
  dir.setMaxSizeInBytes(Math.max(1, dir.sizeInBytes()));
  final Document doc = new Document();
  FieldType customType = new FieldType(TextField.TYPE_STORED);
  doc.add(newField("field", "aaa bbb ccc ddd eee fff ggg hhh iii jjj", customType));
  expectThrows(IOException.class, () -> {
    writer.addDocument(doc);
  });
  assertTrue(writer.isDeleterClosed());
  assertTrue(writer.isClosed());

  dir.close();
}

Source File: InMemoryLuceneIndex.java From tutorials with MIT License

6 votes

/**
 * 
 * @param title
 * @param body
 */
public void indexDocument(String title, String body) {

    IndexWriterConfig indexWriterConfig = new IndexWriterConfig(analyzer);
    try {
        IndexWriter writter = new IndexWriter(memoryIndex, indexWriterConfig);
        Document document = new Document();

        document.add(new TextField("title", title, Field.Store.YES));
        document.add(new TextField("body", body, Field.Store.YES));
        document.add(new SortedDocValuesField("title", new BytesRef(title)));

        writter.addDocument(document);
        writter.close();
    } catch (IOException e) {
        e.printStackTrace();
    }
}

Source File: TestBlockPostingsFormat2.java From lucene-solr with Apache License 2.0

6 votes

private Document newDocument() {
  Document doc = new Document();
  for (IndexOptions option : IndexOptions.values()) {
    if (option == IndexOptions.NONE) {
      continue;
    }
    FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
    // turn on tvs for a cross-check, since we rely upon checkindex in this test (for now)
    ft.setStoreTermVectors(true);
    ft.setStoreTermVectorOffsets(true);
    ft.setStoreTermVectorPositions(true);
    ft.setStoreTermVectorPayloads(true);
    ft.setIndexOptions(option);
    doc.add(new Field(option.toString(), "", ft));
  }
  return doc;
}

Source File: SourceSimpleFragmentsBuilder.java From Elasticsearch with Apache License 2.0

6 votes

@Override
protected Field[] getFields(IndexReader reader, int docId, String fieldName) throws IOException {
    // we know its low level reader, and matching docId, since that's how we call the highlighter with
    SourceLookup sourceLookup = searchContext.lookup().source();
    sourceLookup.setSegmentAndDocument((LeafReaderContext) reader.getContext(), docId);

    List<Object> values = sourceLookup.extractRawValues(hitContext.getSourcePath(mapper.fieldType().names().fullName()));
    if (values.isEmpty()) {
        return EMPTY_FIELDS;
    }
    Field[] fields = new Field[values.size()];
    for (int i = 0; i < values.size(); i++) {
        fields[i] = new Field(mapper.fieldType().names().indexName(), values.get(i).toString(), TextField.TYPE_NOT_STORED);
    }
    return fields;
}

Source File: SimpleTransLog.java From lucene-solr with Apache License 2.0

6 votes

private void replayAddDocument(Connection c, NodeProcess primary, DataInput in) throws IOException {
  String id = in.readString();

  Document doc = new Document();
  doc.add(new StringField("docid", id, Field.Store.YES));

  String title = readNullableString(in);
  if (title != null) {
    doc.add(new StringField("title", title, Field.Store.NO));
    doc.add(new TextField("titleTokenized", title, Field.Store.NO));
  }
  String body = readNullableString(in);
  if (body != null) {
    doc.add(new TextField("body", body, Field.Store.NO));
  }
  String marker = readNullableString(in);
  if (marker != null) {
    //TestStressNRTReplication.message("xlog: replay marker=" + id);
    doc.add(new StringField("marker", marker, Field.Store.YES));
  }

  // For both add and update originally, we use updateDocument to replay,
  // because the doc could in fact already be in the index:
  // nocomit what if this fails?
  primary.addOrUpdateDocument(c, doc, false);
}

Source File: IndexSearcherTest.java From incubator-retired-blur with Apache License 2.0

6 votes

private Iterable<? extends IndexableField> getDoc(int docId, String read, String discover, String field1,
    String field2) {
  Document doc = new Document();
  doc.add(new StringField("id", Integer.toString(docId), Store.YES));
  AccessControlWriter writer = _accessControlFactory.getWriter();
  doc.add(new StringField("f1", field1, Store.YES));
  doc.add(new StringField("f2", field2, Store.YES));
  doc.add(new TextField("text", "constant text", Store.YES));
  Iterable<? extends IndexableField> fields = doc;
  if (read != null) {
    fields = writer.addReadVisiblity(read, doc);
  }
  if (discover != null) {
    fields = writer.addDiscoverVisiblity(discover, fields);
  }
  return fields;
}

Source File: SolrDocumentFetcher.java From lucene-solr with Apache License 2.0

6 votes

/**
 * Returns a collection of the names of all stored fields which can be highlighted the index reader knows about.
 */
public Collection<String> getStoredHighlightFieldNames() {
  synchronized (this) {
    if (storedHighlightFieldNames == null) {
      storedHighlightFieldNames = new LinkedList<>();
      for (FieldInfo fieldInfo : searcher.getFieldInfos()) {
        final String fieldName = fieldInfo.name;
        try {
          SchemaField field = searcher.getSchema().getField(fieldName);
          if (field.stored() && ((field.getType() instanceof org.apache.solr.schema.TextField)
              || (field.getType() instanceof org.apache.solr.schema.StrField))) {
            storedHighlightFieldNames.add(fieldName);
          }
        } catch (RuntimeException e) { // getField() throws a SolrException, but it arrives as a RuntimeException
          log.warn("Field [{}] found in index, but not defined in schema.", fieldName);
        }
      }
    }
    return storedHighlightFieldNames;
  }
}

Source File: LuceneSearch.java From zeppelin with Apache License 2.0

6 votes

/**
 * If paragraph is not null, indexes code in the paragraph, otherwise indexes the notebook name.
 *
 * @param id id of the document, different for Note name and paragraph
 * @param noteName name of the note
 * @param p paragraph
 * @return
 */
private Document newDocument(String id, String noteName, Paragraph p) {
  Document doc = new Document();

  Field pathField = new StringField(ID_FIELD, id, Field.Store.YES);
  doc.add(pathField);
  doc.add(new StringField("title", noteName, Field.Store.YES));

  if (null != p) {
    doc.add(new TextField(SEARCH_FIELD_TEXT, p.getText(), Field.Store.YES));
    if (p.getTitle() != null) {
      doc.add(new TextField(SEARCH_FIELD_TITLE, p.getTitle(), Field.Store.YES));
    }
    Date date = p.getDateStarted() != null ? p.getDateStarted() : p.getDateCreated();
    doc.add(new LongField("modified", date.getTime(), Field.Store.NO));
  } else {
    doc.add(new TextField(SEARCH_FIELD_TEXT, noteName, Field.Store.YES));
  }
  return doc;
}

Source File: TestCustomTermFreq.java From lucene-solr with Apache License 2.0

6 votes

public void testInvalidTermVectorOffsets() throws Exception {
  Directory dir = newDirectory();
  IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(new MockAnalyzer(random())));

  Document doc = new Document();
  FieldType fieldType = new FieldType(TextField.TYPE_NOT_STORED);
  fieldType.setIndexOptions(IndexOptions.DOCS_AND_FREQS);
  fieldType.setStoreTermVectors(true);
  fieldType.setStoreTermVectorOffsets(true);
  Field field = new Field("field",
                          new CannedTermFreqs(new String[] {"foo", "bar", "foo", "bar"},
                                              new int[] {42, 128, 17, 100}),
                          fieldType);
  doc.add(field);
  Exception e = expectThrows(IllegalArgumentException.class, () -> {w.addDocument(doc);});
  assertEquals("field \"field\": cannot index term vector offsets while using custom TermFrequencyAttribute", e.getMessage());
  IOUtils.close(w, dir);
}

Source File: TestPerFieldPostingsFormat2.java From lucene-solr with Apache License 2.0

6 votes

private void doTestMixedPostings(Codec codec) throws Exception {
  Directory dir = newDirectory();
  IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random()));
  iwc.setCodec(codec);
  RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
  Document doc = new Document();
  FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
  // turn on vectors for the checkindex cross-check
  ft.setStoreTermVectors(true);
  ft.setStoreTermVectorOffsets(true);
  ft.setStoreTermVectorPositions(true);
  Field idField = new Field("id", "", ft);
  Field dateField = new Field("date", "", ft);
  doc.add(idField);
  doc.add(dateField);
  for (int i = 0; i < 100; i++) {
    idField.setStringValue(Integer.toString(random().nextInt(50)));
    dateField.setStringValue(Integer.toString(random().nextInt(100)));
    iw.addDocument(doc);
  }
  iw.close();
  dir.close(); // checkindex
}

Source File: SingleFieldTestDb.java From lucene-solr with Apache License 2.0

6 votes

public SingleFieldTestDb(Random random, String[] documents, String fName) {
  try {
    db = new MockDirectoryWrapper(random, new ByteBuffersDirectory());
    docs = documents;
    fieldName = fName;
    IndexWriter writer = new IndexWriter(db, new IndexWriterConfig(new MockAnalyzer(random)));
    for (int j = 0; j < docs.length; j++) {
      Document d = new Document();
      d.add(new TextField(fieldName, docs[j], Field.Store.NO));
      writer.addDocument(d);
    }
    writer.close();
  } catch (java.io.IOException ioe) {
    throw new Error(ioe);
  }
}

Source File: BlockGroupingTest.java From lucene-solr with Apache License 2.0

6 votes

private static List<Document> createRandomBlock(int book) {
  List<Document> block = new ArrayList<>();
  String bookName = "book" + book;
  int chapterCount = atLeast(10);
  for (int j = 0; j < chapterCount; j++) {
    Document doc = new Document();
    String chapterName = "chapter" + j;
    String chapterText = randomText();
    doc.add(new TextField("book", bookName, Field.Store.YES));
    doc.add(new TextField("chapter", chapterName, Field.Store.YES));
    doc.add(new TextField("text", chapterText, Field.Store.NO));
    doc.add(new NumericDocValuesField("length", chapterText.length()));
    doc.add(new SortedDocValuesField("book", new BytesRef(bookName)));
    if (j == chapterCount - 1) {
      doc.add(new TextField("blockEnd", "true", Field.Store.NO));
    }
    block.add(doc);
  }
  return block;
}

Source File: TestDirectoryReaderReopen.java From lucene-solr with Apache License 2.0

6 votes

public static Document createDocument(int n, int numFields) {
  StringBuilder sb = new StringBuilder();
  Document doc = new Document();
  sb.append("a");
  sb.append(n);
  FieldType customType2 = new FieldType(TextField.TYPE_STORED);
  customType2.setTokenized(false);
  customType2.setOmitNorms(true);
  FieldType customType3 = new FieldType();
  customType3.setStored(true);
  doc.add(new TextField("field1", sb.toString(), Field.Store.YES));
  doc.add(new Field("fielda", sb.toString(), customType2));
  doc.add(new Field("fieldb", sb.toString(), customType3));
  sb.append(" b");
  sb.append(n);
  for (int i = 1; i < numFields; i++) {
    doc.add(new TextField("field" + (i+1), sb.toString(), Field.Store.YES));
  }
  return doc;
}

Source File: LuceneExample.java From yuzhouwan with Apache License 2.0

6 votes

public static void main(String[] args) throws Exception {
    // index
    try (Directory index = new NIOFSDirectory(Paths.get("/tmp/index"))) {
        // add
        try (IndexWriter writer = new IndexWriter(index, new IndexWriterConfig(new StandardAnalyzer()))) {
            Document doc = new Document();
            doc.add(new TextField("blog", "yuzhouwan.com", Field.Store.YES));
            doc.add(new StringField("github", "asdf2014", Field.Store.YES));
            writer.addDocument(doc);
            writer.commit();
        }
        // search
        try (DirectoryReader reader = DirectoryReader.open(index)) {
            IndexSearcher searcher = new IndexSearcher(reader);
            QueryParser parser = new QueryParser("blog", new StandardAnalyzer());
            Query query = parser.parse("yuzhouwan.com");
            ScoreDoc[] hits = searcher.search(query, 1000).scoreDocs;
            for (ScoreDoc hit : hits) {
                Document hitDoc = searcher.doc(hit.doc);
                System.out.println(hitDoc.get("blog"));
            }
        }
    }
}

Source File: DocumentValueSourceDictionaryTest.java From lucene-solr with Apache License 2.0

5 votes

private Map<String, Document> generateIndexDocuments(int ndocs) {
  Map<String, Document> docs = new HashMap<>();
  for(int i = 0; i < ndocs ; i++) {
    Field field = new TextField(FIELD_NAME, "field_" + i, Field.Store.YES);
    Field weight1 = new NumericDocValuesField(WEIGHT_FIELD_NAME_1, 10 + i);
    Field weight2 = new NumericDocValuesField(WEIGHT_FIELD_NAME_2, 20 + i);
    Field weight3 = new NumericDocValuesField(WEIGHT_FIELD_NAME_3, 30 + i);
    Field contexts = new StoredField(CONTEXTS_FIELD_NAME, new BytesRef("ctx_"  + i + "_0"));
    Document doc = new Document();
    doc.add(field);
    // even if payload is not required usually have it
    if (usually()) {
      Field payload = new StoredField(PAYLOAD_FIELD_NAME, new BytesRef("payload_" + i));
      doc.add(payload);
    }
    doc.add(weight1);
    doc.add(weight2);
    doc.add(weight3);
    doc.add(contexts);
    for(int j = 1; j < atLeast(3); j++) {
      contexts.setBytesValue(new BytesRef("ctx_" + i + "_" + j));
      doc.add(contexts);
    }
    docs.put(field.stringValue(), doc);
  }
  return docs;
}

Source File: SpatialPrefixTreeTest.java From lucene-solr with Apache License 2.0

5 votes

/**
 * A PrefixTree pruning optimization gone bad, applicable when optimize=true.
 * See <a href="https://issues.apache.org/jira/browse/LUCENE-4770">LUCENE-4770</a>.
 */
@Test
public void testBadPrefixTreePrune() throws Exception {

  trie = new QuadPrefixTree(ctx, 12);
  TermQueryPrefixTreeStrategy strategy = new TermQueryPrefixTreeStrategy(trie, "geo");
  Document doc = new Document();
  doc.add(new TextField("id", "1", Store.YES));

  Shape area = ctx.makeRectangle(-122.82, -122.78, 48.54, 48.56);

  Field[] fields = strategy.createIndexableFields(area, 0.025);
  for (Field field : fields) {
    doc.add(field);
  }
  addDocument(doc);

  Point upperleft = ctx.makePoint(-122.88, 48.54);
  Point lowerright = ctx.makePoint(-122.82, 48.62);

  Query query = strategy.makeQuery(new SpatialArgs(SpatialOperation.Intersects, ctx.makeRectangle(upperleft, lowerright)));

  commit();

  TopDocs search = indexSearcher.search(query, 10);
  ScoreDoc[] scoreDocs = search.scoreDocs;
  for (ScoreDoc scoreDoc : scoreDocs) {
    System.out.println(indexSearcher.doc(scoreDoc.doc));
  }

  assertEquals(1, search.totalHits.value);
}

Source File: TripleIndexCreatorContext.java From AGDISTIS with GNU Affero General Public License v3.0

5 votes

private void addDocumentToIndex(String subject, String predicate, String object, boolean isUri) throws IOException {
	log.info("here again");
	List<Triple> triples = new ArrayList<>();

	try {
		triples = search(subject, null, null, 100);
	} catch (Exception e) {
	}
	if (triples.size() == 0) {
		Document doc = new Document();
		log.debug(subject + " " + predicate + " " + object);
		doc.add(new StringField(FIELD_NAME_URI, subject, Store.YES));
		doc.add(new TextField(FIELD_NAME_SURFACE_FORM, object, Store.YES));
		doc.add(new TextField(FIELD_NAME_URI_COUNT, "1", Store.YES));
		doc.add(new TextField(FIELD_NAME_CONTEXT, object, Store.YES));
		iwriter.addDocument(doc);
	} else {
		String docID = triples.get(0).subject;
		log.info(triples.toString());
		if (isUri) {
			if (endpoint.isEmpty()) {
				log.info("endpoint empty");
				object = object.replace(nodeType, "");
			} else {
				object = sparql(subject);
				log.info("endpoint working");
			}
		}
		String remainContext = triples.get(0).object.concat(" " + object);
		log.info(remainContext);
		Document hitDoc = isearcher.doc(Integer.parseInt(docID));
		Document newDoc = new Document();
		newDoc.add(new StringField(FIELD_NAME_URI, triples.get(0).predicate, Store.YES));
		newDoc.add(new TextField(FIELD_NAME_SURFACE_FORM, hitDoc.get(FIELD_NAME_SURFACE_FORM), Store.YES));
		newDoc.add(new TextField(FIELD_NAME_URI_COUNT, "1", Store.YES));
		newDoc.add(new TextField(FIELD_NAME_CONTEXT, remainContext, Store.YES));
		iwriter.updateDocument(new Term(FIELD_NAME_URI, subject), newDoc);
	}

}

Source File: TestDirectoryReader.java From lucene-solr with Apache License 2.0

5 votes

static void addDocumentWithDifferentFields(IndexWriter writer) throws IOException
{
  Document doc = new Document();
  
  FieldType customType3 = new FieldType();
  customType3.setStored(true);
  doc.add(newStringField("keyword2", "test1", Field.Store.YES));
  doc.add(newTextField("text2", "test1", Field.Store.YES));
  doc.add(newField("unindexed2", "test1", customType3));
  doc.add(new TextField("unstored2","test1", Field.Store.NO));
  writer.addDocument(doc);
}

Source File: TestCustomTermFreq.java From lucene-solr with Apache License 2.0

5 votes

public void testSingletonTermsOneDoc() throws Exception {
  Directory dir = newDirectory();
  IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(new MockAnalyzer(random())));

  Document doc = new Document();
  FieldType fieldType = new FieldType(TextField.TYPE_NOT_STORED);
  fieldType.setIndexOptions(IndexOptions.DOCS_AND_FREQS);
  Field field = new Field("field",
                          new CannedTermFreqs(new String[] {"foo", "bar"},
                                              new int[] {42, 128}),
                          fieldType);
  doc.add(field);
  w.addDocument(doc);
  IndexReader r = DirectoryReader.open(w);
  PostingsEnum postings = MultiTerms.getTermPostingsEnum(r, "field", new BytesRef("bar"), (int) PostingsEnum.FREQS);
  assertNotNull(postings);
  assertEquals(0, postings.nextDoc());
  assertEquals(128, postings.freq());
  assertEquals(NO_MORE_DOCS, postings.nextDoc());

  postings = MultiTerms.getTermPostingsEnum(r, "field", new BytesRef("foo"), (int) PostingsEnum.FREQS);
  assertNotNull(postings);
  assertEquals(0, postings.nextDoc());
  assertEquals(42, postings.freq());
  assertEquals(NO_MORE_DOCS, postings.nextDoc());
  
  IOUtils.close(r, w, dir);
}

Source File: TestTopDocsCollector.java From lucene-solr with Apache License 2.0

5 votes

public void testRelationVsTopDocsCount() throws Exception {
  try (Directory dir = newDirectory();
      IndexWriter w = new IndexWriter(dir, newIndexWriterConfig().setMergePolicy(NoMergePolicy.INSTANCE))) {
    Document doc = new Document();
    doc.add(new TextField("f", "foo bar", Store.NO));
    w.addDocuments(Arrays.asList(doc, doc, doc, doc, doc));
    w.flush();
    w.addDocuments(Arrays.asList(doc, doc, doc, doc, doc));
    w.flush();
    
    try (IndexReader reader = DirectoryReader.open(w)) {
      IndexSearcher searcher = new IndexSearcher(reader);
      TopScoreDocCollector collector = TopScoreDocCollector.create(2, null, 10);
      searcher.search(new TermQuery(new Term("f", "foo")), collector);
      assertEquals(10, collector.totalHits);
      assertEquals(TotalHits.Relation.EQUAL_TO, collector.totalHitsRelation);
      
      collector = TopScoreDocCollector.create(2, null, 2);
      searcher.search(new TermQuery(new Term("f", "foo")), collector);
      assertTrue(10 >= collector.totalHits);
      assertEquals(TotalHits.Relation.GREATER_THAN_OR_EQUAL_TO, collector.totalHitsRelation);
      
      collector = TopScoreDocCollector.create(10, null, 2);
      searcher.search(new TermQuery(new Term("f", "foo")), collector);
      assertEquals(10, collector.totalHits);
      assertEquals(TotalHits.Relation.EQUAL_TO, collector.totalHitsRelation);
    }
  }
}

Source File: FastVectorHighlighterTest.java From lucene-solr with Apache License 2.0

5 votes

public void testSimpleHighlightTest() throws IOException {
  Directory dir = newDirectory();
  IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random())));
  Document doc = new Document();
  FieldType type = new FieldType(TextField.TYPE_STORED);
  type.setStoreTermVectorOffsets(true);
  type.setStoreTermVectorPositions(true);
  type.setStoreTermVectors(true);
  type.freeze();
  Field field = new Field("field", "This is a test where foo is highlighed and should be highlighted", type);
  
  doc.add(field);
  writer.addDocument(doc);
  FastVectorHighlighter highlighter = new FastVectorHighlighter();
  
  IndexReader reader = DirectoryReader.open(writer);
  int docId = 0;
  FieldQuery fieldQuery  = highlighter.getFieldQuery( new TermQuery(new Term("field", "foo")), reader );
  String[] bestFragments = highlighter.getBestFragments(fieldQuery, reader, docId, "field", 54, 1);
  // highlighted results are centered 
  assertEquals("This is a test where <b>foo</b> is highlighed and should be highlighted", bestFragments[0]);
  bestFragments = highlighter.getBestFragments(fieldQuery, reader, docId, "field", 52, 1);
  assertEquals("This is a test where <b>foo</b> is highlighed and should be", bestFragments[0]);
  bestFragments = highlighter.getBestFragments(fieldQuery, reader, docId, "field", 30, 1);
  assertEquals("a test where <b>foo</b> is highlighed", bestFragments[0]);
  reader.close();
  writer.close();
  dir.close();
}

Source File: SimpleFragmentsBuilderTest.java From lucene-solr with Apache License 2.0

5 votes

protected void makeUnstoredIndex() throws Exception {
  IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(analyzerW).setOpenMode(OpenMode.CREATE));
  Document doc = new Document();
  FieldType customType = new FieldType(TextField.TYPE_NOT_STORED);
  customType.setStoreTermVectors(true);
  customType.setStoreTermVectorOffsets(true);
  customType.setStoreTermVectorPositions(true);
  doc.add( new Field( F, "aaa", customType) );
  //doc.add( new Field( F, "aaa", Store.NO, Index.ANALYZED, TermVector.WITH_POSITIONS_OFFSETS ) );
  writer.addDocument( doc );
  writer.close();
  if (reader != null) reader.close();
  reader = DirectoryReader.open(dir);
}

Source File: JtsPolygonTest.java From lucene-solr with Apache License 2.0

5 votes

/**
 * A PrefixTree pruning optimization gone bad.
 * See <a href="https://issues.apache.org/jira/browse/LUCENE-4770">LUCENE-4770</a>.
 */
@Test
public void testBadPrefixTreePrune() throws Exception {

  Shape area = ctx.readShapeFromWkt("POLYGON((-122.83 48.57, -122.77 48.56, -122.79 48.53, -122.83 48.57))");

  SpatialPrefixTree trie = new QuadPrefixTree(ctx, 12);
  TermQueryPrefixTreeStrategy strategy = new TermQueryPrefixTreeStrategy(trie, "geo");
  Document doc = new Document();
  doc.add(new TextField("id", "1", Store.YES));

  Field[] fields = strategy.createIndexableFields(area, 0.025);
  for (Field field : fields) {
    doc.add(field);
  }
  addDocument(doc);

  Point upperleft = ctx.getShapeFactory().pointXY(-122.88, 48.54);
  Point lowerright = ctx.getShapeFactory().pointXY(-122.82, 48.62);

  Query query = strategy.makeQuery(new SpatialArgs(SpatialOperation.Intersects, ctx.getShapeFactory().rect(upperleft, lowerright)));
  commit();

  TopDocs search = indexSearcher.search(query, 10);
  ScoreDoc[] scoreDocs = search.scoreDocs;
  for (ScoreDoc scoreDoc : scoreDocs) {
    System.out.println(indexSearcher.doc(scoreDoc.doc));
  }

  assertEquals(1, search.totalHits.value);
}

Source File: BaseIndex.java From everywhere with Apache License 2.0

5 votes

private static void indexDoc(IndexWriter writer, FileBean t) throws Exception {
        Document doc = new Document();
        if (t.getContent() != null) {
            doc.add(new TextField(LuceneConstants.PATH, t.getFilepath(), Field.Store.YES));
            doc.add(new StringField(LuceneConstants.MODIFIED, UtilsTool.getDateStrByLastModified(t.getLastModified()), Field.Store.YES));
            doc.add(new TextField(LuceneConstants.CONTENT, t.getContent(), CommonConstants.IS_OPEN_CONTEXT ? Field.Store.YES : Field.Store.NO));
//            System.out.println("added to document:" + t.getFilepath());
            if (writer.getConfig().getOpenMode() == IndexWriterConfig.OpenMode.CREATE){
                writer.addDocument(doc);
            } else{
                writer.updateDocument(new Term(LuceneConstants.PATH, t.getFilepath()), doc);
            }
        }
    }

Source File: TestCustomNorms.java From lucene-solr with Apache License 2.0

5 votes

public void testFloatNorms() throws IOException {

    Directory dir = newDirectory();
    MockAnalyzer analyzer = new MockAnalyzer(random());
    analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 2, IndexWriter.MAX_TERM_LENGTH));

    IndexWriterConfig config = newIndexWriterConfig(analyzer);
    Similarity provider = new MySimProvider();
    config.setSimilarity(provider);
    RandomIndexWriter writer = new RandomIndexWriter(random(), dir, config);
    final LineFileDocs docs = new LineFileDocs(random());
    int num = atLeast(100);
    for (int i = 0; i < num; i++) {
      Document doc = docs.nextDoc();
      int boost = TestUtil.nextInt(random(), 1, 10);
      String value = IntStream.range(0, boost).mapToObj(k -> Integer.toString(boost)).collect(Collectors.joining(" "));
      Field f = new TextField(FLOAT_TEST_FIELD, value, Field.Store.YES);

      doc.add(f);
      writer.addDocument(doc);
      doc.removeField(FLOAT_TEST_FIELD);
      if (rarely()) {
        writer.commit();
      }
    }
    writer.commit();
    writer.close();
    DirectoryReader open = DirectoryReader.open(dir);
    NumericDocValues norms = MultiDocValues.getNormValues(open, FLOAT_TEST_FIELD);
    assertNotNull(norms);
    for (int i = 0; i < open.maxDoc(); i++) {
      Document document = open.document(i);
      int expected = Integer.parseInt(document.get(FLOAT_TEST_FIELD).split(" ")[0]);
      assertEquals(i, norms.nextDoc());
      assertEquals(expected, norms.longValue());
    }
    open.close();
    dir.close();
    docs.close();
  }

Source File: TestIndexWriter.java From lucene-solr with Apache License 2.0

5 votes

public void testHighFreqTerm() throws IOException {
  Directory dir = newDirectory();
  IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random()))
                                       .setRAMBufferSizeMB(0.01));
  // Massive doc that has 128 K a's
  StringBuilder b = new StringBuilder(1024*1024);
  for(int i=0;i<4096;i++) {
    b.append(" a a a a a a a a");
    b.append(" a a a a a a a a");
    b.append(" a a a a a a a a");
    b.append(" a a a a a a a a");
  }
  Document doc = new Document();
  FieldType customType = new FieldType(TextField.TYPE_STORED);
  customType.setStoreTermVectors(true);
  customType.setStoreTermVectorPositions(true);
  customType.setStoreTermVectorOffsets(true);
  doc.add(newField("field", b.toString(), customType));
  writer.addDocument(doc);
  writer.close();

  IndexReader reader = DirectoryReader.open(dir);
  assertEquals(1, reader.maxDoc());
  assertEquals(1, reader.numDocs());
  Term t = new Term("field", "a");
  assertEquals(1, reader.docFreq(t));
  PostingsEnum td = TestUtil.docs(random(), reader,
                                  "field",
                                  new BytesRef("a"),
                                  null,
                                  PostingsEnum.FREQS);
  td.nextDoc();
  assertEquals(128*1024, td.freq());
  reader.close();
  dir.close();
}

org.apache.lucene.document.TextField Java Examples