Java Code Examples for org.apache.solr.client.solrj.SolrClient#add()
The following examples show how to use
org.apache.solr.client.solrj.SolrClient#add() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: SolrLocatorTest.java From kite with Apache License 2.0 | 6 votes |
@Test public void testSelectsEmbeddedSolrServerAndAddDocument() throws Exception { //Solr locator should select EmbeddedSolrServer only solrHome is specified SolrLocator solrLocator = new SolrLocator(new SolrMorphlineContext.Builder().build()); solrLocator.setSolrHomeDir(RESOURCES_DIR + "/solr"); solrLocator.setCollectionName("collection1"); SolrServerDocumentLoader documentLoader = (SolrServerDocumentLoader)solrLocator.getLoader(); SolrClient solrServer = documentLoader.getSolrServer(); assertTrue(solrServer instanceof EmbeddedSolrServer); SolrInputDocument doc = new SolrInputDocument(); doc.addField("id", "myId"); doc.addField("text", "myValue"); solrServer.add(doc); solrServer.commit(); SolrDocument resultDoc = solrServer.getById("myId"); assertTrue(resultDoc.getFieldValues("text").contains("myValue")); UpdateResponse deleteResponse = solrServer.deleteById("myId"); assertEquals(0, deleteResponse.getStatus()); solrServer.commit(); solrServer.close(); }
Example 2
Source File: RootFieldTest.java From lucene-solr with Apache License 2.0 | 6 votes |
@Test public void testUpdateWithChildDocs() throws Exception { SolrClient client = getSolrClient(); client.deleteByQuery("*:*");// delete everything! // Add child free doc SolrInputDocument docToUpdate = new SolrInputDocument(); String docId = "11"; docToUpdate.addField( "id", docId); docToUpdate.addField( "name", "parent doc with a child" ); SolrInputDocument child = new SolrInputDocument(); child.addField("id", "111"); child.addField("name", "child doc"); docToUpdate.addChildDocument(child); if (!useRootSchema) { thrown.expect(SolrException.class); thrown.expectMessage("Unable to index docs with children:" + " the schema must include definitions for both a uniqueKey field" + " and the '_root_' field, using the exact same fieldType"); } client.add(docToUpdate); client.commit(); }
Example 3
Source File: ShardRoutingTest.java From lucene-solr with Apache License 2.0 | 6 votes |
public void doAtomicUpdate() throws Exception { log.info("### STARTING doAtomicUpdate"); int nClients = clients.size(); assertEquals(8, nClients); int expectedVal = 0; for (SolrClient client : clients) { client.add(sdoc("id", "b!doc", "foo_i", map("inc",1))); expectedVal++; QueryResponse rsp = client.query(params("qt","/get", "id","b!doc")); Object val = ((Map)rsp.getResponse().get("doc")).get("foo_i"); assertEquals((Integer)expectedVal, val); } }
Example 4
Source File: UsingSolrJRefGuideExamplesTest.java From lucene-solr with Apache License 2.0 | 6 votes |
@Test public void indexWithSolrInputDocumentExample() throws Exception { // tag::solrj-index-with-raw-solrinputdoc[] final SolrClient client = getSolrClient(); final SolrInputDocument doc = new SolrInputDocument(); doc.addField("id", UUID.randomUUID().toString()); doc.addField("name", "Amazon Kindle Paperwhite"); final UpdateResponse updateResponse = client.add("techproducts", doc); // Indexed documents must be committed client.commit("techproducts"); // end::solrj-index-with-raw-solrinputdoc[] assertNumDocuments(NUM_INDEXED_DOCUMENTS + 1); }
Example 5
Source File: AbstractAlfrescoDistributedIT.java From SearchServices with GNU Lesser General Public License v3.0 | 5 votes |
/** * Indexes the document in both the client, and a selected shard */ protected static void indexDoc(SolrClient client, int shardId, SolrInputDocument doc) throws IOException, SolrServerException { client.add(doc); SolrClient clientShard = clientShards.get(shardId); clientShard.add(doc); }
Example 6
Source File: BaseDistributedSearchTestCase.java From lucene-solr with Apache License 2.0 | 5 votes |
protected void index_specific(int serverNumber, Object... fields) throws Exception { SolrInputDocument doc = new SolrInputDocument(); for (int i = 0; i < fields.length; i += 2) { doc.addField((String) (fields[i]), fields[i + 1]); } controlClient.add(doc); SolrClient client = clients.get(serverNumber); client.add(doc); }
Example 7
Source File: BaseDistributedSearchTestCase.java From lucene-solr with Apache License 2.0 | 5 votes |
/** * Indexes the document in both the control client, and a randomly selected client */ protected void indexDoc(SolrInputDocument doc) throws IOException, SolrServerException { controlClient.add(doc); if (shardCount == 0) {//mostly for temp debugging return; } int which = (doc.getField(id).toString().hashCode() & 0x7fffffff) % clients.size(); SolrClient client = clients.get(which); client.add(doc); }
Example 8
Source File: MoveReplicaTest.java From lucene-solr with Apache License 2.0 | 5 votes |
protected void addDocs(String collection, int numDocs) throws Exception { SolrClient solrClient = cluster.getSolrClient(); for (int docId = 1; docId <= numDocs; docId++) { SolrInputDocument doc = new SolrInputDocument(); doc.addField("id", docId); solrClient.add(collection, doc); } solrClient.commit(collection); Thread.sleep(5000); }
Example 9
Source File: ConcurrentUpdateHttp2SolrClientMultiCollectionTest.java From lucene-solr with Apache License 2.0 | 5 votes |
private void splitDocumentsAcrossCollections(SolrClient client, int numTotalDocs) throws IOException, SolrServerException { for (int docNum = 0; docNum < numTotalDocs; docNum++) { final SolrInputDocument doc = new SolrInputDocument(); doc.setField("id", "value" + docNum); if (docNum %2 == 0) { client.add(COLLECTION_ONE_NAME, doc); } else { client.add(COLLECTION_TWO_NAME, doc); } } client.commit(COLLECTION_ONE_NAME); client.commit(COLLECTION_TWO_NAME); }
Example 10
Source File: ContentEditor.java From jease with GNU General Public License v3.0 | 5 votes |
public void insertToSolr() { String solrurl = jease.Registry.getParameter(jease.Names.JEASE_SOLR_URL, ""); if (solrurl.equals("")) { return; } String oid = checkDuplication(); if (oid.length() > 0) { updateToSolr(oid); return; } try { ArrayList<String> tagslist = new ArrayList<String>(Arrays.asList(tags.getValue().split(","))); SolrClient client = new HttpSolrClient.Builder(solrurl).build(); SolrInputDocument doc = new SolrInputDocument(); doc.addField("id", UUID.randomUUID().toString()); doc.addField("tags", tagslist); doc.addField("jeaseid", id.getValue()); doc.addField("jeasepath", getNode().getPath()); doc.addField("title", title.getValue()); doc.addField("author", getNode().getEditor().getName()); doc.addField("type", getNode().getType()); doc.addField("text", getNode().getFulltext().toString()); doc.addField("last_modified", new Date()); doc.addField("date", month_date.format(new Date())); doc.addField("category", getNode().getParent().getId()); client.add(doc); client.commit(); } catch (Exception s) { s.printStackTrace(); } }
Example 11
Source File: TestPutSolrContentStream.java From nifi with Apache License 2.0 | 5 votes |
@Test public void testDeleteWithXml() throws IOException, SolrServerException { final SolrClient solrClient = createEmbeddedSolrClient(DEFAULT_SOLR_CORE); final TestableProcessor proc = new TestableProcessor(solrClient); final TestRunner runner = createDefaultTestRunner(proc); runner.setProperty(PutSolrContentStream.CONTENT_STREAM_PATH, "/update"); runner.setProperty(PutSolrContentStream.CONTENT_TYPE, "application/xml"); runner.setProperty("commit", "true"); // add a document so there is something to delete SolrInputDocument doc = new SolrInputDocument(); doc.addField("id", "1"); doc.addField("first", "bob"); doc.addField("last", "smith"); doc.addField("created", new Date()); solrClient.add(doc); solrClient.commit(); // prove the document got added SolrQuery query = new SolrQuery("*:*"); QueryResponse qResponse = solrClient.query(query); Assert.assertEquals(1, qResponse.getResults().getNumFound()); // run the processor with a delete-by-query command runner.enqueue("<delete><query>first:bob</query></delete>".getBytes(StandardCharsets.UTF_8)); runner.run(1, false); // prove the document got deleted qResponse = solrClient.query(query); Assert.assertEquals(0, qResponse.getResults().getNumFound()); }
Example 12
Source File: AbstractAlfrescoDistributedIT.java From SearchServices with GNU Lesser General Public License v3.0 | 5 votes |
/** * Indexes the document in both the client, and a randomly selected shard */ protected static void indexDoc(SolrClient client, boolean andShards, SolrInputDocument doc) throws IOException, SolrServerException { client.add(doc); if (andShards) { int which = (doc.getField(id).toString().hashCode() & 0x7fffffff) % clientShards.size(); SolrClient clientShard = clientShards.get(which); clientShard.add(doc); } }
Example 13
Source File: TestPutSolrContentStream.java From localization_nifi with Apache License 2.0 | 5 votes |
@Test public void testDeleteWithXml() throws IOException, SolrServerException { final SolrClient solrClient = createEmbeddedSolrClient(DEFAULT_SOLR_CORE); final TestableProcessor proc = new TestableProcessor(solrClient); final TestRunner runner = createDefaultTestRunner(proc); runner.setProperty(PutSolrContentStream.CONTENT_STREAM_PATH, "/update"); runner.setProperty(PutSolrContentStream.CONTENT_TYPE, "application/xml"); runner.setProperty("commit", "true"); // add a document so there is something to delete SolrInputDocument doc = new SolrInputDocument(); doc.addField("first", "bob"); doc.addField("last", "smith"); doc.addField("created", new Date()); solrClient.add(doc); solrClient.commit(); // prove the document got added SolrQuery query = new SolrQuery("*:*"); QueryResponse qResponse = solrClient.query(query); Assert.assertEquals(1, qResponse.getResults().getNumFound()); // run the processor with a delete-by-query command runner.enqueue("<delete><query>first:bob</query></delete>".getBytes("UTF-8")); runner.run(1, false); // prove the document got deleted qResponse = solrClient.query(query); Assert.assertEquals(0, qResponse.getResults().getNumFound()); }
Example 14
Source File: CollectionsAPISolrJTest.java From lucene-solr with Apache License 2.0 | 4 votes |
@Test public void testColStatus() throws Exception { final String collectionName = "collectionStatusTest"; CollectionAdminRequest.createCollection(collectionName, "conf2", 2, 2) .process(cluster.getSolrClient()); cluster.waitForActiveCollection(collectionName, 2, 4); SolrClient client = cluster.getSolrClient(); byte[] binData = collectionName.getBytes("UTF-8"); // index some docs for (int i = 0; i < 10; i++) { SolrInputDocument doc = new SolrInputDocument(); doc.addField("id", String.valueOf(i)); doc.addField("number_i", i); doc.addField("number_l", i); doc.addField("number_f", i); doc.addField("number_d", i); doc.addField("number_ti", i); doc.addField("number_tl", i); doc.addField("number_tf", i); doc.addField("number_td", i); doc.addField("point", i + "," + i); doc.addField("pointD", i + "," + i); doc.addField("store", (i * 5) + "," + (i * 5)); doc.addField("boolean_b", true); doc.addField("multi_int_with_docvals", i); doc.addField("string_s", String.valueOf(i)); doc.addField("tv_mv_string", "this is a test " + i); doc.addField("timestamp_dt", new Date()); doc.addField("timestamp_tdt", new Date()); doc.addField("payload", binData); client.add(collectionName, doc); } client.commit(collectionName); CollectionAdminRequest.ColStatus req = CollectionAdminRequest.collectionStatus(collectionName); req.setWithFieldInfo(true); req.setWithCoreInfo(true); req.setWithSegments(true); req.setWithSizeInfo(true); CollectionAdminResponse rsp = req.process(cluster.getSolrClient()); assertEquals(0, rsp.getStatus()); @SuppressWarnings({"unchecked"}) List<Object> nonCompliant = (List<Object>)rsp.getResponse().findRecursive(collectionName, "schemaNonCompliant"); assertEquals(nonCompliant.toString(), 1, nonCompliant.size()); assertTrue(nonCompliant.toString(), nonCompliant.contains("(NONE)")); @SuppressWarnings({"unchecked"}) NamedList<Object> segInfos = (NamedList<Object>) rsp.getResponse().findRecursive(collectionName, "shards", "shard1", "leader", "segInfos"); assertNotNull(Utils.toJSONString(rsp), segInfos.findRecursive("info", "core", "startTime")); assertNotNull(Utils.toJSONString(rsp), segInfos.get("fieldInfoLegend")); assertNotNull(Utils.toJSONString(rsp), segInfos.findRecursive("segments", "_0", "fields", "id", "flags")); assertNotNull(Utils.toJSONString(rsp), segInfos.findRecursive("segments", "_0", "ramBytesUsed")); // test for replicas not active - SOLR-13882 DocCollection coll = cluster.getSolrClient().getClusterStateProvider().getClusterState().getCollection(collectionName); Replica firstReplica = coll.getSlice("shard1").getReplicas().iterator().next(); String firstNode = firstReplica.getNodeName(); for (JettySolrRunner jetty : cluster.getJettySolrRunners()) { if (jetty.getNodeName().equals(firstNode)) { cluster.stopJettySolrRunner(jetty); } } rsp = req.process(cluster.getSolrClient()); assertEquals(0, rsp.getStatus()); Number down = (Number) rsp.getResponse().findRecursive(collectionName, "shards", "shard1", "replicas", "down"); assertTrue("should be some down replicas, but there were none in shard1:" + rsp, down.intValue() > 0); }
Example 15
Source File: SolrExampleStreamingBinaryHttp2Test.java From lucene-solr with Apache License 2.0 | 4 votes |
@Test public void testQueryAndStreamResponse() throws Exception { // index a simple document with one child SolrClient client = getSolrClient(); client.deleteByQuery("*:*"); SolrInputDocument child = new SolrInputDocument(); child.addField("id", "child"); child.addField("type_s", "child"); child.addField("text_s", "text"); SolrInputDocument parent = new SolrInputDocument(); parent.addField("id", "parent"); parent.addField("type_s", "parent"); parent.addChildDocument(child); client.add(parent); client.commit(); // create a query with child doc transformer SolrQuery query = new SolrQuery("{!parent which='type_s:parent'}text_s:text"); query.addField("*,[child parentFilter='type_s:parent']"); // test regular query QueryResponse response = client.query(query); assertEquals(1, response.getResults().size()); SolrDocument parentDoc = response.getResults().get(0); assertEquals(1, parentDoc.getChildDocumentCount()); // test streaming final List<SolrDocument> docs = new ArrayList<>(); client.queryAndStreamResponse(query, new StreamingResponseCallback() { @Override public void streamSolrDocument(SolrDocument doc) { docs.add(doc); } @Override public void streamDocListInfo(long numFound, long start, Float maxScore) { } }); assertEquals(1, docs.size()); parentDoc = docs.get(0); assertEquals(1, parentDoc.getChildDocumentCount()); }
Example 16
Source File: IndexLoader.java From solr-autocomplete with Apache License 2.0 | 4 votes |
public static void main(String[] args) throws CorruptIndexException, IOException, SolrServerException { if (args.length < 3) { System.err.println("Usage: java -Dfile.encoding=UTF8 -Dclient.encoding.override=UTF-8 -Xmx256m -Xms256m -server " + IndexLoader.class.getName() + " </path/to/index> <AutoCompleteSolrUrl> <indexField1,acField1> [indexField2,acField2 ... ]"); System.exit(0); } Map<String,String> fieldMap = getFieldMapping(args, 2); DirectoryReader reader = DirectoryReader.open(FSDirectory.open(Paths.get(args[0]))); int docs = reader.maxDoc(); SolrClient solr = new ConcurrentUpdateSolrClient.Builder(args[1]).withQueueSize(10000).withThreadCount(2).build(); Set<SolrInputDocument> batch = new HashSet<SolrInputDocument>(1000); Bits liveDocs = MultiFields.getLiveDocs(reader); // go through all docs in the index for (int i = 0; i < docs; i++) { // process doc only if not deleted if (liveDocs == null || liveDocs.get(i)) { // loop through all fields to be looked at SolrInputDocument doc = new SolrInputDocument(); Iterator<String> iter = fieldMap.keySet().iterator(); boolean phraseFieldEmpty = false; while (iter.hasNext()) { String indexField = iter.next(); String acField = fieldMap.get(indexField); IndexableField field = reader.document(i).getField(indexField); String value = field != null ? reader.document(i).getField(indexField).stringValue() : null; if (field != null && value != null && !value.isEmpty()) { doc.addField(acField, value); } else { // not very relevant piece of info // System.err.println("Field is null or empty, skipping: " + indexField); if (acField.equalsIgnoreCase("phrase")) { System.err.println("Since AC phrase field would be null, this doc will not be created: " + reader.document(i)); phraseFieldEmpty = true; break; } } } if (!phraseFieldEmpty) { solr.add(doc); if (docs % 1000 == 0) { System.out.println("Docs: " + docs); } } } } if (!batch.isEmpty()) solr.add(batch); reader.close(); System.out.println("Optimizing..."); solr.optimize(); solr.close(); }
Example 17
Source File: RootFieldTest.java From lucene-solr with Apache License 2.0 | 4 votes |
@Test public void testLegacyBlockProcessing() throws Exception { SolrClient client = getSolrClient(); client.deleteByQuery("*:*");// delete everything! // Add child free doc SolrInputDocument docToUpdate = new SolrInputDocument(); String docId = "11"; docToUpdate.addField( "id", docId); docToUpdate.addField( "name", "child free doc" ); client.add(docToUpdate); client.commit(); SolrQuery query = new SolrQuery(); query.setQuery( "*:*" ); query.set( CommonParams.FL, "id,name,_root_" ); SolrDocumentList results = client.query(query).getResults(); assertThat(results.getNumFound(), is(1L)); SolrDocument foundDoc = results.get( 0 ); // Check retrieved field values assertThat(foundDoc.getFieldValue( "id" ), is(docId)); assertThat(foundDoc.getFieldValue( "name" ), is("child free doc")); String expectedRootValue = expectRoot() ? docId : null; assertThat(MESSAGE, foundDoc.getFieldValue( "_root_" ), is(expectedRootValue)); // Update the doc docToUpdate.setField( "name", "updated doc" ); client.add(docToUpdate); client.commit(); results = client.query(query).getResults(); assertEquals( 1, results.getNumFound() ); foundDoc = results.get( 0 ); // Check updated field values assertThat(foundDoc.getFieldValue( "id" ), is(docId)); assertThat(foundDoc.getFieldValue( "name" ), is("updated doc")); assertThat(MESSAGE, foundDoc.getFieldValue( "_root_" ), is(expectedRootValue)); }
Example 18
Source File: TestJsonFacetRefinement.java From lucene-solr with Apache License 2.0 | 4 votes |
@AwaitsFix(bugUrl="https://issues.apache.org/jira/browse/SOLR-12556") @Test public void testProcessEmptyRefinement() throws Exception { initServers(); final Client client = servers.getClient(random().nextInt()); client.queryDefaults().set("shards", servers.getShards(), "debugQuery", Boolean.toString(random().nextBoolean())); List<SolrClient> clients = client.getClientProvider().all(); assertTrue(clients.size() >= 3); // we only use 2, but assert at least 3 to also test empty shard final SolrClient c0 = clients.get(0); final SolrClient c1 = clients.get(1); client.deleteByQuery("*:*", null); int id = 0; c0.add(sdoc("id", id++, "cat_s", "Ax")); c0.add(sdoc("id", id++, "cat_s", "Bx")); c0.add(sdoc("id", id++, "cat_s", "Cx")); c1.add(sdoc("id", id++, "cat_s", "Ay")); c1.add(sdoc("id", id++, "cat_s", "By")); c1.add(sdoc("id", id++, "cat_s", "Cy")); c1.add(sdoc("id", id++, "cat_s", "Dy")); client.commit(); // regardless of how much overrequest there is, in phase#1 // all terms will tie on the sort criteria, and "Ax" should win the tiebreaker. // // When Ax is refined against c1, it's 'debug' sort value will increase, but regardless // of the value of processEmpty, no other term should be returned in it's place // (because if they are also correctly refined, then their 'debug' sort values will also increase // and Ax will stll win the tie breaker -- and if they are not refined they shouldn't be returned) for (int overrequest = 0; overrequest < 5; overrequest++) { for (boolean pe : Arrays.asList(false, true)) { ModifiableSolrParams p = params("q", "*:*", "rows", "0", "json.facet" , "{" + " top:{ type:terms, field:cat_s, limit:1, overrequest:"+overrequest+", " + " refine:true, sort: 'debug asc', " + " facet:{ debug:'debug(numShards)', processEmpty:"+pe+" } } }"); try { client.testJQ(p , "facets=={ count: "+id+"," + " top:{ buckets:[ " + " { val:Ax, count: 1, " + " debug:"+(pe ? 2 : 1) + " }" + " ] } }" ); } catch (AssertionError | RuntimeException e) { throw new AssertionError(p + " --> " + e.getMessage(), e); } } } }
Example 19
Source File: TestJsonFacetRefinement.java From lucene-solr with Apache License 2.0 | 4 votes |
/** Helper method used by multiple tests to look at same data diff ways */ private int initSomeDocsWhere1ShardHasOnlyParentFacetField() throws Exception { initServers(); final Client client = servers.getClient(random().nextInt()); client.queryDefaults().set("shards", servers.getShards(), "debugQuery", Boolean.toString(random().nextBoolean())); final List<SolrClient> clients = client.getClientProvider().all(); assertTrue(clients.size() >= 2); final SolrClient c0 = clients.get(0); final SolrClient c1 = clients.get(1); client.deleteByQuery("*:*", null); int id = 0; // client 0 // shard1 // only terms pX & pY (with high counts) from the parent_s facet, no child_s values for (int i = 0; i < 10; i++) { c0.add(sdoc("id", id++, "parent_s", "pX")); for (int j =0; j < 2; j++) { c0.add(sdoc("id", id++, "parent_s", "pY")); } } // client 1 // shard2 // some docs with pX & pY, but on this shard, pA & pB have higher counts // (but not as high as pX/py on shard1) // all docs on this shard also have values in child_s for (int i = 0; i < 2; i++) { for (int j = 0; j < 3 ; j++) { c1.add(sdoc("id", id++, "parent_s", "pA", "child_s", "c"+i)); c1.add(sdoc("id", id++, "parent_s", "pB", "child_s", "c"+i)); } c1.add(sdoc("id", id++, "parent_s", "pX", "child_s", "c"+i)); c1.add(sdoc("id", id++, "parent_s", "pY", "child_s", "c"+i)); } c1.add(sdoc("id", id++, "parent_s", "pX", "child_s", "c0")); c1.add(sdoc("id", id++, "parent_s", "pY", "child_s", "c1")); c1.add(sdoc("id", id++, "parent_s", "pY", "child_s", "c1")); client.commit(); return id; }
Example 20
Source File: DistributedFacetSimpleRefinementLongTailTest.java From lucene-solr with Apache License 2.0 | 4 votes |
public static void buildIndexes(final List<SolrClient> clients, final String statField) throws Exception { assertEquals("This indexing code assumes exactly 3 shards/clients", 3, clients.size()); final AtomicInteger docNum = new AtomicInteger(); final SolrClient shard0 = clients.get(0); final SolrClient shard1 = clients.get(1); final SolrClient shard2 = clients.get(2); // the 5 top foo_s terms have 100 docs each on every shard for (int i = 0; i < 100; i++) { for (int j = 0; j < 5; j++) { shard0.add(sdoc("id", docNum.incrementAndGet(), "foo_s", "aaa"+j, statField, j * 13 - i)); shard1.add(sdoc("id", docNum.incrementAndGet(), "foo_s", "aaa"+j, statField, j * 3 + i)); shard2.add(sdoc("id", docNum.incrementAndGet(), "foo_s", "aaa"+j, statField, i * 7 + j)); } } // 20 foo_s terms that come in "second" with 50 docs each // on both shard0 & shard1 ("bbb_") for (int i = 0; i < 50; i++) { for (int j = 0; j < 20; j++) { shard0.add(sdoc("id", docNum.incrementAndGet(), "foo_s", "bbb"+j, statField, 0)); shard1.add(sdoc("id", docNum.incrementAndGet(), "foo_s", "bbb"+j, statField, 1)); } // distracting term appears on only on shard2 50 times shard2.add(sdoc("id", docNum.incrementAndGet(), "foo_s", "junkA")); } // put "bbb0" on shard2 exactly once to sanity check refinement shard2.add(sdoc("id", docNum.incrementAndGet(), "foo_s", "bbb0", statField, -2)); // long 'tail' foo_s term appears in 45 docs on every shard // foo_s:tail is the only term with bar_s sub-pivot terms for (int i = 0; i < 45; i++) { // for sub-pivot, shard0 & shard1 have 6 docs each for "tailB" // but the top 5 terms are ccc(0-4) -- 7 on each shard // (4 docs each have junk terms) String sub_term = (i < 35) ? "ccc"+(i % 5) : ((i < 41) ? "tailB" : "junkA"); shard0.add(sdoc("id", docNum.incrementAndGet(), "foo_s", "tail", "bar_s", sub_term, statField, i)); shard1.add(sdoc("id", docNum.incrementAndGet(), "foo_s", "tail", "bar_s", sub_term, statField, i)); // shard2's top 5 sub-pivot terms are junk only it has with 8 docs each // and 5 docs that use "tailB" // NOTE: none of these get statField ! ! sub_term = (i < 40) ? "junkB"+(i % 5) : "tailB"; shard2.add(sdoc("id", docNum.incrementAndGet(), "foo_s", "tail", "bar_s", sub_term)); } // really long tail uncommon foo_s terms on shard2 for (int i = 0; i < 30; i++) { // NOTE: using "Z" here so these sort before bbb0 when they tie for '1' instance each on shard2 shard2.add(sdoc("id", docNum.incrementAndGet(), "foo_s", "ZZZ"+i)); } }