Java Code Examples for org.apache.solr.update.AddUpdateCommand#getSolrInputDocument()

The following examples show how to use org.apache.solr.update.AddUpdateCommand#getSolrInputDocument() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: AddCoordinatesUpdateRequestProcessor.java    From apache-solr-essentials with Apache License 2.0 6 votes vote down vote up
@Override
public void processAdd(final AddUpdateCommand command) throws IOException  {
	final SolrInputDocument document = command.getSolrInputDocument();
	final String address = (String) document.getFieldValue("address");
	if (address != null && address.trim().length() !=  0) {
		try {
			final String id = String.valueOf(document.getFieldValue("id"));
			final String coordinates = service.getCoordinates(id, address);
			if (coordinates != null && coordinates.trim().length() != 0) {
				document.addField("coordinates", coordinates);
			} else {
				LOGGER.error("Document " + id + "  with address \"" + address+" \" hasn't been translated (null address)");
			}
		
			sleep();
		} catch (final Exception exception) {
			LOGGER.error("Unable to get coordinates for "+ document, exception);
		}
		super.processAdd(command);			
	}
}
 
Example 2
Source File: DocExpirationUpdateProcessorFactory.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
@Override
public void processAdd(AddUpdateCommand cmd) throws IOException {
  final SolrInputDocument doc = cmd.getSolrInputDocument();

  final String math = doc.containsKey(ttlField) 
    ? doc.getFieldValue(ttlField).toString() : defaultTtl;

  if (null != math) {
    try {
      final DateMathParser dmp = new DateMathParser();
      // TODO: should we try to accept things like "1DAY" as well as "+1DAY" ?
      // How? 
      // 'startsWith("+")' is a bad idea because it would cause problems with
      // things like "/DAY+1YEAR"
      // Maybe catch ParseException and retry with "+" prepended?
      doc.addField(expireField, dmp.parseMath(math));
    } catch (ParseException pe) {
      throw new SolrException(BAD_REQUEST, "Can't parse ttl as date math: " + math, pe);
    }
  }

  super.processAdd(cmd);
}
 
Example 3
Source File: TemplateUpdateProcessorFactory.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
@Override
protected void process(AddUpdateCommand cmd, SolrQueryRequest req, SolrQueryResponse rsp) {
  String[] vals = getParams("field");
  SolrInputDocument doc = cmd.getSolrInputDocument();
  if (vals != null && vals.length > 0) {
    for (String val : vals) {
      if (val == null || val.isEmpty()) continue;
      int idx = val.indexOf(':');
      if (idx == -1)
        throw new RuntimeException("'field' must be of the format <field-name>:<the-template-string>");

      String fName = val.substring(0, idx);
      String template = val.substring(idx + 1);
      doc.addField(fName, replaceTokens(template, templateCache, s -> {
        Object v = doc.getFieldValue(s);
        return v == null ? "" : v;
      }, BRACES_PLACEHOLDER_PATTERN));
    }
  }

}
 
Example 4
Source File: ClassificationUpdateProcessor.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
/**
 * @param cmd the update command in input containing the Document to classify
 * @throws IOException If there is a low-level I/O error
 */
@Override
public void processAdd(AddUpdateCommand cmd)
    throws IOException {
  SolrInputDocument doc = cmd.getSolrInputDocument();
  Document luceneDocument = cmd.getLuceneDocument();
  String assignedClass;
  Object documentClass = doc.getFieldValue(trainingClassField);
  if (documentClass == null) {
    List<ClassificationResult<BytesRef>> assignedClassifications = classifier.getClasses(luceneDocument, maxOutputClasses);
    if (assignedClassifications != null) {
      for (ClassificationResult<BytesRef> singleClassification : assignedClassifications) {
        assignedClass = singleClassification.getAssignedClass().utf8ToString();
        doc.addField(predictedClassField, assignedClass);
      }
    }
  }
  super.processAdd(cmd);
}
 
Example 5
Source File: RemoveTrailingUnderscoreProcessor.java    From apache-solr-essentials with Apache License 2.0 6 votes vote down vote up
/**
 * Intercept the add document operation.
 * Here this process gets a chance to change the incoming {@link SolrInputDocument}.
 * 
 * @param command the update command.
 * @throws IOException in case of I/O failure.
 */
@Override
public void processAdd(final AddUpdateCommand command) throws IOException {
	// 1. Retrieve the SolrInputDocument that contains data to be indexed.
	final SolrInputDocument document = command.getSolrInputDocument();
	
	// 2. Loop through the target fields
	for (final String fieldName : fields) {
		
		// 3. Get the field values (for simplicity we assume fields are monovalued and are strings)
		final String fieldValue = (String) document.getFieldValue(fieldName);
		
		// 4. Check and eventually change the value of that field.
		if (fieldValue != null && fieldValue.endsWith("_")) {
			document.setField(fieldName, fieldValue.substring(0, fieldValue.length() -1));
		}
	}
	
	// 5. IMPORTANT: forward the control to the next processor in the chain.
	super.processAdd(command);
}
 
Example 6
Source File: URLClassifyProcessor.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
@Override
public void processAdd(AddUpdateCommand command) throws IOException {
  if (isEnabled()) {
    SolrInputDocument document = command.getSolrInputDocument();
    if (document.containsKey(urlFieldname)) {
      String url = (String) document.getFieldValue(urlFieldname);
      try {
        URL normalizedURL = getNormalizedURL(url);
        document.setField(lengthFieldname, length(normalizedURL));
        document.setField(levelsFieldname, levels(normalizedURL));
        document.setField(toplevelpageFieldname, isTopLevelPage(normalizedURL) ? 1 : 0);
        document.setField(landingpageFieldname, isLandingPage(normalizedURL) ? 1 : 0);
        if (domainFieldname != null) {
          document.setField(domainFieldname, normalizedURL.getHost());
        }
        if (canonicalUrlFieldname != null) {
          document.setField(canonicalUrlFieldname, getCanonicalUrl(normalizedURL));
        }
        log.debug("{}", document);
      } catch (MalformedURLException | URISyntaxException e) {
        log.warn("cannot get the normalized url for '{}' due to {}", url, e.getMessage());
      }
    }
  }
  super.processAdd(command);
}
 
Example 7
Source File: RegexpBoostProcessor.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
public void processBoost(AddUpdateCommand command) {
  SolrInputDocument document = command.getSolrInputDocument();
  if (document.containsKey(inputFieldname)) {
    String value = (String) document.getFieldValue(inputFieldname);
    double boost = 1.0f;
    for (BoostEntry boostEntry : boostEntries) {
      if (boostEntry.getPattern().matcher(value).matches()) {
        if (log.isDebugEnabled()) {
          log.debug("Pattern match {} for {}", boostEntry.getPattern().pattern(), value);
        }
        boost = (boostEntry.getBoost() * 1000) * (boost * 1000) / 1000000;
      }
    }
    document.setField(boostFieldname, boost);

    if (log.isDebugEnabled()) {
      log.debug("Value {}, applied to field {}", boost, boostFieldname);
    }
  }
}
 
Example 8
Source File: DocBasedVersionConstraintsProcessor.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
@Override
public void processAdd(AddUpdateCommand cmd) throws IOException {
  if (isNotLeader(cmd)) {
    super.processAdd(cmd);
    return;
  }

  final SolrInputDocument newDoc = cmd.getSolrInputDocument();
  Object[] newVersions = getUserVersionsFromDocument(newDoc);
  validateUserVersions(newVersions, versionFieldNames, "Doc does not have versionField: ");

  for (int i=0; ;i++) {
    logOverlyFailedRetries(i, cmd);

    if (!isVersionNewEnough(cmd.getIndexedId(), newVersions)) {
      // drop older update
      return;
    }

    try {
      cmd.setVersion(oldSolrVersion);  // use optimistic concurrency to ensure that the doc has not changed in the meantime
      super.processAdd(cmd);
      return;
    } catch (SolrException e) {
      if (e.code() == 409) {
        continue;  // if a version conflict, retry
      }
      throw e;  // rethrow
    }

  }
}
 
Example 9
Source File: SplitCompoundFieldProcessor.java    From apache-solr-essentials with Apache License 2.0 5 votes vote down vote up
@Override
public void processAdd(final AddUpdateCommand command) throws IOException {
	// 1. Get the Solr (Input) document
	final SolrInputDocument document = command.getSolrInputDocument();
	
	// 2. Get the value of the compound field 
	final String compoundValue = (String) document.getFieldValue(COMPOUND_FIELD_NAME);

	// 3. Split the value and create the other fields
	if (compoundValue != null) {
		
		// 4. Create and populate the "year" field.
		if (compoundValue.length() >=4) {
			final String year = compoundValue.substring(0, 4);
			document.setField("year", year);
		}
		
		// 5. Create and populate the "language" field.
		if (compoundValue.length() >=39) {
			final String language = compoundValue.substring(36, 39);
			document.setField("language", language);				
		}
		
		// 6. Remove the compound field.
		document.remove(COMPOUND_FIELD_NAME);
	}
	
	// 7. IMPORTANT: forward the control to the next processor in the chain.
	super.processAdd(command);
}
 
Example 10
Source File: FieldMutatingUpdateProcessor.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
/**
 * Calls <code>mutate</code> on any fields identified by the selector 
 * before forwarding the command down the chain.  Any SolrExceptions 
 * thrown by <code>mutate</code> will be logged with the Field name, 
 * wrapped and re-thrown.
 */
@Override
public void processAdd(AddUpdateCommand cmd) throws IOException {
  final SolrInputDocument doc = cmd.getSolrInputDocument();

  // make a copy we can iterate over while mutating the doc
  final Collection<String> fieldNames 
    = new ArrayList<>(doc.getFieldNames());

  for (final String fname : fieldNames) {

    if (! selector.shouldMutate(fname)) continue;
    
    final SolrInputField src = doc.get(fname);

    SolrInputField dest = null;
    try { 
      dest = mutate(src);
    } catch (SolrException e) {
      String msg = "Unable to mutate field '"+fname+"': "+e.getMessage();
      SolrException.log(log, msg, e);
      throw new SolrException(BAD_REQUEST, msg, e);
    }
    if (null == dest) {
      doc.remove(fname);
    } else {
      // semantics of what happens if dest has diff name are hard
      // we could treat it as a copy, or a rename
      // for now, don't allow it.
      if (! fname.equals(dest.getName()) ) {
        throw new SolrException(SERVER_ERROR,
                                "mutate returned field with different name: " 
                                + fname + " => " + dest.getName());
      }
      doc.put(dest.getName(), dest);
    }
  }
  super.processAdd(cmd);
}
 
Example 11
Source File: AbstractDefaultValueUpdateProcessorFactory.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
@Override
public void processAdd(AddUpdateCommand cmd) throws IOException {
  final SolrInputDocument doc = cmd.getSolrInputDocument();

  if (! doc.containsKey(fieldName)) {
    doc.addField(fieldName, getDefaultValue());
  }

  super.processAdd(cmd);
}
 
Example 12
Source File: AtomicUpdateDocumentMerger.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
/**
 * Utility method that examines the SolrInputDocument in an AddUpdateCommand
 * and returns true if the documents contains atomic update instructions.
 */
public static boolean isAtomicUpdate(final AddUpdateCommand cmd) {
  SolrInputDocument sdoc = cmd.getSolrInputDocument();
  for (SolrInputField sif : sdoc.values()) {
    Object val = sif.getValue();
    if (val instanceof Map && !(val instanceof SolrDocumentBase)) {
      return true;
    }
  }
  
  return false;
}
 
Example 13
Source File: AtomicUpdateProcessorFactory.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
@SuppressWarnings("unchecked")
@Override
public void processAdd(AddUpdateCommand cmd)
    throws IOException {

  SolrInputDocument orgdoc = cmd.getSolrInputDocument();
  boolean isAtomicUpdateAddedByMe = false;

  Iterator<String> paramsIterator = req.getParams().getParameterNamesIterator();

  while (paramsIterator.hasNext()) {

    String param = paramsIterator.next();

    if (!param.startsWith(ATOMIC_FIELD_PREFIX)) continue;

    String field = param.substring(ATOMIC_FIELD_PREFIX.length(), param.length());
    String operation = req.getParams().get(param);

    if (!VALID_OPS.contains(operation)) {
      throw new SolrException(SERVER_ERROR,
          "Unexpected param(s) for AtomicUpdateProcessor, invalid atomic op passed: '" +
              req.getParams().get(param) + "'");
    }
    if (orgdoc.get(field) == null || orgdoc.get(field).getValue() instanceof Map) {
      // no value for the field or it's already an atomic update operation
      //continue processing other fields
      continue;
    }

    orgdoc.setField(field, singletonMap(operation, orgdoc.get(field).getValue()));
    isAtomicUpdateAddedByMe = true;
  }

  // if atomic, put _version_ for optimistic concurrency if doc present in index
  if (isAtomicUpdateAddedByMe) {
    Long lastVersion = vinfo.lookupVersion(cmd.getIndexedId());
    // if lastVersion is null then we put -1 to assert that document must not exist
    lastVersion = lastVersion == null ? -1 : lastVersion;
    orgdoc.setField(VERSION, lastVersion);
    processAddWithRetry(cmd, 1, cmd.getSolrInputDocument().deepCopy());
  } else {
    super.processAdd(cmd);
  }
  // else send it for doc to get inserted for the first time
}
 
Example 14
Source File: LanguageProcessor.java    From document-management-software with GNU Lesser General Public License v3.0 4 votes vote down vote up
@Override
public void processAdd(AddUpdateCommand cmd) throws IOException {
	SolrInputDocument doc = cmd.getSolrInputDocument();
	FilteredAnalyzer.lang.set((String) doc.getFieldValue(HitField.LANGUAGE.getName()));
	super.processAdd(cmd);
}
 
Example 15
Source File: SignatureUpdateProcessorFactory.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
@Override
public void processAdd(AddUpdateCommand cmd) throws IOException {
  if (enabled) {
    SolrInputDocument doc = cmd.getSolrInputDocument();
    List<String> currDocSigFields = null;
    boolean isPartialUpdate = AtomicUpdateDocumentMerger.isAtomicUpdate(cmd);
    if (sigFields == null || sigFields.size() == 0) {
      if (isPartialUpdate)  {
        throw new SolrException
            (ErrorCode.SERVER_ERROR,
                "Can't use SignatureUpdateProcessor with partial updates on signature fields");
      }
      Collection<String> docFields = doc.getFieldNames();
      currDocSigFields = new ArrayList<>(docFields.size());
      currDocSigFields.addAll(docFields);
      Collections.sort(currDocSigFields);
    } else {
      currDocSigFields = sigFields;
    }

    Signature sig = req.getCore().getResourceLoader().newInstance(signatureClass, Signature.class);
    sig.init(params);

    for (String field : currDocSigFields) {
      SolrInputField f = doc.getField(field);
      if (f != null) {
        if (isPartialUpdate)  {
          throw new SolrException
              (ErrorCode.SERVER_ERROR,
                  "Can't use SignatureUpdateProcessor with partial update request " +
                      "containing signature field: " + field);
        }
        sig.add(field);
        Object o = f.getValue();
        if (o instanceof Collection) {
          for (Object oo : (Collection)o) {
            sig.add(String.valueOf(oo));
          }
        } else {
          sig.add(String.valueOf(o));
        }
      }
    }

    byte[] signature = sig.getSignature();
    char[] arr = new char[signature.length<<1];
    for (int i=0; i<signature.length; i++) {
      int b = signature[i];
      int idx = i<<1;
      arr[idx]= StrUtils.HEX_DIGITS[(b >> 4) & 0xf];
      arr[idx+1]= StrUtils.HEX_DIGITS[b & 0xf];
    }
    String sigString = new String(arr);
    doc.addField(signatureField, sigString);

    if (overwriteDupes) {
      cmd.updateTerm = new Term(signatureField, sigString);
    }

  }

  if (next != null)
    next.processAdd(cmd);
}
 
Example 16
Source File: DistributedUpdateProcessor.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
boolean getUpdatedDocument(AddUpdateCommand cmd, long versionOnUpdate) throws IOException {
  if (!AtomicUpdateDocumentMerger.isAtomicUpdate(cmd)) return false;

  Set<String> inPlaceUpdatedFields = AtomicUpdateDocumentMerger.computeInPlaceUpdatableFields(cmd);
  if (inPlaceUpdatedFields.size() > 0) { // non-empty means this is suitable for in-place updates
    if (docMerger.doInPlaceUpdateMerge(cmd, inPlaceUpdatedFields)) {
      return true;
    } else {
      // in-place update failed, so fall through and re-try the same with a full atomic update
    }
  }
  
  // full (non-inplace) atomic update
  SolrInputDocument sdoc = cmd.getSolrInputDocument();
  BytesRef idBytes = cmd.getIndexedId();
  String idString = cmd.getPrintableId();
  SolrInputDocument oldRootDocWithChildren = RealTimeGetComponent.getInputDocument(cmd.getReq().getCore(), idBytes, RealTimeGetComponent.Resolution.ROOT_WITH_CHILDREN);

  if (oldRootDocWithChildren == null) {
    if (versionOnUpdate > 0) {
      // could just let the optimistic locking throw the error
      throw new SolrException(ErrorCode.CONFLICT, "Document not found for update.  id=" + idString);
    } else if (req.getParams().get(ShardParams._ROUTE_) != null) {
      // the specified document could not be found in this shard
      // and was explicitly routed using _route_
      throw new SolrException(ErrorCode.BAD_REQUEST,
          "Could not find document id=" + idString +
              ", perhaps the wrong \"_route_\" param was supplied");
    }
  } else {
    oldRootDocWithChildren.remove(CommonParams.VERSION_FIELD);
  }


  SolrInputDocument mergedDoc;
  if(idField == null || oldRootDocWithChildren == null) {
    // create a new doc by default if an old one wasn't found
    mergedDoc = docMerger.merge(sdoc, new SolrInputDocument());
  } else {
    // Safety check: don't allow an update to an existing doc that has children, unless we actually support this.
    if (req.getSchema().isUsableForChildDocs() // however, next line we see it doesn't support child docs
        && req.getSchema().supportsPartialUpdatesOfChildDocs() == false
        && req.getSearcher().count(new TermQuery(new Term(IndexSchema.ROOT_FIELD_NAME, idBytes))) > 1) {
      throw new SolrException(ErrorCode.BAD_REQUEST, "This schema does not support partial updates to nested docs. See ref guide.");
    }

    String oldRootDocRootFieldVal = (String) oldRootDocWithChildren.getFieldValue(IndexSchema.ROOT_FIELD_NAME);
    if(req.getSchema().savesChildDocRelations() && oldRootDocRootFieldVal != null &&
        !idString.equals(oldRootDocRootFieldVal)) {
      // this is an update where the updated doc is not the root document
      SolrInputDocument sdocWithChildren = RealTimeGetComponent.getInputDocument(cmd.getReq().getCore(),
          idBytes, RealTimeGetComponent.Resolution.DOC_WITH_CHILDREN);
      mergedDoc = docMerger.mergeChildDoc(sdoc, oldRootDocWithChildren, sdocWithChildren);
    } else {
      mergedDoc = docMerger.merge(sdoc, oldRootDocWithChildren);
    }
  }
  cmd.solrDoc = mergedDoc;
  return true;
}
 
Example 17
Source File: NestedUpdateProcessorFactory.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
@Override
public void processAdd(AddUpdateCommand cmd) throws IOException {
  SolrInputDocument doc = cmd.getSolrInputDocument();
  cmd.isNested = processDocChildren(doc, null);
  super.processAdd(cmd);
}