Java Code Examples for org.apache.uima.cas.FSIterator#next()

The following examples show how to use org.apache.uima.cas.FSIterator#next() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: ChineseNormalizer.java    From termsuite-core with Apache License 2.0 6 votes vote down vote up
@Override
public void process(JCas cas) throws AnalysisEngineProcessException {
	
	try {
		AnnotationIndex<Annotation> index = cas.getAnnotationIndex(WordAnnotation.type);
		FSIterator<Annotation> iterator = index.iterator();
		while (iterator.hasNext()) {
			WordAnnotation annotation = (WordAnnotation) iterator.next();
			String norm = annotation.getCoveredText();
			annotation.setLemma(norm);
			annotation.setStem(norm);
		}
	} catch (Exception e) {
		throw new AnalysisEngineProcessException(e);
	}
}
 
Example 2
Source File: HeidelTimeOpenNLP.java    From newsleak with GNU Affero General Public License v3.0 6 votes vote down vote up
/**
 * Postprocessing: Remove invalid timex expressions. These are already marked as
 * invalid: timexValue().equals("REMOVE")
 * 
 * @param jcas
 */
public void removeInvalids(JCas jcas) {

	/*
	 * Iterate over timexes and add invalids to HashSet (invalids cannot be removed
	 * directly since iterator is used)
	 */
	FSIterator iterTimex = jcas.getAnnotationIndex(Timex3.type).iterator();
	HashSet<Timex3> hsTimexToRemove = new HashSet<Timex3>();
	while (iterTimex.hasNext()) {
		Timex3 timex = (Timex3) iterTimex.next();
		if (timex.getTimexValue().equals("REMOVE")) {
			hsTimexToRemove.add(timex);
		}
	}

	// remove invalids, finally
	for (Timex3 timex3 : hsTimexToRemove) {
		timex3.removeFromIndexes();
		this.timex_counter--;
		Logger.printDetail(timex3.getTimexId() + " REMOVING PHASE: " + "found by:" + timex3.getFoundByRule()
		+ " text:" + timex3.getCoveredText() + " value:" + timex3.getTimexValue());
	}
}
 
Example 3
Source File: MateLemmaFixer.java    From termsuite-core with Apache License 2.0 6 votes vote down vote up
@Override
public void process(JCas jCas) throws AnalysisEngineProcessException {
	FSIterator<Annotation> it = jCas.getAnnotationIndex(WordAnnotation.type).iterator();
	WordAnnotation word;
	while(it.hasNext()) {
		word = (WordAnnotation) it.next();
		if (word.getLemma() == null)
			word.setLemma(word.getCoveredText().toLowerCase(language.getLocale()));
		else if (word.getLemma().equals("CD")) //ou TermSuiteConstants.CARD_MATE
			word.setLemma(word.getCoveredText().toLowerCase(language.getLocale()));
		else  {
			word.setLemma(word.getLemma().toLowerCase());
			if (word.getLemma().equals((word.getStem()+"s"))){
			word.setLemma(word.getCoveredText().toLowerCase(language.getLocale()).replaceAll("s$", ""));
			}
		}
	}
}
 
Example 4
Source File: HeidelTimeOpenNLP.java    From newsleak with GNU Affero General Public License v3.0 6 votes vote down vote up
/**
 * Identify the part of speech (POS) of a MarchResult.
 * 
 * @param tokBegin
 * @param tokEnd
 * @param s
 * @param jcas
 * @return
 */
public String getPosFromMatchResult(int tokBegin, int tokEnd, Sentence s, JCas jcas) {
	// get all tokens in sentence
	HashMap<Integer, Token> hmTokens = new HashMap<Integer, Token>();
	FSIterator iterTok = jcas.getAnnotationIndex(Token.type).subiterator(s);
	while (iterTok.hasNext()) {
		Token token = (Token) iterTok.next();
		hmTokens.put(token.getBegin(), token);
	}
	// get correct token
	String pos = "";
	if (hmTokens.containsKey(tokBegin)) {
		Token tokenToCheck = hmTokens.get(tokBegin);
		pos = tokenToCheck.getPos() == null ? "" : tokenToCheck.getPos();
	}
	return pos;
}
 
Example 5
Source File: CasUtil.java    From uima-uimafit with Apache License 2.0 6 votes vote down vote up
/**
 * Get the single instance of the specified type from the CAS.
 * 
 * @param cas
 *          a CAS containing the annotation.
 * @param type
 *          a UIMA type.
 * @return the single instance of the given type. throws IllegalArgumentException if not exactly
 *         one instance if the given type is present.
 */
public static FeatureStructure selectSingleFS(CAS cas, Type type) {
  FSIterator<FeatureStructure> iterator = cas.getIndexRepository().getAllIndexedFS(type);

  if (!iterator.hasNext()) {
    throw new IllegalArgumentException("CAS does not contain any [" + type.getName() + "]");
  }

  FeatureStructure result = iterator.next();

  if (iterator.hasNext()) {
    throw new IllegalArgumentException("CAS contains more than one [" + type.getName() + "]");
  }

  return result;
}
 
Example 6
Source File: CasStatCounter.java    From termsuite-core with Apache License 2.0 6 votes vote down vote up
@Override
public void process(JCas aJCas) throws AnalysisEngineProcessException {
	this.docIt++;
	Optional<SourceDocumentInformation> sourceDocumentAnnotation = JCasUtils.getSourceDocumentAnnotation(aJCas);
	if(sourceDocumentAnnotation.isPresent())
		this.cumulatedFileSize += sourceDocumentAnnotation.get().getDocumentSize();
	FSIterator<Annotation> it =  aJCas.getAnnotationIndex().iterator();
	Annotation a;
	MutableInt i;
	while(it.hasNext()) {
		a = it.next();
		i = counters.get(a.getType().getShortName());
		if(i == null) 
			counters.put(a.getType().getShortName(), new MutableInt(1));
		else
			i.increment();
	}
	if(periodicStatEnabled && this.docIt % this.docPeriod == 0)
		try {
			traceToFile();
		} catch (IOException e) {
			throw new AnalysisEngineProcessException(e);
		}
}
 
Example 7
Source File: RemoveLowConfidenceEntities.java    From baleen with Apache License 2.0 6 votes vote down vote up
@Override
public void doProcess(JCas aJCas) throws AnalysisEngineProcessException {
  List<Entity> toRemove = new ArrayList<>();

  FSIterator<Annotation> iter = aJCas.getAnnotationIndex(Entity.type).iterator();
  while (iter.hasNext()) {
    Entity e = (Entity) iter.next();

    if (e.getConfidence() < confidenceThreshold
        && (!ignoreZeroConfidence || e.getConfidence() > 0.0)) {
      toRemove.add(e);
      getMonitor()
          .debug(
              "Low confidence entity found (ID: {}) - this entity will be removed",
              e.getInternalId());
    }
  }

  removeFromJCasIndex(toRemove);
}
 
Example 8
Source File: CasExporter.java    From termsuite-core with Apache License 2.0 6 votes vote down vote up
protected String getExportFilePath(JCas cas, String extension) {
	AnnotationIndex<Annotation> index = cas.getAnnotationIndex(SourceDocumentInformation.type);
	FSIterator<Annotation> iterator = index.iterator();
	if (iterator.hasNext()) {
		SourceDocumentInformation annotation = (SourceDocumentInformation) iterator.next();
		File file = new File(annotation.getUri());
		String name = file.getName();
		int i = name.lastIndexOf('.');
		if (i == -1) {
			return name + "." + extension;
		} else {
			return name.substring(0, i) + "." + extension;
		}
	} else {
		return null;
	}
}
 
Example 9
Source File: TokenBuilderTest.java    From uima-uimafit with Apache License 2.0 5 votes vote down vote up
@Test
public void test4() {
  String text = "a b-c de--fg h,i,j,k";
  tokenBuilder.buildTokens(jCas, text, "a b - c d e - - f g h , i , j , k");

  FSIterator<Annotation> tokens = jCas.getAnnotationIndex(Token.type).iterator();
  int tokenCount = 0;
  while (tokens.hasNext()) {
    tokenCount++;
    tokens.next();
  }
  assertEquals(17, tokenCount);
}
 
Example 10
Source File: JsonCasSerializer.java    From termsuite-core with Apache License 2.0 5 votes vote down vote up
private static void writeTermOccAnnotations(JsonGenerator jg, JCas jCas) throws IOException {
    jg.writeStartArray();
    FSIterator<Annotation> it = jCas.getAnnotationIndex(TermOccAnnotation.type).iterator();
    while(it.hasNext()) {
         TermOccAnnotation toa = (TermOccAnnotation) it.next();
        jg.writeStartObject();
        writeStringFSArrayField(jg,F_PATTERN, toa.getPattern());
        writeStringField(jg,F_SPOTTING_RULE_NAME, toa.getSpottingRuleName());
        writeStringField(jg,F_TERM_KEY, toa.getTermKey());
        writeIntFSArrayField(jg,F_WORDS,toa.getWords());
        writeOffsets(jg, toa);
        jg.writeEndObject();
    }
    jg.writeEndArray();
}
 
Example 11
Source File: JCasTest.java    From uima-uimaj with Apache License 2.0 5 votes vote down vote up
public void testIteratorGetsJCasType() throws Exception {
	try {
		Token tok1 = new Token(jcas);
		tok1.addToIndexes();
		FSIterator<Token> it = jcas.getJFSIndexRepository().<Token>getIndex("all", Token.type).iterator();
		while (it.hasNext()) {
			Token token = (Token) it.next();
			token.addToIndexes(); // something to do to keep Java from optimizing this away.
		}
	} catch (Exception e) {
		JUnitExtension.handleException(e);
	}
}
 
Example 12
Source File: JCasUtils.java    From termsuite-core with Apache License 2.0 5 votes vote down vote up
public static void showSdiWithCategory2(JCas jcas) {
	String wordsLine = "";
	String catsLine = "";
	int cnt = 0;
	FSIterator<Annotation> it = jcas.getAnnotationIndex(WordAnnotation.type).iterator();
	while(it.hasNext()) {
		cnt += 1;
		WordAnnotation a = (WordAnnotation) it.next();
		
		String[] strings = center(a.getCoveredText(), a.getTag());
		wordsLine+=strings[0] + " ";
		catsLine+=strings[1] + " ";
		if(cnt == 20) {
			System.out.println(wordsLine);
			System.out.println(catsLine);
			System.out.println();
			
			wordsLine = "";
			catsLine = "";
			cnt = 0;
		} 
	}
	if(cnt>0) {
		System.out.println(wordsLine);
		System.out.println(catsLine);
	}
}
 
Example 13
Source File: JCasUtils.java    From termsuite-core with Apache License 2.0 5 votes vote down vote up
public static void showTermFreq(JCas jcas, int num) {
	FSIterator<Annotation> it = jcas.getAnnotationIndex(TermOccAnnotation.type).iterator();
	int cnt = 0;
	while (it.hasNext()) {
		cnt += 1;
		TermOccAnnotation annotation = (TermOccAnnotation) it.next();
		if(cnt == num) {
			System.out.println("TermOccAnnotation n°"+num+": " + annotation);
			break;
		}
	}
}
 
Example 14
Source File: IndexRepositoryTest.java    From uima-uimaj with Apache License 2.0 5 votes vote down vote up
public void testDefaultBagIndex() throws Exception {
  // create an instance of a non-annotation type
  Type tokenTypeType = this.typeSystem.getType(CASTestSetup.TOKEN_TYPE_TYPE);
  FeatureStructure tokenTypeFs1 = this.cas.createFS(tokenTypeType);
  assertFalse(tokenTypeFs1 instanceof AnnotationFS);

  // add to indexes
  this.indexRep.addFS(tokenTypeFs1);

  // now try to retrieve
  FSIterator<FeatureStructure> iter = this.indexRep.getAllIndexedFS(tokenTypeType);
  assertTrue(iter.hasNext());
  assertEquals(tokenTypeFs1, iter.next());
  assertFalse(iter.hasNext());

  // add a second instance
  FeatureStructure tokenTypeFs2 = this.cas.createFS(tokenTypeType);
  assertFalse(tokenTypeFs2 instanceof AnnotationFS);
  this.indexRep.addFS(tokenTypeFs2);

  // now there should be two instances in the index
  FSIterator<FeatureStructure> iter2 = this.indexRep.getAllIndexedFS(tokenTypeType);
  assertTrue(iter2.hasNext());
  iter2.next();
  assertTrue(iter2.hasNext());
  iter2.next();
  assertFalse(iter.hasNext());
}
 
Example 15
Source File: PrintMissingTest.java    From bluima with Apache License 2.0 5 votes vote down vote up
public void process_old(JCas jCas) throws AnalysisEngineProcessException {
    FSIterator<Annotation> it = jCas.getAnnotationIndex().iterator();
    StringBuffer sb = new StringBuffer();
    while (it.hasNext()) {
        Annotation a = it.next();
        System.out.println(a.getType().getName());
        sb.append(a.getCoveredText() + '\n');
        a.prettyPrint(2, 2, sb, false);
        sb.append('\n');
    }
}
 
Example 16
Source File: SimpleTextMerger.java    From uima-uimaj with Apache License 2.0 4 votes vote down vote up
public void process(JCas aJCas) throws AnalysisEngineProcessException {
  // procure a new CAS if we don't have one already
  if (mMergedCas == null) {
    mMergedCas = getEmptyJCas();
  }

  // append document text
  String docText = aJCas.getDocumentText();
  int prevDocLen = mDocBuf.length();
  mDocBuf.append(docText);

  // copy specified annotation types
  CasCopier copier = new CasCopier(aJCas.getCas(), mMergedCas.getCas());
  Set copiedIndexedFs = new HashSet(); // needed in case one annotation is in two indexes (could
  // happen if specified annotation types overlap)
  for (int i = 0; i < mAnnotationTypesToCopy.length; i++) {
    Type type = mMergedCas.getTypeSystem().getType(mAnnotationTypesToCopy[i]);
    FSIndex index = aJCas.getCas().getAnnotationIndex(type);
    Iterator iter = index.iterator();
    while (iter.hasNext()) {
      FeatureStructure fs = (FeatureStructure) iter.next();
      if (!copiedIndexedFs.contains(fs)) {
        Annotation copyOfFs = (Annotation) copier.copyFs(fs);
        // update begin and end
        copyOfFs.setBegin(copyOfFs.getBegin() + prevDocLen);
        copyOfFs.setEnd(copyOfFs.getEnd() + prevDocLen);
        mMergedCas.addFsToIndexes(copyOfFs);
        copiedIndexedFs.add(fs);
      }
    }
  }

  // get the SourceDocumentInformation FS, which indicates the sourceURI of the document
  // and whether the incoming CAS is the last segment
  FSIterator it = aJCas.getAnnotationIndex(SourceDocumentInformation.type).iterator();
  if (!it.hasNext()) {
    throw new AnalysisEngineProcessException(MESSAGE_DIGEST, MISSING_SOURCE_DOCUMENT_INFO,
            new Object[0]);
  }
  SourceDocumentInformation sourceDocInfo = (SourceDocumentInformation) it.next();
  if (sourceDocInfo.getLastSegment()) {
    // time to produce an output CAS
    // set the document text
    mMergedCas.setDocumentText(mDocBuf.toString());

    // add source document info to destination CAS
    SourceDocumentInformation destSDI = new SourceDocumentInformation(mMergedCas);
    destSDI.setUri(sourceDocInfo.getUri());
    destSDI.setOffsetInSource(0);
    destSDI.setLastSegment(true);
    destSDI.addToIndexes();

    mDocBuf = new StringBuffer();
    mReadyToOutput = true;
  }
}
 
Example 17
Source File: HeidelTimeOpenNLP.java    From newsleak with GNU Affero General Public License v3.0 4 votes vote down vote up
/**
 * Check token boundaries using token information
 * 
 * @param r
 *            MatchResult
 * @param s
 *            respective Sentence
 * @param jcas
 *            current CAS object
 * @return whether or not the MatchResult is a clean one
 */
public static Boolean checkTokenBoundaries(MatchResult r, Sentence s, JCas jcas) {
	Boolean beginOK = false;
	Boolean endOK = false;

	// whole expression is marked as a sentence
	if ((r.end() - r.start()) == (s.getEnd() - s.getBegin())) {
		return true;
	}

	// Only check Token boundaries if no white-spaces in front of and behind
	// the match-result
	if ((r.start() > 0) && ((s.getCoveredText().subSequence(r.start() - 1, r.start()).equals(" ")))
			&& ((r.end() < s.getCoveredText().length())
					&& ((s.getCoveredText().subSequence(r.end(), r.end() + 1).equals(" "))))) {
		return true;
	}

	// other token boundaries than white-spaces
	else {
		FSIterator iterToken = jcas.getAnnotationIndex(Token.type).subiterator(s);
		while (iterToken.hasNext()) {
			Token t = (Token) iterToken.next();

			// Check begin
			if ((r.start() + s.getBegin()) == t.getBegin()) {
				beginOK = true;
			}
			// Tokenizer does not split number from some symbols (".", "/",
			// "-", "–"),
			// e.g., "...12 August-24 Augsut..."
			else if ((r.start() > 0) && ((s.getCoveredText().subSequence(r.start() - 1, r.start()).equals("."))
					|| (s.getCoveredText().subSequence(r.start() - 1, r.start()).equals("/"))
					|| (s.getCoveredText().subSequence(r.start() - 1, r.start()).equals("–"))
					|| (s.getCoveredText().subSequence(r.start() - 1, r.start()).equals("-")))) {
				beginOK = true;
			}

			// Check end
			if ((r.end() + s.getBegin()) == t.getEnd()) {
				endOK = true;
			}
			// Tokenizer does not split number from some symbols (".", "/",
			// "-", "–"),
			// e.g., "... in 1990. New Sentence ..."
			else if ((r.end() < s.getCoveredText().length())
					&& ((s.getCoveredText().subSequence(r.end(), r.end() + 1).equals("."))
							|| (s.getCoveredText().subSequence(r.end(), r.end() + 1).equals("/"))
							|| (s.getCoveredText().subSequence(r.end(), r.end() + 1).equals("–"))
							|| (s.getCoveredText().subSequence(r.end(), r.end() + 1).equals("-")))) {
				endOK = true;
			}

			if (beginOK && endOK)
				return true;
		}
	}
	return false;
}
 
Example 18
Source File: HeidelTimeOpenNLP.java    From newsleak with GNU Affero General Public License v3.0 4 votes vote down vote up
/**
 * Under-specified values are disambiguated here. Only Timexes of types "date"
 * and "time" can be under-specified.
 * 
 * @param jcas
 */
public void specifyAmbiguousValues(JCas jcas) {
	// build up a list with all found TIMEX expressions
	List<Timex3> linearDates = new ArrayList<Timex3>();
	FSIterator iterTimex = jcas.getAnnotationIndex(Timex3.type).iterator();

	// Create List of all Timexes of types "date" and "time"
	while (iterTimex.hasNext()) {
		Timex3 timex = (Timex3) iterTimex.next();
		if (timex.getTimexType().equals("DATE") || timex.getTimexType().equals("TIME")) {
			linearDates.add(timex);
		}

		if (timex.getTimexType().equals("DURATION") && !timex.getEmptyValue().equals("")) {
			linearDates.add(timex);
		}
	}

	//////////////////////////////////////////////
	// go through list of Date and Time timexes //
	//////////////////////////////////////////////
	for (int i = 0; i < linearDates.size(); i++) {
		Timex3 t_i = (Timex3) linearDates.get(i);
		String value_i = t_i.getTimexValue();

		String valueNew = value_i;
		// handle the value attribute only if we have a TIME or DATE
		if (t_i.getTimexType().equals("TIME") || t_i.getTimexType().equals("DATE"))
			valueNew = specifyAmbiguousValuesString(value_i, t_i, i, linearDates, jcas);

		// handle the emptyValue attribute for any type
		if (t_i.getEmptyValue() != null && t_i.getEmptyValue().length() > 0) {
			String emptyValueNew = specifyAmbiguousValuesString(t_i.getEmptyValue(), t_i, i, linearDates, jcas);
			t_i.setEmptyValue(emptyValueNew);
		}

		t_i.removeFromIndexes();
		Logger.printDetail(t_i.getTimexId() + " DISAMBIGUATION PHASE: foundBy:" + t_i.getFoundByRule() + " text:"
				+ t_i.getCoveredText() + " value:" + t_i.getTimexValue() + " NEW value:" + valueNew);

		t_i.setTimexValue(valueNew);
		t_i.addToIndexes();
		linearDates.set(i, t_i);
	}
}
 
Example 19
Source File: XmiCasDeserializerTest.java    From uima-uimaj with Apache License 2.0 4 votes vote down vote up
public void testMultipleSofas() throws Exception {
    try {
      CAS cas = CasCreationUtils.createCas(typeSystem, new TypePriorities_impl(),
              new FsIndexDescription[0]);
      // set document text for the initial view
      cas.setDocumentText("This is a test");
      // create a new view and set its document text
      CAS cas2 = cas.createView("OtherSofa");
      cas2.setDocumentText("This is only a test");

      // Change this test to create an instance of TOP because you cannot add an annotation to other than 
      //   the view it is created in. https://issues.apache.org/jira/browse/UIMA-4099
      // create a TOP and add to index of both views
      Type topType = cas.getTypeSystem().getTopType();
      FeatureStructure aTOP = cas.createFS(topType);
      cas.getIndexRepository().addFS(aTOP);
      cas2.getIndexRepository().addFS(aTOP); 
      FSIterator<FeatureStructure> it = cas.getIndexRepository().getAllIndexedFS(topType);
      FSIterator<FeatureStructure> it2 = cas2.getIndexRepository().getAllIndexedFS(topType);
      it.next(); it.next();
      it2.next(); it2.next(); 
      assertFalse(it.hasNext());
      assertFalse(it2.hasNext());

      // serialize
      StringWriter sw = new StringWriter();
      XMLSerializer xmlSer = new XMLSerializer(sw, false);
      XmiCasSerializer xmiSer = new XmiCasSerializer(cas.getTypeSystem());
      xmiSer.serialize(cas, xmlSer.getContentHandler());
      String xml = sw.getBuffer().toString();

      // deserialize into another CAS (repeat twice to check it still works after reset)
      CAS newCas = CasCreationUtils.createCas(typeSystem, new TypePriorities_impl(),
              new FsIndexDescription[0]);
      for (int i = 0; i < 2; i++) {
        XmiCasDeserializer newDeser = new XmiCasDeserializer(newCas.getTypeSystem());
        ContentHandler newDeserHandler = newDeser.getXmiCasHandler(newCas);
        SAXParserFactory fact = SAXParserFactory.newInstance();
        SAXParser parser = fact.newSAXParser();
        XMLReader xmlReader = parser.getXMLReader();
        xmlReader.setContentHandler(newDeserHandler);
        xmlReader.parse(new InputSource(new StringReader(xml)));

        // check sofas
        assertEquals("This is a test", newCas.getDocumentText());
        CAS newCas2 = newCas.getView("OtherSofa");
        assertEquals("This is only a test", newCas2.getDocumentText());

        // check that annotation is still indexed in both views
        // check that annotation is still indexed in both views
        it = newCas.getIndexRepository().getAllIndexedFS(topType);
        it2 = newCas2.getIndexRepository().getAllIndexedFS(topType);
        it.next(); it.next();
        it2.next(); it2.next(); 
        assertFalse(it.hasNext());
//        assertFalse(it2.hasNext());        assertTrue(tIndex.size() == 2); // document annot and this one
//        assertTrue(t2Index.size() == 2); // ditto

        newCas.reset();
      }
    } catch (Exception e) {
      JUnitExtension.handleException(e);
    }
  }
 
Example 20
Source File: MongoUpdateWriter.java    From bluima with Apache License 2.0 4 votes vote down vote up
@Override
public void process(JCas jCas) throws AnalysisEngineProcessException {
    try {
        // LOG.debug("updating docId {}", getHeaderIntDocId(jCas));

        Map<String, BasicDBList> dbLists = new HashMap<String, BasicDBList>();

        FSIterator<Annotation> it = jCas.getAnnotationIndex().iterator();
        while (it.hasNext()) {
            Annotation a = it.next();
            String typeName = a.getType().getName();

            if (updateAllAnnotations) {
                if (ALL_MAPPINGS_KEYS.contains(typeName)) {
                    processAnnotaion(a, dbLists, typeName);
                } else {
                    LOG.warn(
                            "no mapping for {}, could not write annotation",
                            typeName);
                }
            } else { // only specific annotations
                if (ALL_MAPPINGS_KEYS.contains(typeName)
                        && updateAnnotationsSet.contains(typeName)) {
                    processAnnotaion(a, dbLists, typeName);
                }
            }
        }

        // insert all dbLists
        BasicDBObject updateQuery = new BasicDBObject(ID,
                getHeaderIntDocId(jCas) + "");
        BasicDBObject updateCommands = new BasicDBObject();
        updateCommands.put("$set", dbLists);
        coll.update(updateQuery, updateCommands, true, false);

    } catch (Throwable t) {
        // e.g. with "DBObject of size  is over Max BSON size"
        String sourceFile = "unknown";
        try {
            Header header = JCasUtil.selectSingle(jCas, Header.class);
            sourceFile = header.getSource();
        } catch (Throwable t2) {// nope
        }
        LOG.error("inserting doc " + sourceFile + StringUtils.print(t), t);
    }
}