Java Code Examples for org.apache.uima.cas.FSIterator#hasNext()

The following examples show how to use org.apache.uima.cas.FSIterator#hasNext() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: MateLemmaFixer.java    From termsuite-core with Apache License 2.0 6 votes vote down vote up
@Override
public void process(JCas jCas) throws AnalysisEngineProcessException {
	FSIterator<Annotation> it = jCas.getAnnotationIndex(WordAnnotation.type).iterator();
	WordAnnotation word;
	while(it.hasNext()) {
		word = (WordAnnotation) it.next();
		if (word.getLemma() == null)
			word.setLemma(word.getCoveredText().toLowerCase(language.getLocale()));
		else if (word.getLemma().equals("CD")) //ou TermSuiteConstants.CARD_MATE
			word.setLemma(word.getCoveredText().toLowerCase(language.getLocale()));
		else  {
			word.setLemma(word.getLemma().toLowerCase());
			if (word.getLemma().equals((word.getStem()+"s"))){
			word.setLemma(word.getCoveredText().toLowerCase(language.getLocale()).replaceAll("s$", ""));
			}
		}
	}
}
 
Example 2
Source File: TreeTaggerLemmaFixer.java    From termsuite-core with Apache License 2.0 6 votes vote down vote up
@Override
public void process(JCas jCas) throws AnalysisEngineProcessException {
	FSIterator<Annotation> it = jCas.getAnnotationIndex(WordAnnotation.type).iterator();
	WordAnnotation word;
	while(it.hasNext()) {
		word = (WordAnnotation) it.next();
		if (word.getLemma() == null)
			word.setLemma(word.getCoveredText().toLowerCase(language.getLocale()));
		else if (word.getLemma().equals(TermSuiteConstants.CARD_TAG)) 
			word.setLemma(word.getCoveredText().toLowerCase(language.getLocale()));
		else  
			word.setLemma(word.getLemma().toLowerCase());
		
		fixPlural();
	}
}
 
Example 3
Source File: StringRegexFilter.java    From termsuite-core with Apache License 2.0 6 votes vote down vote up
@Override
public void process(JCas cas) throws AnalysisEngineProcessException {
	List<WordAnnotation> rem = Lists.newArrayList();
	FSIterator<Annotation> it = cas.getAnnotationIndex(WordAnnotation.type).iterator();
	WordAnnotation word;
	while(it.hasNext()) {
		word = (WordAnnotation) it.next();
		for(Pattern p:PATTERNS) 
			if(p.matcher(word.getCoveredText()).matches())
				rem.add(word);
	}
	
	this.totalFiltered += rem.size();
	
	for(WordAnnotation wa:rem)
		wa.removeFromIndexes(cas);
}
 
Example 4
Source File: HeidelTimeOpenNLP.java    From newsleak with GNU Affero General Public License v3.0 6 votes vote down vote up
/**
 * Check whether or not a jcas object has a correct DCT value. If there is no
 * DCT present, we canonically return true since fallback calculation takes care
 * of that scenario.
 * 
 * @param jcas
 * @return Whether or not the given jcas contains a valid DCT
 */
private Boolean isValidDCT(JCas jcas) {
	FSIterator dctIter = jcas.getAnnotationIndex(Dct.type).iterator();

	if (!dctIter.hasNext()) {
		return true;
	} else {
		Dct dct = (Dct) dctIter.next();
		String dctVal = dct.getValue();

		if (dctVal == null)
			return false;

		if (dctVal.matches("\\d{8}") // Something like 20041224
				|| dctVal.matches("\\d{4}.\\d{2}.\\d{2}.*")) { // Something
			// like
			// 2004-12-24
			return true;
		} else {
			return false;
		}
	}
}
 
Example 5
Source File: CasComparer.java    From uima-uimaj with Apache License 2.0 6 votes vote down vote up
public void assertEqualsInner(CAS c1, CAS c2) {
  alreadyCompared.clear();
  
  // this code handles initial views with no SofaFS
  CAS initialView1 = c1.getView(CAS.NAME_DEFAULT_SOFA);
  CAS initialView2 = c2.getView(CAS.NAME_DEFAULT_SOFA);
  assertEqualViewsInner(initialView1, initialView2);
  // this code skips the initial view, if it doesn't have a sofa FS
  FSIterator<Sofa> sofaIter = c1.getSofaIterator();
  int c1Sofas = 0;
  while (sofaIter.hasNext()) {
    SofaFS sofa = sofaIter.next();
    CAS tcas1 = c1.getView(sofa);
    CAS tcas2 = c2.getView(tcas1.getViewName());
    assertEqualViewsInner(tcas1, tcas2);
    c1Sofas++;
  }
  sofaIter = c2.getSofaIterator();
  int c2Sofas = 0;
  while (sofaIter.hasNext()) {
    c2Sofas++;
    sofaIter.moveToNext();
  }
  Assert.assertTrue(c1Sofas == c2Sofas);
}
 
Example 6
Source File: CasUtil.java    From uima-uimafit with Apache License 2.0 6 votes vote down vote up
/**
 * Get the single instance of the specified type from the CAS.
 * 
 * @param cas
 *          a CAS containing the annotation.
 * @param type
 *          a UIMA type.
 * @return the single instance of the given type. throws IllegalArgumentException if not exactly
 *         one instance if the given type is present.
 */
public static FeatureStructure selectSingleFS(CAS cas, Type type) {
  FSIterator<FeatureStructure> iterator = cas.getIndexRepository().getAllIndexedFS(type);

  if (!iterator.hasNext()) {
    throw new IllegalArgumentException("CAS does not contain any [" + type.getName() + "]");
  }

  FeatureStructure result = iterator.next();

  if (iterator.hasNext()) {
    throw new IllegalArgumentException("CAS contains more than one [" + type.getName() + "]");
  }

  return result;
}
 
Example 7
Source File: JsonCasSerializer.java    From termsuite-core with Apache License 2.0 6 votes vote down vote up
private static void writeWordAnnotations(JsonGenerator jg, JCas jCas) throws IOException {
    jg.writeStartArray();
    FSIterator<Annotation> it = jCas.getAnnotationIndex(WordAnnotation.type).iterator();
    while(it.hasNext()) {
        WordAnnotation wa = (WordAnnotation) it.next();
        jg.writeStartObject();
        writeStringField(jg,F_CATEGORY, wa.getCategory());
        writeStringField(jg,F_LEMMA, wa.getLemma());
        writeStringField(jg,F_STEM, wa.getStem());
        writeStringField(jg,F_TAG, wa.getTag());
        writeStringField(jg,F_SUB_CATEGORY, wa.getSubCategory());
        writeStringField(jg,F_REGEX_LABEL, wa.getRegexLabel());
        writeStringField(jg,F_NUMBER, wa.getNumber());
        writeStringField(jg,F_GENDER, wa.getGender());
        writeStringField(jg,F_CASE, wa.getCase());
        writeStringField(jg,F_MOOD, wa.getMood());
        writeStringField(jg,F_TENSE, wa.getTense());
        writeStringField(jg,F_PERSON, wa.getPerson());
        writeStringField(jg,F_DEGREE, wa.getDegree());
        writeStringField(jg,F_FORMATION, wa.getFormation());
        writeStringField(jg,F_LABELS, wa.getLabels());
        writeOffsets(jg, wa);
        jg.writeEndObject();
    }
    jg.writeEndArray();
}
 
Example 8
Source File: ChineseNormalizer.java    From termsuite-core with Apache License 2.0 6 votes vote down vote up
@Override
public void process(JCas cas) throws AnalysisEngineProcessException {
	
	try {
		AnnotationIndex<Annotation> index = cas.getAnnotationIndex(WordAnnotation.type);
		FSIterator<Annotation> iterator = index.iterator();
		while (iterator.hasNext()) {
			WordAnnotation annotation = (WordAnnotation) iterator.next();
			String norm = annotation.getCoveredText();
			annotation.setLemma(norm);
			annotation.setStem(norm);
		}
	} catch (Exception e) {
		throw new AnalysisEngineProcessException(e);
	}
}
 
Example 9
Source File: JsonCasSerializer.java    From termsuite-core with Apache License 2.0 5 votes vote down vote up
private static void writeFixedExpressions(JsonGenerator jg, JCas jCas) throws IOException {
    jg.writeStartArray();
    FSIterator<Annotation> it = jCas.getAnnotationIndex(FixedExpression.type).iterator();
    while(it.hasNext()) {
        FixedExpression fe = (FixedExpression) it.next();
        jg.writeStartObject();
        writeOffsets(jg, fe);
        jg.writeEndObject();
    }
    jg.writeEndArray();
}
 
Example 10
Source File: JCasUtils.java    From termsuite-core with Apache License 2.0 5 votes vote down vote up
public static Optional<SourceDocumentInformation> getSourceDocumentAnnotation(JCas jCas) {
	FSIterator<Annotation> iterator = jCas.getAnnotationIndex(SourceDocumentInformation.type).iterator();
	if(iterator.hasNext())
		return Optional.of((SourceDocumentInformation)iterator.next());
	else
		return Optional.empty();
}
 
Example 11
Source File: PrintMissingTest.java    From bluima with Apache License 2.0 5 votes vote down vote up
public void process_old(JCas jCas) throws AnalysisEngineProcessException {
    FSIterator<Annotation> it = jCas.getAnnotationIndex().iterator();
    StringBuffer sb = new StringBuffer();
    while (it.hasNext()) {
        Annotation a = it.next();
        System.out.println(a.getType().getName());
        sb.append(a.getCoveredText() + '\n');
        a.prettyPrint(2, 2, sb, false);
        sb.append('\n');
    }
}
 
Example 12
Source File: JCasUtils.java    From termsuite-core with Apache License 2.0 5 votes vote down vote up
public static void showSdiWithCategory2(JCas jcas) {
	String wordsLine = "";
	String catsLine = "";
	int cnt = 0;
	FSIterator<Annotation> it = jcas.getAnnotationIndex(WordAnnotation.type).iterator();
	while(it.hasNext()) {
		cnt += 1;
		WordAnnotation a = (WordAnnotation) it.next();
		
		String[] strings = center(a.getCoveredText(), a.getTag());
		wordsLine+=strings[0] + " ";
		catsLine+=strings[1] + " ";
		if(cnt == 20) {
			System.out.println(wordsLine);
			System.out.println(catsLine);
			System.out.println();
			
			wordsLine = "";
			catsLine = "";
			cnt = 0;
		} 
	}
	if(cnt>0) {
		System.out.println(wordsLine);
		System.out.println(catsLine);
	}
}
 
Example 13
Source File: LatvianTildeTagger.java    From termsuite-core with Apache License 2.0 5 votes vote down vote up
@Override
public void process(JCas cas) throws AnalysisEngineProcessException {
	try {
		AnnotationIndex<Annotation> index = cas.getAnnotationIndex(WordAnnotation.type);
		FSIterator<Annotation> iterator = index.iterator();
		while (iterator.hasNext()) {
			WordAnnotation annotation = (WordAnnotation) iterator.next();
			String tag = annotation.getTag().toLowerCase();
			this.setCategory(annotation, tag);
		}
	} catch (Exception e) {
		throw new AnalysisEngineProcessException(e);
	}
}
 
Example 14
Source File: UimaCopying.java    From biomedicus with Apache License 2.0 5 votes vote down vote up
public static void copyFeatureStructuresOfType(String typeName, CAS sourceView,
    CAS destinationView) {
  FeatureStructureCopyingQueue featureStructureCopyingQueue = new FeatureStructureCopyingQueue(
      sourceView,
      destinationView);
  FSIterator<FeatureStructure> iterator = sourceView.getIndexRepository()
      .getAllIndexedFS(sourceView.getTypeSystem().getType(typeName));
  while (iterator.hasNext()) {
    FeatureStructure featureStructure = iterator.get();
    featureStructureCopyingQueue.enqueue(featureStructure);
  }
  featureStructureCopyingQueue.run();
}
 
Example 15
Source File: TableAnnotationDivider.java    From biomedicus with Apache License 2.0 5 votes vote down vote up
private void divideAnnotation(AnnotationFS annotation) {
  Objects.requireNonNull(typeToCreate);
  Objects.requireNonNull(dividers);

  FSIterator<AnnotationFS> subiterator = dividers.subiterator(annotation);
  int begin = annotation.getBegin();
  while (subiterator.hasNext()) {
    int end = subiterator.next().getBegin();
    cas.addFsToIndexes(cas.createAnnotation(typeToCreate, begin, end));
    begin = end;
  }
}
 
Example 16
Source File: IteratorTest.java    From uima-uimaj with Apache License 2.0 5 votes vote down vote up
private void verifyHaveSubset(FSIterator<?> x, int nbr, Type type) {
  x.moveToFirst();
  int i = 0;
  while (x.hasNext()) {
    i++;
    assertEquals(type, x.get().getType());
    x.moveToNext();
  }
  assertEquals(nbr, i);
}
 
Example 17
Source File: Annotator4.java    From uima-uimafit with Apache License 2.0 5 votes vote down vote up
@Override
public void process(JCas jCas) throws AnalysisEngineProcessException {
  FSIterator<Annotation> tokens = jCas.getAnnotationIndex(Token.type).iterator();
  while (tokens.hasNext()) {
    Token token = (Token) tokens.next();
    token.setPos("NN");
  }
}
 
Example 18
Source File: ImporterService.java    From termsuite-core with Apache License 2.0 4 votes vote down vote up
public void importToTerminology(JCas jCas) {
	Optional<SourceDocumentInformation> sdi = JCasUtils.getSourceDocumentAnnotation(jCas);
	String currentFileURI = sdi.isPresent() ? sdi.get().getUri() : "(no source uri given)";
	FSIterator<Annotation> it = jCas.getAnnotationIndex(TermOccAnnotation.type).iterator();
	TermOccAnnotation toa;
	while(it.hasNext()) {
		toa = (TermOccAnnotation) it.next();
		String gKey = TermSuiteUtils.getGroupingKey(toa);
		
		TermService term;
		if(terminoService.containsTerm(gKey))
			term = terminoService.getTerm(gKey);
		else {
			Word[] words = new Word[toa.getWords().size()];
			for (int i = 0; i < toa.getWords().size(); i++) {
				WordAnnotation wa = toa.getWords(i);
				if(this.terminoService.containsWord(wa.getLemma()))
					words[i] = this.terminoService.getWord(wa.getLemma());
				else
					words[i]= createOrGetWord(wa.getLemma(), wa.getStem());
			}

			term = createOrGetTerm(
					toa.getPattern().toStringArray(), words);
			term.setSpottingRule(toa.getSpottingRuleName());
		}

		term.incrementFrequency(1);
		occurrenceStore.addOccurrence(
				term.getTerm(),
				currentFileURI, 
				toa.getBegin(),
				toa.getEnd(),
				toa.getCoveredText());
	}
	
	FSIterator<Annotation> termIt = jCas.getAnnotationIndex(TermOccAnnotation.type).iterator();
	terminoService.incrementSpottedTermsNum(Iterators.size(termIt));
	FSIterator<Annotation> wordIt = jCas.getAnnotationIndex(WordAnnotation.type).iterator();
	terminoService.incrementWordAnnotationNum(Iterators.size(wordIt));
	occurrenceStore.flush();
}
 
Example 19
Source File: SimpleTextMerger.java    From uima-uimaj with Apache License 2.0 4 votes vote down vote up
public void process(JCas aJCas) throws AnalysisEngineProcessException {
  // procure a new CAS if we don't have one already
  if (mMergedCas == null) {
    mMergedCas = getEmptyJCas();
  }

  // append document text
  String docText = aJCas.getDocumentText();
  int prevDocLen = mDocBuf.length();
  mDocBuf.append(docText);

  // copy specified annotation types
  CasCopier copier = new CasCopier(aJCas.getCas(), mMergedCas.getCas());
  Set copiedIndexedFs = new HashSet(); // needed in case one annotation is in two indexes (could
  // happen if specified annotation types overlap)
  for (int i = 0; i < mAnnotationTypesToCopy.length; i++) {
    Type type = mMergedCas.getTypeSystem().getType(mAnnotationTypesToCopy[i]);
    FSIndex index = aJCas.getCas().getAnnotationIndex(type);
    Iterator iter = index.iterator();
    while (iter.hasNext()) {
      FeatureStructure fs = (FeatureStructure) iter.next();
      if (!copiedIndexedFs.contains(fs)) {
        Annotation copyOfFs = (Annotation) copier.copyFs(fs);
        // update begin and end
        copyOfFs.setBegin(copyOfFs.getBegin() + prevDocLen);
        copyOfFs.setEnd(copyOfFs.getEnd() + prevDocLen);
        mMergedCas.addFsToIndexes(copyOfFs);
        copiedIndexedFs.add(fs);
      }
    }
  }

  // get the SourceDocumentInformation FS, which indicates the sourceURI of the document
  // and whether the incoming CAS is the last segment
  FSIterator it = aJCas.getAnnotationIndex(SourceDocumentInformation.type).iterator();
  if (!it.hasNext()) {
    throw new AnalysisEngineProcessException(MESSAGE_DIGEST, MISSING_SOURCE_DOCUMENT_INFO,
            new Object[0]);
  }
  SourceDocumentInformation sourceDocInfo = (SourceDocumentInformation) it.next();
  if (sourceDocInfo.getLastSegment()) {
    // time to produce an output CAS
    // set the document text
    mMergedCas.setDocumentText(mDocBuf.toString());

    // add source document info to destination CAS
    SourceDocumentInformation destSDI = new SourceDocumentInformation(mMergedCas);
    destSDI.setUri(sourceDocInfo.getUri());
    destSDI.setOffsetInSource(0);
    destSDI.setLastSegment(true);
    destSDI.addToIndexes();

    mDocBuf = new StringBuffer();
    mReadyToOutput = true;
  }
}
 
Example 20
Source File: HeidelTimeOpenNLP.java    From newsleak with GNU Affero General Public License v3.0 4 votes vote down vote up
/**
 * Postprocessing: Check dates starting with "0" which were extracted without
 * explicit "AD" hints if it is likely that they refer to the respective date BC
 * 
 * @param jcas
 */
public void disambiguateHistoricDates(JCas jcas) {

	// build up a list with all found TIMEX expressions
	List<Timex3> linearDates = new ArrayList<Timex3>();
	FSIterator iterTimex = jcas.getAnnotationIndex(Timex3.type).iterator();

	// Create List of all Timexes of types "date" and "time"
	while (iterTimex.hasNext()) {
		Timex3 timex = (Timex3) iterTimex.next();
		if (timex.getTimexType().equals("DATE") || timex.getTimexType().equals("TIME")) {
			linearDates.add(timex);
		}
	}

	//////////////////////////////////////////////
	// go through list of Date and Time timexes //
	//////////////////////////////////////////////
	for (int i = 1; i < linearDates.size(); i++) {
		Timex3 t_i = (Timex3) linearDates.get(i);
		String value_i = t_i.getTimexValue();
		String newValue = value_i;
		Boolean change = false;
		if (!(t_i.getFoundByRule().contains("-BCADhint"))) {
			if (value_i.startsWith("0")) {
				Integer offset = 1, counter = 1;
				do {
					if ((i == 1 || (i > 1 && !change))
							&& linearDates.get(i - offset).getTimexValue().startsWith("BC")) {
						if (value_i.length() > 1) {
							if ((linearDates.get(i - offset).getTimexValue()
									.startsWith("BC" + value_i.substring(0, 2)))
									|| (linearDates.get(i - offset).getTimexValue().startsWith("BC" + String
											.format("%02d", (Integer.parseInt(value_i.substring(0, 2)) + 1))))) {
								if (((value_i.startsWith("00"))
										&& (linearDates.get(i - offset).getTimexValue().startsWith("BC00")))
										|| ((value_i.startsWith("01")) && (linearDates.get(i - offset)
												.getTimexValue().startsWith("BC01")))) {
									if ((value_i.length() > 2)
											&& (linearDates.get(i - offset).getTimexValue().length() > 4)) {
										if (Integer.parseInt(value_i.substring(0, 3)) <= Integer.parseInt(
												linearDates.get(i - offset).getTimexValue().substring(2, 5))) {
											newValue = "BC" + value_i;
											change = true;
											Logger.printDetail("DisambiguateHistoricDates: " + value_i + " to "
													+ newValue + ". Expression " + t_i.getCoveredText() + " due to "
													+ linearDates.get(i - offset).getCoveredText());
										}
									}
								} else {
									newValue = "BC" + value_i;
									change = true;
									Logger.printDetail("DisambiguateHistoricDates: " + value_i + " to " + newValue
											+ ". Expression " + t_i.getCoveredText() + " due to "
											+ linearDates.get(i - offset).getCoveredText());
								}
							}
						}
					}

					if ((linearDates.get(i - offset).getTimexType().equals("TIME")
							|| linearDates.get(i - offset).getTimexType().equals("DATE"))
							&& (linearDates.get(i - offset).getTimexValue().matches("^\\d.*"))) {
						counter++;
					}
				} while (counter < 5 && ++offset < i);
			}
		}
		if (!(newValue.equals(value_i))) {
			t_i.removeFromIndexes();
			Logger.printDetail("DisambiguateHistoricDates: value changed to BC");

			t_i.setTimexValue(newValue);
			t_i.addToIndexes();
			linearDates.set(i, t_i);
		}
	}
}