org.apache.uima.cas.FSIterator#hasNext

Source File: MateLemmaFixer.java From termsuite-core with Apache License 2.0

6 votes

@Override
public void process(JCas jCas) throws AnalysisEngineProcessException {
	FSIterator<Annotation> it = jCas.getAnnotationIndex(WordAnnotation.type).iterator();
	WordAnnotation word;
	while(it.hasNext()) {
		word = (WordAnnotation) it.next();
		if (word.getLemma() == null)
			word.setLemma(word.getCoveredText().toLowerCase(language.getLocale()));
		else if (word.getLemma().equals("CD")) //ou TermSuiteConstants.CARD_MATE
			word.setLemma(word.getCoveredText().toLowerCase(language.getLocale()));
		else  {
			word.setLemma(word.getLemma().toLowerCase());
			if (word.getLemma().equals((word.getStem()+"s"))){
			word.setLemma(word.getCoveredText().toLowerCase(language.getLocale()).replaceAll("s$", ""));
			}
		}
	}
}

Source File: TreeTaggerLemmaFixer.java From termsuite-core with Apache License 2.0

6 votes

@Override
public void process(JCas jCas) throws AnalysisEngineProcessException {
	FSIterator<Annotation> it = jCas.getAnnotationIndex(WordAnnotation.type).iterator();
	WordAnnotation word;
	while(it.hasNext()) {
		word = (WordAnnotation) it.next();
		if (word.getLemma() == null)
			word.setLemma(word.getCoveredText().toLowerCase(language.getLocale()));
		else if (word.getLemma().equals(TermSuiteConstants.CARD_TAG)) 
			word.setLemma(word.getCoveredText().toLowerCase(language.getLocale()));
		else  
			word.setLemma(word.getLemma().toLowerCase());
		
		fixPlural();
	}
}

Source File: StringRegexFilter.java From termsuite-core with Apache License 2.0

6 votes

@Override
public void process(JCas cas) throws AnalysisEngineProcessException {
	List<WordAnnotation> rem = Lists.newArrayList();
	FSIterator<Annotation> it = cas.getAnnotationIndex(WordAnnotation.type).iterator();
	WordAnnotation word;
	while(it.hasNext()) {
		word = (WordAnnotation) it.next();
		for(Pattern p:PATTERNS) 
			if(p.matcher(word.getCoveredText()).matches())
				rem.add(word);
	}
	
	this.totalFiltered += rem.size();
	
	for(WordAnnotation wa:rem)
		wa.removeFromIndexes(cas);
}

Source File: HeidelTimeOpenNLP.java From newsleak with GNU Affero General Public License v3.0

6 votes

/**
 * Check whether or not a jcas object has a correct DCT value. If there is no
 * DCT present, we canonically return true since fallback calculation takes care
 * of that scenario.
 * 
 * @param jcas
 * @return Whether or not the given jcas contains a valid DCT
 */
private Boolean isValidDCT(JCas jcas) {
	FSIterator dctIter = jcas.getAnnotationIndex(Dct.type).iterator();

	if (!dctIter.hasNext()) {
		return true;
	} else {
		Dct dct = (Dct) dctIter.next();
		String dctVal = dct.getValue();

		if (dctVal == null)
			return false;

		if (dctVal.matches("\\d{8}") // Something like 20041224
				|| dctVal.matches("\\d{4}.\\d{2}.\\d{2}.*")) { // Something
			// like
			// 2004-12-24
			return true;
		} else {
			return false;
		}
	}
}

Source File: CasComparer.java From uima-uimaj with Apache License 2.0

6 votes

public void assertEqualsInner(CAS c1, CAS c2) {
  alreadyCompared.clear();
  
  // this code handles initial views with no SofaFS
  CAS initialView1 = c1.getView(CAS.NAME_DEFAULT_SOFA);
  CAS initialView2 = c2.getView(CAS.NAME_DEFAULT_SOFA);
  assertEqualViewsInner(initialView1, initialView2);
  // this code skips the initial view, if it doesn't have a sofa FS
  FSIterator<Sofa> sofaIter = c1.getSofaIterator();
  int c1Sofas = 0;
  while (sofaIter.hasNext()) {
    SofaFS sofa = sofaIter.next();
    CAS tcas1 = c1.getView(sofa);
    CAS tcas2 = c2.getView(tcas1.getViewName());
    assertEqualViewsInner(tcas1, tcas2);
    c1Sofas++;
  }
  sofaIter = c2.getSofaIterator();
  int c2Sofas = 0;
  while (sofaIter.hasNext()) {
    c2Sofas++;
    sofaIter.moveToNext();
  }
  Assert.assertTrue(c1Sofas == c2Sofas);
}

Source File: CasUtil.java From uima-uimafit with Apache License 2.0

6 votes

/**
 * Get the single instance of the specified type from the CAS.
 * 
 * @param cas
 *          a CAS containing the annotation.
 * @param type
 *          a UIMA type.
 * @return the single instance of the given type. throws IllegalArgumentException if not exactly
 *         one instance if the given type is present.
 */
public static FeatureStructure selectSingleFS(CAS cas, Type type) {
  FSIterator<FeatureStructure> iterator = cas.getIndexRepository().getAllIndexedFS(type);

  if (!iterator.hasNext()) {
    throw new IllegalArgumentException("CAS does not contain any [" + type.getName() + "]");
  }

  FeatureStructure result = iterator.next();

  if (iterator.hasNext()) {
    throw new IllegalArgumentException("CAS contains more than one [" + type.getName() + "]");
  }

  return result;
}

Source File: JsonCasSerializer.java From termsuite-core with Apache License 2.0

6 votes

private static void writeWordAnnotations(JsonGenerator jg, JCas jCas) throws IOException {
    jg.writeStartArray();
    FSIterator<Annotation> it = jCas.getAnnotationIndex(WordAnnotation.type).iterator();
    while(it.hasNext()) {
        WordAnnotation wa = (WordAnnotation) it.next();
        jg.writeStartObject();
        writeStringField(jg,F_CATEGORY, wa.getCategory());
        writeStringField(jg,F_LEMMA, wa.getLemma());
        writeStringField(jg,F_STEM, wa.getStem());
        writeStringField(jg,F_TAG, wa.getTag());
        writeStringField(jg,F_SUB_CATEGORY, wa.getSubCategory());
        writeStringField(jg,F_REGEX_LABEL, wa.getRegexLabel());
        writeStringField(jg,F_NUMBER, wa.getNumber());
        writeStringField(jg,F_GENDER, wa.getGender());
        writeStringField(jg,F_CASE, wa.getCase());
        writeStringField(jg,F_MOOD, wa.getMood());
        writeStringField(jg,F_TENSE, wa.getTense());
        writeStringField(jg,F_PERSON, wa.getPerson());
        writeStringField(jg,F_DEGREE, wa.getDegree());
        writeStringField(jg,F_FORMATION, wa.getFormation());
        writeStringField(jg,F_LABELS, wa.getLabels());
        writeOffsets(jg, wa);
        jg.writeEndObject();
    }
    jg.writeEndArray();
}

Source File: ChineseNormalizer.java From termsuite-core with Apache License 2.0

6 votes

@Override
public void process(JCas cas) throws AnalysisEngineProcessException {
	
	try {
		AnnotationIndex<Annotation> index = cas.getAnnotationIndex(WordAnnotation.type);
		FSIterator<Annotation> iterator = index.iterator();
		while (iterator.hasNext()) {
			WordAnnotation annotation = (WordAnnotation) iterator.next();
			String norm = annotation.getCoveredText();
			annotation.setLemma(norm);
			annotation.setStem(norm);
		}
	} catch (Exception e) {
		throw new AnalysisEngineProcessException(e);
	}
}

Source File: JsonCasSerializer.java From termsuite-core with Apache License 2.0

5 votes

private static void writeFixedExpressions(JsonGenerator jg, JCas jCas) throws IOException {
    jg.writeStartArray();
    FSIterator<Annotation> it = jCas.getAnnotationIndex(FixedExpression.type).iterator();
    while(it.hasNext()) {
        FixedExpression fe = (FixedExpression) it.next();
        jg.writeStartObject();
        writeOffsets(jg, fe);
        jg.writeEndObject();
    }
    jg.writeEndArray();
}

Source File: JCasUtils.java From termsuite-core with Apache License 2.0

5 votes

public static Optional<SourceDocumentInformation> getSourceDocumentAnnotation(JCas jCas) {
	FSIterator<Annotation> iterator = jCas.getAnnotationIndex(SourceDocumentInformation.type).iterator();
	if(iterator.hasNext())
		return Optional.of((SourceDocumentInformation)iterator.next());
	else
		return Optional.empty();
}

Source File: PrintMissingTest.java From bluima with Apache License 2.0

5 votes

public void process_old(JCas jCas) throws AnalysisEngineProcessException {
    FSIterator<Annotation> it = jCas.getAnnotationIndex().iterator();
    StringBuffer sb = new StringBuffer();
    while (it.hasNext()) {
        Annotation a = it.next();
        System.out.println(a.getType().getName());
        sb.append(a.getCoveredText() + '\n');
        a.prettyPrint(2, 2, sb, false);
        sb.append('\n');
    }
}

Source File: JCasUtils.java From termsuite-core with Apache License 2.0

5 votes

public static void showSdiWithCategory2(JCas jcas) {
	String wordsLine = "";
	String catsLine = "";
	int cnt = 0;
	FSIterator<Annotation> it = jcas.getAnnotationIndex(WordAnnotation.type).iterator();
	while(it.hasNext()) {
		cnt += 1;
		WordAnnotation a = (WordAnnotation) it.next();
		
		String[] strings = center(a.getCoveredText(), a.getTag());
		wordsLine+=strings[0] + " ";
		catsLine+=strings[1] + " ";
		if(cnt == 20) {
			System.out.println(wordsLine);
			System.out.println(catsLine);
			System.out.println();
			
			wordsLine = "";
			catsLine = "";
			cnt = 0;
		} 
	}
	if(cnt>0) {
		System.out.println(wordsLine);
		System.out.println(catsLine);
	}
}

Source File: LatvianTildeTagger.java From termsuite-core with Apache License 2.0

5 votes

@Override
public void process(JCas cas) throws AnalysisEngineProcessException {
	try {
		AnnotationIndex<Annotation> index = cas.getAnnotationIndex(WordAnnotation.type);
		FSIterator<Annotation> iterator = index.iterator();
		while (iterator.hasNext()) {
			WordAnnotation annotation = (WordAnnotation) iterator.next();
			String tag = annotation.getTag().toLowerCase();
			this.setCategory(annotation, tag);
		}
	} catch (Exception e) {
		throw new AnalysisEngineProcessException(e);
	}
}

Source File: UimaCopying.java From biomedicus with Apache License 2.0

5 votes

public static void copyFeatureStructuresOfType(String typeName, CAS sourceView,
    CAS destinationView) {
  FeatureStructureCopyingQueue featureStructureCopyingQueue = new FeatureStructureCopyingQueue(
      sourceView,
      destinationView);
  FSIterator<FeatureStructure> iterator = sourceView.getIndexRepository()
      .getAllIndexedFS(sourceView.getTypeSystem().getType(typeName));
  while (iterator.hasNext()) {
    FeatureStructure featureStructure = iterator.get();
    featureStructureCopyingQueue.enqueue(featureStructure);
  }
  featureStructureCopyingQueue.run();
}

Source File: TableAnnotationDivider.java From biomedicus with Apache License 2.0

5 votes

private void divideAnnotation(AnnotationFS annotation) {
  Objects.requireNonNull(typeToCreate);
  Objects.requireNonNull(dividers);

  FSIterator<AnnotationFS> subiterator = dividers.subiterator(annotation);
  int begin = annotation.getBegin();
  while (subiterator.hasNext()) {
    int end = subiterator.next().getBegin();
    cas.addFsToIndexes(cas.createAnnotation(typeToCreate, begin, end));
    begin = end;
  }
}

Source File: IteratorTest.java From uima-uimaj with Apache License 2.0

5 votes

private void verifyHaveSubset(FSIterator<?> x, int nbr, Type type) {
  x.moveToFirst();
  int i = 0;
  while (x.hasNext()) {
    i++;
    assertEquals(type, x.get().getType());
    x.moveToNext();
  }
  assertEquals(nbr, i);
}

Source File: Annotator4.java From uima-uimafit with Apache License 2.0

5 votes

@Override
public void process(JCas jCas) throws AnalysisEngineProcessException {
  FSIterator<Annotation> tokens = jCas.getAnnotationIndex(Token.type).iterator();
  while (tokens.hasNext()) {
    Token token = (Token) tokens.next();
    token.setPos("NN");
  }
}

Source File: ImporterService.java From termsuite-core with Apache License 2.0

4 votes

public void importToTerminology(JCas jCas) {
	Optional<SourceDocumentInformation> sdi = JCasUtils.getSourceDocumentAnnotation(jCas);
	String currentFileURI = sdi.isPresent() ? sdi.get().getUri() : "(no source uri given)";
	FSIterator<Annotation> it = jCas.getAnnotationIndex(TermOccAnnotation.type).iterator();
	TermOccAnnotation toa;
	while(it.hasNext()) {
		toa = (TermOccAnnotation) it.next();
		String gKey = TermSuiteUtils.getGroupingKey(toa);
		
		TermService term;
		if(terminoService.containsTerm(gKey))
			term = terminoService.getTerm(gKey);
		else {
			Word[] words = new Word[toa.getWords().size()];
			for (int i = 0; i < toa.getWords().size(); i++) {
				WordAnnotation wa = toa.getWords(i);
				if(this.terminoService.containsWord(wa.getLemma()))
					words[i] = this.terminoService.getWord(wa.getLemma());
				else
					words[i]= createOrGetWord(wa.getLemma(), wa.getStem());
			}

			term = createOrGetTerm(
					toa.getPattern().toStringArray(), words);
			term.setSpottingRule(toa.getSpottingRuleName());
		}

		term.incrementFrequency(1);
		occurrenceStore.addOccurrence(
				term.getTerm(),
				currentFileURI, 
				toa.getBegin(),
				toa.getEnd(),
				toa.getCoveredText());
	}
	
	FSIterator<Annotation> termIt = jCas.getAnnotationIndex(TermOccAnnotation.type).iterator();
	terminoService.incrementSpottedTermsNum(Iterators.size(termIt));
	FSIterator<Annotation> wordIt = jCas.getAnnotationIndex(WordAnnotation.type).iterator();
	terminoService.incrementWordAnnotationNum(Iterators.size(wordIt));
	occurrenceStore.flush();
}

Source File: SimpleTextMerger.java From uima-uimaj with Apache License 2.0

4 votes

public void process(JCas aJCas) throws AnalysisEngineProcessException {
  // procure a new CAS if we don't have one already
  if (mMergedCas == null) {
    mMergedCas = getEmptyJCas();
  }

  // append document text
  String docText = aJCas.getDocumentText();
  int prevDocLen = mDocBuf.length();
  mDocBuf.append(docText);

  // copy specified annotation types
  CasCopier copier = new CasCopier(aJCas.getCas(), mMergedCas.getCas());
  Set copiedIndexedFs = new HashSet(); // needed in case one annotation is in two indexes (could
  // happen if specified annotation types overlap)
  for (int i = 0; i < mAnnotationTypesToCopy.length; i++) {
    Type type = mMergedCas.getTypeSystem().getType(mAnnotationTypesToCopy[i]);
    FSIndex index = aJCas.getCas().getAnnotationIndex(type);
    Iterator iter = index.iterator();
    while (iter.hasNext()) {
      FeatureStructure fs = (FeatureStructure) iter.next();
      if (!copiedIndexedFs.contains(fs)) {
        Annotation copyOfFs = (Annotation) copier.copyFs(fs);
        // update begin and end
        copyOfFs.setBegin(copyOfFs.getBegin() + prevDocLen);
        copyOfFs.setEnd(copyOfFs.getEnd() + prevDocLen);
        mMergedCas.addFsToIndexes(copyOfFs);
        copiedIndexedFs.add(fs);
      }
    }
  }

  // get the SourceDocumentInformation FS, which indicates the sourceURI of the document
  // and whether the incoming CAS is the last segment
  FSIterator it = aJCas.getAnnotationIndex(SourceDocumentInformation.type).iterator();
  if (!it.hasNext()) {
    throw new AnalysisEngineProcessException(MESSAGE_DIGEST, MISSING_SOURCE_DOCUMENT_INFO,
            new Object[0]);
  }
  SourceDocumentInformation sourceDocInfo = (SourceDocumentInformation) it.next();
  if (sourceDocInfo.getLastSegment()) {
    // time to produce an output CAS
    // set the document text
    mMergedCas.setDocumentText(mDocBuf.toString());

    // add source document info to destination CAS
    SourceDocumentInformation destSDI = new SourceDocumentInformation(mMergedCas);
    destSDI.setUri(sourceDocInfo.getUri());
    destSDI.setOffsetInSource(0);
    destSDI.setLastSegment(true);
    destSDI.addToIndexes();

    mDocBuf = new StringBuffer();
    mReadyToOutput = true;
  }
}

Source File: HeidelTimeOpenNLP.java From newsleak with GNU Affero General Public License v3.0

4 votes

/**
 * Postprocessing: Check dates starting with "0" which were extracted without
 * explicit "AD" hints if it is likely that they refer to the respective date BC
 * 
 * @param jcas
 */
public void disambiguateHistoricDates(JCas jcas) {

	// build up a list with all found TIMEX expressions
	List<Timex3> linearDates = new ArrayList<Timex3>();
	FSIterator iterTimex = jcas.getAnnotationIndex(Timex3.type).iterator();

	// Create List of all Timexes of types "date" and "time"
	while (iterTimex.hasNext()) {
		Timex3 timex = (Timex3) iterTimex.next();
		if (timex.getTimexType().equals("DATE") || timex.getTimexType().equals("TIME")) {
			linearDates.add(timex);
		}
	}

	//////////////////////////////////////////////
	// go through list of Date and Time timexes //
	//////////////////////////////////////////////
	for (int i = 1; i < linearDates.size(); i++) {
		Timex3 t_i = (Timex3) linearDates.get(i);
		String value_i = t_i.getTimexValue();
		String newValue = value_i;
		Boolean change = false;
		if (!(t_i.getFoundByRule().contains("-BCADhint"))) {
			if (value_i.startsWith("0")) {
				Integer offset = 1, counter = 1;
				do {
					if ((i == 1 || (i > 1 && !change))
							&& linearDates.get(i - offset).getTimexValue().startsWith("BC")) {
						if (value_i.length() > 1) {
							if ((linearDates.get(i - offset).getTimexValue()
									.startsWith("BC" + value_i.substring(0, 2)))
									|| (linearDates.get(i - offset).getTimexValue().startsWith("BC" + String
											.format("%02d", (Integer.parseInt(value_i.substring(0, 2)) + 1))))) {
								if (((value_i.startsWith("00"))
										&& (linearDates.get(i - offset).getTimexValue().startsWith("BC00")))
										|| ((value_i.startsWith("01")) && (linearDates.get(i - offset)
												.getTimexValue().startsWith("BC01")))) {
									if ((value_i.length() > 2)
											&& (linearDates.get(i - offset).getTimexValue().length() > 4)) {
										if (Integer.parseInt(value_i.substring(0, 3)) <= Integer.parseInt(
												linearDates.get(i - offset).getTimexValue().substring(2, 5))) {
											newValue = "BC" + value_i;
											change = true;
											Logger.printDetail("DisambiguateHistoricDates: " + value_i + " to "
													+ newValue + ". Expression " + t_i.getCoveredText() + " due to "
													+ linearDates.get(i - offset).getCoveredText());
										}
									}
								} else {
									newValue = "BC" + value_i;
									change = true;
									Logger.printDetail("DisambiguateHistoricDates: " + value_i + " to " + newValue
											+ ". Expression " + t_i.getCoveredText() + " due to "
											+ linearDates.get(i - offset).getCoveredText());
								}
							}
						}
					}

					if ((linearDates.get(i - offset).getTimexType().equals("TIME")
							|| linearDates.get(i - offset).getTimexType().equals("DATE"))
							&& (linearDates.get(i - offset).getTimexValue().matches("^\\d.*"))) {
						counter++;
					}
				} while (counter < 5 && ++offset < i);
			}
		}
		if (!(newValue.equals(value_i))) {
			t_i.removeFromIndexes();
			Logger.printDetail("DisambiguateHistoricDates: value changed to BC");

			t_i.setTimexValue(newValue);
			t_i.addToIndexes();
			linearDates.set(i, t_i);
		}
	}
}

Java Code Examples for org.apache.uima.cas.FSIterator#hasNext()