Java Code Examples for org.apache.uima.cas.FSIterator#hasNext()
The following examples show how to use
org.apache.uima.cas.FSIterator#hasNext() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: MateLemmaFixer.java From termsuite-core with Apache License 2.0 | 6 votes |
@Override public void process(JCas jCas) throws AnalysisEngineProcessException { FSIterator<Annotation> it = jCas.getAnnotationIndex(WordAnnotation.type).iterator(); WordAnnotation word; while(it.hasNext()) { word = (WordAnnotation) it.next(); if (word.getLemma() == null) word.setLemma(word.getCoveredText().toLowerCase(language.getLocale())); else if (word.getLemma().equals("CD")) //ou TermSuiteConstants.CARD_MATE word.setLemma(word.getCoveredText().toLowerCase(language.getLocale())); else { word.setLemma(word.getLemma().toLowerCase()); if (word.getLemma().equals((word.getStem()+"s"))){ word.setLemma(word.getCoveredText().toLowerCase(language.getLocale()).replaceAll("s$", "")); } } } }
Example 2
Source File: TreeTaggerLemmaFixer.java From termsuite-core with Apache License 2.0 | 6 votes |
@Override public void process(JCas jCas) throws AnalysisEngineProcessException { FSIterator<Annotation> it = jCas.getAnnotationIndex(WordAnnotation.type).iterator(); WordAnnotation word; while(it.hasNext()) { word = (WordAnnotation) it.next(); if (word.getLemma() == null) word.setLemma(word.getCoveredText().toLowerCase(language.getLocale())); else if (word.getLemma().equals(TermSuiteConstants.CARD_TAG)) word.setLemma(word.getCoveredText().toLowerCase(language.getLocale())); else word.setLemma(word.getLemma().toLowerCase()); fixPlural(); } }
Example 3
Source File: StringRegexFilter.java From termsuite-core with Apache License 2.0 | 6 votes |
@Override public void process(JCas cas) throws AnalysisEngineProcessException { List<WordAnnotation> rem = Lists.newArrayList(); FSIterator<Annotation> it = cas.getAnnotationIndex(WordAnnotation.type).iterator(); WordAnnotation word; while(it.hasNext()) { word = (WordAnnotation) it.next(); for(Pattern p:PATTERNS) if(p.matcher(word.getCoveredText()).matches()) rem.add(word); } this.totalFiltered += rem.size(); for(WordAnnotation wa:rem) wa.removeFromIndexes(cas); }
Example 4
Source File: HeidelTimeOpenNLP.java From newsleak with GNU Affero General Public License v3.0 | 6 votes |
/** * Check whether or not a jcas object has a correct DCT value. If there is no * DCT present, we canonically return true since fallback calculation takes care * of that scenario. * * @param jcas * @return Whether or not the given jcas contains a valid DCT */ private Boolean isValidDCT(JCas jcas) { FSIterator dctIter = jcas.getAnnotationIndex(Dct.type).iterator(); if (!dctIter.hasNext()) { return true; } else { Dct dct = (Dct) dctIter.next(); String dctVal = dct.getValue(); if (dctVal == null) return false; if (dctVal.matches("\\d{8}") // Something like 20041224 || dctVal.matches("\\d{4}.\\d{2}.\\d{2}.*")) { // Something // like // 2004-12-24 return true; } else { return false; } } }
Example 5
Source File: CasComparer.java From uima-uimaj with Apache License 2.0 | 6 votes |
public void assertEqualsInner(CAS c1, CAS c2) { alreadyCompared.clear(); // this code handles initial views with no SofaFS CAS initialView1 = c1.getView(CAS.NAME_DEFAULT_SOFA); CAS initialView2 = c2.getView(CAS.NAME_DEFAULT_SOFA); assertEqualViewsInner(initialView1, initialView2); // this code skips the initial view, if it doesn't have a sofa FS FSIterator<Sofa> sofaIter = c1.getSofaIterator(); int c1Sofas = 0; while (sofaIter.hasNext()) { SofaFS sofa = sofaIter.next(); CAS tcas1 = c1.getView(sofa); CAS tcas2 = c2.getView(tcas1.getViewName()); assertEqualViewsInner(tcas1, tcas2); c1Sofas++; } sofaIter = c2.getSofaIterator(); int c2Sofas = 0; while (sofaIter.hasNext()) { c2Sofas++; sofaIter.moveToNext(); } Assert.assertTrue(c1Sofas == c2Sofas); }
Example 6
Source File: CasUtil.java From uima-uimafit with Apache License 2.0 | 6 votes |
/** * Get the single instance of the specified type from the CAS. * * @param cas * a CAS containing the annotation. * @param type * a UIMA type. * @return the single instance of the given type. throws IllegalArgumentException if not exactly * one instance if the given type is present. */ public static FeatureStructure selectSingleFS(CAS cas, Type type) { FSIterator<FeatureStructure> iterator = cas.getIndexRepository().getAllIndexedFS(type); if (!iterator.hasNext()) { throw new IllegalArgumentException("CAS does not contain any [" + type.getName() + "]"); } FeatureStructure result = iterator.next(); if (iterator.hasNext()) { throw new IllegalArgumentException("CAS contains more than one [" + type.getName() + "]"); } return result; }
Example 7
Source File: JsonCasSerializer.java From termsuite-core with Apache License 2.0 | 6 votes |
private static void writeWordAnnotations(JsonGenerator jg, JCas jCas) throws IOException { jg.writeStartArray(); FSIterator<Annotation> it = jCas.getAnnotationIndex(WordAnnotation.type).iterator(); while(it.hasNext()) { WordAnnotation wa = (WordAnnotation) it.next(); jg.writeStartObject(); writeStringField(jg,F_CATEGORY, wa.getCategory()); writeStringField(jg,F_LEMMA, wa.getLemma()); writeStringField(jg,F_STEM, wa.getStem()); writeStringField(jg,F_TAG, wa.getTag()); writeStringField(jg,F_SUB_CATEGORY, wa.getSubCategory()); writeStringField(jg,F_REGEX_LABEL, wa.getRegexLabel()); writeStringField(jg,F_NUMBER, wa.getNumber()); writeStringField(jg,F_GENDER, wa.getGender()); writeStringField(jg,F_CASE, wa.getCase()); writeStringField(jg,F_MOOD, wa.getMood()); writeStringField(jg,F_TENSE, wa.getTense()); writeStringField(jg,F_PERSON, wa.getPerson()); writeStringField(jg,F_DEGREE, wa.getDegree()); writeStringField(jg,F_FORMATION, wa.getFormation()); writeStringField(jg,F_LABELS, wa.getLabels()); writeOffsets(jg, wa); jg.writeEndObject(); } jg.writeEndArray(); }
Example 8
Source File: ChineseNormalizer.java From termsuite-core with Apache License 2.0 | 6 votes |
@Override public void process(JCas cas) throws AnalysisEngineProcessException { try { AnnotationIndex<Annotation> index = cas.getAnnotationIndex(WordAnnotation.type); FSIterator<Annotation> iterator = index.iterator(); while (iterator.hasNext()) { WordAnnotation annotation = (WordAnnotation) iterator.next(); String norm = annotation.getCoveredText(); annotation.setLemma(norm); annotation.setStem(norm); } } catch (Exception e) { throw new AnalysisEngineProcessException(e); } }
Example 9
Source File: JsonCasSerializer.java From termsuite-core with Apache License 2.0 | 5 votes |
private static void writeFixedExpressions(JsonGenerator jg, JCas jCas) throws IOException { jg.writeStartArray(); FSIterator<Annotation> it = jCas.getAnnotationIndex(FixedExpression.type).iterator(); while(it.hasNext()) { FixedExpression fe = (FixedExpression) it.next(); jg.writeStartObject(); writeOffsets(jg, fe); jg.writeEndObject(); } jg.writeEndArray(); }
Example 10
Source File: JCasUtils.java From termsuite-core with Apache License 2.0 | 5 votes |
public static Optional<SourceDocumentInformation> getSourceDocumentAnnotation(JCas jCas) { FSIterator<Annotation> iterator = jCas.getAnnotationIndex(SourceDocumentInformation.type).iterator(); if(iterator.hasNext()) return Optional.of((SourceDocumentInformation)iterator.next()); else return Optional.empty(); }
Example 11
Source File: PrintMissingTest.java From bluima with Apache License 2.0 | 5 votes |
public void process_old(JCas jCas) throws AnalysisEngineProcessException { FSIterator<Annotation> it = jCas.getAnnotationIndex().iterator(); StringBuffer sb = new StringBuffer(); while (it.hasNext()) { Annotation a = it.next(); System.out.println(a.getType().getName()); sb.append(a.getCoveredText() + '\n'); a.prettyPrint(2, 2, sb, false); sb.append('\n'); } }
Example 12
Source File: JCasUtils.java From termsuite-core with Apache License 2.0 | 5 votes |
public static void showSdiWithCategory2(JCas jcas) { String wordsLine = ""; String catsLine = ""; int cnt = 0; FSIterator<Annotation> it = jcas.getAnnotationIndex(WordAnnotation.type).iterator(); while(it.hasNext()) { cnt += 1; WordAnnotation a = (WordAnnotation) it.next(); String[] strings = center(a.getCoveredText(), a.getTag()); wordsLine+=strings[0] + " "; catsLine+=strings[1] + " "; if(cnt == 20) { System.out.println(wordsLine); System.out.println(catsLine); System.out.println(); wordsLine = ""; catsLine = ""; cnt = 0; } } if(cnt>0) { System.out.println(wordsLine); System.out.println(catsLine); } }
Example 13
Source File: LatvianTildeTagger.java From termsuite-core with Apache License 2.0 | 5 votes |
@Override public void process(JCas cas) throws AnalysisEngineProcessException { try { AnnotationIndex<Annotation> index = cas.getAnnotationIndex(WordAnnotation.type); FSIterator<Annotation> iterator = index.iterator(); while (iterator.hasNext()) { WordAnnotation annotation = (WordAnnotation) iterator.next(); String tag = annotation.getTag().toLowerCase(); this.setCategory(annotation, tag); } } catch (Exception e) { throw new AnalysisEngineProcessException(e); } }
Example 14
Source File: UimaCopying.java From biomedicus with Apache License 2.0 | 5 votes |
public static void copyFeatureStructuresOfType(String typeName, CAS sourceView, CAS destinationView) { FeatureStructureCopyingQueue featureStructureCopyingQueue = new FeatureStructureCopyingQueue( sourceView, destinationView); FSIterator<FeatureStructure> iterator = sourceView.getIndexRepository() .getAllIndexedFS(sourceView.getTypeSystem().getType(typeName)); while (iterator.hasNext()) { FeatureStructure featureStructure = iterator.get(); featureStructureCopyingQueue.enqueue(featureStructure); } featureStructureCopyingQueue.run(); }
Example 15
Source File: TableAnnotationDivider.java From biomedicus with Apache License 2.0 | 5 votes |
private void divideAnnotation(AnnotationFS annotation) { Objects.requireNonNull(typeToCreate); Objects.requireNonNull(dividers); FSIterator<AnnotationFS> subiterator = dividers.subiterator(annotation); int begin = annotation.getBegin(); while (subiterator.hasNext()) { int end = subiterator.next().getBegin(); cas.addFsToIndexes(cas.createAnnotation(typeToCreate, begin, end)); begin = end; } }
Example 16
Source File: IteratorTest.java From uima-uimaj with Apache License 2.0 | 5 votes |
private void verifyHaveSubset(FSIterator<?> x, int nbr, Type type) { x.moveToFirst(); int i = 0; while (x.hasNext()) { i++; assertEquals(type, x.get().getType()); x.moveToNext(); } assertEquals(nbr, i); }
Example 17
Source File: Annotator4.java From uima-uimafit with Apache License 2.0 | 5 votes |
@Override public void process(JCas jCas) throws AnalysisEngineProcessException { FSIterator<Annotation> tokens = jCas.getAnnotationIndex(Token.type).iterator(); while (tokens.hasNext()) { Token token = (Token) tokens.next(); token.setPos("NN"); } }
Example 18
Source File: ImporterService.java From termsuite-core with Apache License 2.0 | 4 votes |
public void importToTerminology(JCas jCas) { Optional<SourceDocumentInformation> sdi = JCasUtils.getSourceDocumentAnnotation(jCas); String currentFileURI = sdi.isPresent() ? sdi.get().getUri() : "(no source uri given)"; FSIterator<Annotation> it = jCas.getAnnotationIndex(TermOccAnnotation.type).iterator(); TermOccAnnotation toa; while(it.hasNext()) { toa = (TermOccAnnotation) it.next(); String gKey = TermSuiteUtils.getGroupingKey(toa); TermService term; if(terminoService.containsTerm(gKey)) term = terminoService.getTerm(gKey); else { Word[] words = new Word[toa.getWords().size()]; for (int i = 0; i < toa.getWords().size(); i++) { WordAnnotation wa = toa.getWords(i); if(this.terminoService.containsWord(wa.getLemma())) words[i] = this.terminoService.getWord(wa.getLemma()); else words[i]= createOrGetWord(wa.getLemma(), wa.getStem()); } term = createOrGetTerm( toa.getPattern().toStringArray(), words); term.setSpottingRule(toa.getSpottingRuleName()); } term.incrementFrequency(1); occurrenceStore.addOccurrence( term.getTerm(), currentFileURI, toa.getBegin(), toa.getEnd(), toa.getCoveredText()); } FSIterator<Annotation> termIt = jCas.getAnnotationIndex(TermOccAnnotation.type).iterator(); terminoService.incrementSpottedTermsNum(Iterators.size(termIt)); FSIterator<Annotation> wordIt = jCas.getAnnotationIndex(WordAnnotation.type).iterator(); terminoService.incrementWordAnnotationNum(Iterators.size(wordIt)); occurrenceStore.flush(); }
Example 19
Source File: SimpleTextMerger.java From uima-uimaj with Apache License 2.0 | 4 votes |
public void process(JCas aJCas) throws AnalysisEngineProcessException { // procure a new CAS if we don't have one already if (mMergedCas == null) { mMergedCas = getEmptyJCas(); } // append document text String docText = aJCas.getDocumentText(); int prevDocLen = mDocBuf.length(); mDocBuf.append(docText); // copy specified annotation types CasCopier copier = new CasCopier(aJCas.getCas(), mMergedCas.getCas()); Set copiedIndexedFs = new HashSet(); // needed in case one annotation is in two indexes (could // happen if specified annotation types overlap) for (int i = 0; i < mAnnotationTypesToCopy.length; i++) { Type type = mMergedCas.getTypeSystem().getType(mAnnotationTypesToCopy[i]); FSIndex index = aJCas.getCas().getAnnotationIndex(type); Iterator iter = index.iterator(); while (iter.hasNext()) { FeatureStructure fs = (FeatureStructure) iter.next(); if (!copiedIndexedFs.contains(fs)) { Annotation copyOfFs = (Annotation) copier.copyFs(fs); // update begin and end copyOfFs.setBegin(copyOfFs.getBegin() + prevDocLen); copyOfFs.setEnd(copyOfFs.getEnd() + prevDocLen); mMergedCas.addFsToIndexes(copyOfFs); copiedIndexedFs.add(fs); } } } // get the SourceDocumentInformation FS, which indicates the sourceURI of the document // and whether the incoming CAS is the last segment FSIterator it = aJCas.getAnnotationIndex(SourceDocumentInformation.type).iterator(); if (!it.hasNext()) { throw new AnalysisEngineProcessException(MESSAGE_DIGEST, MISSING_SOURCE_DOCUMENT_INFO, new Object[0]); } SourceDocumentInformation sourceDocInfo = (SourceDocumentInformation) it.next(); if (sourceDocInfo.getLastSegment()) { // time to produce an output CAS // set the document text mMergedCas.setDocumentText(mDocBuf.toString()); // add source document info to destination CAS SourceDocumentInformation destSDI = new SourceDocumentInformation(mMergedCas); destSDI.setUri(sourceDocInfo.getUri()); destSDI.setOffsetInSource(0); destSDI.setLastSegment(true); destSDI.addToIndexes(); mDocBuf = new StringBuffer(); mReadyToOutput = true; } }
Example 20
Source File: HeidelTimeOpenNLP.java From newsleak with GNU Affero General Public License v3.0 | 4 votes |
/** * Postprocessing: Check dates starting with "0" which were extracted without * explicit "AD" hints if it is likely that they refer to the respective date BC * * @param jcas */ public void disambiguateHistoricDates(JCas jcas) { // build up a list with all found TIMEX expressions List<Timex3> linearDates = new ArrayList<Timex3>(); FSIterator iterTimex = jcas.getAnnotationIndex(Timex3.type).iterator(); // Create List of all Timexes of types "date" and "time" while (iterTimex.hasNext()) { Timex3 timex = (Timex3) iterTimex.next(); if (timex.getTimexType().equals("DATE") || timex.getTimexType().equals("TIME")) { linearDates.add(timex); } } ////////////////////////////////////////////// // go through list of Date and Time timexes // ////////////////////////////////////////////// for (int i = 1; i < linearDates.size(); i++) { Timex3 t_i = (Timex3) linearDates.get(i); String value_i = t_i.getTimexValue(); String newValue = value_i; Boolean change = false; if (!(t_i.getFoundByRule().contains("-BCADhint"))) { if (value_i.startsWith("0")) { Integer offset = 1, counter = 1; do { if ((i == 1 || (i > 1 && !change)) && linearDates.get(i - offset).getTimexValue().startsWith("BC")) { if (value_i.length() > 1) { if ((linearDates.get(i - offset).getTimexValue() .startsWith("BC" + value_i.substring(0, 2))) || (linearDates.get(i - offset).getTimexValue().startsWith("BC" + String .format("%02d", (Integer.parseInt(value_i.substring(0, 2)) + 1))))) { if (((value_i.startsWith("00")) && (linearDates.get(i - offset).getTimexValue().startsWith("BC00"))) || ((value_i.startsWith("01")) && (linearDates.get(i - offset) .getTimexValue().startsWith("BC01")))) { if ((value_i.length() > 2) && (linearDates.get(i - offset).getTimexValue().length() > 4)) { if (Integer.parseInt(value_i.substring(0, 3)) <= Integer.parseInt( linearDates.get(i - offset).getTimexValue().substring(2, 5))) { newValue = "BC" + value_i; change = true; Logger.printDetail("DisambiguateHistoricDates: " + value_i + " to " + newValue + ". Expression " + t_i.getCoveredText() + " due to " + linearDates.get(i - offset).getCoveredText()); } } } else { newValue = "BC" + value_i; change = true; Logger.printDetail("DisambiguateHistoricDates: " + value_i + " to " + newValue + ". Expression " + t_i.getCoveredText() + " due to " + linearDates.get(i - offset).getCoveredText()); } } } } if ((linearDates.get(i - offset).getTimexType().equals("TIME") || linearDates.get(i - offset).getTimexType().equals("DATE")) && (linearDates.get(i - offset).getTimexValue().matches("^\\d.*"))) { counter++; } } while (counter < 5 && ++offset < i); } } if (!(newValue.equals(value_i))) { t_i.removeFromIndexes(); Logger.printDetail("DisambiguateHistoricDates: value changed to BC"); t_i.setTimexValue(newValue); t_i.addToIndexes(); linearDates.set(i, t_i); } } }