Java Code Examples for org.apache.uima.fit.util.JCasUtil#exists()
The following examples show how to use
org.apache.uima.fit.util.JCasUtil#exists() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: LocalFeaturesTcAnnotator.java From ambiverse-nlu with Apache License 2.0 | 6 votes |
@Override public void process(JCas jcas) throws AnalysisEngineProcessException { if (!JCasUtil.exists(jcas, JCasId.class)) { JCasId id = new JCasId(jcas); id.setId(jcasId++); id.addToIndexes(); } switch (featureMode) { case Constants.FM_DOCUMENT: processDocument(jcas); break; case Constants.FM_PAIR: // same as document processDocument(jcas); break; case Constants.FM_SEQUENCE: processSequence(jcas); break; case Constants.FM_UNIT: processUnit(jcas); break; } }
Example 2
Source File: LocalFeaturesTcAnnotator.java From ambiverse-nlu with Apache License 2.0 | 6 votes |
private void processDocument(JCas jcas) throws AnalysisEngineProcessException { if (!JCasUtil.exists(jcas, TextClassificationTarget.class)) { TextClassificationTarget target = new TextClassificationTarget(jcas, 0, jcas.getDocumentText().length()); target.addToIndexes(); } // we need an outcome annotation to be present if (!JCasUtil.exists(jcas, TextClassificationOutcome.class)) { TextClassificationOutcome outcome = new TextClassificationOutcome(jcas); outcome.setOutcome(""); outcome.addToIndexes(); } // create new UIMA annotator in order to separate the parameter spaces // this annotator will get initialized with its own set of parameters loaded from the model try { engine.process(jcas); } catch (Exception e) { throw new AnalysisEngineProcessException(e); } }
Example 3
Source File: SparkUimaUtils.java From ambiverse-nlu with Apache License 2.0 | 5 votes |
public static void createSequenceFile(Object[] params, String uri) throws URISyntaxException, IOException, UIMAException, NoSuchMethodException, MissingSettingException, ClassNotFoundException { Configuration conf = new Configuration(); Path path = new Path(uri); Writer writer = SequenceFile.createWriter( conf, Writer.file(path), Writer.keyClass(Text.class), Writer.valueClass(SCAS.class)); int count = 0; CollectionReaderDescription readerDescription = Reader.getCollectionReaderDescription(Reader.COLLECTION_FORMAT.NYT, params); for (JCas jCas : SimplePipelineCasPoolIterator.iteratePipeline(20, readerDescription)) { if(JCasUtil.exists(jCas, DocumentMetaData.class)) { ++count; // Get the ID. DocumentMetaData dmd = JCasUtil.selectSingle(jCas, DocumentMetaData.class); String docId = "NULL"; if (dmd != null) { docId = dmd.getDocumentId(); } else { throw new IOException("No Document ID for xml: " + jCas.getView("xml").getDocumentText()); } Text docIdText = new Text(docId); SCAS scas = new SCAS(jCas.getCas()); writer.append(docIdText, scas); } jCas.release(); } logger.info("Wrote " + count + " documents to " + uri); IOUtils.closeStream(writer); }
Example 4
Source File: DKPro2Tcf.java From inception with Apache License 2.0 | 5 votes |
public void writePosTags(JCas aJCas, TextCorpus aTextCorpus, Map<Integer, eu.clarin.weblicht.wlfxb.tc.api.Token> aTokensBeginPositionMap) { if (!JCasUtil.exists(aJCas, POS.class)) { // Do nothing if there are no part-of-speech tags in the CAS log.debug("Layer [{}]: empty", TextCorpusLayerTag.POSTAGS.getXmlName()); return; } // Tokens layer must already exist TokensLayer tokensLayer = aTextCorpus.getTokensLayer(); // create POS tag annotation layer String posTagSet = "STTS"; for (TagsetDescription tagSet : select(aJCas, TagsetDescription.class)) { if (tagSet.getLayer().equals(POS.class.getName())) { posTagSet = tagSet.getName(); break; } } PosTagsLayer posLayer = aTextCorpus.createPosTagsLayer(posTagSet); log.debug("Layer [{}]: created", TextCorpusLayerTag.POSTAGS.getXmlName()); int j = 0; for (Token coveredToken : select(aJCas, Token.class)) { POS pos = coveredToken.getPos(); if (pos != null && posLayer != null ) { String posValue = coveredToken.getPos().getPosValue(); posLayer.addTag(posValue, tokensLayer.getToken(j)); } j++; } }
Example 5
Source File: DKPro2Tcf.java From inception with Apache License 2.0 | 5 votes |
public void writeLemmas(JCas aJCas, TextCorpus aTextCorpus, Map<Integer, eu.clarin.weblicht.wlfxb.tc.api.Token> aTokensBeginPositionMap) { if (!JCasUtil.exists(aJCas, Lemma.class)) { // Do nothing if there are no lemmas in the CAS log.debug("Layer [{}]: empty", TextCorpusLayerTag.LEMMAS.getXmlName()); return; } // Tokens layer must already exist TokensLayer tokensLayer = aTextCorpus.getTokensLayer(); // create lemma annotation layer LemmasLayer lemmasLayer = aTextCorpus.createLemmasLayer(); log.debug("Layer [{}]: created", TextCorpusLayerTag.LEMMAS.getXmlName()); int j = 0; for (Token coveredToken : select(aJCas, Token.class)) { Lemma lemma = coveredToken.getLemma(); if (lemma != null && lemmasLayer != null) { String lemmaValue = coveredToken.getLemma().getValue(); lemmasLayer.addLemma(lemmaValue, tokensLayer.getToken(j)); } j++; } }
Example 6
Source File: DKPro2Tcf.java From inception with Apache License 2.0 | 5 votes |
public void writeOrthograph(JCas aJCas, TextCorpus aTextCorpus) { if (!JCasUtil.exists(aJCas, SofaChangeAnnotation.class)) { // Do nothing if there are no SofaChangeAnnotation layer // (Which is equivalent to Orthography layer in TCF) in the CAS log.debug("Layer [{}]: empty", TextCorpusLayerTag.ORTHOGRAPHY.getXmlName()); return; } // Tokens layer must already exist TokensLayer tokensLayer = aTextCorpus.getTokensLayer(); // create orthographyLayer annotation layer OrthographyLayer orthographyLayer = aTextCorpus.createOrthographyLayer(); log.debug("Layer [{}]: created", TextCorpusLayerTag.ORTHOGRAPHY.getXmlName()); int j = 0; for (Token token : select(aJCas, Token.class)) { List<SofaChangeAnnotation> scas = selectCovered(aJCas, SofaChangeAnnotation.class, token.getBegin(), token.getEnd()); if (scas.size() > 0 && orthographyLayer != null) { SofaChangeAnnotation change = scas.get(0); orthographyLayer.addCorrection(scas.get(0).getValue(), tokensLayer.getToken(j), Optional.ofNullable(change.getOperation()).map(CorrectionOperation::valueOf) .orElse(null)); } j++; } }
Example 7
Source File: DKPro2Tcf.java From inception with Apache License 2.0 | 5 votes |
public void writeNamedEntity(JCas aJCas, TextCorpus aTextCorpus, Map<Integer, eu.clarin.weblicht.wlfxb.tc.api.Token> aTokensBeginPositionMap) { if (!JCasUtil.exists(aJCas, NamedEntity.class)) { // Do nothing if there are no named entities in the CAS log.debug("Layer [{}]: empty", TextCorpusLayerTag.NAMED_ENTITIES.getXmlName()); return; } String tagSetName = "BART"; for (TagsetDescription tagSet : select(aJCas, TagsetDescription.class)) { if (tagSet.getLayer().equals(NamedEntity.class.getName())) { tagSetName = tagSet.getName(); break; } } NamedEntitiesLayer namedEntitiesLayer = aTextCorpus.createNamedEntitiesLayer(tagSetName); log.debug("Layer [{}]: created", TextCorpusLayerTag.NAMED_ENTITIES.getXmlName()); for (NamedEntity namedEntity : select(aJCas, NamedEntity.class)) { List<Token> tokensInCas = selectCovered(aJCas, Token.class, namedEntity.getBegin(), namedEntity.getEnd()); List<eu.clarin.weblicht.wlfxb.tc.api.Token> tokensInTcf = new ArrayList<>(); for (Token token : tokensInCas) { tokensInTcf.add(aTokensBeginPositionMap.get(token.getBegin())); } namedEntitiesLayer.addEntity(namedEntity.getValue(), tokensInTcf); } }
Example 8
Source File: BlueCasUtil.java From bluima with Apache License 2.0 | 5 votes |
public static JCas setDocId(JCas jCas, int docId) { if (JCasUtil.exists(jCas, Header.class)) { throw new IllegalArgumentException(); } Header h = new Header(jCas); h.setDocId(docId + ""); h.addToIndexes(); return jCas; }
Example 9
Source File: DKPro2Tcf.java From inception with Apache License 2.0 | 4 votes |
public void writeDependency(JCas aJCas, TextCorpus aTextCorpus, Map<Integer, eu.clarin.weblicht.wlfxb.tc.api.Token> aTokensBeginPositionMap) { if (!JCasUtil.exists(aJCas, Dependency.class)) { // Do nothing if there are no dependencies in the CAS log.debug("Layer [{}]: empty", TextCorpusLayerTag.PARSING_DEPENDENCY.getXmlName()); return; } DependencyParsingLayer dependencyParsingLayer = null; String tagSetName = "tiger"; for (TagsetDescription tagSet : select(aJCas, TagsetDescription.class)) { if (tagSet.getLayer().equals(Dependency.class.getName())) { tagSetName = tagSet.getName(); break; } } Optional<Dependency> hasNonBasic = select(aJCas, Dependency.class).stream() .filter(dep -> dep.getFlavor() != null && !DependencyFlavor.BASIC.equals(dep.getFlavor())) .findAny(); dependencyParsingLayer = aTextCorpus.createDependencyParsingLayer(tagSetName, hasNonBasic.isPresent(), true); log.debug("Layer [{}]: created", TextCorpusLayerTag.PARSING_DEPENDENCY.getXmlName()); for (Sentence s : select(aJCas, Sentence.class)) { List<eu.clarin.weblicht.wlfxb.tc.api.Dependency> deps = new ArrayList<>(); for (Dependency d : selectCovered(Dependency.class, s)) { eu.clarin.weblicht.wlfxb.tc.api.Dependency dependency = dependencyParsingLayer .createDependency(d.getDependencyType(), aTokensBeginPositionMap.get(d.getDependent().getBegin()), aTokensBeginPositionMap.get(d.getGovernor().getBegin())); deps.add(dependency); } if (deps.size() > 0) { dependencyParsingLayer.addParse(deps); } } }