Java Code Examples for org.apache.uima.jcas.JCas#setDocumentText()
The following examples show how to use
org.apache.uima.jcas.JCas#setDocumentText() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: Annotator2.java From uima-uimafit with Apache License 2.0 | 6 votes |
@Override public void process(JCas jCas) throws AnalysisEngineProcessException { try { JCas sortedView = ViewCreatorAnnotator.createViewSafely(jCas, ViewNames.SORTED_VIEW); jCas = jCas.getView(CAS.NAME_DEFAULT_SOFA); String initialText = jCas.getDocumentText(); char[] chars = initialText.toCharArray(); Arrays.sort(chars); String sortedText = new String(chars).trim(); sortedView.setDocumentText(sortedText); sortedView = ViewCreatorAnnotator.createViewSafely(jCas, ViewNames.SORTED_PARENTHESES_VIEW); JCas parenthesesView = jCas.getView(ViewNames.PARENTHESES_VIEW); String parenthesesText = parenthesesView.getDocumentText(); chars = parenthesesText.toCharArray(); Arrays.sort(chars); sortedText = new String(chars).trim(); sortedView.setDocumentText(sortedText); } catch (CASException e) { throw new AnalysisEngineProcessException(e); } }
Example 2
Source File: ConsumerUtilsTest.java From baleen with Apache License 2.0 | 6 votes |
@Test public void testEntityExternalId() throws UIMAException, BaleenException { JCas jCas = JCasSingleton.getJCasInstance(); jCas.setDocumentText("Hello World"); Person p1 = new Person(jCas); p1.setGender("female"); p1.setValue("Jane Doe"); p1.addToIndexes(jCas); Person p2 = new Person(jCas); p2.setGender("female"); p2.setValue("J. Doe"); p2.addToIndexes(jCas); assertEquals( ConsumerUtils.getExternalId(ImmutableSet.of(p1, p2)), ConsumerUtils.getExternalId(ImmutableSet.of(p1, p2))); assertEquals( "d3c514ea1fb3367430959255917ee4de12468004897d683d60114b475d37264a", ConsumerUtils.getExternalId(ImmutableSet.of(p1, p2))); assertNotEquals( ConsumerUtils.getExternalId(ImmutableSet.of(p1)), ConsumerUtils.getExternalId(ImmutableSet.of(p1, p2))); }
Example 3
Source File: WebAnnoTsv3WriterTestBase.java From webanno with Apache License 2.0 | 6 votes |
@Test public void testTwoSentencesWithNoSpaceInBetween() throws Exception { TypeSystemDescription global = TypeSystemDescriptionFactory.createTypeSystemDescription(); TypeSystemDescription local = TypeSystemDescriptionFactory .createTypeSystemDescriptionFromPath( "src/test/resources/desc/type/webannoTestTypes.xml"); TypeSystemDescription merged = CasCreationUtils.mergeTypeSystems(asList(global, local)); JCas jcas = JCasFactory.createJCas(merged); DocumentMetaData.create(jcas).setDocumentId("doc"); jcas.setDocumentText("onetwo"); new Token(jcas, 0, 3).addToIndexes(); new Sentence(jcas, 0, 3).addToIndexes(); new Token(jcas, 3, 6).addToIndexes(); new Sentence(jcas, 3, 6).addToIndexes(); writeAndAssertEquals(jcas); }
Example 4
Source File: DocumentConverterTest.java From baleen with Apache License 2.0 | 6 votes |
@Test public void canConvertSentence() throws UIMAException { JCas jCas = JCasFactory.createJCas(); jCas.setDocumentText("This is a test. This is another test."); String[] words = new String[] {"This", "is", "another", "test", "."}; Sentence sentence2 = new Sentence(words, new int[] {16, 21, 24, 31, 35}, new int[] {20, 23, 30, 34, 36}, words); when(document.sentences()).thenReturn(new Sentence[] {sentence, sentence2}); DocumentConverter converter = new DocumentConverter(jCas, document); converter.convert(); Collection<uk.gov.dstl.baleen.types.language.Sentence> actual = JCasUtil.select(jCas, uk.gov.dstl.baleen.types.language.Sentence.class); assertEquals(2, actual.size()); Iterator<uk.gov.dstl.baleen.types.language.Sentence> iterator = actual.iterator(); uk.gov.dstl.baleen.types.language.Sentence next = iterator.next(); assertEquals(0, next.getBegin()); assertEquals(15, next.getEnd()); next = iterator.next(); assertEquals(16, next.getBegin()); assertEquals(36, next.getEnd()); }
Example 5
Source File: PubmedArchiveCollectionReader2.java From bluima with Apache License 2.0 | 6 votes |
public void getNext(JCas jcas) throws IOException, CollectionException { MedlineCitation article = articlesIt.next(); // text Abstract abstrct = article.getArticle().getAbstract(); if (abstrct != null) jcas.setDocumentText(abstrct.getAbstractText()); // add metadata String title = article.getArticle().getArticleTitle().getvalue(); Header header = new Header(jcas); header.setDocId(article.getPMID().getvalue().toString()); header.setTitle(title); // header.setSource(nextArticle.file); header.setComponentId(PubmedArchiveCollectionReader2.class.getName()); header.addToIndexes(); DateCreated dateCreated = article.getDateCreated(); //FIXME use dateCompleted Date pubDateA = new Date(jcas); pubDateA.setDay(parseInt(dateCreated.getDay().getvalue())); pubDateA.setMonth(parseInt(dateCreated.getMonth().getvalue())); pubDateA.setYear(parseInt(dateCreated.getYear().getvalue())); pubDateA.addToIndexes(); }
Example 6
Source File: JCasDeserialiser.java From baleen with Apache License 2.0 | 6 votes |
/** * Deserialise the given JSON map by populating the given JCas. * * @param jCas to populate * @param input to deserialise * @throws IOException if there is an error while deserialising. */ public void deseralize(final JCas jCas, final Map<String, Object> input) { // Read top level jCas.setDocumentText((String) input.getOrDefault(JsonJCas.DOCUMENT_TEXT, "")); jCas.setDocumentLanguage((String) input.getOrDefault(JsonJCas.DOCUMENT_LANGUAGE, "")); // Read Document annotations final DocumentAnnotation documentAnnotation = UimaSupport.getDocumentAnnotation(jCas); final Map<String, Object> daNode = (Map<String, Object>) input.get(JsonJCas.DOCUMENT_ANNOTATION); processDocumentAnnotation(jCas, documentAnnotation, daNode); final List<Map<String, Object>> annotationsNode = (List<Map<String, Object>>) input.get(JsonJCas.ANNOTATIONS); final List<ReferencedFeatures> featuresToDereference = processAnnotations(jCas, annotationsNode); // Here we need to do hydrate the references final Map<Long, BaleenAnnotation> annotationIndex = buildAnnotationIndex(jCas); featuresToDereference.forEach(r -> r.rehydrate(jCas, annotationIndex)); }
Example 7
Source File: RecordStructureManagerTest.java From baleen with Apache License 2.0 | 5 votes |
@Before public void setUp() throws Exception { JCas jCas = JCasSingleton.getJCasInstance(); jCas.setDocumentText(TEXT); addAnnotations(jCas); recordStructureManager = new RecordStructureManager( StructureHierarchy.build(jCas, StructureUtil.getStructureClasses())); }
Example 8
Source File: ComparableEntitySpanTest.java From baleen with Apache License 2.0 | 5 votes |
@Test public void test() throws UIMAException { JCas jCas = JCasSingleton.getJCasInstance(); jCas.setDocumentText("Hello world"); final Entity e = new Entity(jCas, 0, 5); final ComparableEntitySpan span = new ComparableEntitySpan(e, 0, 5); assertEquals(0, span.getBegin()); assertEquals(5, span.getEnd()); assertSame(e, span.getEntity()); assertSame(e.getClass(), span.getClazz()); assertEquals("Hello", span.getValue()); e.setValue("Howdy"); assertEquals("Howdy", span.getValue()); final ComparableEntitySpan span2 = new ComparableEntitySpan(e, 0, 5); final ComparableEntitySpan span3 = new ComparableEntitySpan(e, 0, 6); final ComparableEntitySpan span4 = new ComparableEntitySpan(e, 1, 5); final ComparableEntitySpan span5 = new ComparableEntitySpan(new Person(jCas), 1, 5); assertEquals(span, span2); assertEquals(span.hashCode(), span2.hashCode()); assertNotEquals(span, span3); assertNotEquals(span.hashCode(), span3.hashCode()); assertNotEquals(span, span5); assertNotEquals(span.hashCode(), span5.hashCode()); assertNotEquals(span, span4); assertNotEquals(span.hashCode(), span4.hashCode()); assertEquals(span, span); assertNotEquals(span, null); assertNotEquals(span, "Hello"); // Check doesn't error span.toString(); }
Example 9
Source File: Annotator1.java From uima-uimafit with Apache License 2.0 | 5 votes |
@Override public void process(JCas jCas) throws AnalysisEngineProcessException { try { JCas parentheticalView = ViewCreatorAnnotator.createViewSafely(jCas, ViewNames.PARENTHESES_VIEW); jCas = jCas.getView(CAS.NAME_DEFAULT_SOFA); String initialText = jCas.getDocumentText(); String parentheticalText = initialText.replaceAll("[aeiou]+", "($0)"); parentheticalView.setDocumentText(parentheticalText); } catch (CASException e) { throw new AnalysisEngineProcessException(e); } }
Example 10
Source File: OffsetTest.java From baleen with Apache License 2.0 | 5 votes |
@Test public void testGetOffsetText() throws UIMAException { JCas jCas = JCasFactory.createJCas(); jCas.setDocumentText("This is a test."); assertEquals("", OffsetUtil.getText(jCas, new Offset(0, 0))); assertEquals("This", OffsetUtil.getText(jCas, new Offset(0, 4))); assertEquals(" is a ", OffsetUtil.getText(jCas, new Offset(4, 10))); assertEquals("This is a test.", OffsetUtil.getText(jCas, new Offset(0, 15))); }
Example 11
Source File: ViewCopier.java From biomedicus with Apache License 2.0 | 5 votes |
@Override public void migrate(JCas source, JCas target) { target.setDocumentText(source.getDocumentText()); FeatureStructureCopyingQueue featureStructureCopyingQueue = new FeatureStructureCopyingQueue( source.getCas(), target.getCas()); FSIterator<FeatureStructure> allFs = source.getIndexRepository() .getAllIndexedFS(source.getCasType(TOP.type)); while (allFs.hasNext()) { featureStructureCopyingQueue.enqueue(allFs.next()); } featureStructureCopyingQueue.run(); }
Example 12
Source File: MistAnalysisEngine.java From ctakes-docker with Apache License 2.0 | 5 votes |
@Override public void process(JCas jCas) throws AnalysisEngineProcessException { try{ JCas deidView = CasUtil.getView(jCas.getCas(), DEID_VIEW_NAME, true).getJCas(); copyDocIdToView(jCas, deidView); String text = forceXmlSerializable(jCas.getDocumentText().replace("<","<").replace(">",">")); String decoderOut = decoder.decodeString(text); while(true) { Matcher m = xmlPatt.matcher(decoderOut); if (!m.find()) { break; } String matchType = m.group(1); int matchStart = m.start(); int matchEnd = m.end(); decoderOut = decoderOut.substring(0, matchStart) + "[" + matchType + "]" + decoderOut.substring(matchEnd); } deidView.setDocumentText(decoderOut); }catch(Exception e){ System.err.println("Error trying to run mist!"); throw new AnalysisEngineProcessException(e); } }
Example 13
Source File: TearlineContentExtractor.java From baleen with Apache License 2.0 | 5 votes |
@Override public void doProcessStream(InputStream stream, String source, JCas jCas) throws IOException { super.doProcessStream(stream, source, jCas); try { BodyContentHandler textHandler = new BodyContentHandler(Integer.MAX_VALUE); Metadata metadata = new Metadata(); ParseContext context = new ParseContext(); AutoDetectParser autoParser = new AutoDetectParser(); autoParser.parse(stream, textHandler, metadata, context); String fullContent = textHandler.toString(); Matcher m = tearlinePattern.matcher(fullContent); if (m.find()) { jCas.setDocumentText(removeBoilerplate(fullContent.substring(0, m.start())).trim()); } else { jCas.setDocumentText(removeBoilerplate(fullContent).trim()); } for (String name : metadata.names()) { addMetadata(jCas, name, metadata.get(name)); } } catch (SAXException | TikaException e) { getMonitor().warn("Couldn't parse metadata from '{}'", source, e); } }
Example 14
Source File: Tcf2DKPro.java From inception with Apache License 2.0 | 5 votes |
/** * This method builds texts from the {@link eu.clarin.weblicht.wlfxb.tc.api.Token} annotation * layer. The getText Method of {@link TextCorpusStreamed} is not used as some tokens, such as * special characters represented differently than in the original text. * <p> * If the CAS already contains a document text, it is kept. * <p> * If the CAS already contains a document language, it is kept. * * @param aJCas * the JCas. * @param aCorpusData * the TCF document. */ public void convertText(JCas aJCas, TextCorpus aCorpusData) { if (aJCas.getDocumentText() == null) { StringBuilder text = new StringBuilder(); for (int i = 0; i < aCorpusData.getTokensLayer().size(); i++) { eu.clarin.weblicht.wlfxb.tc.api.Token token = aCorpusData.getTokensLayer() .getToken(i); if (token.getStart() != null && token.getEnd() != null) { // Assuming all of the tokens have offset information... while (text.length() < token.getStart()) { text.append(" "); } } else { // Assuming none of the tokens has offset information... if (i > 0) { text.append(" "); } } text.append(token.getString()); } aJCas.setDocumentText(text.toString()); } aJCas.setDocumentLanguage(aCorpusData.getLanguage()); }
Example 15
Source File: RemoveDanglingRelationsRepairTest.java From webanno with Apache License 2.0 | 5 votes |
@Test public void test() throws Exception { JCas jcas = JCasFactory.createJCas(); jcas.setDocumentText("This is a test."); Token span1 = new Token(jcas, 0, 4); span1.addToIndexes(); Token span2 = new Token(jcas, 6, 8); Dependency dep = new Dependency(jcas, 0, 8); dep.setGovernor(span1); dep.setDependent(span2); dep.addToIndexes(); List<LogMessage> messages = new ArrayList<>(); CasDoctor cd = new CasDoctor(RemoveDanglingRelationsRepair.class, AllFeatureStructuresIndexedCheck.class); // A project is not required for this check boolean result = cd.analyze(null, jcas.getCas(), messages); // A project is not required for this repair cd.repair(null, jcas.getCas(), messages); assertFalse(result); messages.forEach(System.out::println); }
Example 16
Source File: SimpleTextSegmenter.java From uima-uimaj with Apache License 2.0 | 5 votes |
public AbstractCas next() throws AnalysisEngineProcessException { int breakAt = mPos + mSegmentSize; if (breakAt > mDoc.length()) breakAt = mDoc.length(); // search for the next newline character. Note: this example segmenter implementation // assumes that the document contains many newlines. In the worst case, if this segmenter // is runon a document with no newlines, it will produce only one segment containing the // entire document text. A better implementation might specify a maximum segment size as // well as a minimum. while (breakAt < mDoc.length() && mDoc.charAt(breakAt - 1) != '\n') breakAt++; JCas jcas = getEmptyJCas(); try { jcas.setDocumentText(mDoc.substring(mPos, breakAt)); // if original CAS had SourceDocumentInformation, also add SourceDocumentInformatio // to each segment if (mDocUri != null) { SourceDocumentInformation sdi = new SourceDocumentInformation(jcas); sdi.setUri(mDocUri); sdi.setOffsetInSource(mPos); sdi.setDocumentSize(breakAt - mPos); sdi.addToIndexes(); if (breakAt == mDoc.length()) { sdi.setLastSegment(true); } } mPos = breakAt; return jcas; } catch (Exception e) { jcas.release(); throw new AnalysisEngineProcessException(e); } }
Example 17
Source File: MboxReader.java From baleen with Apache License 2.0 | 4 votes |
/** Process body of message as plain text */ private void processTextBody(JCas jCas, TextBody textBody) throws IOException { String text = CharStreams.toString(textBody.getReader()); jCas.setDocumentText(text.trim()); }
Example 18
Source File: DummyCollectionReader.java From baleen with Apache License 2.0 | 4 votes |
@Override public void doGetNext(JCas jCas) throws IOException, CollectionException { jCas.setDocumentText(documents.remove(0)); }
Example 19
Source File: ConstraintsGeneratorTest.java From webanno with Apache License 2.0 | 4 votes |
@Test public void testSimplePath() throws Exception { ConstraintsGrammar parser = new ConstraintsGrammar(new FileInputStream( "src/test/resources/rules/10.rules")); Parse p = parser.Parse(); ParsedConstraints constraints = p.accept(new ParserVisitor()); JCas jcas = JCasFactory.createJCas(); jcas.setDocumentText("The sun."); // Add token annotations Token t_the = new Token(jcas, 0, 3); t_the.addToIndexes(); Token t_sun = new Token(jcas, 0, 3); t_sun.addToIndexes(); // Add POS annotations and link them to the tokens POS p_the = new POS(jcas, t_the.getBegin(), t_the.getEnd()); p_the.setPosValue("DET"); p_the.addToIndexes(); t_the.setPos(p_the); POS p_sun = new POS(jcas, t_sun.getBegin(), t_sun.getEnd()); p_sun.setPosValue("NN"); p_sun.addToIndexes(); t_sun.setPos(p_sun); // Add dependency annotations Dependency dep_the_sun = new Dependency(jcas); dep_the_sun.setGovernor(t_sun); dep_the_sun.setDependent(t_the); dep_the_sun.setDependencyType("det"); dep_the_sun.setBegin(dep_the_sun.getGovernor().getBegin()); dep_the_sun.setEnd(dep_the_sun.getGovernor().getEnd()); dep_the_sun.addToIndexes(); Evaluator constraintsEvaluator = new ValuesGenerator(); List<PossibleValue> possibleValues = constraintsEvaluator.generatePossibleValues( dep_the_sun, "DependencyType", constraints); List<PossibleValue> expectedOutput = new LinkedList<>(); expectedOutput.add(new PossibleValue("det", false)); assertEquals(expectedOutput, possibleValues); }
Example 20
Source File: ProperNounInformationCollectorTest.java From baleen with Apache License 2.0 | 4 votes |
@Test public void testCanCollectInformation() throws UIMAException { JCas jCas = JCasFactory.createJCas(); jCas.setDocumentText( "Sir John Major was Prime Minister of the United Kingdom. Major became Prime Minister after Thatcher resigned."); List<Sentence> s = Annotations.createSentences(jCas); WordToken wt1 = new WordToken(jCas); wt1.setBegin(4); wt1.setEnd(8); wt1.setPartOfSpeech("NNP"); wt1.addToIndexes(jCas); WordToken wt2 = new WordToken(jCas); wt2.setBegin(9); wt2.setEnd(14); wt2.setPartOfSpeech("NNP"); wt2.addToIndexes(jCas); WordToken wt3 = new WordToken(jCas); wt3.setBegin(19); wt3.setEnd(33); wt3.setPartOfSpeech("NN"); wt3.addToIndexes(jCas); WordToken wt4 = new WordToken(jCas); wt4.setBegin(59); wt4.setEnd(64); wt4.setPartOfSpeech("NNP"); wt4.addToIndexes(jCas); Person j1 = Annotations.createPerson(jCas, 0, 14, "Sir John Major"); Person j2 = Annotations.createPerson(jCas, 19, 33, "Prime Minister"); Person j3 = Annotations.createPerson(jCas, 59, 64, "Major"); ReferenceTarget jRT = Annotations.createReferenceTarget(jCas, j1, j2, j3); ProperNounInformationCollector collector = new ProperNounInformationCollector(); Set<EntityInformation<Person>> entityInformations = collector.getEntityInformation(jCas, Person.class); assertEquals(1, entityInformations.size()); EntityInformation<Person> entityInformation = entityInformations.iterator().next(); assertEquals(jRT, entityInformation.getReferenceTarget()); assertTrue( CollectionUtils.isEqualCollection( ImmutableSet.of(j1, j3), entityInformation.getMentions())); assertTrue(CollectionUtils.isEqualCollection(s, entityInformation.getSentences())); }