org.apache.uima.fit.factory.AnalysisEngineFactory Java Examples
The following examples show how to use
org.apache.uima.fit.factory.AnalysisEngineFactory.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: Html5Test.java From baleen with Apache License 2.0 | 6 votes |
@Test public void testLineBreak() throws UIMAException, IOException { AnalysisEngine consumer = AnalysisEngineFactory.createEngine( Html5.class, TypeSystemSingleton.getTypeSystemDescriptionInstance(), Html5.PARAM_OUTPUT_FOLDER, outputFolder.getPath()); DocumentAnnotation da = (DocumentAnnotation) jCas.getDocumentAnnotationFs(); da.setSourceUri("multiline.txt"); jCas.setDocumentText("His name was James\n\nBond."); Person p = new Person(jCas, 13, 24); p.addToIndexes(); consumer.process(jCas); File f = new File(outputFolder, "multiline.txt.html"); assertTrue(f.exists()); assertTrue( Files.asCharSource(f, StandardCharsets.UTF_8) .read() .contains("data-referent=\"\">James\n\nBond</span>")); }
Example #2
Source File: BlacklistTest.java From baleen with Apache License 2.0 | 6 votes |
@Test public void testBlacklistEntityValue() throws Exception { AnalysisEngine rneAE = AnalysisEngineFactory.createEngine( Blacklist.class, Blacklist.PARAM_BLACKLIST, new String[] {LONDON, UNITED_KINGDOM}, Blacklist.PARAM_CHECK_ENTITY_VALUE, true); createDocument(jCas); rneAE.process(jCas); assertCorrect(1, 1, 0); rneAE.destroy(); }
Example #3
Source File: SimplePipelineTest.java From uima-uimafit with Apache License 2.0 | 6 votes |
@Test public void test1() throws UIMAException, IOException { // Creating a CAS locally here to work around UIMA-5097 - otherwise this test may fail if // run in Eclipse or in other unit test setups where the same JVM is re-used for multiple tests. TypeSystemDescription tsd = TypeSystemDescriptionFactory.createTypeSystemDescription(); TypePriorities tp = TypePrioritiesFactory.createTypePriorities(new String[] { "org.apache.uima.fit.type.Sentence", "org.apache.uima.fit.type.AnalyzedText", "org.apache.uima.fit.type.Token" }); JCas jcas = CasCreationUtils.createCas(tsd, tp, null).getJCas(); CasIOUtil.readJCas(jcas, new File("src/test/resources/data/docs/test.xmi")); AnalysisEngineDescription aed1 = AnalysisEngineFactory.createEngineDescription( Annotator1.class, typeSystemDescription); AnalysisEngineDescription aed2 = AnalysisEngineFactory.createEngineDescription( Annotator2.class, typeSystemDescription); AnalysisEngineDescription aed3 = AnalysisEngineFactory.createEngineDescription( Annotator3.class, typeSystemDescription); SimplePipeline.runPipeline(jcas, aed1, aed2, aed3); }
Example #4
Source File: Step0bTextSegmenterA.java From argument-reasoning-comprehension-task with Apache License 2.0 | 6 votes |
/** * Creates a tokenizing pipeline * * @throws IOException exception */ private static AnalysisEngineDescription getPipeline() throws IOException { if (pipelineSingleton == null) { try { pipelineSingleton = AnalysisEngineFactory.createEngineDescription( AnalysisEngineFactory.createEngineDescription(ParagraphSplitter.class, ParagraphSplitter.PARAM_SPLIT_PATTERN, ParagraphSplitter.SINGLE_LINE_BREAKS_PATTERN), AnalysisEngineFactory.createEngineDescription(ArkTweetTokenizerFixed.class), AnalysisEngineFactory.createEngineDescription(StanfordSegmenter.class, StanfordSegmenter.PARAM_WRITE_TOKEN, false, StanfordSegmenter.PARAM_ZONE_TYPES, Paragraph.class.getCanonicalName())); } catch (ResourceInitializationException e) { throw new IOException(); } } return pipelineSingleton; }
Example #5
Source File: AbstractBaleenFileConsumerTest.java From baleen with Apache License 2.0 | 6 votes |
@Test public void testNullBasePath() throws Exception { AnalysisEngine consumer = AnalysisEngineFactory.createEngine( TestFileConsumer.class, TypeSystemSingleton.getTypeSystemDescriptionInstance()); DocumentAnnotation da = (DocumentAnnotation) jCas.getDocumentAnnotationFs(); da.setSourceUri(FILENAME); consumer.process(jCas); File f = new File(FILENAME); assertTrue(f.exists()); f.delete(); }
Example #6
Source File: CorefCapitalisationAndApostropheTest.java From baleen with Apache License 2.0 | 6 votes |
@Test public void testMissingValue() throws Exception { AnalysisEngine corefCapAE = AnalysisEngineFactory.createEngine(CorefCapitalisationAndApostrophe.class); jCas.setDocumentText(TEXT); Person p1 = new Person(jCas); p1.setBegin(0); p1.setEnd(5); p1.addToIndexes(); Annotations.createPerson(jCas, 22, 27, JAMES_UC); corefCapAE.process(jCas); assertEquals(1, JCasUtil.select(jCas, ReferenceTarget.class).size()); ReferenceTarget rt = JCasUtil.selectByIndex(jCas, ReferenceTarget.class, 0); Person p1t = JCasUtil.selectByIndex(jCas, Person.class, 0); Person p2t = JCasUtil.selectByIndex(jCas, Person.class, 1); assertEquals(rt, p1t.getReferent()); assertEquals(rt, p2t.getReferent()); }
Example #7
Source File: EntityCountTest.java From baleen with Apache License 2.0 | 6 votes |
@Test public void testEntityCountOutputReadOnly() throws Exception { File output = Files.createTempFile("baleen-entitycount", ".tsv").toFile(); output.setReadOnly(); try { AnalysisEngineFactory.createEngine( EntityCount.class, TypeSystemSingleton.getTypeSystemDescriptionInstance(), OUTPUT_FILE, output.getPath()); fail("Expected exception not thrown"); } catch (Exception ex) { // Do nothing } output.delete(); }
Example #8
Source File: PrintTest.java From baleen with Apache License 2.0 | 6 votes |
@Test public void testRelations() throws UIMAException { final Person s = new Person(jCas); s.setValue("source"); final Location t = new Location(jCas); t.setValue("target"); final Relation r = new Relation(jCas); r.setSource(s); r.setTarget(t); r.setRelationshipType("check"); r.addToIndexes(); SimplePipeline.runPipeline(jCas, AnalysisEngineFactory.createEngine(Relations.class)); }
Example #9
Source File: AbstractBaleenFileConsumerTest.java From baleen with Apache License 2.0 | 6 votes |
@Test public void testNoSource() throws Exception { File baseDir = Files.createTempDir(); AnalysisEngine consumer = AnalysisEngineFactory.createEngine( TestFileConsumer.class, TypeSystemSingleton.getTypeSystemDescriptionInstance(), BASE_PATH, baseDir.getPath(), "extension", "txt"); jCas.setDocumentText(TEXT); consumer.process(jCas); String s = FileUtils.file2String( new File( baseDir, "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855.txt")); assertEquals(TEXT, s); }
Example #10
Source File: CorefBracketsTest.java From baleen with Apache License 2.0 | 6 votes |
@Test public void testExistingCoordReferent() throws Exception { AnalysisEngine ae = AnalysisEngineFactory.createEngine(CorefBrackets.class); jCas.setDocumentText(LOC_TEXT); ReferenceTarget rt1 = Annotations.createReferenceTarget(jCas); Annotations.createLocation(jCas, 0, 9, SOMEWHERE, null); Coordinate c1 = Annotations.createCoordinate(jCas, 11, 19, MRGS); c1.setReferent(rt1); ae.process(jCas); assertEquals(1, JCasUtil.select(jCas, ReferenceTarget.class).size()); ReferenceTarget rt = JCasUtil.selectByIndex(jCas, ReferenceTarget.class, 0); Location l = JCasUtil.selectByIndex(jCas, Location.class, 0); Coordinate c = JCasUtil.selectByIndex(jCas, Coordinate.class, 0); assertEquals(rt, l.getReferent()); assertEquals(rt, c.getReferent()); assertEquals(c.getReferent(), l.getReferent()); }
Example #11
Source File: ListTest.java From baleen with Apache License 2.0 | 6 votes |
@Test public void testmultipleHits() throws Exception { AnalysisEngineDescription aed = AnalysisEngineFactory.createEngineDescription( List.class, List.PARAM_TERMS, terms, List.PARAM_TYPE, LOCATION); AnalysisEngine ae = AnalysisEngineFactory.createEngine(aed); // the same search term appears multiple times in text... jCas.setDocumentText("Hello world, and hello world again."); ae.process(jCas); assertEquals(2, JCasUtil.select(jCas, Location.class).size()); Location l = JCasUtil.selectByIndex(jCas, Location.class, 0); assertEquals(WORLD, l.getValue()); assertEquals(WORLD, l.getCoveredText()); ae.destroy(); }
Example #12
Source File: TemporalElasticsearchTest.java From baleen with Apache License 2.0 | 6 votes |
@Before public void setUp() throws Exception { elasticsearch = new EmbeddedElasticsearch5(); ExternalResourceDescription erd = ExternalResourceFactory.createNamedResourceDescription( RESOURCE_KEY, SharedElasticsearchResource.class, PARAM_PORT, Integer.toString(elasticsearch.getTransportPort()), PARAM_CLUSTER, elasticsearch.getClusterName()); AnalysisEngineDescription aed = AnalysisEngineFactory.createEngineDescription( TemporalElasticsearch.class, TypeSystemSingleton.getTypeSystemDescriptionInstance(), RESOURCE_KEY, erd, PARAM_INDEX, TEMPORAL_INDEX); ae = AnalysisEngineFactory.createEngine(aed); }
Example #13
Source File: CasDumpWriterTest.java From uima-uimafit with Apache License 2.0 | 6 votes |
@Test public void test() throws Exception { File outputFile = new File(folder.getRoot(), "dump-output.txt"); AnalysisEngine writer = AnalysisEngineFactory.createEngine(CasDumpWriter.class, CasDumpWriter.PARAM_OUTPUT_FILE, outputFile.getPath()); JCas jcas = writer.newJCas(); CasIOUtil.readJCas(jcas, new File("src/test/resources/data/docs/test.xmi")); writer.process(jcas); assertTrue(outputFile.exists()); String reference = readFileToString( new File("src/test/resources/data/reference/test.xmi.dump"), "UTF-8").trim(); String actual = readFileToString(outputFile, "UTF-8").trim(); actual = actual.replaceAll("\r\n", "\n"); assertEquals(reference, actual); }
Example #14
Source File: CustomResourceTermSuiteAEFactory.java From termsuite-core with Apache License 2.0 | 6 votes |
/** * Spots fixed expressions in the CAS an creates {@link FixedExpression} * annotation whenever one is found. * * @return */ public static AnalysisEngineDescription createFixedExpressionSpotterAEDesc(ResourceConfig resourceConfig, Lang lang) { try { AnalysisEngineDescription ae = AnalysisEngineFactory.createEngineDescription( FixedExpressionSpotter.class, FixedExpressionSpotter.FIXED_EXPRESSION_MAX_SIZE, 5, FixedExpressionSpotter.REMOVE_WORD_ANNOTATIONS_FROM_CAS, false, FixedExpressionSpotter.REMOVE_TERM_OCC_ANNOTATIONS_FROM_CAS, true ); ExternalResourceDescription fixedExprRes = ExternalResourceFactory.createExternalResourceDescription( FixedExpressionResource.class, getResourceURL(resourceConfig, ResourceType.FIXED_EXPRESSIONS, lang)); ExternalResourceFactory.bindResource( ae, FixedExpressionResource.FIXED_EXPRESSION_RESOURCE, fixedExprRes ); return ae; } catch (Exception e) { throw new PreparationPipelineException(e); } }
Example #15
Source File: EntityCountTest.java From baleen with Apache License 2.0 | 6 votes |
@Test public void testEntityCountOutputNewFile() throws Exception { File outputFolder = Files.createTempDirectory("baleen").toFile(); File output = new File(outputFolder, "baleen-entitycount.tsv"); AnalysisEngine consumer = AnalysisEngineFactory.createEngine( EntityCount.class, TypeSystemSingleton.getTypeSystemDescriptionInstance(), OUTPUT_FILE, output.getPath()); createDocument(); consumer.process(jCas); assertEquals("test1.txt\t2", FileUtils.file2String(output).trim()); consumer.destroy(); output.delete(); outputFolder.delete(); }
Example #16
Source File: Html5Test.java From baleen with Apache License 2.0 | 6 votes |
@Test public void testCreateExternalIdFile() throws UIMAException { AnalysisEngine consumer = AnalysisEngineFactory.createEngine( Html5.class, TypeSystemSingleton.getTypeSystemDescriptionInstance(), Html5.PARAM_OUTPUT_FOLDER, outputFolder.getPath(), Html5.PARAM_USE_EXTERNAL_ID, true, Html5.PARAM_CONTENT_HASH_AS_ID, false); jCas.setDocumentText("Hello World!"); DocumentAnnotation da = (DocumentAnnotation) jCas.getDocumentAnnotationFs(); da.setSourceUri("hello.txt"); consumer.process(jCas); File f = new File( outputFolder, "734cad14909bedfafb5b273b6b0eb01fbfa639587d217f78ce9639bba41f4415.html"); assertTrue(f.exists()); }
Example #17
Source File: CorefCapitalisationAndApostropheTest.java From baleen with Apache License 2.0 | 6 votes |
@Test public void testOneExistingReferent() throws Exception { AnalysisEngine corefCapAE = AnalysisEngineFactory.createEngine(CorefCapitalisationAndApostrophe.class); jCas.setDocumentText(TEXT); ReferenceTarget rt = Annotations.createReferenceTarget(jCas); Person p1 = Annotations.createPerson(jCas, 0, 5, JAMES); p1.setReferent(rt); Annotations.createPerson(jCas, 22, 27, JAMES_UC); corefCapAE.process(jCas); assertEquals(1, JCasUtil.select(jCas, ReferenceTarget.class).size()); ReferenceTarget rtt = JCasUtil.selectByIndex(jCas, ReferenceTarget.class, 0); Person p1t = JCasUtil.selectByIndex(jCas, Person.class, 0); Person p2t = JCasUtil.selectByIndex(jCas, Person.class, 1); assertEquals(rtt, p1t.getReferent()); assertEquals(rtt, p2t.getReferent()); }
Example #18
Source File: BannerAETest.java From bluima with Apache License 2.0 | 6 votes |
@Test public void testView() throws Exception { JCas jcas = getTestCas("empty!"); JCas newView = jcas.createView("bla"); newView.setDocumentText(TEST_SENTENCE); createAnnot(newView, Sentence.class, 0, TEST_SENTENCE.length()); AnalysisEngine ss = AnalysisEngineFactory .createEngine(NaiveSentenceSplitterAnnotator.class); AnalysisEngine banner = AnalysisEngineFactory.createEngine( BannerAnnotator.class, PARAM_VIEW, "bla"); SimplePipeline.runPipeline(jcas, ss, banner); Collection<Protein> prots = select(jcas, Protein.class); assertEquals("nothin in system view", 0, prots.size()); prots = select(jcas.getView("bla"), Protein.class); assertEquals("one protein in bla view", 1, prots.size()); }
Example #19
Source File: ListTest.java From baleen with Apache License 2.0 | 6 votes |
@Test public void test() throws Exception { AnalysisEngineDescription aed = AnalysisEngineFactory.createEngineDescription( List.class, List.PARAM_TERMS, terms, List.PARAM_TYPE, LOCATION); AnalysisEngine ae = AnalysisEngineFactory.createEngine(aed); jCas.setDocumentText("Hello world, this is a test"); ae.process(jCas); assertEquals(1, JCasUtil.select(jCas, Location.class).size()); Location l = JCasUtil.selectByIndex(jCas, Location.class, 0); assertEquals(WORLD, l.getValue()); assertEquals(WORLD, l.getCoveredText()); ae.destroy(); }
Example #20
Source File: CustomResourceTermSuiteAEFactory.java From termsuite-core with Apache License 2.0 | 5 votes |
public static AnalysisEngineDescription createMateAEDesc(ResourceConfig resourceConfig, Lang lang, Path mateModelPath) { try { AnalysisEngineDescription mateTaggerAE = AnalysisEngineFactory.createEngineDescription( MateLemmatizerTagger.class ); String lemmatizerModel = mateModelPath.resolve("mate-lemma-"+lang.getCode()+".model").toString(); String taggerModel = mateModelPath.resolve("mate-pos-"+lang.getCode()+".model").toString(); Preconditions.checkArgument(Files.exists(Paths.get(lemmatizerModel)), "Lemmatizer model does not exist: %s", lemmatizerModel); Preconditions.checkArgument(Files.exists(Paths.get(taggerModel)), "Tagger model does not exist: %s", taggerModel); ExternalResourceFactory.createDependencyAndBind( mateTaggerAE, MateLemmatizerTagger.LEMMATIZER, MateLemmatizerModel.class, lemmatizerModel); ExternalResourceFactory.createDependencyAndBind( mateTaggerAE, MateLemmatizerTagger.TAGGER, MateTaggerModel.class, taggerModel); AnalysisEngineDescription lemmaFixerAE = AnalysisEngineFactory.createEngineDescription( MateLemmaFixer.class, MateLemmaFixer.LANGUAGE, lang.getCode() ); AnalysisEngineDescription normalizerAE = createNormalizerAE(resourceConfig, lang, Tagger.MATE); return AnalysisEngineFactory.createEngineDescription( mateTaggerAE, lemmaFixerAE, normalizerAE); } catch (Exception e) { throw new TermSuiteException(e); } }
Example #21
Source File: CpeBuilder.java From bluima with Apache License 2.0 | 5 votes |
@Override public void add(Class<? extends JCasAnnotator_ImplBase> annotatorClass, Object... configurationData) throws InvalidXMLException, ResourceInitializationException, IOException, SAXException, CpeDescriptorException { add(AnalysisEngineFactory.createEngineDescription(annotatorClass, configurationData)); }
Example #22
Source File: MongoRegexTest.java From baleen with Apache License 2.0 | 5 votes |
@Test public void test() throws Exception { ExternalResourceDescription erd = ExternalResourceFactory.createNamedResourceDescription( MONGO, SharedFongoResource.class, FONGO_COLLECTION, MONGO_COLL, FONGO_DATA, objectMapper.writeValueAsString(GAZ_DATA)); AnalysisEngineDescription aed = AnalysisEngineFactory.createEngineDescription( MongoRegex.class, MONGO, erd, COLLECTION, MONGO_COLL, TYPE, LOCATION, "caseSensitive", true, REGEX, LONDON_REGEX); AnalysisEngine ae = AnalysisEngineFactory.createEngine(aed); jCas.setDocumentText(TEXT); ae.process(jCas); assertEquals(0, JCasUtil.select(jCas, Location.class).size()); ae.destroy(); }
Example #23
Source File: PoStagger.java From deeplearning4j with Apache License 2.0 | 5 votes |
public static AnalysisEngineDescription getDescription(String languageCode) throws ResourceInitializationException { String modelPath = String.format("/models/%s-pos-maxent.bin", languageCode); return AnalysisEngineFactory.createEngineDescription(PoStagger.class, opennlp.uima.util.UimaUtil.MODEL_PARAMETER, ExternalResourceFactory.createExternalResourceDescription(POSModelResourceImpl.class, PoStagger.class.getResource(modelPath).toString()), opennlp.uima.util.UimaUtil.SENTENCE_TYPE_PARAMETER, Sentence.class.getName(), opennlp.uima.util.UimaUtil.TOKEN_TYPE_PARAMETER, Token.class.getName(), opennlp.uima.util.UimaUtil.POS_FEATURE_PARAMETER, "pos"); }
Example #24
Source File: ListTest.java From baleen with Apache License 2.0 | 5 votes |
@Test public void testReference() throws Exception { // This test demonstrates the case where whitespace is preserved in gazetteer matching. AnalysisEngineDescription aed = AnalysisEngineFactory.createEngineDescription( List.class, List.PARAM_TERMS, terms, List.PARAM_TYPE, LOCATION, List.PARAM_EXACT_WHITESPACE, false); AnalysisEngine ae = AnalysisEngineFactory.createEngine(aed); // words in term to search for separated by multiple spaces, tabs or newline... jCas.setDocumentText("This text mentions New York (also known as NY and the Big Apple)."); ae.process(jCas); // 3 mentions of "New York" and nicknames... assertEquals(3, JCasUtil.select(jCas, Location.class).size()); // ...but they're all the same entity, so only one ReferenceTarget assertEquals(1, JCasUtil.select(jCas, ReferenceTarget.class).size()); Location l = JCasUtil.selectByIndex(jCas, Location.class, 0); assertEquals(NEW_YORK, l.getValue()); ae.destroy(); }
Example #25
Source File: PosUimaTokenizer.java From Canova with Apache License 2.0 | 5 votes |
public static AnalysisEngine defaultAnalysisEngine() { try { return AnalysisEngineFactory.createEngine(AnalysisEngineFactory.createEngineDescription(SentenceAnnotator.getDescription(), TokenizerAnnotator.getDescription(), PoStagger.getDescription("en"), StemmerAnnotator.getDescription("English"))); }catch(Exception e) { throw new RuntimeException(e); } }
Example #26
Source File: ElasticsearchTest.java From baleen with Apache License 2.0 | 5 votes |
@Before public void before() throws Exception { elasticsearch = new EmbeddedElasticsearch5(); final ExternalResourceDescription erd = ExternalResourceFactory.createNamedResourceDescription( RESOURCE_KEY, SharedElasticsearchResource.class, PARAM_CLUSTER, elasticsearch.getClusterName(), PARAM_PORT, Integer.toString(elasticsearch.getTransportPort())); final ExternalResourceDescription idErd = ExternalResourceFactory.createNamedResourceDescription( SharedIdGenerator.RESOURCE_KEY, SharedIdGenerator.class); final AnalysisEngineDescription aed = AnalysisEngineFactory.createEngineDescription( Elasticsearch.class, TypeSystemSingleton.getTypeSystemDescriptionInstance(), RESOURCE_KEY, erd, SharedIdGenerator.RESOURCE_KEY, idErd); ae = AnalysisEngineFactory.createEngine(aed); ae.initialize(new CustomResourceSpecifier_impl(), Collections.emptyMap()); assertEquals(0, countTypeInIndex(Elasticsearch.DEFAULT_DOCUMENT_TYPE)); assertEquals(0, countTypeInIndex(Elasticsearch.DEFAULT_ENTITY_TYPE)); assertEquals(0, countTypeInIndex(Elasticsearch.DEFAULT_RELATION_TYPE)); assertEquals(0, countTypeInIndex(Elasticsearch.DEFAULT_MENTION_TYPE)); }
Example #27
Source File: CreateDbWriterDescriptor.java From ctakes-docker with Apache License 2.0 | 5 votes |
public static void main(String[] args) throws ResourceInitializationException, IOException, SAXException { Map<String,String> env = System.getenv(); ExternalResourceDescription erd = ExternalResourceFactory.createExternalResourceDescription( JdbcConnectionResourceImpl.class, "null", // method is ambiguous because all strings are objects so this is here as the unneede (i think) aURL argument JdbcConnectionResourceImpl.PARAM_DRIVER_CLASS, "oracle.jdbc.OracleDriver", JdbcConnectionResourceImpl.PARAM_URL, "jdbc:oracle:thin:@" + System.getProperty("oracle_host"), JdbcConnectionResourceImpl.PARAM_USERNAME, System.getProperty("oracle_user"), JdbcConnectionResourceImpl.PARAM_PASSWORD, System.getProperty("oracle_pw"), JdbcConnectionResourceImpl.PARAM_KEEP_ALIVE, "false", AbstractJdbcWriter.PARAM_DB_CONN_RESRC, "DbConnectionWrite"); AnalysisEngineDescription aed = AnalysisEngineFactory.createEngineDescription(I2b2JdbcWriter.class, I2b2JdbcWriter.PARAM_VECTOR_TABLE, System.getProperty("oracle_table"), AbstractJdbcWriter.PARAM_DB_CONN_RESRC, erd ); aed.toXML(new FileWriter(args[0])); }
Example #28
Source File: ListTest.java From baleen with Apache License 2.0 | 5 votes |
@Test public void testWhitespaceNormalized() throws Exception { // This test demonstrates the case where whitespace is preserved in gazetteer matching. AnalysisEngineDescription aed = AnalysisEngineFactory.createEngineDescription( List.class, List.PARAM_TERMS, terms, List.PARAM_TYPE, LOCATION, List.PARAM_EXACT_WHITESPACE, false); AnalysisEngine ae = AnalysisEngineFactory.createEngine(aed); // words in term to search for separated by multiple spaces, tabs or newline... jCas.setDocumentText( "This text mentions New York, and New York again, and New York again, and New \nYork yet again"); ae.process(jCas); // Three mentions of "New York" if we reduce any whitespace to a single space (exactWhitespace // parameter, which ignores new lines) assertEquals(3, JCasUtil.select(jCas, Location.class).size()); Location l = JCasUtil.selectByIndex(jCas, Location.class, 0); assertEquals(NEW_YORK, l.getValue()); ae.destroy(); }
Example #29
Source File: GenerateDescriptors.java From ctakes-docker with Apache License 2.0 | 5 votes |
public static void main(String[] args) throws Exception { AggregateBuilder builder = new AggregateBuilder(); builder.add(AnalysisEngineFactory.createEngineDescriptionFromPath("desc/remoteMist.xml")); AnalysisEngineDescription aed = builder.createAggregateDescription(); aed.toXML(new FileWriter(args[0])); }
Example #30
Source File: ListTest.java From baleen with Apache License 2.0 | 5 votes |
@Test public void testWhitespaceExact() throws Exception { // This test demonstrates the case where whitespace is preserved in gazetteer matching. AnalysisEngineDescription aed = AnalysisEngineFactory.createEngineDescription( List.class, List.PARAM_TERMS, terms, List.PARAM_TYPE, LOCATION, List.PARAM_EXACT_WHITESPACE, true); AnalysisEngine ae = AnalysisEngineFactory.createEngine(aed); // words in term to search for separated by multiple spaces, tabs or newline... jCas.setDocumentText( "This text mentions New York, and New York again, and New York again, and New \nYork yet again"); ae.process(jCas); // only one mention of "New York" has the two words separated by a single space (as in the // gazetteer) assertEquals(1, JCasUtil.select(jCas, Location.class).size()); Location l = JCasUtil.selectByIndex(jCas, Location.class, 0); assertEquals(NEW_YORK, l.getValue()); ae.destroy(); }