org.apache.uima.fit.factory.ExternalResourceFactory Java Examples
The following examples show how to use
org.apache.uima.fit.factory.ExternalResourceFactory.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: FixedExpressionSpotterSpec.java From termsuite-core with Apache License 2.0 | 6 votes |
private AnalysisEngine makeAE(boolean removeWordAnnotationFromCas, boolean removeTermOccAnnotationFromCas) throws Exception { AnalysisEngineDescription aeDesc = AnalysisEngineFactory.createEngineDescription( FixedExpressionSpotter.class, FixedExpressionSpotter.FIXED_EXPRESSION_MAX_SIZE, 5, FixedExpressionSpotter.REMOVE_WORD_ANNOTATIONS_FROM_CAS, removeWordAnnotationFromCas, FixedExpressionSpotter.REMOVE_TERM_OCC_ANNOTATIONS_FROM_CAS, removeTermOccAnnotationFromCas ); /* * The term index resource */ ExternalResourceDescription fixedExpressionDesc = ExternalResourceFactory.createExternalResourceDescription( FixedExpressionResource.FIXED_EXPRESSION_RESOURCE, FixedExpressionResource.class, "file:fr/univnantes/termsuite/test/resources/french-fixed-expressions.txt" ); ExternalResourceFactory.bindResource(aeDesc, fixedExpressionDesc); AnalysisEngine ae = AnalysisEngineFactory.createEngine(aeDesc); return ae; }
Example #2
Source File: MongoParagraphsTest.java From baleen with Apache License 2.0 | 6 votes |
@Before public void setUp() throws ResourceInitializationException, ResourceAccessException { // Create a description of an external resource - a fongo instance, in the same way we would // have created a shared mongo resource ExternalResourceDescription erd = ExternalResourceFactory.createNamedResourceDescription( MONGO, SharedFongoResource.class, "fongo.collection", "paragraphs", "fongo.data", "[]"); // Create the analysis engine AnalysisEngineDescription aed = AnalysisEngineFactory.createEngineDescription(MongoParagraph.class, MONGO, erd); ae = AnalysisEngineFactory.createEngine(aed); ae.initialize(new CustomResourceSpecifier_impl(), Collections.emptyMap()); SharedFongoResource sfr = (SharedFongoResource) ae.getUimaContext().getResourceObject(MONGO); paragraphs = sfr.getDB().getCollection("paragraphs"); // Ensure we start with no data! assertEquals(0L, paragraphs.count()); }
Example #3
Source File: CustomResourceTermSuiteAEFactory.java From termsuite-core with Apache License 2.0 | 6 votes |
/** * Spots fixed expressions in the CAS an creates {@link FixedExpression} * annotation whenever one is found. * * @return */ public static AnalysisEngineDescription createFixedExpressionSpotterAEDesc(ResourceConfig resourceConfig, Lang lang) { try { AnalysisEngineDescription ae = AnalysisEngineFactory.createEngineDescription( FixedExpressionSpotter.class, FixedExpressionSpotter.FIXED_EXPRESSION_MAX_SIZE, 5, FixedExpressionSpotter.REMOVE_WORD_ANNOTATIONS_FROM_CAS, false, FixedExpressionSpotter.REMOVE_TERM_OCC_ANNOTATIONS_FROM_CAS, true ); ExternalResourceDescription fixedExprRes = ExternalResourceFactory.createExternalResourceDescription( FixedExpressionResource.class, getResourceURL(resourceConfig, ResourceType.FIXED_EXPRESSIONS, lang)); ExternalResourceFactory.bindResource( ae, FixedExpressionResource.FIXED_EXPRESSION_RESOURCE, fixedExprRes ); return ae; } catch (Exception e) { throw new PreparationPipelineException(e); } }
Example #4
Source File: LexicaHelper.java From bluima with Apache License 2.0 | 6 votes |
public static AnalysisEngineDescription getConceptMapper(String path, AnalysisEngineDescription tokenDesc) throws UIMAException, IOException, SAXException { String conceptMapper = LEXICA_ROOT + "desc/" + path + "ConceptMapper.xml"; checkArgument(new File(conceptMapper).exists(), "no ConceptMapper file at " + conceptMapper); String lexicon = LEXICA_ROOT + "resources/" + path + ".xml"; checkArgument(new File(lexicon).exists(), "no lexicon file at " + lexicon); AnalysisEngineDescription aed = (AnalysisEngineDescription) createResourceCreationSpecifier( conceptMapper, new Object[] { "TokenizerDescriptorPath", getTokenDescPath(tokenDesc), "Stemmer", SCharsStemmer.class.getName() }); // Create the external resource dependency for the model and bind it ExternalResourceFactory.createDependencyAndBind(aed, "DictionaryFile", DictionaryResource_impl.class, "file:" + lexicon); return aed; }
Example #5
Source File: MongoRelationsTest.java From baleen with Apache License 2.0 | 6 votes |
@Before public void setUp() throws ResourceInitializationException, ResourceAccessException { // Create a description of an external resource - a fongo instance, in the same way we would // have created a shared mongo resource ExternalResourceDescription erd = ExternalResourceFactory.createNamedResourceDescription( MONGO, SharedFongoResource.class, "fongo.collection", "test", "fongo.data", "[]"); // Create the analysis engine AnalysisEngineDescription aed = AnalysisEngineFactory.createEngineDescription( MongoRelations.class, MONGO, erd, "collection", "test"); ae = AnalysisEngineFactory.createEngine(aed); ae.initialize(new CustomResourceSpecifier_impl(), Collections.emptyMap()); SharedFongoResource sfr = (SharedFongoResource) ae.getUimaContext().getResourceObject(MONGO); relations = sfr.getDB().getCollection("test"); // Ensure we start with no data! assertEquals(0L, relations.count()); }
Example #6
Source File: BaleenCollectionReaderTest.java From baleen with Apache License 2.0 | 6 votes |
@Test public void testHasNextLooping() throws Exception { ExternalResourceDescription contentExtractor = ExternalResourceFactory.createNamedResourceDescription( KEY_CONTENT_EXTRACTOR, FakeBaleenContentExtractor.class); DummyBaleenCollectionReader cr = (DummyBaleenCollectionReader) CollectionReaderFactory.createReader( DummyBaleenCollectionReader.class, BaleenCollectionReader.KEY_CONTENT_EXTRACTOR, contentExtractor); while (cr.hasNext()) { JCas jCas = JCasSingleton.getJCasInstance(); cr.getNext(jCas.getCas()); } cr.destroy(); }
Example #7
Source File: PoStagger.java From Canova with Apache License 2.0 | 6 votes |
public static AnalysisEngineDescription getDescription(String languageCode) throws ResourceInitializationException { String modelPath = String.format("/models/%s-pos-maxent.bin", languageCode); return AnalysisEngineFactory.createEngineDescription( PoStagger.class, UimaUtil.MODEL_PARAMETER, ExternalResourceFactory.createExternalResourceDescription( POSModelResourceImpl.class, PoStagger.class.getResource(modelPath).toString()), UimaUtil.SENTENCE_TYPE_PARAMETER, Sentence.class.getName(), UimaUtil.TOKEN_TYPE_PARAMETER, Token.class.getName(), UimaUtil.POS_FEATURE_PARAMETER, "pos"); }
Example #8
Source File: TemporalElasticsearchTest.java From baleen with Apache License 2.0 | 6 votes |
@Before public void setUp() throws Exception { elasticsearch = new EmbeddedElasticsearch5(); ExternalResourceDescription erd = ExternalResourceFactory.createNamedResourceDescription( RESOURCE_KEY, SharedElasticsearchResource.class, PARAM_PORT, Integer.toString(elasticsearch.getTransportPort()), PARAM_CLUSTER, elasticsearch.getClusterName()); AnalysisEngineDescription aed = AnalysisEngineFactory.createEngineDescription( TemporalElasticsearch.class, TypeSystemSingleton.getTypeSystemDescriptionInstance(), RESOURCE_KEY, erd, PARAM_INDEX, TEMPORAL_INDEX); ae = AnalysisEngineFactory.createEngine(aed); }
Example #9
Source File: ElasticsearchTest.java From baleen with Apache License 2.0 | 6 votes |
@Before public void setup() throws UIMAException { ExternalResourceDescription erd = ExternalResourceFactory.createNamedResourceDescription( RESOURCE_KEY, SharedElasticsearchResource.class, PARAM_PORT, Integer.toString(elasticsearch.getTransportPort()), PARAM_CLUSTER, elasticsearch.getClusterName()); AnalysisEngineDescription aed = AnalysisEngineFactory.createEngineDescription( Elasticsearch.class, TypeSystemSingleton.getTypeSystemDescriptionInstance(), RESOURCE_KEY, erd); ae = AnalysisEngineFactory.createEngine(aed); }
Example #10
Source File: MongoPatternSaverTest.java From baleen with Apache License 2.0 | 6 votes |
@Before public void setUp() throws ResourceInitializationException, ResourceAccessException { // Create a description of an external resource - a fongo instance, in the same way we would // have created a shared mongo resource final ExternalResourceDescription erd = ExternalResourceFactory.createResourceDescription( SharedFongoResource.class, "fongo.collection", "test", "fongo.data", "[]"); // Create the analysis engine final AnalysisEngineDescription aed = AnalysisEngineFactory.createEngineDescription( MongoPatternSaver.class, MongoPatternSaver.KEY_MONGO, erd, "collection", "test"); ae = AnalysisEngineFactory.createEngine(aed); ae.initialize(new CustomResourceSpecifier_impl(), Collections.emptyMap()); sfr = (SharedFongoResource) ae.getUimaContext().getResourceObject(MongoPatternSaver.KEY_MONGO); }
Example #11
Source File: CustomResourceTermSuiteAEFactory.java From termsuite-core with Apache License 2.0 | 6 votes |
public static AnalysisEngineDescription createNormalizerAEDesc(ResourceConfig resourceConfig, Lang lang, Tagger tagger) { AnalysisEngineDescription ae; try { ae = AnalysisEngineFactory.createEngineDescription( Lexer.class, Lexer.PARAM_TYPE, "fr.univnantes.termsuite.types.WordAnnotation" ); ExternalResourceDescription segmentBank = ExternalResourceFactory.createExternalResourceDescription( SegmentBankResource.class, getResourceURL(resourceConfig, ResourceType.SEGMENT_BANK, lang) ); ExternalResourceFactory.bindResource( ae, SegmentBank.KEY_SEGMENT_BANK, segmentBank); return ae; } catch (Exception e) { throw new TermSuiteException(e); } }
Example #12
Source File: CustomResourceTermSuiteAEFactory.java From termsuite-core with Apache License 2.0 | 6 votes |
public static AnalysisEngineDescription createWordTokenizerAEDesc(ResourceConfig resourceConfig, Lang lang) { AnalysisEngineDescription ae; try { ae = AnalysisEngineFactory.createEngineDescription( Lexer.class, Lexer.PARAM_TYPE, "fr.univnantes.termsuite.types.WordAnnotation" ); ExternalResourceDescription segmentBank = ExternalResourceFactory.createExternalResourceDescription( SegmentBankResource.class, getResourceURL(resourceConfig, ResourceType.SEGMENT_BANK, lang) ); ExternalResourceFactory.bindResource( ae, SegmentBank.KEY_SEGMENT_BANK, segmentBank); return ae; } catch (Exception e) { throw new TermSuiteException(e); } }
Example #13
Source File: ElasticsearchRestTest.java From baleen with Apache License 2.0 | 6 votes |
@Before public void setup() throws UIMAException, IOException { ExternalResourceDescription erd = ExternalResourceFactory.createNamedResourceDescription( ELASTICSEARCH, SharedElasticsearchRestResource.class, PARAM_URL, elasticsearch.getHttpUrl()); AnalysisEngineDescription aed = AnalysisEngineFactory.createEngineDescription( ElasticsearchRest.class, TypeSystemSingleton.getTypeSystemDescriptionInstance(), ELASTICSEARCH, erd); ae = AnalysisEngineFactory.createEngine(aed); }
Example #14
Source File: AssignTypeToInteractionTest.java From baleen with Apache License 2.0 | 5 votes |
@BeforeClass public static void before() { fongoErd = ExternalResourceFactory.createNamedResourceDescription( "mongo", SharedFongoResource.class, "fongo.collection", "relationTypes", "fongo.data", "[ { \"source\": \"uk.gov.dstl.baleen.types.common.Person\", \"target\": \"uk.gov.dstl.baleen.types.semantic.Location\", \"type\": \"noun\", \"subType\": \"attack\", \"pos\": \"NOUN\", \"value\":[ \"attack\", \"attacking\", \"attacked\" ] }," + "{ \"source\": \"uk.gov.dstl.baleen.types.common.Person\", \"target\": \"uk.gov.dstl.baleen.types.semantic.Location\", \"type\": \"verb\", \"subType\": \"attack\", \"pos\": \"VERB\", \"value\":[ \"attack\" ] } ]"); }
Example #15
Source File: ReNounNoAttributesSeedFactsTest.java From baleen with Apache License 2.0 | 5 votes |
@Override protected AnalysisEngine[] createAnalysisEngines() throws ResourceInitializationException { // Use OpenNlp to generate the POS etc for us final ExternalResourceDescription tokensDesc = ExternalResourceFactory.createNamedResourceDescription("tokens", SharedOpenNLPModel.class); final ExternalResourceDescription sentencesDesc = ExternalResourceFactory.createNamedResourceDescription( "sentences", SharedOpenNLPModel.class); final ExternalResourceDescription posDesc = ExternalResourceFactory.createNamedResourceDescription("posTags", SharedOpenNLPModel.class); final ExternalResourceDescription chunksDesc = ExternalResourceFactory.createNamedResourceDescription( "phraseChunks", SharedOpenNLPModel.class); return asArray( createAnalysisEngine( OpenNLP.class, "tokens", tokensDesc, "sentences", sentencesDesc, "posTags", posDesc, "phraseChunks", chunksDesc), createAnalysisEngine(MaltParser.class), createAnalysisEngine(ReNounDefaultSeedsRelationshipAnnotator.class)); }
Example #16
Source File: ReNounCoreferenceSeedFactsTest.java From baleen with Apache License 2.0 | 5 votes |
@Override protected AnalysisEngine[] createAnalysisEngines() throws ResourceInitializationException { // Use OpenNlp to generate the POS etc for us final ExternalResourceDescription tokensDesc = ExternalResourceFactory.createNamedResourceDescription("tokens", SharedOpenNLPModel.class); final ExternalResourceDescription sentencesDesc = ExternalResourceFactory.createNamedResourceDescription( "sentences", SharedOpenNLPModel.class); final ExternalResourceDescription posDesc = ExternalResourceFactory.createNamedResourceDescription("posTags", SharedOpenNLPModel.class); final ExternalResourceDescription chunksDesc = ExternalResourceFactory.createNamedResourceDescription( "phraseChunks", SharedOpenNLPModel.class); return asArray( createAnalysisEngine( OpenNLP.class, "tokens", tokensDesc, "sentences", sentencesDesc, "posTags", posDesc, "phraseChunks", chunksDesc), createAnalysisEngine(MaltParser.class), createAnalysisEngine( ReNounDefaultSeedsRelationshipAnnotator.class, PARAM_ONTOLOGY_ATTRIBUTES, new String[] {"CEO", "chief executive officer"}, PARAM_REQUIRE_COREFERENCE, true)); }
Example #17
Source File: OdinTest.java From baleen with Apache License 2.0 | 5 votes |
@Override protected AnalysisEngine[] createAnalysisEngines() throws ResourceInitializationException { // Use OpenNlp to generate the POS etc for us final ExternalResourceDescription tokensDesc = ExternalResourceFactory.createNamedResourceDescription("tokens", SharedOpenNLPModel.class); final ExternalResourceDescription sentencesDesc = ExternalResourceFactory.createNamedResourceDescription( "sentences", SharedOpenNLPModel.class); final ExternalResourceDescription posDesc = ExternalResourceFactory.createNamedResourceDescription("posTags", SharedOpenNLPModel.class); final ExternalResourceDescription chunksDesc = ExternalResourceFactory.createNamedResourceDescription( "phraseChunks", SharedOpenNLPModel.class); File file = new File(RULES_FILE.getFile()); return asArray( createAnalysisEngine( OpenNLP.class, "tokens", tokensDesc, "sentences", sentencesDesc, "posTags", posDesc, "phraseChunks", chunksDesc), createAnalysisEngine(MaltParser.class), createAnalysisEngine(Odin.class, PARAM_RULES, file.getAbsolutePath())); }
Example #18
Source File: ReNounDependencySeedFactsTest.java From baleen with Apache License 2.0 | 5 votes |
@Override protected AnalysisEngine[] createAnalysisEngines() throws ResourceInitializationException { // Use OpenNlp to generate the POS etc for us final ExternalResourceDescription tokensDesc = ExternalResourceFactory.createNamedResourceDescription("tokens", SharedOpenNLPModel.class); final ExternalResourceDescription sentencesDesc = ExternalResourceFactory.createNamedResourceDescription( "sentences", SharedOpenNLPModel.class); final ExternalResourceDescription posDesc = ExternalResourceFactory.createNamedResourceDescription("posTags", SharedOpenNLPModel.class); final ExternalResourceDescription chunksDesc = ExternalResourceFactory.createNamedResourceDescription( "phraseChunks", SharedOpenNLPModel.class); return asArray( createAnalysisEngine( OpenNLP.class, "tokens", tokensDesc, "sentences", sentencesDesc, "posTags", posDesc, "phraseChunks", chunksDesc), createAnalysisEngine(MaltParser.class), createAnalysisEngine( ReNounDefaultSeedsRelationshipAnnotator.class, PARAM_ONTOLOGY_ATTRIBUTES, new String[] {"CEO", "chief executive officer"})); }
Example #19
Source File: RelationTypeFilterTest.java From baleen with Apache License 2.0 | 5 votes |
@Before public void before() { fongoErd = ExternalResourceFactory.createNamedResourceDescription( "mongo", SharedFongoResource.class, "fongo.collection", "relationTypes", "fongo.data", "[ { \"source\": \"uk.gov.dstl.baleen.types.common.Person\", \"target\": \"uk.gov.dstl.baleen.types.semantic.Location\", \"type\": \"went\", \"subType\": \"past\", \"pos\": \"VBG\", \"value\":[ \"went\" ] } ]"); }
Example #20
Source File: MongoStatsTest.java From baleen with Apache License 2.0 | 5 votes |
@Test public void testNewFile() throws ResourceInitializationException, AnalysisEngineProcessException, IOException { // Due to limitations in the shared fongo resource we only test document count here! ExternalResourceDescription erd = ExternalResourceFactory.createNamedResourceDescription( "mongo", SharedFongoResource.class, SharedFongoResource.PARAM_FONGO_COLLECTION, "documents", SharedFongoResource.PARAM_FONGO_DATA, objectMapper.writeValueAsString(DATA)); File tempFile = File.createTempFile("test", "mongostats"); tempFile.delete(); try { AnalysisEngine task = create(MongoStats.class, "mongo", erd, "file", tempFile.getAbsolutePath()); execute(task); task.destroy(); List<String> lines = Files.readAllLines(tempFile.toPath()); assertEquals(2, lines.size()); assertEquals("timestamp,documents,entities,relations", lines.get(0)); String[] split = lines.get(1).split(","); assertEquals("3", split[1]); assertEquals("0", split[2]); assertEquals("0", split[3]); } finally { tempFile.delete(); } }
Example #21
Source File: MongoTest.java From baleen with Apache License 2.0 | 5 votes |
@Before public void setUp() throws ResourceInitializationException, ResourceAccessException { // Create a description of an external resource - a fongo instance, in the same way we would // have created a shared mongo resource ExternalResourceDescription erd = ExternalResourceFactory.createNamedResourceDescription( MONGO, SharedFongoResource.class, "fongo.collection", "test", "fongo.data", "[]"); ExternalResourceDescription historyErd = ExternalResourceFactory.createNamedResourceDescription( PipelineBuilder.BALEEN_HISTORY, InMemoryBaleenHistory.class); history = Mockito.mock(BaleenHistory.class); // Create the analysis engine AnalysisEngineDescription aed = AnalysisEngineFactory.createEngineDescription( Mongo.class, MONGO, erd, "collection", "test", PipelineBuilder.BALEEN_HISTORY, historyErd, "outputHistory", Boolean.TRUE); ae = AnalysisEngineFactory.createEngine(aed); ae.initialize(new CustomResourceSpecifier_impl(), Collections.emptyMap()); SharedFongoResource sfr = (SharedFongoResource) ae.getUimaContext().getResourceObject(MONGO); history = (BaleenHistory) ae.getUimaContext().getResourceObject(PipelineBuilder.BALEEN_HISTORY); entities = sfr.getDB().getCollection("entities"); documents = sfr.getDB().getCollection("documents"); relations = sfr.getDB().getCollection("relations"); // Ensure we start with no data! assertEquals(0L, documents.count()); assertEquals(0L, entities.count()); assertEquals(0L, relations.count()); }
Example #22
Source File: EntityLinkingAnnotatorTest.java From baleen with Apache License 2.0 | 5 votes |
@Before public void setup() throws ResourceInitializationException { stopwords = ExternalResourceFactory.createNamedResourceDescription( EntityLinkingAnnotator.KEY_STOPWORDS, SharedStopwordResource.class); Set<EntityInformation<Person>> entityInformationSet = new HashSet<>(); entityInformationSet.add(entityInformation); }
Example #23
Source File: RakeKeywordsTest.java From baleen with Apache License 2.0 | 5 votes |
@Test public void testLongDocument() throws Exception { ExternalResourceDescription erd = ExternalResourceFactory.createNamedResourceDescription( STOPWORDS, SharedStopwordResource.class); AnalysisEngineDescription aed = AnalysisEngineFactory.createEngineDescription( RakeKeywords.class, STOPWORDS, erd, RakeKeywords.PARAM_MAX_KEYWORDS, 12, RakeKeywords.PARAM_ADD_BUZZWORDS, true); AnalysisEngine ae = AnalysisEngineFactory.createEngine(aed); jCas.setDocumentText( new String(Files.readAllBytes(Paths.get(getClass().getResource("turing.txt").toURI())))); ae.process(jCas); JCasMetadata metadata = new JCasMetadata(jCas); Optional<String> keyword = metadata.find(KEYWORD_METADATA_KEY); assertTrue(keyword.isPresent()); ae.destroy(); }
Example #24
Source File: RakeKeywordsTest.java From baleen with Apache License 2.0 | 5 votes |
@Test public void testBadStemmer() throws ResourceInitializationException, AnalysisEngineProcessException { ExternalResourceDescription erd = ExternalResourceFactory.createNamedResourceDescription( STOPWORDS, SharedStopwordResource.class); AnalysisEngineDescription aed = AnalysisEngineFactory.createEngineDescription( RakeKeywords.class, STOPWORDS, erd, RakeKeywords.PARAM_MAX_KEYWORDS, 12, RakeKeywords.PARAM_ADD_BUZZWORDS, false, RakeKeywords.PARAM_STEMMING, "NotARealStemmer"); AnalysisEngine ae = AnalysisEngineFactory.createEngine(aed); jCas.setDocumentText( "Compatibility of systems of linear constraints over the set of natural numbers. Criteria of compatibility of a system of linear Diophantine equations, strict inequations, and nonstrict inequations are considered. Upper bounds for components of a minimal set of solutions and algorithms of construction of minimal generating sets of solutions for all types of systems are given. These criteria and the corresponding algorithms for contructing a minimal supporting set of solutions can be used in solving all the considered types of systems and systems of mixed types."); ae.process(jCas); JCasMetadata metadata = new JCasMetadata(jCas); Set<String> keywords = metadata.findAll(KEYWORD_METADATA_KEY); assertEquals(9, keywords.size()); assertTrue(keywords.contains("minimal generating sets")); assertTrue(keywords.contains("linear diophantine equations")); assertTrue(keywords.contains("minimal supporting set")); assertTrue(keywords.contains("minimal set")); assertTrue(keywords.contains("linear constraints")); assertTrue(keywords.contains("natural numbers")); assertTrue(keywords.contains("strict inequations")); assertTrue(keywords.contains("nonstrict inequations")); assertTrue(keywords.contains("upper bounds")); ae.destroy(); }
Example #25
Source File: RakeKeywordsTest.java From baleen with Apache License 2.0 | 5 votes |
@Test public void testCustomStoplist() throws ResourceInitializationException, AnalysisEngineProcessException { ExternalResourceDescription erd = ExternalResourceFactory.createNamedResourceDescription( STOPWORDS, SharedStopwordResource.class); AnalysisEngineDescription aed = AnalysisEngineFactory.createEngineDescription( RakeKeywords.class, STOPWORDS, erd, RakeKeywords.PARAM_MAX_KEYWORDS, 12, RakeKeywords.PARAM_ADD_BUZZWORDS, false, RakeKeywords.PARAM_STOPLIST, getClass().getResource("exampleStoplist.txt").getPath()); AnalysisEngine ae = AnalysisEngineFactory.createEngine(aed); jCas.setDocumentText("Bill and Ben went off to the shops in London town."); ae.process(jCas); JCasMetadata metadata = new JCasMetadata(jCas); Set<String> keywords = metadata.findAll(KEYWORD_METADATA_KEY); assertEquals(1, keywords.size()); assertTrue(keywords.contains("london town")); ae.destroy(); }
Example #26
Source File: IdentifyInteractionsTest.java From baleen with Apache License 2.0 | 5 votes |
@Before public void before() { fongoErd = ExternalResourceFactory.createNamedResourceDescription( "mongo", SharedFongoResource.class, "fongo.collection", "patterns", "fongo.data", "[ { \"_id\":\"1\", \"words\": [ { \"lemma\":\"went\", \"pos\":\"VERB\"}], \"source\":{\"type\":\"Person\"}, \"target\":{\"type\":\"Location\"}}, { \"_id\":\"2\", \"words\": [ { \"lemma\":\"went\", \"pos\":\"VERB\"}, { \"lemma\":\"after\", \"pos\":\"VERB\"} ], \"source\":{ \"type\":\"Person\" }, \"target\":{\"type\":\"Person\" } } ]"); wordnetErd = ExternalResourceFactory.createNamedResourceDescription( "wordnet", SharedWordNetResource.class); }
Example #27
Source File: RakeKeywordsTest.java From baleen with Apache License 2.0 | 5 votes |
@Test public void testMaxNumber() throws ResourceInitializationException, AnalysisEngineProcessException { ExternalResourceDescription erd = ExternalResourceFactory.createNamedResourceDescription( STOPWORDS, SharedStopwordResource.class); AnalysisEngineDescription aed = AnalysisEngineFactory.createEngineDescription( RakeKeywords.class, STOPWORDS, erd, RakeKeywords.PARAM_MAX_KEYWORDS, 3, RakeKeywords.PARAM_ADD_BUZZWORDS, false); AnalysisEngine ae = AnalysisEngineFactory.createEngine(aed); jCas.setDocumentText( "Compatibility of systems of linear constraints over the set of natural numbers. Criteria of compatibility of a system of linear Diophantine equations, strict inequations, and nonstrict inequations are considered. Upper bounds for components of a minimal set of solutions and algorithms of construction of minimal generating sets of solutions for all types of systems are given. These criteria and the corresponding algorithms for contructing a minimal supporting set of solutions can be used in solving all the considered types of systems and systems of mixed types."); ae.process(jCas); JCasMetadata metadata = new JCasMetadata(jCas); Set<String> keywords = metadata.findAll(KEYWORD_METADATA_KEY); assertEquals(3, keywords.size()); assertTrue(keywords.contains("minimal generating sets")); assertTrue(keywords.contains("linear diophantine equations")); assertTrue(keywords.contains("minimal supporting set")); ae.destroy(); }
Example #28
Source File: RakeKeywordsTest.java From baleen with Apache License 2.0 | 5 votes |
@Test public void testNoBuzzwords() throws ResourceInitializationException, AnalysisEngineProcessException { ExternalResourceDescription erd = ExternalResourceFactory.createNamedResourceDescription( STOPWORDS, SharedStopwordResource.class); AnalysisEngineDescription aed = AnalysisEngineFactory.createEngineDescription( RakeKeywords.class, STOPWORDS, erd, RakeKeywords.PARAM_MAX_KEYWORDS, 12, RakeKeywords.PARAM_ADD_BUZZWORDS, false); AnalysisEngine ae = AnalysisEngineFactory.createEngine(aed); jCas.setDocumentText( "Compatibility of systems of linear constraints over the set of natural numbers. Criteria of compatibility of a system of linear Diophantine equations, strict inequations, and nonstrict inequations are considered. Upper bounds for components of a minimal set of solutions and algorithms of construction of minimal generating sets of solutions for all types of systems are given. These criteria and the corresponding algorithms for contructing a minimal supporting set of solutions can be used in solving all the considered types of systems and systems of mixed types."); ae.process(jCas); JCasMetadata metadata = new JCasMetadata(jCas); Set<String> keywords = metadata.findAll(KEYWORD_METADATA_KEY); assertEquals(9, keywords.size()); assertTrue(keywords.contains("minimal generating sets")); assertTrue(keywords.contains("linear diophantine equations")); assertTrue(keywords.contains("minimal supporting set")); assertTrue(keywords.contains("minimal set")); assertTrue(keywords.contains("linear constraints")); assertTrue(keywords.contains("natural numbers")); assertTrue(keywords.contains("strict inequations")); assertTrue(keywords.contains("nonstrict inequations")); assertTrue(keywords.contains("upper bounds")); ae.destroy(); }
Example #29
Source File: TokenizerAnnotator.java From deeplearning4j with Apache License 2.0 | 5 votes |
public static AnalysisEngineDescription getDescription() throws ResourceInitializationException { String modelPath = String.format("/models/%s-token.bin", "en"); return AnalysisEngineFactory.createEngineDescription(ConcurrentTokenizer.class, opennlp.uima.util.UimaUtil.MODEL_PARAMETER, ExternalResourceFactory.createExternalResourceDescription(TokenizerModelResourceImpl.class, ConcurrentTokenizer.class.getResource(modelPath).toString()), opennlp.uima.util.UimaUtil.SENTENCE_TYPE_PARAMETER, Sentence.class.getName(), opennlp.uima.util.UimaUtil.TOKEN_TYPE_PARAMETER, Token.class.getName()); }
Example #30
Source File: OpenNLPTest.java From baleen with Apache License 2.0 | 5 votes |
@Override public void beforeTest() throws UIMAException { super.beforeTest(); ExternalResourceDescription tokensDesc = ExternalResourceFactory.createNamedResourceDescription("tokens", SharedOpenNLPModel.class); ExternalResourceDescription sentencesDesc = ExternalResourceFactory.createNamedResourceDescription( "sentences", SharedOpenNLPModel.class); ExternalResourceDescription posDesc = ExternalResourceFactory.createNamedResourceDescription("posTags", SharedOpenNLPModel.class); ExternalResourceDescription chunksDesc = ExternalResourceFactory.createNamedResourceDescription( "phraseChunks", SharedOpenNLPModel.class); AnalysisEngineDescription descLanguage = AnalysisEngineFactory.createEngineDescription( uk.gov.dstl.baleen.annotators.language.OpenNLP.class, "tokens", tokensDesc, "sentences", sentencesDesc, "posTags", posDesc, "phraseChunks", chunksDesc); aeLanguage = AnalysisEngineFactory.createEngine(descLanguage); String text = "This is a mention of John Smith visiting Thomas Brown at the United Nations in New York on the afternoon of February 10th, 2014."; jCas.setDocumentText(text); }