com.bericotech.clavin.resolver.ResolvedLocation Java Examples
The following examples show how to use
com.bericotech.clavin.resolver.ResolvedLocation.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: GenericPass.java From CLIFF with Apache License 2.0 | 6 votes |
/** * Pick all the candidates that are in the same primary country as any of the places already picked * @param candidates * @param placesAlreadyPicked * @return */ protected List<ResolvedLocation> inSameCountry(List<ResolvedLocation> candidates, List<ResolvedLocation> placesAlreadyPicked, boolean citiesOnly,boolean exactMatchesOnly,boolean populatedOnly){ List<ResolvedLocation> candidatesInSameCountry = new ArrayList<ResolvedLocation>(); for(ResolvedLocation candidate:candidates){ if(inSameCountry(candidate,placesAlreadyPicked) && (!citiesOnly || isCity(candidate)) && (!exactMatchesOnly || isExactMatch(candidate)) && (!populatedOnly || isPopulated(candidate)) ){ candidatesInSameCountry.add(candidate); } } return candidatesInSameCountry; }
Example #2
Source File: WorkflowDemoNERD.java From CLAVIN-NERD with GNU General Public License v2.0 | 6 votes |
/** * Standard usage of CLAVIN. Instantiate a default GeoParser, give * it some text, check out the locations it extracts and resolves. * * @throws Exception */ private static void getparseArticle() throws Exception { // Instantiate a CLAVIN GeoParser using the StanfordExtractor GeoParser parser = GeoParserFactory.getDefault("./IndexDirectory", new StanfordExtractor(), 1, 1, false); // Unstructured text file about Somalia to be geoparsed File inputFile = new File("src/test/resources/sample-docs/Somalia-doc.txt"); // Grab the contents of the text file as a String String inputString = TextUtils.fileToString(inputFile); // Parse location names in the text into geographic entities List<ResolvedLocation> resolvedLocations = parser.parse(inputString); // Display the ResolvedLocations found for the location names for (ResolvedLocation resolvedLocation : resolvedLocations) System.out.println(resolvedLocation); }
Example #3
Source File: WorkflowDemoNERD.java From CLAVIN-NERD with GNU General Public License v2.0 | 6 votes |
/** * Demonstrates usage of CLAVIN with non-default NER model -- in * this instance, a case-insensitive model to help us perform * geoparsing on a text document IN ALL CAPS. * * @throws Exception */ private static void geoparseUppercaseArticle() throws Exception { // Instantiate a CLAVIN GeoParser using the StanfordExtractor with "caseless" models GeoParser parser = GeoParserFactory.getDefault("./IndexDirectory", new StanfordExtractor("english.all.3class.caseless.distsim.crf.ser.gz", "english.all.3class.caseless.distsim.prop"), 1, 1, false); // Unstructured uppercase text file about Somalia to be geoparsed File inputFile = new File("src/test/resources/sample-docs/Somalia-doc-uppercase.txt"); // Grab the contents of the text file as a String String inputString = TextUtils.fileToString(inputFile); // Parse location names in the text into geographic entities List<ResolvedLocation> resolvedLocations = parser.parse(inputString); // Display the ResolvedLocations found for the location names for (ResolvedLocation resolvedLocation : resolvedLocations) System.out.println(resolvedLocation); }
Example #4
Source File: FocusUtils.java From CLIFF with Apache License 2.0 | 6 votes |
public static HashMap<String,Integer> getScoredStateCounts(List<ResolvedLocation> resolvedLocations, String text){ HashMap<String,Integer> stateCounts = new HashMap<String,Integer>(); for (ResolvedLocation resolvedLocation: resolvedLocations){ if(resolvedLocation.getGeoname().getAdmin1Code()==null){ continue; } int position = resolvedLocation.getLocation().getPosition(); int percent10 = text.length()/10; int points = 1; if( position <= percent10){ points = 2; } String state = resolvedLocation.getGeoname().getAdmin1Code(); if(!stateCounts.containsKey(state)){ stateCounts.put(state, 0); } stateCounts.put(state, stateCounts.get(state)+points); } return stateCounts; }
Example #5
Source File: HandCodedFocusChecker.java From CLIFF with Apache License 2.0 | 6 votes |
private static double getAboutnessAccuracy(String filePath) throws Exception{ int correct = 0; List<CodedArticle> articles = TestUtils.loadExamplesFromFile(filePath); for(CodedArticle article: articles){ logger.info("Testing article "+article.mediacloudId+" (looking for "+article.handCodedPlaceName+" / "+article.handCodedCountryCode+")"); //List<CountryCode> primaryCountries = ParseManager.extractAndResolve(article.text).getUniqueCountries(); List<ResolvedLocation> resolvedLocations = ParseManager.extractAndResolve(article.text).getResolvedLocations(); FocusStrategy focusStrategy = ParseManager.getFocusStrategy(); List<FocusLocation> primaryCountries = focusStrategy.selectCountries(resolvedLocations); if(article.isAboutHandCodedCountry(primaryCountries)) { correct++; } else { logger.error("Didn't find "+article.handCodedCountryCode+" in "+primaryCountries); } } return (double)correct/(double)articles.size(); }
Example #6
Source File: FocusUtils.java From CLIFF with Apache License 2.0 | 6 votes |
public static HashMap<String,Integer> getStateCounts(List<ResolvedLocation> resolvedLocations){ HashMap<String,Integer> stateCounts = new HashMap<String,Integer>(); for (ResolvedLocation resolvedLocation: resolvedLocations){ if(resolvedLocation.getGeoname().getPrimaryCountryCode()==CountryCode.NULL){ continue; } CountryCode country = resolvedLocation.getGeoname().getPrimaryCountryCode(); String adm1Code = resolvedLocation.getGeoname().getAdmin1Code(); String key = Adm1GeoNameLookup.getKey(country, adm1Code); if(!Adm1GeoNameLookup.isValid(key)){ // skip things that aren't actually ADM1 codes continue; } if(!stateCounts.containsKey(key)){ stateCounts.put(key, 0); } stateCounts.put(key, stateCounts.get(key)+1); } return stateCounts; }
Example #7
Source File: FocusUtils.java From CLIFF with Apache License 2.0 | 6 votes |
public static HashMap<GeoName,Integer> getCityCounts(List<ResolvedLocation> resolvedLocations){ HashMap<GeoName,Integer> cityCounts = new HashMap<GeoName,Integer>(); for (ResolvedLocation resolvedLocation: resolvedLocations){ if(resolvedLocation.getGeoname().getFeatureClass()!=FeatureClass.P){ continue; } Set<GeoName> cityCountKeys = cityCounts.keySet(); boolean found = false; for (GeoName geoname: cityCountKeys){ if (geoname.getGeonameID() == resolvedLocation.getGeoname().getGeonameID()){ cityCounts.put(geoname, cityCounts.get(geoname)+1); logger.debug("Adding count to city " + geoname.getAsciiName() + cityCounts.get(geoname)); found=true; break; } } if(!found){ cityCounts.put(resolvedLocation.getGeoname(), 1); logger.debug("Adding city " + resolvedLocation.getGeoname().getAsciiName()); } } return cityCounts; }
Example #8
Source File: EntityParser.java From CLIFF with Apache License 2.0 | 6 votes |
public ExtractedEntities resolve(ExtractedEntities entities) throws Exception{ // resolve the extracted location names against a // gazetteer to produce geographic entities representing the // locations mentioned in the original text List<ResolvedLocation> resolvedLocations = locationResolver.resolveLocations( entities.getLocations(), this.maxHitDepth, -1, this.fuzzy); entities.setResolvedLocations( resolvedLocations ); logger.trace("resolvedLocations: {}", resolvedLocations); // Disambiguate people List<ResolvedPerson> resolvedPeople = personResolver.resolve(entities.getPeople()); entities.setResolvedPeople( resolvedPeople ); logger.trace("resolvedPeople: {}", resolvedPeople); // Disambiguate organizations List<ResolvedOrganization> resolvedOrganizations = organizationResolver.resolve(entities.getOrganizations()); entities.setResolvedOrganizations( resolvedOrganizations ); logger.trace("resolvedOrganizations: {}", resolvedOrganizations); return entities; }
Example #9
Source File: FuzzyMatchedCountriesPass.java From CLIFF with Apache License 2.0 | 6 votes |
/** * Find the first country after any Hypsographic Features (T) * @param candidates * @param exactMatchRequired * @return */ protected ResolvedLocation findFirstCountryCandidate(List<ResolvedLocation> candidates, boolean exactMatchRequired){ boolean keepGoing = true; for(ResolvedLocation candidate: candidates) { if(keepGoing){ if(candidate.getGeoname().getFeatureClass().equals(FeatureClass.T)){ // skip large territories that appear ahead of countries in results (ie. Indian Subcontinent!) continue; } if(isCountry(candidate)){ if (exactMatchRequired && isExactMatch(candidate)){ return candidate; } else if (!exactMatchRequired){ return candidate; } } else{ keepGoing = false; } } } return null; }
Example #10
Source File: EntityParserTest.java From CLIFF with Apache License 2.0 | 6 votes |
@Test @SuppressWarnings("rawtypes") public void extractAndResolveFromSentences() throws Exception { String fileName = "story-sentences-278413513.json"; File file = new File("src/test/resources/sample-sentence-docs/"+fileName); String jsonText = FileUtils.readFileToString(file); Gson gson = new Gson(); Map[] sentences = gson.fromJson(jsonText, Map[].class); ExtractedEntities entities = ParseManager.extractAndResolveFromSentences(sentences, false); List<ResolvedLocation> locations = entities.getResolvedLocations(); assertEquals(locations.size(),1); ResolvedLocation loc = locations.get(0); assertEquals(loc.getGeoname().getGeonameID(),TestPlaces.RIKERS_ISLAND); assertTrue(loc.getLocation() instanceof SentenceLocationOccurrence); SentenceLocationOccurrence sentenceLoc = (SentenceLocationOccurrence) loc.getLocation(); assertEquals(sentenceLoc.storySentenceId,"3279940188"); }
Example #11
Source File: ExactAdmin1MatchPass.java From CLIFF with Apache License 2.0 | 6 votes |
@Override protected List<List<ResolvedLocation>> disambiguate( List<List<ResolvedLocation>> possibilitiesToDo, List<ResolvedLocation> bestCandidates) { List<List<ResolvedLocation>> possibilitiesToRemove = new ArrayList<List<ResolvedLocation>>(); for( List<ResolvedLocation> candidates: possibilitiesToDo){ if(containsPopulatedCityExactMatch(candidates)){ continue; } List<ResolvedLocation> exactMatchCandidates = getExactMatchesOrAdmin1ExactMatches(candidates); if(exactMatchCandidates.size() > 0) { ResolvedLocation firstCandidate = exactMatchCandidates.get(0); if(firstCandidate.getGeoname().getPopulation()>0 && firstCandidate.getGeoname().getFeatureCode().equals(FeatureCode.ADM1)){ bestCandidates.add(firstCandidate); possibilitiesToRemove.add(candidates); } } } return possibilitiesToRemove; }
Example #12
Source File: GenericPass.java From CLIFF with Apache License 2.0 | 6 votes |
public void execute(List<List<ResolvedLocation>> possibilitiesToDo, List<ResolvedLocation> bestCandidates) { if(possibilitiesToDo.size()==0){ // bail if there is nothing to disambiguate return; } List<List<ResolvedLocation>> possibilitiesToRemove = disambiguate( possibilitiesToDo, bestCandidates); for(ResolvedLocation pickedCandidate: bestCandidates){ logSelectedCandidate(pickedCandidate); logResolvedLocationInfo(pickedCandidate); } triggerCount+= possibilitiesToRemove.size(); for (List<ResolvedLocation> toRemove : possibilitiesToRemove) { possibilitiesToDo.remove(toRemove); } logger.debug("Still have " + possibilitiesToDo.size() + " lists to do"); }
Example #13
Source File: TopColocationsPass.java From CLIFF with Apache License 2.0 | 6 votes |
@Override protected List<List<ResolvedLocation>> disambiguate( List<List<ResolvedLocation>> possibilitiesToDo, List<ResolvedLocation> bestCandidates) { List<List<ResolvedLocation>> possibilitiesToRemove = new ArrayList<List<ResolvedLocation>>(); possibilitiesToRemove.clear(); for( List<ResolvedLocation> candidates: possibilitiesToDo){ boolean foundOne = false; for( ResolvedLocation candidate: candidates) { if(!foundOne && (candidate.getGeoname().getFeatureClass()==FeatureClass.A || candidate.getGeoname().getFeatureClass()==FeatureClass.P) && inSameCountry(candidate,bestCandidates)){ bestCandidates.add(candidate); possibilitiesToRemove.add(candidates); foundOne = true; } } } return possibilitiesToRemove; }
Example #14
Source File: HeuristicDisambiguationStrategy.java From CLIFF with Apache License 2.0 | 6 votes |
@Override public List<ResolvedLocation> select(CliffLocationResolver resolver, List<List<ResolvedLocation>> allPossibilities) { logger.debug("Starting with "+allPossibilities.size()+" lists to do:"); // print all of them for( List<ResolvedLocation> candidates: allPossibilities){ ResolvedLocation firstCandidate = candidates.get(0); logger.debug(" Location: "+firstCandidate.getLocation().getText()+"@"+firstCandidate.getLocation().getPosition()); for( ResolvedLocation candidate: candidates){ GenericPass.logResolvedLocationInfo(candidate); } } // all this does is run the chain we set up already List<ResolvedLocation> bestCandidates = chain.disambiguate(allPossibilities); return bestCandidates; }
Example #15
Source File: LargeAreasPass.java From CLIFF with Apache License 2.0 | 6 votes |
@Override protected List<List<ResolvedLocation>> disambiguate( List<List<ResolvedLocation>> possibilitiesToDo, List<ResolvedLocation> bestCandidates) { List<List<ResolvedLocation>> possibilitiesToRemove = new ArrayList<List<ResolvedLocation>>(); for (List<ResolvedLocation> candidates : possibilitiesToDo) { boolean foundOne = false; for (ResolvedLocation candidate : candidates) { if (!foundOne && isExactMatch(candidate) && candidate.getGeoname().getPrimaryCountryCode() == CountryCode.NULL && candidate.getGeoname().getFeatureClass() == FeatureClass.L) { bestCandidates.add(candidate); possibilitiesToRemove.add(candidates); foundOne = true; } } } return possibilitiesToRemove; }
Example #16
Source File: LocationResolverBolt.java From cognition with Apache License 2.0 | 5 votes |
@Override protected void process(LogRecord record) { if (_parser == null) { this.prepare(); } int count = 0; for (Object field : _textFields) { String text = record.getValue((String) field); if (StringUtils.isBlank(text)) continue; try { List<ResolvedLocation> resolvedLocations = _parser.parse(text); for (ResolvedLocation rl : resolvedLocations) { GeoName geo = rl.geoname; record.setValue(_pipClavinLocationPrefix + count + "." + _name, geo.name); if (geo.admin1Code != null && !geo.admin1Code.isEmpty()) { record.setValue(_pipClavinLocationPrefix + count + "." + _admin1Code, geo.admin1Code); //US state or FIPS code } if (geo.admin2Code != null && !geo.admin2Code.isEmpty()) { record.setValue(_pipClavinLocationPrefix + count + "." + _admin2Code, geo.admin2Code); //county } record.setValue(_pipClavinLocationPrefix + count + "." + _countryCode, geo.primaryCountryCode.toString()); record.setValue(_pipClavinLocationPrefix + count + "." + _latitude, String.valueOf(geo.latitude)); record.setValue(_pipClavinLocationPrefix + count + "." + _longitude, String.valueOf(geo.longitude)); record.setValue(_pipClavinLocationPrefix + count + "." + _confidence, String.valueOf(rl.confidence)); record.setValue(_pipClavinLocationPrefix + count + "." + _fieldName, (String) field); count++; } } catch (Exception e) { logger.error("Failed to geoparse text: " + text); } } }
Example #17
Source File: GenericPass.java From CLIFF with Apache License 2.0 | 5 votes |
protected static List<ResolvedLocation> getExactMatches(List<ResolvedLocation> candidates){ ArrayList<ResolvedLocation> exactMatches = new ArrayList<ResolvedLocation>(); for( ResolvedLocation item: candidates){ if(GenericPass.isExactMatch(item)){ exactMatches.add(item); } } return exactMatches; }
Example #18
Source File: ExactColocationsPass.java From CLIFF with Apache License 2.0 | 5 votes |
/** * * @param colocatedExactCityCandidates * @param alreadyPicked * @return */ private List<ResolvedLocation> inSameCountryAndAdm1( List<ResolvedLocation> candidates, List<ResolvedLocation> alreadyPicked) { List<ResolvedLocation> colocations = new ArrayList<ResolvedLocation>(); for(ResolvedLocation pickedLocation:alreadyPicked){ for(ResolvedLocation candidate:candidates){ if(isSameCountryAndAdm1(candidate, pickedLocation)){ colocations.add(candidate); } } } return colocations; }
Example #19
Source File: GenericPass.java From CLIFF with Apache License 2.0 | 5 votes |
protected static boolean inSameSuperPlace(ResolvedLocation candidate, List<ResolvedLocation> list){ for( ResolvedLocation item: list){ if(candidate.getGeoname().getAdmin1Code().equals(item.getGeoname().getAdmin1Code())){ return true; } } return false; }
Example #20
Source File: GenericPass.java From CLIFF with Apache License 2.0 | 5 votes |
protected ResolvedLocation findFirstCityCandidate(List<ResolvedLocation> candidates, boolean exactMatchRequired){ for(ResolvedLocation candidate: candidates) { if(isCity(candidate)){ if (exactMatchRequired && isExactMatch(candidate)){ return candidate; } else if (!exactMatchRequired){ return candidate; } } } return null; }
Example #21
Source File: GenericPass.java From CLIFF with Apache License 2.0 | 5 votes |
protected ResolvedLocation findFirstAdminCandidate(List<ResolvedLocation> candidates, boolean exactMatchRequired){ for(ResolvedLocation candidate: candidates) { if(isAdminRegion(candidate)){ if (exactMatchRequired && isExactMatch(candidate)){ return candidate; } else if (!exactMatchRequired){ return candidate; } } } return null; }
Example #22
Source File: GenericPass.java From CLIFF with Apache License 2.0 | 5 votes |
protected boolean chooseCityOverAdmin(ResolvedLocation cityCandidate, ResolvedLocation adminCandidate){ if (cityCandidate == null){ return false; } else if (adminCandidate == null){ return true; } else { return (cityCandidate.getGeoname().getPopulation() > adminCandidate.getGeoname().getPopulation()) || (cityCandidate.getGeoname().getPrimaryCountryCode() == adminCandidate.getGeoname().getPrimaryCountryCode()); } }
Example #23
Source File: GenericPass.java From CLIFF with Apache License 2.0 | 5 votes |
/** * Return if the candidate is in the same primary country as any of the places already picked * @param candidate * @param placesAlreadyPicked * @return */ protected boolean inSameCountry(ResolvedLocation candidate, List<ResolvedLocation> placesAlreadyPicked){ for( ResolvedLocation selected: placesAlreadyPicked){ if(inSameCountry(candidate,selected)){ return true; } } return false; }
Example #24
Source File: GenericPass.java From CLIFF with Apache License 2.0 | 5 votes |
protected boolean inSameAdmin1(ResolvedLocation candidate, List<ResolvedLocation> list){ for( ResolvedLocation item: list){ if(candidate.getGeoname().getAdmin1Code().equals(item.getGeoname().getAdmin1Code())){ return true; } } return false; }
Example #25
Source File: GenericPass.java From CLIFF with Apache License 2.0 | 5 votes |
protected static List<ResolvedLocation> getExactMatchesOrAdmin1ExactMatches(List<ResolvedLocation> candidates){ ArrayList<ResolvedLocation> exactMatches = new ArrayList<ResolvedLocation>(); for( ResolvedLocation item: candidates){ if(GenericPass.isExactMatch(item) || GenericPass.isExactMatchToAdmin1(item)){ exactMatches.add(item); } } return exactMatches; }
Example #26
Source File: ExactAdmin1MatchPass.java From CLIFF with Apache License 2.0 | 5 votes |
/** * Tuned to skip tiny cities that are populated to solve the Oklahoma problem * and the Sao Paulo problem. The population threshold is a subjective number based * on a number of specific test cases we have in the unit tests (from bug reports). * @param candidates * @return */ private boolean containsPopulatedCityExactMatch(List<ResolvedLocation> candidates) { for(ResolvedLocation loc:candidates){ if(loc.getGeoname().getPopulation()>300000 && isCity(loc) && isExactMatch(loc)){ return true; } } return false; }
Example #27
Source File: MultiplePassChain.java From CLIFF with Apache License 2.0 | 5 votes |
public List<ResolvedLocation> disambiguate(List<List<ResolvedLocation>> possibilities){ callCount+= 1; List<ResolvedLocation> bestCandidates = new ArrayList<ResolvedLocation>(); int round = 0; for(GenericPass pass:passes){ logger.debug("Pass "+round+": "+pass.getDescription()); pass.execute(possibilities, bestCandidates); round += 1; } return bestCandidates; }
Example #28
Source File: HandCodedDisambiguationTest.java From CLIFF with Apache License 2.0 | 5 votes |
private void verifyArticlesMentionHandCodedCountry(List<CodedArticle> articles, String source) throws Exception{ for(CodedArticle article: articles){ logger.info("Testing article "+article.mediacloudId+" (looking for "+article.handCodedPlaceName+" / "+article.handCodedCountryCode+")"); List<ResolvedLocation> resolvedLocations = ParseManager.extractAndResolve(article.text).getResolvedLocations(); String resolvedCountryCodes = ""; for(ResolvedLocation loc: resolvedLocations){ resolvedCountryCodes += loc.getGeoname().getPrimaryCountryCode()+" "; } assertTrue("Didn't find "+source+" "+article.handCodedPlaceName+" ("+article.handCodedCountryCode+") " + "in article "+article.mediacloudId+ "( found "+resolvedCountryCodes+")", article.mentionsHandCodedCountry(resolvedLocations)); } }
Example #29
Source File: FranceNLPPlaceTest.java From CLIFF with Apache License 2.0 | 5 votes |
@Test public void testFranceFromNlp() throws Exception{ String fileName = "story307385477.json"; File file = new File("src/test/resources/sample-muck-json/"+fileName); String json = FileUtils.readFileToString(file); ExtractedEntities entities = MuckUtils.entitiesFromNlpJsonString(json); assertEquals("Wrong number of location occurrences", 5, entities.getLocations().size()); assertEquals("Wrong number of people occurrences", 18, entities.getPeople().size()); assertEquals("Wrong number of organization occurrences", 8, entities.getOrganizations().size()); entities = ParseManager.getParserInstance().resolve(entities); List<ResolvedLocation> results = entities.getResolvedLocations(); assertEquals("Found "+results.size()+" places, should have been 5!",5,results.size()); assertEquals(TestPlaces.COUNTRY_IRELAND,results.get(0).getGeoname().getGeonameID()); assertEquals(TestPlaces.COUNTRY_FRANCE,results.get(1).getGeoname().getGeonameID()); }
Example #30
Source File: BangorMainePlaceTest.java From CLIFF with Apache License 2.0 | 5 votes |
@Test public void testBangorMaine() throws Exception{ List<ResolvedLocation> results = ParseManager.extractAndResolve("Near Bangor, Maine, 75 vehicles got tangled up in a series of chain-reaction pileups on a snowy stretch of Interstate 95, injuring at least 17 people.").getResolvedLocations(); assertEquals("Found "+results.size()+" places, should have been 2!",2,results.size()); assertEquals(TestPlaces.STATE_MAINE,results.get(0).getGeoname().getGeonameID()); assertEquals(TestPlaces.CITY_BANGOR, results.get(1).getGeoname().getGeonameID()); }