edu.stanford.nlp.util.Triple Java Exaples

Source File: CorenlpPipeline.java From datashare with GNU Affero General Public License v3.0

6 votes

/**
 * Named Entity Classifier (Conditional Random Fields) only
 *
 * @param input    the string to annotator
 * @param hash     the input hash code
 * @param language the input language
 */
private Annotations processNerClassifier(String input, String hash, Language language) throws InterruptedException {
    Annotations annotations = new Annotations(hash, getType(), language);

    LOGGER.info("name-finding for " + language.toString());
    // Recognize named entities from input
    final CoreNlpAnnotator<AbstractSequenceClassifier<CoreLabel>> abstractSequenceClassifierCoreNlpAnnotator;
    abstractSequenceClassifierCoreNlpAnnotator = CoreNlpNerModels.getInstance().get(language);
    List<Triple<String, Integer, Integer>> items = abstractSequenceClassifierCoreNlpAnnotator.annotator.classifyToCharacterOffsets(input);
    // For each recognized named entity
    for (Triple<String, Integer, Integer> item : items) {
        // Triple: <category, begin, end>
        NamedEntity.Category category = NamedEntity.Category.parse(item.first());
        int begin = item.second();
        int end = item.third();
        annotations.add(NER, begin, end, category);
    }

    return annotations;
}

Source File: StanfordExtractor.java From CLAVIN-NERD with GNU General Public License v2.0

6 votes

/**
 * Converts output from Stanford NER to input required by CLAVIN resolver.
 *
 * @param entities  A List&lt;Triple&lt;String, Integer, Integer&gt;&gt; from Stanford NER
 * @param text      text content processed by Stanford NER + CLAVIN resolver
 * @return          List&lt;LocationOccurrence&gt; used by CLAVIN resolver
 */
public static List<LocationOccurrence> convertNERtoCLAVIN
        (List<Triple<String, Integer, Integer>> entities, String text) {

    List<LocationOccurrence> locations = new ArrayList<LocationOccurrence>();

    if (entities != null) {
        // iterate over each entity Triple
        for (Triple<String, Integer, Integer> entity : entities) {
            // check if the entity is a "Location"
            if (entity.first.equalsIgnoreCase("LOCATION")) {
                // build a LocationOccurrence object
                locations.add(new LocationOccurrence(text.substring(entity.second, entity.third), entity.second));
            }
        }
    }

    return locations;
}

Source File: StanfordExtractorTest.java From CLAVIN-NERD with GNU General Public License v2.0

6 votes

/**
 * Checks conversion of Stanford NER output format into
 * {@link com.bericotech.clavin.resolver.ClavinLocationResolver}
 * input format.
 *
 * @throws IOException
 */
@Test
public void testConvertNERtoCLAVIN() throws IOException {
    InputStream mpis = this.getClass().getClassLoader().getResourceAsStream("models/english.all.3class.distsim.prop");
    Properties mp = new Properties();
    mp.load(mpis);
    AbstractSequenceClassifier<CoreMap> namedEntityRecognizer =
            CRFClassifier.getJarClassifier("/models/english.all.3class.distsim.crf.ser.gz", mp);

    String text = "I was born in Springfield and grew up in Boston.";
    List<Triple<String, Integer, Integer>> entitiesFromNER = namedEntityRecognizer.classifyToCharacterOffsets(text);

    List<LocationOccurrence> locationsForCLAVIN = convertNERtoCLAVIN(entitiesFromNER, text);
    assertEquals("wrong number of entities", 2, locationsForCLAVIN.size());
    assertEquals("wrong text for first entity", "Springfield", locationsForCLAVIN.get(0).getText());
    assertEquals("wrong position for first entity", 14, locationsForCLAVIN.get(0).getPosition());
    assertEquals("wrong text for second entity", "Boston", locationsForCLAVIN.get(1).getText());
    assertEquals("wrong position for second entity", 41, locationsForCLAVIN.get(1).getPosition());
}

Source File: RelExTool.java From Criteria2Query with Apache License 2.0

5 votes

public static String queryRelTruth(Integer a, Integer b, List<Triple<Integer, Integer, String>> relations) {
	for (Triple<Integer, Integer, String> r : relations) {
		if (r.first == a && r.second == b) {
			return r.third;
		}
	}
	return "no_relation";
}

Source File: InformationExtractionServiceImpl.java From Criteria2Query with Apache License 2.0

5 votes

public List<Paragraph> patchDocLevel(List<Paragraph> originalp) {
	for (Paragraph p : originalp) {
		if (p.getSents() != null) {
			for (Sentence s : p.getSents()) {
				if (s.getTerms() != null) {
					for (int i = 0; i < s.getTerms().size(); i++) {
						if (s.getTerms().get(i).getCategorey().equals("Value")) {
							String text = s.getTerms().get(i).getText();
							List<String> lemmas = corenlp.getLemmasList(text);
							if (lemmas.contains("old") || lemmas.contains("young") || lemmas.contains("older")
									|| lemmas.contains("younger")) {
								// if there is no age in this sentence.
								if (hasDemoAge(s.getTerms())==false) {
									Term t = new Term();
									t.setCategorey("Demographic");
									t.setStart_index(-1);
									t.setEnd_index(-1);
									t.setNeg(false);
									t.setText("age");
									Integer assignId = s.getTerms().size();
									t.setTermId(assignId);
									s.getTerms().add(t);
									s.getRelations().add(new Triple<Integer, Integer, String>(assignId,
											s.getTerms().get(i).getTermId(), "has_value"));
								}
							}
						}
						
						
					}
				}
			}
		}
	}
	return originalp;
}

Source File: PairwiseRankingOptimizerSGD.java From phrasal with GNU General Public License v3.0

5 votes

/**
 * Sampling algorithm of Hopkins and May (2011).
 * 
 * Make one pass through the n-best list to score the translations since e.g. TER-based
 * metrics are very slow.
 */
private List<Triple<Double, Integer, Integer>> sample(List<RichTranslation<IString, String>> translations,
    List<Sequence<IString>> references, int sourceId, Sequence<IString> source, SentenceLevelMetric<IString, String> scoreMetric) {
  double[] tgtToScore = new double[translations.size()];
  for (int i = 0, max = translations.size(); i < max; ++i) {
    // Cache the scoring metric values.
    Sequence<IString> nBestItem = translations.get(i).translation;
    tgtToScore[i] = scoreMetric.score(sourceId, source, references, nBestItem);
  }
  List<Triple<Double, Integer, Integer>> v =
      new ArrayList<Triple<Double, Integer, Integer>>(gamma);
  final int jMax   = translations.size();
  
  if (jMax == 0) {
    System.err.println("No translations for input sentence #" + sourceId) ;
    return v;
  }

  for (int g = 0; g < gamma; g++) {
    int j      = ThreadLocalRandom.current().nextInt(jMax);
    int jPrime = ThreadLocalRandom.current().nextInt(jMax);
    double gJ = tgtToScore[j];
    double gJPrime = tgtToScore[jPrime];
    double absDiff = Math.abs(gJ-gJPrime);
    if (absDiff >= nThreshold) {
      if (gJ > gJPrime) {
        v.add(new Triple<Double, Integer,Integer>(absDiff, j, jPrime));
      } else {
        v.add(new Triple<Double, Integer,Integer>(absDiff, jPrime, j));
      }
    }
  }
  return v;
}

Source File: Reindex.java From uncc2014watsonsim with GNU General Public License v2.0

5 votes

private void indexAll(String query) throws SQLException {
 	PreparedStatement statements = db.prep(query);
 	statements.setFetchSize(10000);
 	ResultSet rs = statements.executeQuery();
 	AtomicInteger c = new AtomicInteger();
 	Stream.generate(() -> {
 		List<Triple<String,String,String>> block = new ArrayList<>(300);
 		try {
 			synchronized(rs) {
 				while (block.size() < 300 && !rs.isAfterLast() && rs.next()) {
 					// The usual case, another result
 					block.add(Triple.makeTriple(
 							rs.getString(1), rs.getString(2), rs.getString(3)));
 				}
 			}
} catch (SQLException e) {
	// Sometimes the resultset closes while we use it.
	// What can we do about it?
	e.printStackTrace();
}
 		return block;
 	}).parallel().flatMap((block) -> {
 			if (!block.isEmpty()) {
 				for (Triple<String,String,String> row : block) {
 					Passage pass = new Passage(
 							"none", row.first, row.second, row.third);
 					
    		for (Segment i : indexers) {
    			i.accept(pass);
    		}
 				}
   		int count = c.addAndGet(block.size());
  			System.out.println("Indexed " + count);
 			}
 			// It's looking for the first non-empty stream
 			if (block.isEmpty()) return Stream.of("done");
 			else return Stream.empty();
  	}
 	).findFirst();
 }

Source File: StanfordQuery.java From Library with MIT License

4 votes

@Override
public Feature getQuery(String query) {

    StringBuilder capitalizeString = new StringBuilder();
    String[] str = query.split(" ");
    for (int i = 0; i < str.length; i++) {
        capitalizeString.append(str[i].substring(0, 1).toUpperCase() + str[i].substring(1) + " ");
    }
    capitalizeString.deleteCharAt(capitalizeString.length() - 1);
    query = capitalizeString.toString();
    StringBuilder authorQuery = new StringBuilder();
    StringBuilder bookNameQuery = new StringBuilder();
    try {

        int startIndex = 0, endIndex = 0;
        List<Triple<String, Integer, Integer>> triples = classifier
                .classifyToCharacterOffsets(query);
        for (Triple<String, Integer, Integer> trip : triples) {
            startIndex = trip.second();
            endIndex = trip.third();

        }

        authorQuery.append(query, startIndex, endIndex);
        bookNameQuery.append(query, 0, startIndex);
        bookNameQuery.append(query.substring(endIndex));

        LOGGER.debug("Author Query " + authorQuery + " :: Book query ::  " + bookNameQuery);

    } catch (Exception e) {
        LOGGER.error("Exception while extracting features ", e);
    }

    bookNameQuery.toString();
    Feature feature = new Feature();
    if(bookNameQuery.toString().isEmpty()){
        feature.setFeatureType(FeatureType.AUTHOR);
        feature.setQuery(authorQuery.toString());
    }else{
        feature.setFeatureType(FeatureType.BOOK_NAME);
        feature.setQuery(bookNameQuery.toString());
    }

    return feature;
}

Source File: Sentence.java From Criteria2Query with Apache License 2.0

4 votes

public List<Triple<Integer, Integer, String>> getRelations() {
	return relations;
}

Source File: Sentence.java From Criteria2Query with Apache License 2.0

4 votes

public void setRelations(List<Triple<Integer, Integer, String>> relations) {
	this.relations = relations;
}

Source File: QueryFormulateServiceImpl.java From Criteria2Query with Apache License 2.0

4 votes

@Override
public CdmCriteria translateBySentence(Sentence s, boolean include) {
	CdmCriteria cdmc = new CdmCriteria();
	cdmc.setText(s.getText());
	cdmc.setDesc(GlobalSetting.c2qversion);
	List<Term> terms = s.getTerms();
	List<Triple<Integer, Integer, String>> relations = s.getRelations();
	List<CdmCriterion> clist=new ArrayList<CdmCriterion>();
	for (Term t : terms) {
		if (Arrays.asList(GlobalSetting.primaryEntities).contains(t.getCategorey())) {
			CdmCriterion cunit = new CdmCriterion();
			cunit.setOrginialtext(t.getText());
			cunit.setCriterionId(t.getTermId());
			cunit.setConceptsetId(t.getVocabularyId());
			cunit.setNeg(t.isNeg());
			cunit.setDomain(t.getCategorey());
			for (Triple<Integer, Integer, String> r : relations) {
				if (t.getTermId() == r.first) {
					if(r.third.equals("has_temporal")){
						TemporalConstraint[] temporalwindow = normalizeTemporal(findTermById(terms,r.second).getText());
						if(temporalwindow==null){
							continue;
						}
						cunit.setTemporalwindow(temporalwindow);
					}else if(r.third.equals("has_value")){
						String valuestr=findTermById(terms,r.second).getText();
						Map<String, String> map = new HashMap<String,String>();
						if(t.getCategorey().equals("Demographic")&&t.getText().toLowerCase().contains("age")){
							map.put("age_range", valuestr);
							System.err.println("value_str="+valuestr);
						}else if(t.getCategorey().equals("Measurement")){
							map.put("measure_value", valuestr);
						}
						cunit.setAttributes(map);
					}
				}
			}
			clist.add(cunit);
		}
	}
	cdmc.setClist(clist);
	cdmc.setLogic_groups(s.getLogic_groups());
	return cdmc;
}

Source File: IOUtil.java From Criteria2Query with Apache License 2.0

4 votes

public static String Pargraph2List(List<Paragraph> incps, String inctag) {
	StringBuffer sb = new StringBuffer();
	if (incps != null) {
		for (Paragraph p : incps) {
			List<Sentence> sents = p.getSents();
			if (sents != null) {
				for (Sentence s : sents) {
					List<Term> terms = s.getTerms();
					List<Triple<Integer, Integer, String>> relations = s.getRelations();
					for (Term t : terms) {
						if (Arrays.asList(GlobalSetting.primaryEntities).contains(t.getCategorey())) {
							CdmCriterion cunit = new CdmCriterion();
							cunit.setOrginialtext(t.getText());
							cunit.setCriterionId(t.getTermId());
							cunit.setConceptsetId(t.getVocabularyId());
							cunit.setNeg(t.isNeg());
							cunit.setDomain(t.getCategorey());
							String temporalstr = "no_temporal";
							String valuestr = "no_value";
							for (Triple<Integer, Integer, String> r : relations) {
								if (t.getTermId() == r.first) {
									if (r.third.equals("has_temporal")) {
										temporalstr = findTermById(terms, r.second).getText();
									} else if (r.third.equals("has_value")) {
										valuestr = findTermById(terms, r.second).getText();
									}
								}

								// allinfo.add(inctag+"\t"+t.getText()+"\t"+t.getCategorey()+"\t"+t.isNeg()+"\t"+temporalstr+"\t"+valuestr+"\n");
								
							}
							sb.append(inctag + "\t" + t.getText() + "\t" + t.getCategorey() + "\t" + t.isNeg()
							+ "\t" + temporalstr + "\t" + valuestr + "\n");
						}
						
					}
				}
			}
		}
	}
	return sb.toString();

}

Source File: StanfordNamedEntityExtractor.java From CLIFF with Apache License 2.0

4 votes

/**
 * Get extracted locations from a plain-text body.
 * 
 * @param textToParse                      Text content to perform extraction on.
 * @param manuallyReplaceDemonyms   Can slow down performance quite a bit
 * @param language   What language to parse in
 * @return          All the entities mentioned
 */
@Override
public ExtractedEntities extractEntities(String textToParse, boolean manuallyReplaceDemonyms, String language) {
    ExtractedEntities entities = new ExtractedEntities();

    if (textToParse==null || textToParse.length()==0){
        logger.warn("input to extractEntities was null or zero!");
        return entities; 
    }

    String text = textToParse;
    if(manuallyReplaceDemonyms){    // this is a noticeable performance hit
        logger.debug("Replacing all demonyms by hand");
        text = demonyms.replaceAll(textToParse);
    }
    
    AbstractSequenceClassifier<CoreMap> recognizer = recognizerByLanguage.get(language);
    
    // extract entities as <Entity Type, Start Index, Stop Index>
    List<Triple<String, Integer, Integer>> extractedEntities = 
    		recognizer.classifyToCharacterOffsets(text);

    if (extractedEntities != null) {
        for (Triple<String, Integer, Integer> extractedEntity : extractedEntities) {
            String entityName = text.substring(extractedEntity.second(), extractedEntity.third());
            int position = extractedEntity.second();
        	switch(extractedEntity.first){
            case "PERS":       // spanish
            case "I-PER":      // german
            case "PERSON":      // english
                if(personToPlaceSubstitutions.contains(entityName)){
                    entities.addLocation( getLocationOccurrence(personToPlaceSubstitutions.getSubstitution(entityName), position) );
                    logger.debug("Changed person "+entityName+" to a place");
                } else {
                    PersonOccurrence person = new PersonOccurrence(entityName, position);
                    entities.addPerson( person );
                }
                break;
            case "LUG":
            case "I-LOC":      // german
            case "LOCATION":    // english
                if(!locationBlacklist.contains(entityName)){
                    entities.addLocation( getLocationOccurrence(entityName, position) );
                } else {
                   logger.debug("Ignored blacklisted location "+entityName);
                }
                break;
            case "ORG":            // spanish
            case "I-ORG":          // german
            case "ORGANIZATION":    // english
                OrganizationOccurrence organization = new OrganizationOccurrence(entityName, position);
                entities.addOrganization( organization );
                break;
            case "OTROS":	// spanish
            case "MISC":    // if you're using the slower 4class model
                if (demonyms.contains(entityName)) {
                    logger.debug("Found and adding a MISC demonym "+entityName);
                    entities.addLocation( getLocationOccurrence(entityName, position) );
                }
                break;
            default:
                logger.error("Unknown NER type :"+ extractedEntity.first);
            }
        }
    }

    return entities;
}

Source File: StanfordNamedEntityExtractor.java From CLIFF with Apache License 2.0

4 votes

@Override
@SuppressWarnings("rawtypes")
public ExtractedEntities extractEntitiesFromSentences(Map[] sentences, boolean manuallyReplaceDemonyms, String language) {
	ExtractedEntities entities = new ExtractedEntities();

    if (sentences.length==0){
        logger.warn("input to extractEntities was null or zero!");
        return entities; 
    }

    if(manuallyReplaceDemonyms){    // this is a noticeable performance hit
        logger.debug("Replacing all demonyms by hand");
    }
    
    AbstractSequenceClassifier<CoreMap> recognizer = recognizerByLanguage.get(language);
    
    for(Map s:sentences){
        String storySentencesId = s.get("story_sentences_id").toString();
        String text = s.get("sentence").toString();
        if(manuallyReplaceDemonyms){    // this is a noticeable performance hit
            text = demonyms.replaceAll(text);
        }
        // extract entities as <Entity Type, Start Index, Stop Index>
        List<Triple<String, Integer, Integer>> extractedEntities = 
            recognizer.classifyToCharacterOffsets(text);
        if (extractedEntities != null) {
            for (Triple<String, Integer, Integer> extractedEntity : extractedEntities) {
                String entityName = text.substring(extractedEntity.second(), extractedEntity.third());
                int position = extractedEntity.second();
                switch(extractedEntity.first){
                case "PERSON":
                    if(personToPlaceSubstitutions.contains(entityName)){
                        entities.addLocation( getLocationOccurrence(personToPlaceSubstitutions.getSubstitution(entityName), position) );
                        logger.debug("Changed person "+entityName+" to a place");
                    } else {
                        PersonOccurrence person = new PersonOccurrence(entityName, position);
                        entities.addPerson( person );
                    }
                    break;
                case "LOCATION":
                    if(!locationBlacklist.contains(entityName)){
                        LocationOccurrence loc = getLocationOccurrence(entityName, position);  
                        // save the sentence id here
                        entities.addLocation( new SentenceLocationOccurrence(loc.getText(), storySentencesId) );
                    } else {
                       logger.debug("Ignored blacklisted location "+entityName);
                    }
                    break;
                case "ORGANIZATION":
                    OrganizationOccurrence organization = new OrganizationOccurrence(entityName, position);
                    entities.addOrganization( organization );
                    break;
                case "MISC":    // if you're using the slower 4class model
                    if (demonyms.contains(entityName)) {
                        logger.debug("Found and adding a MISC demonym "+entityName);
                        entities.addLocation( getLocationOccurrence(entityName, position) );
                    }
                    break;
                default:
                    logger.error("Unknown NER type :"+ extractedEntity.first);
                }
            }
        }
    }

    return entities;
}

Source File: WorkflowDemoNERD.java From CLAVIN-NERD with GNU General Public License v2.0

4 votes

/**
 * Sometimes, you might already be using Stanford NER elsewhere in
 * your application, and you'd like to just pass the output from
 * Stanford NER directly into CLAVIN, without having to re-run the
 * input through Stanford NER just to use CLAVIN. This example
 * shows you how to very easily do exactly that.
 *
 * @throws IOException
 * @throws ClavinException
 */
private static void resolveStanfordEntities() throws IOException, ClavinException {

    /*#####################################################################
     *
     * Start with Stanford NER -- no need to get CLAVIN involved for now.
     *
     *###################################################################*/

    // instantiate Stanford NER entity extractor
    InputStream mpis = WorkflowDemoNERD.class.getClassLoader().getResourceAsStream("models/english.all.3class.distsim.prop");
    Properties mp = new Properties();
    mp.load(mpis);
    AbstractSequenceClassifier<CoreMap> namedEntityRecognizer =
            CRFClassifier.getJarClassifier("/models/english.all.3class.distsim.crf.ser.gz", mp);

    // Unstructured text file about Somalia to be geoparsed
    File inputFile = new File("src/test/resources/sample-docs/Somalia-doc.txt");

    // Grab the contents of the text file as a String
    String inputString = TextUtils.fileToString(inputFile);

    // extract entities from input text using Stanford NER
    List<Triple<String, Integer, Integer>> entitiesFromNER = namedEntityRecognizer.classifyToCharacterOffsets(inputString);

    /*#####################################################################
     *
     * Now, CLAVIN comes into play...
     *
     *###################################################################*/

    // convert Stanford NER output to ClavinLocationResolver input
    List<LocationOccurrence> locationsForCLAVIN = convertNERtoCLAVIN(entitiesFromNER, inputString);

    // instantiate the CLAVIN location resolver
    ClavinLocationResolver clavinLocationResolver = new ClavinLocationResolver(new LuceneGazetteer(new File("./IndexDirectory")));

    // resolve location entities extracted from input text
    List<ResolvedLocation> resolvedLocations = clavinLocationResolver.resolveLocations(locationsForCLAVIN, 1, 1, false);

    // Display the ResolvedLocations found for the location names
    for (ResolvedLocation resolvedLocation : resolvedLocations)
        System.out.println(resolvedLocation);
}

Source File: IConceptMappingService.java From Criteria2Query with Apache License 2.0

votes

public List<Triple<Integer,Integer,String>> getAllRelsByDoc(Document doc);

edu.stanford.nlp.util.Triple Java Examples