edu.stanford.nlp.util.Pair Java Exaples

Source File: IntelKBPSemgrexExtractor.java From InformationExtraction with GNU General Public License v3.0

6 votes

@Override
public Pair<String, Double> classify(KBPInput input) {
    for (RelationType rel : RelationType.values()) {

        if (rules.containsKey(rel) &&
                rel.entityType == input.subjectType &&
                rel.validNamedEntityLabels.contains(input.objectType)) {
            Collection<SemgrexPattern> rulesForRel = rules.get(rel);
            CoreMap sentence = input.sentence.asCoreMap(Sentence::nerTags, Sentence::dependencyGraph);
            boolean matches
                    = matches(sentence, rulesForRel, input,
                    sentence.get(SemanticGraphCoreAnnotations.EnhancedPlusPlusDependenciesAnnotation.class)) ||
                    matches(sentence, rulesForRel, input,
                            sentence.get(SemanticGraphCoreAnnotations.AlternativeDependenciesAnnotation.class));
            if (matches) {
                //logger.log("MATCH for " + rel +  ". " + sentence: + sentence + " with rules for  " + rel);
                return Pair.makePair(rel.canonicalName, 1.0);
            }
        }
    }

    return Pair.makePair(NO_RELATION, 1.0);
}

Source File: AbstractDependencyLanguageModelFeaturizer.java From phrasal with GNU General Public License v3.0

6 votes

public void loadDependencies(String filename) throws IOException {
  LineNumberReader reader = IOTools.getReaderFromFile(filename);
  forwardDependenciesCache = new HashMap<Integer, Map<Integer, HashSet<Integer>>>();
  reverseDependenciesCache = new HashMap<Integer, Map<Integer, Integer>>();
  reachableNodesCache = new HashMap<Integer, Map<Integer, Set<Integer>>>();

  
  HashMap<Integer, Pair<IndexedWord, List<Integer>>> deps;
  int i = 0;
  while ((deps = DependencyUtils.getDependenciesFromCoNLLFileReader(reader, true, true)) != null) {
    reverseDependenciesCache.put(i,DependencyUtils.getReverseDependencies(deps));
    Map<Integer, HashSet<Integer>> forwardDeps = new HashMap<Integer, HashSet<Integer>>();
    for (Integer gov : deps.keySet()) {
      List<Integer> children = deps.get(gov).second;
      forwardDeps.put(gov, new HashSet<Integer>());
      for (Integer child : children) {
        forwardDeps.get(gov).add(child);
      }
    }
    forwardDependenciesCache.put(i, forwardDeps);
    i++;
  }
  
  reader.close();
}

Source File: TranslationLayout.java From phrasal with GNU General Public License v3.0

6 votes

public boolean addTranslationRow(String name, String trans, Color bgColor) {
  JLabel label = new JLabel(trans);
  label.setOpaque(true);
  label.setBackground(bgColor);
  label.setForeground(Color.WHITE);

  GridBagConstraints c = new GridBagConstraints();
  c.fill = GridBagConstraints.HORIZONTAL;
  c.gridx = 0;
  c.ipady = 20;
  c.gridwidth = numColumns;

  if (unusedRows.isEmpty()) {
    ++numFullTranslationRows;
    c.gridy = numRows + numFullTranslationRows;
  } else {
    c.gridy = unusedRows.removeFirst();
  }

  if (panel != null)
    panel.add(label, c);
  fullTranslations.put(name, new Pair<Integer, JLabel>(c.gridy, label));

  return true;
}

Source File: Phrase.java From uncc2014watsonsim with GNU General Public License v2.0

6 votes

private static Map<Integer, Pair<CorefMention, CorefMention>> _unpronoun(Phrase p) {
	Stream<Pair<CorefMention, CorefMention>> s =
			Stream.of(p.memo(Phrase.coreNLP).get(CorefChainAnnotation.class))
		.filter(Objects::nonNull)  // Do nothing with an empty map
		.flatMap(chains -> chains.entrySet().stream()) // Disassemble the map
	    .flatMap(entry -> {
			// Link each entry to it's main mention
			CorefMention main = entry.getValue().getRepresentativeMention();
			return entry.getValue().getMentionsInTextualOrder().stream()
				.filter(mention -> mention != main)
				.map(mention -> makePair(mention, main));
		});
	// Type inference chokes here so write it down then return.
	return s.collect(HashMap::new,
			(m, pair) -> m.put(pair.first.headIndex, pair),
			(l, r) -> {});
}

Source File: KBPSemgrexExtractor.java From InformationExtraction with GNU General Public License v3.0

6 votes

@Override
public Pair<String, Double> classify(KBPInput input) {
  for (RelationType rel : RelationType.values()) {

    if (rules.containsKey(rel) &&
        rel.entityType == input.subjectType &&
        rel.validNamedEntityLabels.contains(input.objectType)) {
      Collection<SemgrexPattern> rulesForRel = rules.get(rel);
      CoreMap sentence = input.sentence.asCoreMap(Sentence::nerTags, Sentence::dependencyGraph);
      boolean matches
          = matches(sentence, rulesForRel, input,
          sentence.get(SemanticGraphCoreAnnotations.EnhancedPlusPlusDependenciesAnnotation.class)) ||
          matches(sentence, rulesForRel, input,
              sentence.get(SemanticGraphCoreAnnotations.AlternativeDependenciesAnnotation.class));
      if (matches) {
        //logger.log("MATCH for " + rel +  ". " + sentence: + sentence + " with rules for  " + rel);
        return Pair.makePair(rel.canonicalName, 1.0);
      }
    }
  }

  return Pair.makePair(NO_RELATION, 1.0);
}

Source File: TextualSimilarity.java From ADW with GNU General Public License v3.0

6 votes

/**
 * returns 
 * @param sentence
 * 			input sentence, space delimited
 * @param discardStopWords
 * 			true if stopwords are to be discarded from the sentence 			
 * @return
 * 		a pair containing <list of word-pos, remaining not-handled terms>  
 * 		
 */
public Pair<List<String>, List<String>> getStanfordSentence(String sentence)
{
	List<WordLemmaTag> wlts = SentenceProcessor.getInstance().processSentence(sentence, false);
	
	List<String> terms = null;
	StanfordSentence sSentence = StanfordSentence.fromLine(Strings.join(wlts," "));
	
	try
	{
		 terms = sSentence.getTerms(TAGS, 
				 Language.EN, 
				 null, 
				 MultiwordBelongingTo.WORDNET, 
				 CompoundingParameter.ALLOW_MULTIWORD_EXPRESSIONS,
				 CompoundingParameter.APPEND_POS);	 
	}
	catch(Exception e)
	{
		e.printStackTrace();
	}

	//discards OOVs, and tries to map incorrect pos-tags to the correct ones
	return fixTerms(terms, discardStopwords);
}

Source File: Preprocess.java From ADW with GNU General Public License v3.0

6 votes

public static void fixAllCasings(List<Pair<String,String>> pairs, String path)
{
	try
	{
		BufferedWriter bw = new BufferedWriter(new FileWriter(path, false)); 
		
		for(Pair<String,String> aPair : pairs)
		{
			Pair<String,String> fixedPair = caseFixer(aPair);
			
			bw.write(fixedPair.first+"\t"+fixedPair.second+"\n");
		}
		
		bw.close();
	}
	catch(Exception e)
	{
		e.printStackTrace();
	}
}

Source File: IntelKBPEnsembleExtractor.java From InformationExtraction with GNU General Public License v3.0

6 votes

@Override
public Pair<String, Double> classify(KBPInput input) {
    switch (ensembleStrategy) {
        
        case DEFAULT:
            return classifyDefault(input);
        case HIGHEST_SCORE:
            return classifyWithHighestScore(input);
        case VOTE:
            return classifyWithVote(input);
        case WEIGHTED_VOTE:
            return classifyWithWeightedVote(input);
        case HIGH_RECALL:
            return classifyWithHighRecall(input);
        case HIGH_PRECISION:
            return classifyWithHighPrecision(input);
        default:
            throw new UnsupportedClassVersionError(ensembleStrategy + " not supported");
    }
}

Source File: Node.java From dependensee with GNU General Public License v2.0

6 votes

public int getPathLength(Node n) {

        Queue<Pair<Node, Integer>> q = new LinkedList<Pair<Node, Integer>>();
        Set<Node> marked = new HashSet<Node>();
        q.add(new Pair<Node, Integer>(this, 0));
        marked.add(this);
        while (!q.isEmpty()) {
            Pair<Node, Integer> v = q.remove();
            if (v.first == n) {
                return v.second;
            }
            if (v.first.parent != null && !marked.contains(v.first.parent)) {
                q.add(new Pair<Node, Integer>(v.first.parent, v.second + 1));
                marked.add(v.first.parent);
            }
            for (Node node : v.first.children) {
                q.add(new Pair<Node, Integer>(node, v.second + 1));
                marked.add(node);
            }
        }
        return Integer.MAX_VALUE;
    }

Source File: TranslationLayout.java From phrasal with GNU General Public License v3.0

5 votes

public boolean removeTranslationRow(String name) {
  Pair<Integer, JLabel> labelPair = fullTranslations.get(name);
  if (labelPair != null) {
    unusedRows.addFirst(labelPair.first());
    if (panel != null)
      panel.remove(labelPair.second());

    return true;
  }
  return false;
}

Source File: KBPRelationExtractor.java From InformationExtraction with GNU General Public License v3.0

5 votes

default Accuracy computeAccuracy(Stream<Pair<KBPInput, String>> examples,
                                 Optional<PrintStream> predictOut) {
  forceTrack("Accuracy");
  Accuracy accuracy = new Accuracy();
  AtomicInteger testI = new AtomicInteger(0);
  DecimalFormat confidenceFormat = new DecimalFormat("0.0000");
  forceTrack("Featurizing");
  examples.parallel().map(example -> {
    Pair<String, Double> predicted = this.classify(example.first);
    synchronized (accuracy) {
      accuracy.predict(Collections.singleton(predicted.first), Collections.singleton(example.second));
    }
    if (testI.incrementAndGet() % 1000 == 0) {
      log(KBPRelationExtractor.class, "[" + testI.get() + "]  " + accuracy.toOneLineString());
    }
    return predicted.first + "\t" + confidenceFormat.format(predicted.second);
  })
    .forEachOrdered(line -> {
      if (predictOut.isPresent()) {
        predictOut.get().println(line);
      }
    });
  endTrack("Featurizing");
  log(accuracy.toString());
  endTrack("Accuracy");
  return accuracy;
}

Source File: KBPTokensregexExtractor.java From InformationExtraction with GNU General Public License v3.0

5 votes

public static void main(String[] args) throws IOException {
  RedwoodConfiguration.standard().apply();  // Disable SLF4J crap.
  ArgumentParser.fillOptions(edu.stanford.nlp.ie.KBPTokensregexExtractor.class, args);
  edu.stanford.nlp.ie.KBPTokensregexExtractor extractor = new edu.stanford.nlp.ie.KBPTokensregexExtractor(DIR);
  List<Pair<KBPInput, String>> testExamples = KBPRelationExtractor.readDataset(TEST_FILE);

  extractor.computeAccuracy(testExamples.stream(), PREDICTIONS.map(x -> {
    try {
      return "stdout".equalsIgnoreCase(x) ? System.out : new PrintStream(new FileOutputStream(x));
    } catch (IOException e) {
      throw new RuntimeIOException(e);
    }
  }));

}

Source File: MinimumBayesRisk.java From phrasal with GNU General Public License v3.0

5 votes

/**
 * 
 * @param args
 * @throws IOException
 */
public static void main(String[] args) throws IOException {
  if (args.length < 1) {
    System.err.print(usage());
    System.exit(-1);
  }

  Properties options = StringUtils.argsToProperties(args, argDefs());
  final double scale = PropertiesUtils.getDouble(options, "s", DEFAULT_SCALE);
  final String orientation = options.getProperty("o", "utility");
  final boolean risk = "risk".equals(orientation);
  final String metricName = options.getProperty("m", DEFAULT_METRIC);

  final String filename = options.getProperty("");
  BasicNBestList nbestlists = new BasicNBestList(filename);
  MulticoreWrapper<List<BasicNBestEntry>, List<Pair<Double, String>>> wrapper = 
    new MulticoreWrapper<List<BasicNBestEntry>, List<Pair<Double, String>>>(0, new Processor(metricName, risk, scale), true);
  for (List<BasicNBestEntry> nbestlist : nbestlists) {
    wrapper.put(nbestlist);
    while (wrapper.peek()) {
      DumpRescored(wrapper.poll());
    }
  }
  wrapper.join();
  while (wrapper.peek()) {
    DumpRescored(wrapper.poll());
  }
}

Source File: PrefixTagger.java From phrasal with GNU General Public License v3.0

5 votes

/**
 * Determine best tag based on current word and its immediate predecessors.
 *
 * @param s
 *          <i>leftWindow</i> plus one words
 * @param o
 *          Offset with respect to last position.
 * @return Best tag and its probability.
 */
public Pair<IString, Float> getBestTag(IString[] s, int o) {
  int loc = s.length - 1 + o;

  IStringArrayWrapper aw = null;
  Pair<IString, Float> tag;

  if (CACHE_POS) {
    aw = new IStringArrayWrapper(s);
    tag = cache.get(aw);
    if (tag != null)
      return tag;
  }

  init(s);

  int[] bestTags = new int[len];
  int[][] vals = new int[len][];
  for(int pos = 0 ; pos < len ; pos++) {
    vals[pos] = getPossibleValues(pos);
    bestTags[pos] = vals[pos][0];
  }

  this.initializeScorer();
  double[] scores = scoresOf(bestTags, loc);

  int am = ArrayMath.argmax(scores);

  // TODO
  bestTags[loc] = vals[loc][am];
  cleanUpScorer();

  tag = new Pair<IString, Float>(new IString(maxentTagger.getTag(bestTags[loc])),
          (float) scores[am]);
  if (CACHE_POS)
    cache.put(aw, tag);
  return tag;
}

Source File: PrefixTagger.java From phrasal with GNU General Public License v3.0

5 votes

/**
 * Tag text file using PrefixTagger.
 *
 * @param textFile
 *          File to tag
 */
public void tagFile(String textFile) {

  for (String line : ObjectBank.getLineIterator(new File(textFile))) {

    line = line.replaceAll("$", " ");
    line = line + Tagger.EOS_WORD;
    IString[] in = IStrings.toIStringArray(line.split("\\s+"));

    // System.err.println("sent: "+Arrays.toString(in));
    for (int i = 0; i < in.length - 1; ++i) {
      int from = Math.max(0, i - leftWindow);
      int to = Math.min(i + 1 + rightWindow, in.length);
      int offset = -rightWindow;
      IString[] seq = new IString[to - from];
      System.arraycopy(in, from, seq, 0, seq.length);
      // System.err.printf("tagging(%d,%d,%d): %s\n",from,to,offset,Arrays.toString(seq));
      Pair<IString, Float> tag = getBestTag(seq);
      if (i > 0)
        System.out.print(" ");
      int loc = seq.length - 1 + offset;
      // System.err.printf("tagging(%d,%d,%d,%s): %s\n",from,to,offset,tag.first.word(),Arrays.toString(seq));
      System.out.print(seq[loc]);
      System.out.print("/");
      System.out.print(tag.first.toString());
    }
    System.out.print("\n");
  }
}

Source File: DependencyUtils.java From phrasal with GNU General Public License v3.0

5 votes

public static Map<Integer, Integer> getReverseDependencies(HashMap<Integer, Pair<IndexedWord, List<Integer>>> forwardDependencies) {
  if (forwardDependencies == null)
    return null;
  
  Map<Integer, Integer> reverseDependencies = new HashMap<>();
  for (Integer gov : forwardDependencies.keySet()) {
    for (Integer dep : forwardDependencies.get(gov).second) {
      reverseDependencies.put(dep, gov);
    }
  }
  return reverseDependencies;
}

Source File: BLEUSorter.java From phrasal with GNU General Public License v3.0

5 votes

static List<Integer> sortSentencesByScore(List<Sequence<IString>> hyps1,
    List<Sequence<IString>> hyps2,
    BLEUMetric<IString, String>.BLEUIncrementalMetric incMetric1,
    BLEUMetric<IString, String>.BLEUIncrementalMetric incMetric2) {
  List<Pair<Double, Integer>> scores = new ArrayList<Pair<Double, Integer>>();

  for (int sentId = 0; sentId < hyps1.size(); ++sentId) {
    double score1 = incMetric1.computeLocalSmoothScore(hyps1.get(sentId),
        sentId);
    double score2 = incMetric2.computeLocalSmoothScore(hyps2.get(sentId),
        sentId);
    boolean add;
    if (score1 == 0.0) {
      add = (score2 > 0.0);
    } else {
      add = Math.abs(score2 / score1 - 1.0) > minDelta;
    }
    if (add)
      // Only print sentence if difference is significant enough:
      scores.add(new Pair<Double, Integer>(score2 - score1, sentId));
  }
  Collections.sort(scores, (el1, el2) -> el1.first().compareTo(el2.first()));
  List<Integer> sentIds = new ArrayList<Integer>();
  for (Pair<Double, Integer> el : scores) {
    sentIds.add(el.second());
  }
  return sentIds;
}

Source File: IntelKBPSemgrexExtractor.java From InformationExtraction with GNU General Public License v3.0

5 votes

public static void main(String[] args) throws IOException {
    RedwoodConfiguration.standard().apply();  // Disable SLF4J crap.
    ArgumentParser.fillOptions(IntelKBPSemgrexExtractor.class, args);
    IntelKBPSemgrexExtractor extractor = new IntelKBPSemgrexExtractor(DIR);
    List<Pair<KBPInput, String>> testExamples = DatasetUtils.readDataset(TEST_FILE);

    extractor.computeAccuracy(testExamples.stream(), PREDICTIONS.map(x -> {
        try {
            return "stdout".equalsIgnoreCase(x) ? System.out : new PrintStream(new FileOutputStream(x));
        } catch (IOException e) {
            throw new RuntimeIOException(e);
        }
    }));
}

Source File: Messages.java From phrasal with GNU General Public License v3.0

5 votes

@SuppressWarnings("unchecked")
public static Pair<MessageType,Request>parseRequest(HttpServletRequest request) {
  MessageType type = getMessageType(request);
  Request message = new UnknownRequest();
  if (type != MessageType.UNKNOWN_REQUEST) {
    String jsonString = request.getParameter(type.keyName());
    message = (Request) gson.fromJson(jsonString, type.msgClass());
  }
  return new Pair<MessageType,Request>(type, message);
}

Source File: TranslationLayout.java From phrasal with GNU General Public License v3.0

5 votes

public TranslationLayout(Translation t, boolean rightToLeft) {
  translation = t;
  numColumns = translation.getNumSourceWords();
  RIGHT_TO_LEFT = rightToLeft;
  numOptions = t.numPhrases();
  numFullTranslationRows = 0;
  vPhrases = new ArrayList<VisualPhrase>();
  vPhraseLookup = new HashMap<Phrase, VisualPhrase>();
  unusedRows = new LinkedList<Integer>();
  fullTranslations = new HashMap<String, Pair<Integer, JLabel>>();
}

Source File: StopwordAnnotatorTest.java From coreNlp with Apache License 2.0

5 votes

/**
 * Test to validate that lemma values are checked against the (custom) stopword list
 *
 * NOTE: since we're loading the pos model into memory you'll need to set the VM memory size via '-Xms512m -Xmx1048m'
 * @throws Exception
 */
@org.junit.Test
public void testStopwordsWithLemma() throws Exception {

    //setup coreNlp properties for stopwords. Note the custom stopword list and check for lemma property
    Properties props = new Properties();
    props.put("annotators", "tokenize, ssplit, pos, lemma, stopword");
    props.setProperty("customAnnotatorClass.stopword", "intoxicant.analytics.coreNlp.StopwordAnnotator");
    props.setProperty(StopwordAnnotator.STOPWORDS_LIST, customStopWordList);
    props.setProperty(StopwordAnnotator.CHECK_LEMMA, "true");

    //get the custom stopword set
    Set<?> stopWords = StopwordAnnotator.getStopWordList(Version.LUCENE_36, customStopWordList, true);

    StanfordCoreNLP pipeline = new StanfordCoreNLP(props);
    Annotation document = new Annotation(example);
    pipeline.annotate(document);
    List<CoreLabel> tokens = document.get(CoreAnnotations.TokensAnnotation.class);
    for (CoreLabel token : tokens) {

        //get the stopword annotation
        Pair<Boolean, Boolean> stopword = token.get(StopwordAnnotator.class);

        String word = token.word().toLowerCase();
        if (stopWords.contains(word)) {
            assertTrue(stopword.first());
        }
        else {
            assertFalse(stopword.first());
        }

        String lemma = token.lemma().toLowerCase();
        if (stopWords.contains(lemma)) {
            assertTrue(stopword.first());
        }
        else {
            assertFalse(stopword.first());
        }
    }
}

Source File: StopwordAnnotatorTest.java From coreNlp with Apache License 2.0

5 votes

/**
 * Test to validate that the custom stopword list words
 * @throws Exception
 */
@org.junit.Test
public void testCustomStopwordList() throws Exception {

    //setup coreNlp properties for stopwords. Note the custom stopword list property
    Properties props = new Properties();
    props.put("annotators", "tokenize, ssplit, stopword");
    props.setProperty("customAnnotatorClass.stopword", "intoxicant.analytics.coreNlp.StopwordAnnotator");
    props.setProperty(StopwordAnnotator.STOPWORDS_LIST, customStopWordList);

    //get the custom stopword set
    Set<?> stopWords = StopwordAnnotator.getStopWordList(Version.LUCENE_36, customStopWordList, true);

    StanfordCoreNLP pipeline = new StanfordCoreNLP(props);
    Annotation document = new Annotation(example);
    pipeline.annotate(document);
    List<CoreLabel> tokens = document.get(CoreAnnotations.TokensAnnotation.class);
    for (CoreLabel token : tokens) {

        //get the stopword annotation
        Pair<Boolean, Boolean> stopword = token.get(StopwordAnnotator.class);

        String word = token.word().toLowerCase();
        if (stopWords.contains(word)) {
            assertTrue(stopword.first());
        }
        else {
            assertFalse(stopword.first());
        }

        //not checking lemma, so always false
        assertFalse(stopword.second());
    }
}

Source File: Edges.java From uncc2014watsonsim with GNU General Public License v2.0

5 votes

/**
 * Returns some new rules learned about a pronoun given its match
 * context from anaphora resolution.
 * 
 * Specifically, we fill in the tags
 * 
 * _animate(main mention, ___).
 * _gender(main mention, ___).
 * _number(main mention, ___).
 * 
 * Basically, we can tell if it is animate, it's gender, and it's count.
 * @return A list of semantic notes.
 */
public static List<Edge> generatePronounEdges(
		SemanticGraph g, IndexedWord w, Phrase t) {
	List<Edge> edges = new ArrayList<>();
	if (t.getUnpronoun().containsKey(w.index())) {
		// Use what we know about the pronoun
		Pair<CorefMention, CorefMention> mention_edge = t.getUnpronoun().get(w.index());
		String main_noun = Trees.concatNoun(g, g.getNodeByIndex(mention_edge.second.headIndex));
		
		Animacy is_animate = mention_edge.first.animacy;
		if (is_animate != Animacy.UNKNOWN) {
			edges.add(new Edge(
				main_noun, "_animate", is_animate.toString()));
		}
		
		Gender gender = mention_edge.first.gender;
		if (gender != Gender.UNKNOWN) {
			edges.add(new Edge(
				main_noun, "_gender", gender.toString()));
		}
		
		Dictionaries.Number number = mention_edge.first.number;
		if (number != Dictionaries.Number.UNKNOWN) {
			edges.add(new Edge(
				main_noun, "_number", number.toString()));
		}
	}
	return edges;
}

Source File: StopwordAnnotatorTest.java From coreNlp with Apache License 2.0

5 votes

/**
 * Test to validate that stopwords are properly annotated in the token list
 * @throws Exception
 */
@org.junit.Test
public void testLuceneStopwordList() throws Exception {
    Properties props = new Properties();
    props.put("annotators", "tokenize, ssplit, stopword");
    props.setProperty("customAnnotatorClass.stopword", "intoxicant.analytics.coreNlp.StopwordAnnotator");

    StanfordCoreNLP pipeline = new StanfordCoreNLP(props);
    Annotation document = new Annotation(example);
    pipeline.annotate(document);
    List<CoreLabel> tokens = document.get(CoreAnnotations.TokensAnnotation.class);

    //get the standard lucene stopword set
    Set<?> stopWords = StopAnalyzer.ENGLISH_STOP_WORDS_SET;

    for (CoreLabel token : tokens) {

        //get the stopword annotation
        Pair<Boolean, Boolean> stopword = token.get(StopwordAnnotator.class);

        String word = token.word().toLowerCase();
        if (stopWords.contains(word)) {
            assertTrue(stopword.first());
        }
        else {
            assertFalse(stopword.first());
        }

        //not checking lemma, so always false
        assertFalse(stopword.second());
    }
}

Source File: Edges.java From uncc2014watsonsim with GNU General Public License v2.0

5 votes

/**
 * Get the full text of the main mention of a particular word, if it has a
 * better mention. Otherwise just get it's segment of the tree using
 * concatNoun()
 * 
 * @param phrase
 * @param w
 * @return
 */
public static String getMainMention(
		Phrase phrase, SemanticGraph graph, IndexedWord word) {
	Pair<CorefMention, CorefMention> linked_refs =
			phrase.getUnpronoun().get(word.index());
	if (linked_refs == null) {
		return Trees.concatNoun(graph, word);
	} else {
		return linked_refs.second.mentionSpan;
	}
}

Source File: StopwordAnnotator.java From coreNlp with Apache License 2.0

5 votes

@Override
public void annotate(Annotation annotation) {
    if (stopwords != null && stopwords.size() > 0 && annotation.containsKey(TokensAnnotation.class)) {
        List<CoreLabel> tokens = annotation.get(TokensAnnotation.class);
        for (CoreLabel token : tokens) {
            boolean isWordStopword = stopwords.contains(token.word().toLowerCase());
            boolean isLemmaStopword = checkLemma ? stopwords.contains(token.word().toLowerCase()) : false;
            Pair<Boolean, Boolean> pair = Pair.makePair(isWordStopword, isLemmaStopword);
            token.set(StopwordAnnotator.class, pair);
        }
    }
}

Source File: Preprocess.java From ADW with GNU General Public License v3.0

5 votes

public static void pipeline(List<Pair<String,String>> pairs, String outPath)
{
	try
	{
		BufferedWriter bw = new BufferedWriter(new FileWriter(ADWConfiguration.getInstance().getOffsetMapPath(), false)); 
				
		int i = 1;
		for(Pair<String,String> aPair : pairs)
		{
			System.out.println("[working on "+ i++ +"]");
			String first = aPair.first;
			String second = aPair.second;

			//n't & 'm => not and am
			first = fixAbbrev(first);
			second = fixAbbrev(second);
			
			//remove hyphens
			first = removeHyphens(first);
			second = removeHyphens(second);
			
			//fixCurrency
			first = fixCurrency(first);
			second = fixCurrency(second);
			
			bw.write(first.replace(" .", ".")+"\t"+second.replace(" .", ".")+"\n");
		}
	
		bw.close();
	}
	catch(Exception e)
	{
		e.printStackTrace();
	}
}

Source File: Preprocess.java From ADW with GNU General Public License v3.0

5 votes

public static void manualCheck(List<Pair<String,String>> pairs, boolean flag, String filePath)
{
	
	if(flag)
	{
		//mirror-compounding
		mirrorCompounder(pairs);
	}
	else
	{
		try
		{
			BufferedWriter bw = new BufferedWriter(new FileWriter(filePath, false)); 
			
			//spellchecking
			for(Pair<String,String> aPair : pairs)
			{
				Pair<String,String> fixedPair = spellCorrect(aPair);
				bw.write(fixedPair.first+"\t"+fixedPair.second+"\n");
			}
			
			bw.close();
			
		}
		catch(Exception e)
		{
			e.printStackTrace();
		}
		
		
	}
}

Source File: PairSimilarity.java From ADW with GNU General Public License v3.0

5 votes

public Pair<List<String>,List<String>> mirrorPosTags(List<String> firstCookedSentence, List<String> secondCookedSentence) 
{
	/*
	if(secondCookedSentence.size() == 0)
	{
		System.out.println("[ERROR: Set mirror pos tagging off!]");
		System.exit(0);
	}
	*/
	
	return TextualSimilarity.fixPOSmirroring(firstCookedSentence, secondCookedSentence);
}

Source File: ADW.java From ADW with GNU General Public License v3.0

5 votes

public Pair<Boolean,String> evaluateInputType(String input, ItemType type)
{
	boolean passed = checkType(input, type);
	
	if(!passed)
	{
		log.warn("Invalid input type for "+ type +" and string \""+ input +"\"! Please check the input type.");
		return new Pair<Boolean,String>(false,"Invalid input type for "+ type +" and string \""+ input +"\"! Please check the input type.");
	}
	else
	{
		return new Pair<Boolean,String>(true,"Valid input type for "+ type +" and string \""+ input +"\".");
	}
}

edu.stanford.nlp.util.Pair Java Examples