edu.stanford.nlp.util.Pair Java Examples
The following examples show how to use
edu.stanford.nlp.util.Pair.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: IntelKBPSemgrexExtractor.java From InformationExtraction with GNU General Public License v3.0 | 6 votes |
@Override public Pair<String, Double> classify(KBPInput input) { for (RelationType rel : RelationType.values()) { if (rules.containsKey(rel) && rel.entityType == input.subjectType && rel.validNamedEntityLabels.contains(input.objectType)) { Collection<SemgrexPattern> rulesForRel = rules.get(rel); CoreMap sentence = input.sentence.asCoreMap(Sentence::nerTags, Sentence::dependencyGraph); boolean matches = matches(sentence, rulesForRel, input, sentence.get(SemanticGraphCoreAnnotations.EnhancedPlusPlusDependenciesAnnotation.class)) || matches(sentence, rulesForRel, input, sentence.get(SemanticGraphCoreAnnotations.AlternativeDependenciesAnnotation.class)); if (matches) { //logger.log("MATCH for " + rel + ". " + sentence: + sentence + " with rules for " + rel); return Pair.makePair(rel.canonicalName, 1.0); } } } return Pair.makePair(NO_RELATION, 1.0); }
Example #2
Source File: AbstractDependencyLanguageModelFeaturizer.java From phrasal with GNU General Public License v3.0 | 6 votes |
public void loadDependencies(String filename) throws IOException { LineNumberReader reader = IOTools.getReaderFromFile(filename); forwardDependenciesCache = new HashMap<Integer, Map<Integer, HashSet<Integer>>>(); reverseDependenciesCache = new HashMap<Integer, Map<Integer, Integer>>(); reachableNodesCache = new HashMap<Integer, Map<Integer, Set<Integer>>>(); HashMap<Integer, Pair<IndexedWord, List<Integer>>> deps; int i = 0; while ((deps = DependencyUtils.getDependenciesFromCoNLLFileReader(reader, true, true)) != null) { reverseDependenciesCache.put(i,DependencyUtils.getReverseDependencies(deps)); Map<Integer, HashSet<Integer>> forwardDeps = new HashMap<Integer, HashSet<Integer>>(); for (Integer gov : deps.keySet()) { List<Integer> children = deps.get(gov).second; forwardDeps.put(gov, new HashSet<Integer>()); for (Integer child : children) { forwardDeps.get(gov).add(child); } } forwardDependenciesCache.put(i, forwardDeps); i++; } reader.close(); }
Example #3
Source File: TranslationLayout.java From phrasal with GNU General Public License v3.0 | 6 votes |
public boolean addTranslationRow(String name, String trans, Color bgColor) { JLabel label = new JLabel(trans); label.setOpaque(true); label.setBackground(bgColor); label.setForeground(Color.WHITE); GridBagConstraints c = new GridBagConstraints(); c.fill = GridBagConstraints.HORIZONTAL; c.gridx = 0; c.ipady = 20; c.gridwidth = numColumns; if (unusedRows.isEmpty()) { ++numFullTranslationRows; c.gridy = numRows + numFullTranslationRows; } else { c.gridy = unusedRows.removeFirst(); } if (panel != null) panel.add(label, c); fullTranslations.put(name, new Pair<Integer, JLabel>(c.gridy, label)); return true; }
Example #4
Source File: Phrase.java From uncc2014watsonsim with GNU General Public License v2.0 | 6 votes |
private static Map<Integer, Pair<CorefMention, CorefMention>> _unpronoun(Phrase p) { Stream<Pair<CorefMention, CorefMention>> s = Stream.of(p.memo(Phrase.coreNLP).get(CorefChainAnnotation.class)) .filter(Objects::nonNull) // Do nothing with an empty map .flatMap(chains -> chains.entrySet().stream()) // Disassemble the map .flatMap(entry -> { // Link each entry to it's main mention CorefMention main = entry.getValue().getRepresentativeMention(); return entry.getValue().getMentionsInTextualOrder().stream() .filter(mention -> mention != main) .map(mention -> makePair(mention, main)); }); // Type inference chokes here so write it down then return. return s.collect(HashMap::new, (m, pair) -> m.put(pair.first.headIndex, pair), (l, r) -> {}); }
Example #5
Source File: KBPSemgrexExtractor.java From InformationExtraction with GNU General Public License v3.0 | 6 votes |
@Override public Pair<String, Double> classify(KBPInput input) { for (RelationType rel : RelationType.values()) { if (rules.containsKey(rel) && rel.entityType == input.subjectType && rel.validNamedEntityLabels.contains(input.objectType)) { Collection<SemgrexPattern> rulesForRel = rules.get(rel); CoreMap sentence = input.sentence.asCoreMap(Sentence::nerTags, Sentence::dependencyGraph); boolean matches = matches(sentence, rulesForRel, input, sentence.get(SemanticGraphCoreAnnotations.EnhancedPlusPlusDependenciesAnnotation.class)) || matches(sentence, rulesForRel, input, sentence.get(SemanticGraphCoreAnnotations.AlternativeDependenciesAnnotation.class)); if (matches) { //logger.log("MATCH for " + rel + ". " + sentence: + sentence + " with rules for " + rel); return Pair.makePair(rel.canonicalName, 1.0); } } } return Pair.makePair(NO_RELATION, 1.0); }
Example #6
Source File: TextualSimilarity.java From ADW with GNU General Public License v3.0 | 6 votes |
/** * returns * @param sentence * input sentence, space delimited * @param discardStopWords * true if stopwords are to be discarded from the sentence * @return * a pair containing <list of word-pos, remaining not-handled terms> * */ public Pair<List<String>, List<String>> getStanfordSentence(String sentence) { List<WordLemmaTag> wlts = SentenceProcessor.getInstance().processSentence(sentence, false); List<String> terms = null; StanfordSentence sSentence = StanfordSentence.fromLine(Strings.join(wlts," ")); try { terms = sSentence.getTerms(TAGS, Language.EN, null, MultiwordBelongingTo.WORDNET, CompoundingParameter.ALLOW_MULTIWORD_EXPRESSIONS, CompoundingParameter.APPEND_POS); } catch(Exception e) { e.printStackTrace(); } //discards OOVs, and tries to map incorrect pos-tags to the correct ones return fixTerms(terms, discardStopwords); }
Example #7
Source File: Preprocess.java From ADW with GNU General Public License v3.0 | 6 votes |
public static void fixAllCasings(List<Pair<String,String>> pairs, String path) { try { BufferedWriter bw = new BufferedWriter(new FileWriter(path, false)); for(Pair<String,String> aPair : pairs) { Pair<String,String> fixedPair = caseFixer(aPair); bw.write(fixedPair.first+"\t"+fixedPair.second+"\n"); } bw.close(); } catch(Exception e) { e.printStackTrace(); } }
Example #8
Source File: IntelKBPEnsembleExtractor.java From InformationExtraction with GNU General Public License v3.0 | 6 votes |
@Override public Pair<String, Double> classify(KBPInput input) { switch (ensembleStrategy) { case DEFAULT: return classifyDefault(input); case HIGHEST_SCORE: return classifyWithHighestScore(input); case VOTE: return classifyWithVote(input); case WEIGHTED_VOTE: return classifyWithWeightedVote(input); case HIGH_RECALL: return classifyWithHighRecall(input); case HIGH_PRECISION: return classifyWithHighPrecision(input); default: throw new UnsupportedClassVersionError(ensembleStrategy + " not supported"); } }
Example #9
Source File: Node.java From dependensee with GNU General Public License v2.0 | 6 votes |
public int getPathLength(Node n) { Queue<Pair<Node, Integer>> q = new LinkedList<Pair<Node, Integer>>(); Set<Node> marked = new HashSet<Node>(); q.add(new Pair<Node, Integer>(this, 0)); marked.add(this); while (!q.isEmpty()) { Pair<Node, Integer> v = q.remove(); if (v.first == n) { return v.second; } if (v.first.parent != null && !marked.contains(v.first.parent)) { q.add(new Pair<Node, Integer>(v.first.parent, v.second + 1)); marked.add(v.first.parent); } for (Node node : v.first.children) { q.add(new Pair<Node, Integer>(node, v.second + 1)); marked.add(node); } } return Integer.MAX_VALUE; }
Example #10
Source File: TranslationLayout.java From phrasal with GNU General Public License v3.0 | 5 votes |
public boolean removeTranslationRow(String name) { Pair<Integer, JLabel> labelPair = fullTranslations.get(name); if (labelPair != null) { unusedRows.addFirst(labelPair.first()); if (panel != null) panel.remove(labelPair.second()); return true; } return false; }
Example #11
Source File: KBPRelationExtractor.java From InformationExtraction with GNU General Public License v3.0 | 5 votes |
default Accuracy computeAccuracy(Stream<Pair<KBPInput, String>> examples, Optional<PrintStream> predictOut) { forceTrack("Accuracy"); Accuracy accuracy = new Accuracy(); AtomicInteger testI = new AtomicInteger(0); DecimalFormat confidenceFormat = new DecimalFormat("0.0000"); forceTrack("Featurizing"); examples.parallel().map(example -> { Pair<String, Double> predicted = this.classify(example.first); synchronized (accuracy) { accuracy.predict(Collections.singleton(predicted.first), Collections.singleton(example.second)); } if (testI.incrementAndGet() % 1000 == 0) { log(KBPRelationExtractor.class, "[" + testI.get() + "] " + accuracy.toOneLineString()); } return predicted.first + "\t" + confidenceFormat.format(predicted.second); }) .forEachOrdered(line -> { if (predictOut.isPresent()) { predictOut.get().println(line); } }); endTrack("Featurizing"); log(accuracy.toString()); endTrack("Accuracy"); return accuracy; }
Example #12
Source File: KBPTokensregexExtractor.java From InformationExtraction with GNU General Public License v3.0 | 5 votes |
public static void main(String[] args) throws IOException { RedwoodConfiguration.standard().apply(); // Disable SLF4J crap. ArgumentParser.fillOptions(edu.stanford.nlp.ie.KBPTokensregexExtractor.class, args); edu.stanford.nlp.ie.KBPTokensregexExtractor extractor = new edu.stanford.nlp.ie.KBPTokensregexExtractor(DIR); List<Pair<KBPInput, String>> testExamples = KBPRelationExtractor.readDataset(TEST_FILE); extractor.computeAccuracy(testExamples.stream(), PREDICTIONS.map(x -> { try { return "stdout".equalsIgnoreCase(x) ? System.out : new PrintStream(new FileOutputStream(x)); } catch (IOException e) { throw new RuntimeIOException(e); } })); }
Example #13
Source File: MinimumBayesRisk.java From phrasal with GNU General Public License v3.0 | 5 votes |
/** * * @param args * @throws IOException */ public static void main(String[] args) throws IOException { if (args.length < 1) { System.err.print(usage()); System.exit(-1); } Properties options = StringUtils.argsToProperties(args, argDefs()); final double scale = PropertiesUtils.getDouble(options, "s", DEFAULT_SCALE); final String orientation = options.getProperty("o", "utility"); final boolean risk = "risk".equals(orientation); final String metricName = options.getProperty("m", DEFAULT_METRIC); final String filename = options.getProperty(""); BasicNBestList nbestlists = new BasicNBestList(filename); MulticoreWrapper<List<BasicNBestEntry>, List<Pair<Double, String>>> wrapper = new MulticoreWrapper<List<BasicNBestEntry>, List<Pair<Double, String>>>(0, new Processor(metricName, risk, scale), true); for (List<BasicNBestEntry> nbestlist : nbestlists) { wrapper.put(nbestlist); while (wrapper.peek()) { DumpRescored(wrapper.poll()); } } wrapper.join(); while (wrapper.peek()) { DumpRescored(wrapper.poll()); } }
Example #14
Source File: PrefixTagger.java From phrasal with GNU General Public License v3.0 | 5 votes |
/** * Determine best tag based on current word and its immediate predecessors. * * @param s * <i>leftWindow</i> plus one words * @param o * Offset with respect to last position. * @return Best tag and its probability. */ public Pair<IString, Float> getBestTag(IString[] s, int o) { int loc = s.length - 1 + o; IStringArrayWrapper aw = null; Pair<IString, Float> tag; if (CACHE_POS) { aw = new IStringArrayWrapper(s); tag = cache.get(aw); if (tag != null) return tag; } init(s); int[] bestTags = new int[len]; int[][] vals = new int[len][]; for(int pos = 0 ; pos < len ; pos++) { vals[pos] = getPossibleValues(pos); bestTags[pos] = vals[pos][0]; } this.initializeScorer(); double[] scores = scoresOf(bestTags, loc); int am = ArrayMath.argmax(scores); // TODO bestTags[loc] = vals[loc][am]; cleanUpScorer(); tag = new Pair<IString, Float>(new IString(maxentTagger.getTag(bestTags[loc])), (float) scores[am]); if (CACHE_POS) cache.put(aw, tag); return tag; }
Example #15
Source File: PrefixTagger.java From phrasal with GNU General Public License v3.0 | 5 votes |
/** * Tag text file using PrefixTagger. * * @param textFile * File to tag */ public void tagFile(String textFile) { for (String line : ObjectBank.getLineIterator(new File(textFile))) { line = line.replaceAll("$", " "); line = line + Tagger.EOS_WORD; IString[] in = IStrings.toIStringArray(line.split("\\s+")); // System.err.println("sent: "+Arrays.toString(in)); for (int i = 0; i < in.length - 1; ++i) { int from = Math.max(0, i - leftWindow); int to = Math.min(i + 1 + rightWindow, in.length); int offset = -rightWindow; IString[] seq = new IString[to - from]; System.arraycopy(in, from, seq, 0, seq.length); // System.err.printf("tagging(%d,%d,%d): %s\n",from,to,offset,Arrays.toString(seq)); Pair<IString, Float> tag = getBestTag(seq); if (i > 0) System.out.print(" "); int loc = seq.length - 1 + offset; // System.err.printf("tagging(%d,%d,%d,%s): %s\n",from,to,offset,tag.first.word(),Arrays.toString(seq)); System.out.print(seq[loc]); System.out.print("/"); System.out.print(tag.first.toString()); } System.out.print("\n"); } }
Example #16
Source File: DependencyUtils.java From phrasal with GNU General Public License v3.0 | 5 votes |
public static Map<Integer, Integer> getReverseDependencies(HashMap<Integer, Pair<IndexedWord, List<Integer>>> forwardDependencies) { if (forwardDependencies == null) return null; Map<Integer, Integer> reverseDependencies = new HashMap<>(); for (Integer gov : forwardDependencies.keySet()) { for (Integer dep : forwardDependencies.get(gov).second) { reverseDependencies.put(dep, gov); } } return reverseDependencies; }
Example #17
Source File: BLEUSorter.java From phrasal with GNU General Public License v3.0 | 5 votes |
static List<Integer> sortSentencesByScore(List<Sequence<IString>> hyps1, List<Sequence<IString>> hyps2, BLEUMetric<IString, String>.BLEUIncrementalMetric incMetric1, BLEUMetric<IString, String>.BLEUIncrementalMetric incMetric2) { List<Pair<Double, Integer>> scores = new ArrayList<Pair<Double, Integer>>(); for (int sentId = 0; sentId < hyps1.size(); ++sentId) { double score1 = incMetric1.computeLocalSmoothScore(hyps1.get(sentId), sentId); double score2 = incMetric2.computeLocalSmoothScore(hyps2.get(sentId), sentId); boolean add; if (score1 == 0.0) { add = (score2 > 0.0); } else { add = Math.abs(score2 / score1 - 1.0) > minDelta; } if (add) // Only print sentence if difference is significant enough: scores.add(new Pair<Double, Integer>(score2 - score1, sentId)); } Collections.sort(scores, (el1, el2) -> el1.first().compareTo(el2.first())); List<Integer> sentIds = new ArrayList<Integer>(); for (Pair<Double, Integer> el : scores) { sentIds.add(el.second()); } return sentIds; }
Example #18
Source File: IntelKBPSemgrexExtractor.java From InformationExtraction with GNU General Public License v3.0 | 5 votes |
public static void main(String[] args) throws IOException { RedwoodConfiguration.standard().apply(); // Disable SLF4J crap. ArgumentParser.fillOptions(IntelKBPSemgrexExtractor.class, args); IntelKBPSemgrexExtractor extractor = new IntelKBPSemgrexExtractor(DIR); List<Pair<KBPInput, String>> testExamples = DatasetUtils.readDataset(TEST_FILE); extractor.computeAccuracy(testExamples.stream(), PREDICTIONS.map(x -> { try { return "stdout".equalsIgnoreCase(x) ? System.out : new PrintStream(new FileOutputStream(x)); } catch (IOException e) { throw new RuntimeIOException(e); } })); }
Example #19
Source File: Messages.java From phrasal with GNU General Public License v3.0 | 5 votes |
@SuppressWarnings("unchecked") public static Pair<MessageType,Request>parseRequest(HttpServletRequest request) { MessageType type = getMessageType(request); Request message = new UnknownRequest(); if (type != MessageType.UNKNOWN_REQUEST) { String jsonString = request.getParameter(type.keyName()); message = (Request) gson.fromJson(jsonString, type.msgClass()); } return new Pair<MessageType,Request>(type, message); }
Example #20
Source File: TranslationLayout.java From phrasal with GNU General Public License v3.0 | 5 votes |
public TranslationLayout(Translation t, boolean rightToLeft) { translation = t; numColumns = translation.getNumSourceWords(); RIGHT_TO_LEFT = rightToLeft; numOptions = t.numPhrases(); numFullTranslationRows = 0; vPhrases = new ArrayList<VisualPhrase>(); vPhraseLookup = new HashMap<Phrase, VisualPhrase>(); unusedRows = new LinkedList<Integer>(); fullTranslations = new HashMap<String, Pair<Integer, JLabel>>(); }
Example #21
Source File: StopwordAnnotatorTest.java From coreNlp with Apache License 2.0 | 5 votes |
/** * Test to validate that lemma values are checked against the (custom) stopword list * * NOTE: since we're loading the pos model into memory you'll need to set the VM memory size via '-Xms512m -Xmx1048m' * @throws Exception */ @org.junit.Test public void testStopwordsWithLemma() throws Exception { //setup coreNlp properties for stopwords. Note the custom stopword list and check for lemma property Properties props = new Properties(); props.put("annotators", "tokenize, ssplit, pos, lemma, stopword"); props.setProperty("customAnnotatorClass.stopword", "intoxicant.analytics.coreNlp.StopwordAnnotator"); props.setProperty(StopwordAnnotator.STOPWORDS_LIST, customStopWordList); props.setProperty(StopwordAnnotator.CHECK_LEMMA, "true"); //get the custom stopword set Set<?> stopWords = StopwordAnnotator.getStopWordList(Version.LUCENE_36, customStopWordList, true); StanfordCoreNLP pipeline = new StanfordCoreNLP(props); Annotation document = new Annotation(example); pipeline.annotate(document); List<CoreLabel> tokens = document.get(CoreAnnotations.TokensAnnotation.class); for (CoreLabel token : tokens) { //get the stopword annotation Pair<Boolean, Boolean> stopword = token.get(StopwordAnnotator.class); String word = token.word().toLowerCase(); if (stopWords.contains(word)) { assertTrue(stopword.first()); } else { assertFalse(stopword.first()); } String lemma = token.lemma().toLowerCase(); if (stopWords.contains(lemma)) { assertTrue(stopword.first()); } else { assertFalse(stopword.first()); } } }
Example #22
Source File: StopwordAnnotatorTest.java From coreNlp with Apache License 2.0 | 5 votes |
/** * Test to validate that the custom stopword list words * @throws Exception */ @org.junit.Test public void testCustomStopwordList() throws Exception { //setup coreNlp properties for stopwords. Note the custom stopword list property Properties props = new Properties(); props.put("annotators", "tokenize, ssplit, stopword"); props.setProperty("customAnnotatorClass.stopword", "intoxicant.analytics.coreNlp.StopwordAnnotator"); props.setProperty(StopwordAnnotator.STOPWORDS_LIST, customStopWordList); //get the custom stopword set Set<?> stopWords = StopwordAnnotator.getStopWordList(Version.LUCENE_36, customStopWordList, true); StanfordCoreNLP pipeline = new StanfordCoreNLP(props); Annotation document = new Annotation(example); pipeline.annotate(document); List<CoreLabel> tokens = document.get(CoreAnnotations.TokensAnnotation.class); for (CoreLabel token : tokens) { //get the stopword annotation Pair<Boolean, Boolean> stopword = token.get(StopwordAnnotator.class); String word = token.word().toLowerCase(); if (stopWords.contains(word)) { assertTrue(stopword.first()); } else { assertFalse(stopword.first()); } //not checking lemma, so always false assertFalse(stopword.second()); } }
Example #23
Source File: Edges.java From uncc2014watsonsim with GNU General Public License v2.0 | 5 votes |
/** * Returns some new rules learned about a pronoun given its match * context from anaphora resolution. * * Specifically, we fill in the tags * * _animate(main mention, ___). * _gender(main mention, ___). * _number(main mention, ___). * * Basically, we can tell if it is animate, it's gender, and it's count. * @return A list of semantic notes. */ public static List<Edge> generatePronounEdges( SemanticGraph g, IndexedWord w, Phrase t) { List<Edge> edges = new ArrayList<>(); if (t.getUnpronoun().containsKey(w.index())) { // Use what we know about the pronoun Pair<CorefMention, CorefMention> mention_edge = t.getUnpronoun().get(w.index()); String main_noun = Trees.concatNoun(g, g.getNodeByIndex(mention_edge.second.headIndex)); Animacy is_animate = mention_edge.first.animacy; if (is_animate != Animacy.UNKNOWN) { edges.add(new Edge( main_noun, "_animate", is_animate.toString())); } Gender gender = mention_edge.first.gender; if (gender != Gender.UNKNOWN) { edges.add(new Edge( main_noun, "_gender", gender.toString())); } Dictionaries.Number number = mention_edge.first.number; if (number != Dictionaries.Number.UNKNOWN) { edges.add(new Edge( main_noun, "_number", number.toString())); } } return edges; }
Example #24
Source File: StopwordAnnotatorTest.java From coreNlp with Apache License 2.0 | 5 votes |
/** * Test to validate that stopwords are properly annotated in the token list * @throws Exception */ @org.junit.Test public void testLuceneStopwordList() throws Exception { Properties props = new Properties(); props.put("annotators", "tokenize, ssplit, stopword"); props.setProperty("customAnnotatorClass.stopword", "intoxicant.analytics.coreNlp.StopwordAnnotator"); StanfordCoreNLP pipeline = new StanfordCoreNLP(props); Annotation document = new Annotation(example); pipeline.annotate(document); List<CoreLabel> tokens = document.get(CoreAnnotations.TokensAnnotation.class); //get the standard lucene stopword set Set<?> stopWords = StopAnalyzer.ENGLISH_STOP_WORDS_SET; for (CoreLabel token : tokens) { //get the stopword annotation Pair<Boolean, Boolean> stopword = token.get(StopwordAnnotator.class); String word = token.word().toLowerCase(); if (stopWords.contains(word)) { assertTrue(stopword.first()); } else { assertFalse(stopword.first()); } //not checking lemma, so always false assertFalse(stopword.second()); } }
Example #25
Source File: Edges.java From uncc2014watsonsim with GNU General Public License v2.0 | 5 votes |
/** * Get the full text of the main mention of a particular word, if it has a * better mention. Otherwise just get it's segment of the tree using * concatNoun() * * @param phrase * @param w * @return */ public static String getMainMention( Phrase phrase, SemanticGraph graph, IndexedWord word) { Pair<CorefMention, CorefMention> linked_refs = phrase.getUnpronoun().get(word.index()); if (linked_refs == null) { return Trees.concatNoun(graph, word); } else { return linked_refs.second.mentionSpan; } }
Example #26
Source File: StopwordAnnotator.java From coreNlp with Apache License 2.0 | 5 votes |
@Override public void annotate(Annotation annotation) { if (stopwords != null && stopwords.size() > 0 && annotation.containsKey(TokensAnnotation.class)) { List<CoreLabel> tokens = annotation.get(TokensAnnotation.class); for (CoreLabel token : tokens) { boolean isWordStopword = stopwords.contains(token.word().toLowerCase()); boolean isLemmaStopword = checkLemma ? stopwords.contains(token.word().toLowerCase()) : false; Pair<Boolean, Boolean> pair = Pair.makePair(isWordStopword, isLemmaStopword); token.set(StopwordAnnotator.class, pair); } } }
Example #27
Source File: Preprocess.java From ADW with GNU General Public License v3.0 | 5 votes |
public static void pipeline(List<Pair<String,String>> pairs, String outPath) { try { BufferedWriter bw = new BufferedWriter(new FileWriter(ADWConfiguration.getInstance().getOffsetMapPath(), false)); int i = 1; for(Pair<String,String> aPair : pairs) { System.out.println("[working on "+ i++ +"]"); String first = aPair.first; String second = aPair.second; //n't & 'm => not and am first = fixAbbrev(first); second = fixAbbrev(second); //remove hyphens first = removeHyphens(first); second = removeHyphens(second); //fixCurrency first = fixCurrency(first); second = fixCurrency(second); bw.write(first.replace(" .", ".")+"\t"+second.replace(" .", ".")+"\n"); } bw.close(); } catch(Exception e) { e.printStackTrace(); } }
Example #28
Source File: Preprocess.java From ADW with GNU General Public License v3.0 | 5 votes |
public static void manualCheck(List<Pair<String,String>> pairs, boolean flag, String filePath) { if(flag) { //mirror-compounding mirrorCompounder(pairs); } else { try { BufferedWriter bw = new BufferedWriter(new FileWriter(filePath, false)); //spellchecking for(Pair<String,String> aPair : pairs) { Pair<String,String> fixedPair = spellCorrect(aPair); bw.write(fixedPair.first+"\t"+fixedPair.second+"\n"); } bw.close(); } catch(Exception e) { e.printStackTrace(); } } }
Example #29
Source File: PairSimilarity.java From ADW with GNU General Public License v3.0 | 5 votes |
public Pair<List<String>,List<String>> mirrorPosTags(List<String> firstCookedSentence, List<String> secondCookedSentence) { /* if(secondCookedSentence.size() == 0) { System.out.println("[ERROR: Set mirror pos tagging off!]"); System.exit(0); } */ return TextualSimilarity.fixPOSmirroring(firstCookedSentence, secondCookedSentence); }
Example #30
Source File: ADW.java From ADW with GNU General Public License v3.0 | 5 votes |
public Pair<Boolean,String> evaluateInputType(String input, ItemType type) { boolean passed = checkType(input, type); if(!passed) { log.warn("Invalid input type for "+ type +" and string \""+ input +"\"! Please check the input type."); return new Pair<Boolean,String>(false,"Invalid input type for "+ type +" and string \""+ input +"\"! Please check the input type."); } else { return new Pair<Boolean,String>(true,"Valid input type for "+ type +" and string \""+ input +"\"."); } }