edu.stanford.nlp.io.IOUtils Java Examples
The following examples show how to use
edu.stanford.nlp.io.IOUtils.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: KBPTokensregexExtractor.java From InformationExtraction with GNU General Public License v3.0 | 6 votes |
public KBPTokensregexExtractor(String tokensregexDir) { logger.log("Creating TokensRegexExtractor"); // Create extractors for (RelationType rel : RelationType.values()) { String path = tokensregexDir + File.separator + rel.canonicalName.replaceAll("/", "SLASH") + ".rules"; if (IOUtils.existsInClasspathOrFileSystem(path)) { List<String> listFiles = new ArrayList<>(); listFiles.add(tokensregexDir + File.separator + "defs.rules"); listFiles.add(path); logger.log("Rule files for relation " + rel + " is " + path); Env env = TokenSequencePattern.getNewEnv(); env.bind("collapseExtractionRules", true); CoreMapExpressionExtractor extr = CoreMapExpressionExtractor.createExtractorFromFiles(env, listFiles).keepTemporaryTags(); rules.put(rel, extr); } } }
Example #2
Source File: KBPTest.java From InformationExtraction with GNU General Public License v3.0 | 6 votes |
/** * A debugging method to try relation extraction from the console. * @throws IOException */ public static void main(String[] args) throws IOException { Properties props = StringUtils.argsToProperties(args); props.setProperty("annotators", "tokenize,ssplit,pos,lemma,ner,regexner,parse,mention,coref,kbp"); props.setProperty("regexner.mapping", "ignorecase=true,validpospattern=^(NN|JJ).*,edu/stanford/nlp/models/kbp/regexner_caseless.tab;edu/stanford/nlp/models/kbp/regexner_cased.tab"); StanfordCoreNLP pipeline = new StanfordCoreNLP(props); IOUtils.console("sentence> ", line -> { Annotation ann = new Annotation(line); pipeline.annotate(ann); for (CoreMap sentence : ann.get(CoreAnnotations.SentencesAnnotation.class)) { sentence.get(CoreAnnotations.KBPTriplesAnnotation.class).forEach(System.err::println); System.out.println(sentence); } }); }
Example #3
Source File: InteractiveDriver.java From InformationExtraction with GNU General Public License v3.0 | 6 votes |
public static void main(String[] args) throws IOException { Properties props = StringUtils.argsToProperties(args); props.setProperty("annotators", "tokenize,ssplit,pos,lemma,ner,regexner,parse,mention,coref,kbp"); props.setProperty("regexner.mapping", "ignorecase=true,validpospattern=^(NN|JJ).*,edu/stanford/nlp/models/kbp/regexner_caseless.tab;edu/stanford/nlp/models/kbp/regexner_cased.tab"); Set<String> interested = Stream.of("per:title", "per:employee_of", "org:top_members/employees").collect(Collectors.toSet()); StanfordCoreNLP pipeline = new StanfordCoreNLP(props); IOUtils.console("sentence> ", line -> { Annotation ann = new Annotation(line); pipeline.annotate(ann); for (CoreMap sentence : ann.get(CoreAnnotations.SentencesAnnotation.class)) { sentence.get(CoreAnnotations.KBPTriplesAnnotation.class).forEach(r -> { String relation = r.relationGloss(); if(interested.contains(relation)) { System.err.println(r); } }); } }); }
Example #4
Source File: IntelKBPStatisticalExtractor.java From InformationExtraction with GNU General Public License v3.0 | 6 votes |
public static IntelKBPRelationExtractor loadStatisticalExtractor() throws IOException, ClassNotFoundException { log.info("Loading KBP classifier from " + MODEL); Object object = edu.stanford.nlp.io.IOUtils.readObjectFromURLOrClasspathOrFileSystem(MODEL); IntelKBPRelationExtractor statisticalExtractor; if (object instanceof LinearClassifier) { //noinspection unchecked statisticalExtractor = new IntelKBPStatisticalExtractor((Classifier<String, String>) object); } else if (object instanceof IntelKBPStatisticalExtractor) { statisticalExtractor = (IntelKBPStatisticalExtractor) object; } else if (object instanceof edu.stanford.nlp.ie.KBPStatisticalExtractor) { edu.stanford.nlp.ie.KBPStatisticalExtractor kbp = (edu.stanford.nlp.ie.KBPStatisticalExtractor) object; statisticalExtractor = new IntelKBPStatisticalExtractor(kbp.classifier); } else { throw new ClassCastException(object.getClass() + " cannot be cast into a " + IntelKBPStatisticalExtractor.class); } return statisticalExtractor; }
Example #5
Source File: DefaultKBPStatisticalExtractor.java From InformationExtraction with GNU General Public License v3.0 | 6 votes |
public static IntelKBPRelationExtractor loadStatisticalExtractor() throws IOException, ClassNotFoundException { log.info("Loading KBP classifier from " + model); Object object = IOUtils.readObjectFromURLOrClasspathOrFileSystem(model); IntelKBPRelationExtractor statisticalExtractor; if (object instanceof LinearClassifier) { //noinspection unchecked statisticalExtractor = new DefaultKBPStatisticalExtractor((Classifier<String, String>) object); } else if (object instanceof DefaultKBPStatisticalExtractor) { statisticalExtractor = (DefaultKBPStatisticalExtractor) object; } else if (object instanceof edu.stanford.nlp.ie.KBPStatisticalExtractor) { edu.stanford.nlp.ie.KBPStatisticalExtractor kbp = (edu.stanford.nlp.ie.KBPStatisticalExtractor) object; statisticalExtractor = new DefaultKBPStatisticalExtractor(kbp.classifier); } else { throw new ClassCastException(object.getClass() + " cannot be cast into a " + DefaultKBPStatisticalExtractor.class); } return statisticalExtractor; }
Example #6
Source File: IntelKBPTokensregexExtractor.java From InformationExtraction with GNU General Public License v3.0 | 6 votes |
public IntelKBPTokensregexExtractor(String tokensregexDir) { logger.log("Creating TokensRegexExtractor"); // Create extractors for (RelationType rel : RelationType.values()) { if (IntelConfig.bSeprateFormerTitle || rel != RelationType.PER_FORMER_TITLE) { String path = tokensregexDir + File.separator + rel.canonicalName.replaceAll("/", "SLASH") + ".rules"; if (IOUtils.existsInClasspathOrFileSystem(path)) { List<String> listFiles = new ArrayList<>(); listFiles.add(tokensregexDir + File.separator + "defs.rules"); listFiles.add(path); logger.log("Rule files for relation " + rel + " is " + path); Env env = TokenSequencePattern.getNewEnv(); env.bind("collapseExtractionRules", true); CoreMapExpressionExtractor extr = CoreMapExpressionExtractor.createExtractorFromFiles(env, listFiles).keepTemporaryTags(); rules.put(rel, extr); } } } }
Example #7
Source File: KBPEnsembleExtractor.java From InformationExtraction with GNU General Public License v3.0 | 5 votes |
public static void main(String[] args) throws IOException, ClassNotFoundException { RedwoodConfiguration.standard().apply(); // Disable SLF4J crap. ArgumentParser.fillOptions(edu.stanford.nlp.ie.KBPEnsembleExtractor.class, args); Object object = IOUtils.readObjectFromURLOrClasspathOrFileSystem(STATISTICAL_MODEL); edu.stanford.nlp.ie.KBPRelationExtractor statisticalExtractor; if (object instanceof LinearClassifier) { //noinspection unchecked statisticalExtractor = new IntelKBPStatisticalExtractor((Classifier<String, String>) object); } else if (object instanceof IntelKBPStatisticalExtractor) { statisticalExtractor = (IntelKBPStatisticalExtractor) object; } else { throw new ClassCastException(object.getClass() + " cannot be cast into a " + IntelKBPStatisticalExtractor.class); } logger.info("Read statistical model from " + STATISTICAL_MODEL); edu.stanford.nlp.ie.KBPRelationExtractor extractor = new edu.stanford.nlp.ie.KBPEnsembleExtractor( new IntelKBPTokensregexExtractor(TOKENSREGEX_DIR), new IntelKBPSemgrexExtractor(SEMGREX_DIR), statisticalExtractor ); List<Pair<KBPInput, String>> testExamples = KBPRelationExtractor.readDataset(TEST_FILE); extractor.computeAccuracy(testExamples.stream(), PREDICTIONS.map(x -> { try { return "stdout".equalsIgnoreCase(x) ? System.out : new PrintStream(new FileOutputStream(x)); } catch (IOException e) { throw new RuntimeIOException(e); } })); }
Example #8
Source File: MultiLangsStanfordCoreNLPClient.java From blog-codes with Apache License 2.0 | 5 votes |
/** * Runs an interactive shell where input text is processed with the given pipeline. * * @param pipeline The pipeline to be used * @throws IOException If IO problem with stdin */ private static void shell(MultiLangsStanfordCoreNLPClient pipeline) throws IOException { log.info("Entering interactive shell. Type q RETURN or EOF to quit."); final StanfordCoreNLP.OutputFormat outputFormat = StanfordCoreNLP.OutputFormat.valueOf(pipeline.properties.getProperty("outputFormat", "text").toUpperCase()); IOUtils.console("NLP> ", line -> { if ( ! line.isEmpty()) { Annotation anno = pipeline.process(line); try { switch (outputFormat) { case XML: new XMLOutputter().print(anno, System.out); break; case JSON: new JSONOutputter().print(anno, System.out); System.out.println(); break; case CONLL: new CoNLLOutputter().print(anno, System.out); System.out.println(); break; case TEXT: new TextOutputter().print(anno, System.out); break; case SERIALIZED: warn("You probably cannot read the serialized output, so printing in text instead"); new TextOutputter().print(anno, System.out); break; default: throw new IllegalArgumentException("Cannot output in format " + outputFormat + " from the interactive shell"); } } catch (IOException e) { throw new RuntimeIOException(e); } } }); }
Example #9
Source File: KBPSemgrexExtractor.java From InformationExtraction with GNU General Public License v3.0 | 5 votes |
public KBPSemgrexExtractor(String semgrexdir) throws IOException { logger.log("Creating SemgrexRegexExtractor"); // Create extractors for (RelationType rel : RelationType.values()) { String filename = semgrexdir + File.separator + rel.canonicalName.replace("/", "SLASH") + ".rules"; if (IOUtils.existsInClasspathOrFileSystem(filename)) { List<SemgrexPattern> rulesforrel = SemgrexBatchParser.compileStream(IOUtils.getInputStreamFromURLOrClasspathOrFileSystem(filename)); logger.log("Read " + rulesforrel.size() + " rules from " + filename + " for relation " + rel); rules.put(rel, rulesforrel); } } }
Example #10
Source File: IntelKBPStatisticalExtractor.java From InformationExtraction with GNU General Public License v3.0 | 5 votes |
public static void trainModel() throws IOException { forceTrack("Training data"); List<Pair<KBPInput, String>> trainExamples = DatasetUtils.readDataset(TRAIN_FILE); log.info("Read " + trainExamples.size() + " examples"); log.info("" + trainExamples.stream().map(Pair::second).filter(NO_RELATION::equals).count() + " are " + NO_RELATION); endTrack("Training data"); // Featurize + create the dataset forceTrack("Creating dataset"); RVFDataset<String, String> dataset = new RVFDataset<>(); final AtomicInteger i = new AtomicInteger(0); long beginTime = System.currentTimeMillis(); trainExamples.stream().parallel().forEach(example -> { if (i.incrementAndGet() % 1000 == 0) { log.info("[" + Redwood.formatTimeDifference(System.currentTimeMillis() - beginTime) + "] Featurized " + i.get() + " / " + trainExamples.size() + " examples"); } Counter<String> features = features(example.first); // This takes a while per example synchronized (dataset) { dataset.add(new RVFDatum<>(features, example.second)); } }); trainExamples.clear(); // Free up some memory endTrack("Creating dataset"); // Train the classifier log.info("Training classifier:"); Classifier<String, String> classifier = trainMultinomialClassifier(dataset, FEATURE_THRESHOLD, SIGMA); dataset.clear(); // Free up some memory // Save the classifier IOUtils.writeObjectToFile(new IntelKBPStatisticalExtractor(classifier), MODEL_FILE); }
Example #11
Source File: IntelKBPEnsembleExtractor.java From InformationExtraction with GNU General Public License v3.0 | 5 votes |
public static void main(String[] args) throws IOException, ClassNotFoundException { RedwoodConfiguration.standard().apply(); // Disable SLF4J crap. ArgumentParser.fillOptions(edu.stanford.nlp.ie.KBPEnsembleExtractor.class, args); Object object = IOUtils.readObjectFromURLOrClasspathOrFileSystem(STATISTICAL_MODEL); IntelKBPRelationExtractor statisticalExtractor; if (object instanceof LinearClassifier) { //noinspection unchecked statisticalExtractor = new IntelKBPStatisticalExtractor((Classifier<String, String>) object); } else if (object instanceof IntelKBPStatisticalExtractor) { statisticalExtractor = (IntelKBPStatisticalExtractor) object; } else { throw new ClassCastException(object.getClass() + " cannot be cast into a " + IntelKBPStatisticalExtractor.class); } logger.info("Read statistical model from " + STATISTICAL_MODEL); IntelKBPRelationExtractor extractor = new IntelKBPEnsembleExtractor( new IntelKBPTokensregexExtractor(TOKENSREGEX_DIR), new IntelKBPSemgrexExtractor(SEMGREX_DIR), statisticalExtractor ); List<Pair<KBPInput, String>> testExamples = DatasetUtils.readDataset(TEST_FILE); extractor.computeAccuracy(testExamples.stream(), PREDICTIONS.map(x -> { try { return "stdout".equalsIgnoreCase(x) ? System.out : new PrintStream(new FileOutputStream(x)); } catch (IOException e) { throw new RuntimeIOException(e); } })); }
Example #12
Source File: KBPModel.java From InformationExtraction with GNU General Public License v3.0 | 5 votes |
public static void main(String[] args) throws IOException { IOUtils.console("sentence> ", line -> { HashMap<RelationTriple, String> triple = extract(line); for (RelationTriple s: triple.keySet()){ System.out.println(s); } }); }
Example #13
Source File: IntelKBPSemgrexExtractor.java From InformationExtraction with GNU General Public License v3.0 | 5 votes |
public IntelKBPSemgrexExtractor(String semgrexdir) throws IOException { logger.log("Creating SemgrexRegexExtractor"); // Create extractors for (RelationType rel : RelationType.values()) { String filename = semgrexdir + File.separator + rel.canonicalName.replace("/", "SLASH") + ".rules"; if (IOUtils.existsInClasspathOrFileSystem(filename)) { List<SemgrexPattern> rulesforrel = SemgrexBatchParser.compileStream(IOUtils.getInputStreamFromURLOrClasspathOrFileSystem(filename)); logger.log("Read " + rulesforrel.size() + " rules from " + filename + " for relation " + rel); rules.put(rel, rulesforrel); } } }
Example #14
Source File: IntelKBPAnnotator.java From InformationExtraction with GNU General Public License v3.0 | 5 votes |
/** * A debugging method to try relation extraction from the console. * * @throws IOException */ public static void main(String[] args) throws IOException { Properties props = StringUtils.argsToProperties(args); props.setProperty("annotators", "tokenize,ssplit,pos,lemma,ner,regexner,parse,mention,coref,kbp"); props.setProperty("regexner.mapping", "ignorecase=true,validpospattern=^(NN|JJ).*,edu/stanford/nlp/models/kbp/regexner_caseless.tab;edu/stanford/nlp/models/kbp/regexner_cased.tab"); StanfordCoreNLP pipeline = new StanfordCoreNLP(props); IOUtils.console("sentence> ", line -> { Annotation ann = new Annotation(line); pipeline.annotate(ann); for (CoreMap sentence : ann.get(CoreAnnotations.SentencesAnnotation.class)) { sentence.get(CoreAnnotations.KBPTriplesAnnotation.class).forEach(System.err::println); } }); }
Example #15
Source File: IntelKBPModel.java From InformationExtraction with GNU General Public License v3.0 | 5 votes |
public static void main(String[] args) throws IOException { IOUtils.console("sentence> ", line -> { HashMap<RelationTriple, String> triple = extract(line); for (RelationTriple s : triple.keySet()) { System.out.println(s); } }); }
Example #16
Source File: RegexNerTest.java From InformationExtraction with GNU General Public License v3.0 | 5 votes |
public static void main(String[] args) throws IOException{ IOUtils.console("sentence> ", line -> { List<String> ners = extractNER(line); for (String ner : ners) { System.out.print(ner + ","); } System.out.println(); }); }
Example #17
Source File: DocumentFrequencyCounter.java From wiseowl with MIT License | 5 votes |
/** * Computes a result, or throws an exception if unable to do so. * * @return computed result * @throws Exception if unable to compute a result */ public Counter<String> call() throws Exception { // We need to hallucinate some overarching document tag.. because the Gigaword files don't // have them :/ String fileContents = IOUtils.slurpFile(file); fileContents = "<docs>" + fileContents + "</docs>"; return getIDFMapForFile(new StringReader(fileContents)); }
Example #18
Source File: CoreNlpTokenizer.java From jstarcraft-nlp with Apache License 2.0 | 4 votes |
private boolean processInput() { Annotation annotation = new Annotation(IOUtils.slurpReader(input)); pipeline.annotate(annotation); sentences = annotation.get(SentencesAnnotation.class).iterator(); return true; }