edu.stanford.nlp.io.IOUtils Java Exaples

Source File: KBPTokensregexExtractor.java From InformationExtraction with GNU General Public License v3.0

6 votes

public KBPTokensregexExtractor(String tokensregexDir) {
  logger.log("Creating TokensRegexExtractor");
  // Create extractors
  for (RelationType rel : RelationType.values()) {
    String path = tokensregexDir + File.separator + rel.canonicalName.replaceAll("/", "SLASH") + ".rules";
    if (IOUtils.existsInClasspathOrFileSystem(path)) {
      List<String> listFiles = new ArrayList<>();
      listFiles.add(tokensregexDir + File.separator + "defs.rules");
      listFiles.add(path);
      logger.log("Rule files for relation " + rel + " is " + path);
      Env env = TokenSequencePattern.getNewEnv();
      env.bind("collapseExtractionRules", true);
      CoreMapExpressionExtractor extr = CoreMapExpressionExtractor.createExtractorFromFiles(env, listFiles).keepTemporaryTags();
      rules.put(rel, extr);
    }
  }
}

Source File: KBPTest.java From InformationExtraction with GNU General Public License v3.0

6 votes

/**
 * A debugging method to try relation extraction from the console.
 * @throws IOException
 */
public static void main(String[] args) throws IOException {
  Properties props = StringUtils.argsToProperties(args);
  props.setProperty("annotators", "tokenize,ssplit,pos,lemma,ner,regexner,parse,mention,coref,kbp");
  props.setProperty("regexner.mapping", "ignorecase=true,validpospattern=^(NN|JJ).*,edu/stanford/nlp/models/kbp/regexner_caseless.tab;edu/stanford/nlp/models/kbp/regexner_cased.tab");

  StanfordCoreNLP pipeline = new StanfordCoreNLP(props);
  IOUtils.console("sentence> ", line -> {
    Annotation ann = new Annotation(line);
    pipeline.annotate(ann);
    for (CoreMap sentence : ann.get(CoreAnnotations.SentencesAnnotation.class)) {
      sentence.get(CoreAnnotations.KBPTriplesAnnotation.class).forEach(System.err::println);
      System.out.println(sentence);
    }
  });
}

Source File: InteractiveDriver.java From InformationExtraction with GNU General Public License v3.0

6 votes

public static void main(String[] args) throws IOException {
    Properties props = StringUtils.argsToProperties(args);
    props.setProperty("annotators", "tokenize,ssplit,pos,lemma,ner,regexner,parse,mention,coref,kbp");
    props.setProperty("regexner.mapping", "ignorecase=true,validpospattern=^(NN|JJ).*,edu/stanford/nlp/models/kbp/regexner_caseless.tab;edu/stanford/nlp/models/kbp/regexner_cased.tab");
    Set<String> interested = Stream.of("per:title", "per:employee_of", "org:top_members/employees").collect(Collectors.toSet());
    StanfordCoreNLP pipeline = new StanfordCoreNLP(props);
    IOUtils.console("sentence> ", line -> {
        Annotation ann = new Annotation(line);
        pipeline.annotate(ann);
        for (CoreMap sentence : ann.get(CoreAnnotations.SentencesAnnotation.class)) {
            sentence.get(CoreAnnotations.KBPTriplesAnnotation.class).forEach(r -> {
                 String relation = r.relationGloss();
                if(interested.contains(relation)) {
                    System.err.println(r);
                }
            });
        }
    });
}

Source File: IntelKBPStatisticalExtractor.java From InformationExtraction with GNU General Public License v3.0

6 votes

public static IntelKBPRelationExtractor loadStatisticalExtractor() throws IOException, ClassNotFoundException {
    log.info("Loading KBP classifier from " + MODEL);
    Object object = edu.stanford.nlp.io.IOUtils.readObjectFromURLOrClasspathOrFileSystem(MODEL);
    IntelKBPRelationExtractor statisticalExtractor;
    if (object instanceof LinearClassifier) {
        //noinspection unchecked
        statisticalExtractor = new IntelKBPStatisticalExtractor((Classifier<String, String>) object);
    } else if (object instanceof IntelKBPStatisticalExtractor) {
        statisticalExtractor = (IntelKBPStatisticalExtractor) object;
    } else if (object instanceof edu.stanford.nlp.ie.KBPStatisticalExtractor) {
        edu.stanford.nlp.ie.KBPStatisticalExtractor kbp = (edu.stanford.nlp.ie.KBPStatisticalExtractor) object;
        statisticalExtractor = new IntelKBPStatisticalExtractor(kbp.classifier);
    } else {
        throw new ClassCastException(object.getClass() + " cannot be cast into a " + IntelKBPStatisticalExtractor.class);
    }
    return statisticalExtractor;
}

Source File: DefaultKBPStatisticalExtractor.java From InformationExtraction with GNU General Public License v3.0

6 votes

public static IntelKBPRelationExtractor loadStatisticalExtractor() throws IOException, ClassNotFoundException {
    log.info("Loading KBP classifier from " + model);
    Object object = IOUtils.readObjectFromURLOrClasspathOrFileSystem(model);
    IntelKBPRelationExtractor statisticalExtractor;
    if (object instanceof LinearClassifier) {
        //noinspection unchecked
        statisticalExtractor = new DefaultKBPStatisticalExtractor((Classifier<String, String>) object);
    } else if (object instanceof DefaultKBPStatisticalExtractor) {
        statisticalExtractor = (DefaultKBPStatisticalExtractor) object;
    } else if (object instanceof edu.stanford.nlp.ie.KBPStatisticalExtractor) {
        edu.stanford.nlp.ie.KBPStatisticalExtractor kbp = (edu.stanford.nlp.ie.KBPStatisticalExtractor) object;
        statisticalExtractor = new DefaultKBPStatisticalExtractor(kbp.classifier);
    } else {
        throw new ClassCastException(object.getClass() + " cannot be cast into a " + DefaultKBPStatisticalExtractor.class);
    }
    return statisticalExtractor;
}

Source File: IntelKBPTokensregexExtractor.java From InformationExtraction with GNU General Public License v3.0

6 votes

public IntelKBPTokensregexExtractor(String tokensregexDir) {
    logger.log("Creating TokensRegexExtractor");
    // Create extractors        
    for (RelationType rel : RelationType.values()) {
        if (IntelConfig.bSeprateFormerTitle || rel != RelationType.PER_FORMER_TITLE) {
            String path = tokensregexDir + File.separator + rel.canonicalName.replaceAll("/", "SLASH") + ".rules";
            if (IOUtils.existsInClasspathOrFileSystem(path)) {
                List<String> listFiles = new ArrayList<>();
                listFiles.add(tokensregexDir + File.separator + "defs.rules");
                listFiles.add(path);
                logger.log("Rule files for relation " + rel + " is " + path);
                Env env = TokenSequencePattern.getNewEnv();
                env.bind("collapseExtractionRules", true);
                CoreMapExpressionExtractor extr = CoreMapExpressionExtractor.createExtractorFromFiles(env, listFiles).keepTemporaryTags();
                rules.put(rel, extr);
            }
        }
    }
}

Source File: KBPEnsembleExtractor.java From InformationExtraction with GNU General Public License v3.0

5 votes

public static void main(String[] args) throws IOException, ClassNotFoundException {
  RedwoodConfiguration.standard().apply();  // Disable SLF4J crap.
  ArgumentParser.fillOptions(edu.stanford.nlp.ie.KBPEnsembleExtractor.class, args);

  Object object = IOUtils.readObjectFromURLOrClasspathOrFileSystem(STATISTICAL_MODEL);
  edu.stanford.nlp.ie.KBPRelationExtractor statisticalExtractor;
  if (object instanceof LinearClassifier) {
    //noinspection unchecked
    statisticalExtractor = new IntelKBPStatisticalExtractor((Classifier<String, String>) object);
  } else if (object instanceof IntelKBPStatisticalExtractor) {
    statisticalExtractor = (IntelKBPStatisticalExtractor) object;
  } else {
    throw new ClassCastException(object.getClass() + " cannot be cast into a " + IntelKBPStatisticalExtractor.class);
  }
  logger.info("Read statistical model from " + STATISTICAL_MODEL);
  edu.stanford.nlp.ie.KBPRelationExtractor extractor = new edu.stanford.nlp.ie.KBPEnsembleExtractor(
      new IntelKBPTokensregexExtractor(TOKENSREGEX_DIR),
      new IntelKBPSemgrexExtractor(SEMGREX_DIR),
      statisticalExtractor
  );

  List<Pair<KBPInput, String>> testExamples = KBPRelationExtractor.readDataset(TEST_FILE);

  extractor.computeAccuracy(testExamples.stream(), PREDICTIONS.map(x -> {
    try {
      return "stdout".equalsIgnoreCase(x) ? System.out : new PrintStream(new FileOutputStream(x));
    } catch (IOException e) {
      throw new RuntimeIOException(e);
    }
  }));

}

Source File: MultiLangsStanfordCoreNLPClient.java From blog-codes with Apache License 2.0

5 votes

/**
 * Runs an interactive shell where input text is processed with the given pipeline.
 *
 * @param pipeline The pipeline to be used
 * @throws IOException If IO problem with stdin
 */
private static void shell(MultiLangsStanfordCoreNLPClient pipeline) throws IOException {
  log.info("Entering interactive shell. Type q RETURN or EOF to quit.");
  final StanfordCoreNLP.OutputFormat outputFormat = StanfordCoreNLP.OutputFormat.valueOf(pipeline.properties.getProperty("outputFormat", "text").toUpperCase());
  IOUtils.console("NLP> ", line -> {
    if ( ! line.isEmpty()) {
      Annotation anno = pipeline.process(line);
      try {
        switch (outputFormat) {
          case XML:
            new XMLOutputter().print(anno, System.out);
            break;
          case JSON:
            new JSONOutputter().print(anno, System.out);
            System.out.println();
            break;
          case CONLL:
            new CoNLLOutputter().print(anno, System.out);
            System.out.println();
            break;
          case TEXT:
            new TextOutputter().print(anno, System.out);
            break;
          case SERIALIZED:
            warn("You probably cannot read the serialized output, so printing in text instead");
            new TextOutputter().print(anno, System.out);
            break;
          default:
            throw new IllegalArgumentException("Cannot output in format " + outputFormat + " from the interactive shell");
        }
      } catch (IOException e) {
        throw new RuntimeIOException(e);
      }
    }
  });
}

Source File: KBPSemgrexExtractor.java From InformationExtraction with GNU General Public License v3.0

5 votes

public KBPSemgrexExtractor(String semgrexdir) throws IOException {
  logger.log("Creating SemgrexRegexExtractor");
  // Create extractors
  for (RelationType rel : RelationType.values()) {
    String filename = semgrexdir + File.separator + rel.canonicalName.replace("/", "SLASH") + ".rules";
    if (IOUtils.existsInClasspathOrFileSystem(filename)) {

      List<SemgrexPattern> rulesforrel = SemgrexBatchParser.compileStream(IOUtils.getInputStreamFromURLOrClasspathOrFileSystem(filename));
      logger.log("Read " + rulesforrel.size() + " rules from " + filename + " for relation " + rel);
      rules.put(rel, rulesforrel);
    }
  }
}

Source File: IntelKBPStatisticalExtractor.java From InformationExtraction with GNU General Public License v3.0

5 votes

public static void trainModel() throws IOException {
    forceTrack("Training data");
    List<Pair<KBPInput, String>> trainExamples = DatasetUtils.readDataset(TRAIN_FILE);
    log.info("Read " + trainExamples.size() + " examples");
    log.info("" + trainExamples.stream().map(Pair::second).filter(NO_RELATION::equals).count() + " are " + NO_RELATION);
    endTrack("Training data");

    // Featurize + create the dataset
    forceTrack("Creating dataset");
    RVFDataset<String, String> dataset = new RVFDataset<>();
    final AtomicInteger i = new AtomicInteger(0);
    long beginTime = System.currentTimeMillis();
    trainExamples.stream().parallel().forEach(example -> {
        if (i.incrementAndGet() % 1000 == 0) {
            log.info("[" + Redwood.formatTimeDifference(System.currentTimeMillis() - beginTime) +
                    "] Featurized " + i.get() + " / " + trainExamples.size() + " examples");
        }
        Counter<String> features = features(example.first);  // This takes a while per example
        synchronized (dataset) {
            dataset.add(new RVFDatum<>(features, example.second));
        }
    });
    trainExamples.clear();  // Free up some memory
    endTrack("Creating dataset");

    // Train the classifier
    log.info("Training classifier:");
    Classifier<String, String> classifier = trainMultinomialClassifier(dataset, FEATURE_THRESHOLD, SIGMA);
    dataset.clear();  // Free up some memory

    // Save the classifier
    IOUtils.writeObjectToFile(new IntelKBPStatisticalExtractor(classifier), MODEL_FILE);
}

Source File: IntelKBPEnsembleExtractor.java From InformationExtraction with GNU General Public License v3.0

5 votes

public static void main(String[] args) throws IOException, ClassNotFoundException {
    RedwoodConfiguration.standard().apply();  // Disable SLF4J crap.
    ArgumentParser.fillOptions(edu.stanford.nlp.ie.KBPEnsembleExtractor.class, args);

    Object object = IOUtils.readObjectFromURLOrClasspathOrFileSystem(STATISTICAL_MODEL);
    IntelKBPRelationExtractor statisticalExtractor;
    if (object instanceof LinearClassifier) {
        //noinspection unchecked
        statisticalExtractor = new IntelKBPStatisticalExtractor((Classifier<String, String>) object);
    } else if (object instanceof IntelKBPStatisticalExtractor) {
        statisticalExtractor = (IntelKBPStatisticalExtractor) object;
    } else {
        throw new ClassCastException(object.getClass() + " cannot be cast into a " + IntelKBPStatisticalExtractor.class);
    }
    logger.info("Read statistical model from " + STATISTICAL_MODEL);
    IntelKBPRelationExtractor extractor = new IntelKBPEnsembleExtractor(
            new IntelKBPTokensregexExtractor(TOKENSREGEX_DIR),
            new IntelKBPSemgrexExtractor(SEMGREX_DIR),
            statisticalExtractor
    );

    List<Pair<KBPInput, String>> testExamples = DatasetUtils.readDataset(TEST_FILE);

    extractor.computeAccuracy(testExamples.stream(), PREDICTIONS.map(x -> {
        try {
            return "stdout".equalsIgnoreCase(x) ? System.out : new PrintStream(new FileOutputStream(x));
        } catch (IOException e) {
            throw new RuntimeIOException(e);
        }
    }));

}

Source File: KBPModel.java From InformationExtraction with GNU General Public License v3.0

5 votes

public static void main(String[] args) throws IOException {

        IOUtils.console("sentence> ", line -> {
            HashMap<RelationTriple, String> triple = extract(line);
            for (RelationTriple s: triple.keySet()){
                System.out.println(s);
            }
        });
    }

Source File: IntelKBPSemgrexExtractor.java From InformationExtraction with GNU General Public License v3.0

5 votes

public IntelKBPSemgrexExtractor(String semgrexdir) throws IOException {
    logger.log("Creating SemgrexRegexExtractor");
    // Create extractors
    for (RelationType rel : RelationType.values()) {
        String filename = semgrexdir + File.separator + rel.canonicalName.replace("/", "SLASH") + ".rules";
        if (IOUtils.existsInClasspathOrFileSystem(filename)) {

            List<SemgrexPattern> rulesforrel = SemgrexBatchParser.compileStream(IOUtils.getInputStreamFromURLOrClasspathOrFileSystem(filename));
            logger.log("Read " + rulesforrel.size() + " rules from " + filename + " for relation " + rel);
            rules.put(rel, rulesforrel);
        }
    }
}

Source File: IntelKBPAnnotator.java From InformationExtraction with GNU General Public License v3.0

5 votes

/**
 * A debugging method to try relation extraction from the console.
 *
 * @throws IOException
 */
public static void main(String[] args) throws IOException {
    Properties props = StringUtils.argsToProperties(args);
    props.setProperty("annotators", "tokenize,ssplit,pos,lemma,ner,regexner,parse,mention,coref,kbp");
    props.setProperty("regexner.mapping", "ignorecase=true,validpospattern=^(NN|JJ).*,edu/stanford/nlp/models/kbp/regexner_caseless.tab;edu/stanford/nlp/models/kbp/regexner_cased.tab");

    StanfordCoreNLP pipeline = new StanfordCoreNLP(props);
    IOUtils.console("sentence> ", line -> {
        Annotation ann = new Annotation(line);
        pipeline.annotate(ann);
        for (CoreMap sentence : ann.get(CoreAnnotations.SentencesAnnotation.class)) {
            sentence.get(CoreAnnotations.KBPTriplesAnnotation.class).forEach(System.err::println);
        }
    });
}

Source File: IntelKBPModel.java From InformationExtraction with GNU General Public License v3.0

5 votes

public static void main(String[] args) throws IOException {

        IOUtils.console("sentence> ", line -> {
            HashMap<RelationTriple, String> triple = extract(line);
            for (RelationTriple s : triple.keySet()) {
                System.out.println(s);
            }
        });
    }

Source File: RegexNerTest.java From InformationExtraction with GNU General Public License v3.0

5 votes

public static void main(String[] args) throws IOException{

        IOUtils.console("sentence> ", line -> {
            List<String> ners = extractNER(line);
            for (String ner : ners) {
                System.out.print(ner + ",");
            }
            System.out.println();
        });
    }

Source File: DocumentFrequencyCounter.java From wiseowl with MIT License

5 votes

/**
 * Computes a result, or throws an exception if unable to do so.
 *
 * @return computed result
 * @throws Exception if unable to compute a result
 */
public Counter<String> call() throws Exception {
  // We need to hallucinate some overarching document tag.. because the Gigaword files don't
  // have them :/
  String fileContents = IOUtils.slurpFile(file);
  fileContents = "<docs>" + fileContents + "</docs>";

  return getIDFMapForFile(new StringReader(fileContents));
}

Source File: CoreNlpTokenizer.java From jstarcraft-nlp with Apache License 2.0

4 votes

private boolean processInput() {
    Annotation annotation = new Annotation(IOUtils.slurpReader(input));
    pipeline.annotate(annotation);
    sentences = annotation.get(SentencesAnnotation.class).iterator();
    return true;
}

edu.stanford.nlp.io.IOUtils Java Examples