opennlp.tools.util.InvalidFormatException Java Exaples

Source File: LemmatizerFactory.java From ixa-pipe-pos with Apache License 2.0

6 votes

public static LemmatizerFactory create(String subclassName)
    throws InvalidFormatException {
  if (subclassName == null) {
    // will create the default factory
    return new LemmatizerFactory();
  }
  try {
    LemmatizerFactory theFactory = ExtensionLoader.instantiateExtension(
        LemmatizerFactory.class, subclassName);
    return theFactory;
  } catch (Exception e) {
    String msg = "Could not instantiate the " + subclassName
        + ". The initialization throw an exception.";
    System.err.println(msg);
    e.printStackTrace();
    throw new InvalidFormatException(msg, e);
  }
}

Source File: OpenNlpTests.java From uncc2014watsonsim with GNU General Public License v2.0

6 votes

public Parse[] parsePassageText(String p) throws InvalidFormatException{
	if (!modelsAreInitialized)init();
	//initialize 	 
	SentenceDetectorME sentenceDetector = new SentenceDetectorME(this.sentenceModel);
	Parser parser = ParserFactory.create(
			this.parserModel,
			20, // beam size
			0.95); // advance percentage
	//find sentences, tokenize each, parse each, return top parse for each 	 	 
	String[] sentences = sentenceDetector.sentDetect(p);
	Parse[] results = new Parse[sentences.length];
	for (int i=0;i<sentences.length;i++){
		String[] tks = SimpleTokenizer.INSTANCE.tokenize(sentences[i]);
		//StringTokenizer st = new StringTokenizer(tks[i]); 
		//There are several tokenizers available. SimpleTokenizer works best

		String sent= StringUtils.join(tks," ");
		System.out.println("Found sentence " + sent);
		Parse[] sentResults = ParserTool.parseLine(sent,parser, 1);
		results[i]=sentResults[0];
	}
	return results;
}

Source File: JM_Scorer.java From uncc2014watsonsim with GNU General Public License v2.0

6 votes

public double scoreStructure(String ca, String q, String passage, boolean verbose) throws InvalidFormatException, IOException{
	POSTaggerME parserModel = new POSTaggerME(new POSModel(new FileInputStream(new File("en-pos-model.bin"))));
	Tokenizer tokenizer = new TokenizerME(new TokenizerModel(new FileInputStream(new File("en-token.bin"))));
	Parser parser = ParserFactory.create(new ParserModel(new FileInputStream(new File("en-parser.bin"))));
	double score = 0;
	
	Parse[] questionParse = ParserTool.parseLine(q, parser, 1);
	Parse[] passageParse = ParserTool.parseLine(q, parser, 1);
	
	if (passage.contains(ca)) {
		for (int i =0; i < questionParse.length; i++) {
			score += matchChildren(questionParse[i],passageParse[i]);
		}
	}
	
	return score;
}

Source File: POSStructureScorer.java From uncc2014watsonsim with GNU General Public License v2.0

6 votes

public static Parse[] parsePassageText(String p) throws InvalidFormatException{
	
	//initialize 	 
	SentenceDetectorME sentenceDetector = new SentenceDetectorME(sentenceModel);
	Parser parser = ParserFactory.create(
			parserModel,
			20, // beam size
			0.95); // advance percentage
 	 	 
	String[] sentences = sentenceDetector.sentDetect(p);
	Parse[] results = new Parse[sentences.length];
	for (int i=0;i<sentences.length;i++){
		String[] tks = SimpleTokenizer.INSTANCE.tokenize(sentences[i]);


		String sent= StringUtils.join(tks," ");
		System.out.println("Found sentence " + sent);
		Parse[] sentResults = ParserTool.parseLine(sent,parser, 1);
		results[i]=sentResults[0];
	}
	return results;
}

Source File: LemmatizerModel.java From ixa-pipe-pos with Apache License 2.0

5 votes

@Override
protected void validateArtifactMap() throws InvalidFormatException {
  super.validateArtifactMap();

  if (!(artifactMap.get(LEMMATIZER_MODEL_ENTRY_NAME) instanceof AbstractModel)) {
    throw new InvalidFormatException("Lemmatizer model is incomplete!");
  }
}

Source File: OpenNlpTests.java From uncc2014watsonsim with GNU General Public License v2.0

5 votes

public double scoreStructureNorm(String ca, String q, String passage, boolean verbose) throws InvalidFormatException{
	double score1=0, score2=0;
	//OnlpParserTest pt= new OnlpParserTest();
	Parse[] caParse = this.parsePassageText(ca);
	Parse[] qParse = this.parsePassageText(q);
	Parse[] pasParse = this.parsePassageText(passage);
	Parse[] caParseCh = getAllChildren(caParse);
	Parse[] qParseCh = getAllChildren(qParse);
	Parse[] pasParseCh = getAllChildren(pasParse);
	score1=compareParseChunks(qParseCh, pasParseCh,verbose);
	score2=compareParseChunks(caParseCh, pasParseCh,verbose);
	return score1*score2/passage.length();
}

Source File: OpenNlpTests.java From uncc2014watsonsim with GNU General Public License v2.0

5 votes

public double scoreStructure(String ca, String q, String passage, boolean verbose) throws InvalidFormatException{
	double score1=0, score2=0;
	Parse[] caParse = this.parsePassageText(ca);
	Parse[] qParse = this.parsePassageText(q);
	Parse[] pasParse = this.parsePassageText(passage);
	Parse[] caParseCh = getAllChildren(caParse);
	Parse[] qParseCh = getAllChildren(qParse);
	Parse[] pasParseCh = getAllChildren(pasParse);
	score1=compareParseChunks(qParseCh, pasParseCh,verbose);
	score2=compareParseChunks(caParseCh, pasParseCh,verbose);
	return score1*score2;
}

Source File: OpenNlpTests.java From uncc2014watsonsim with GNU General Public License v2.0

5 votes

public void testSentDetector(String testSents) throws InvalidFormatException{
	init();
	SentenceDetectorME sentenceDetector = new SentenceDetectorME(this.sentenceModel);
	String[] sentences = sentenceDetector.sentDetect(testSents);
	for (int i=0;i<sentences.length; i++)
		System.err.println("sent: "+sentences[i]);
}

Source File: NERScorer.java From uncc2014watsonsim with GNU General Public License v2.0

5 votes

public double scoreStructureNorm(String ca, String q, String passage, boolean verbose) throws InvalidFormatException{
	double score1=0, score2=0;
	//OnlpParserTest pt= new OnlpParserTest();
	Parse[] caParse = this.parsePassageText(ca);
	Parse[] qParse = this.parsePassageText(q);
	Parse[] pasParse = this.parsePassageText(passage);
	Parse[] caParseCh = getAllChildren(caParse);
	Parse[] qParseCh = getAllChildren(qParse);
	Parse[] pasParseCh = getAllChildren(pasParse);
	score1=compareParseChunks(qParseCh, pasParseCh,verbose);
	score2=compareParseChunks(caParseCh, pasParseCh,verbose);
	return score1*score2/passage.length();
}

Source File: NERScorer.java From uncc2014watsonsim with GNU General Public License v2.0

5 votes

public double scoreStructure(String ca, String q, String passage, boolean verbose) throws InvalidFormatException{
	double score1=0, score2=0;
	Parse[] caParse = this.parsePassageText(ca);
	Parse[] qParse = this.parsePassageText(q);
	Parse[] pasParse = this.parsePassageText(passage);
	Parse[] caParseCh = getAllChildren(caParse);
	Parse[] qParseCh = getAllChildren(qParse);
	Parse[] pasParseCh = getAllChildren(pasParse);
	score1=compareParseChunks(qParseCh, pasParseCh,verbose);
	score2=compareParseChunks(caParseCh, pasParseCh,verbose);
	return score1*score2;
}

Source File: NERScorer.java From uncc2014watsonsim with GNU General Public License v2.0

5 votes

public Parse[] parsePassageText(String p) throws InvalidFormatException{
	if (!modelsAreInitialized)init();
	//initialize 	 
	SentenceDetectorME sentenceDetector = new SentenceDetectorME(this.sentenceModel);
	NameFinderME nameFinder = new NameFinderME(this.nerModel);
	Parser parser = ParserFactory.create(
			this.parserModel,
			20, // beam size
			0.95); // advance percentage
	//find sentences, tokenize each, parse each, return top parse for each 	 	 
	String[] sentences = sentenceDetector.sentDetect(p);
	Parse[] results = new Parse[sentences.length];
	for (int i=0;i<sentences.length;i++){
		//String[] tks = SimpleTokenizer.INSTANCE.tokenize(sentences[i]);
		
		//StringTokenizer st = new StringTokenizer(tks[i]); 
		//There are several tokenizers available. SimpleTokenizer works best
		Tokenizer tokenizer = SimpleTokenizer.INSTANCE;
		for (int si = 0; si < sentences.length; si++) {
	        Span[] tokenSpans = tokenizer.tokenizePos(sentences[si]);
	        String[] tokens = Span.spansToStrings(tokenSpans, sentences[si]);
	        Span[] names = nameFinder.find(tokens);
	        for (int ni = 0; ni < names.length; ni++) {
	            Span startSpan = tokenSpans[names[ni].getStart()];
	            int nameStart = startSpan.getStart();
	            Span endSpan = tokenSpans[names[ni].getEnd() - 1];
	            int nameEnd = endSpan.getEnd();
	            String name = sentences[si].substring(nameStart, nameEnd);
	            System.out.println(name);
	        }
	    }
		String sent= StringUtils.join(tokenizer," ");
		System.out.println("Found sentence " + sent);
		Parse[] sentResults = ParserTool.parseLine(sent,parser, 1);
		results[i]=sentResults[0];
	}
	return results;
}

Source File: NERScorer.java From uncc2014watsonsim with GNU General Public License v2.0

5 votes

public void testSentDetector(String testSents) throws InvalidFormatException{
	init();
	SentenceDetectorME sentenceDetector = new SentenceDetectorME(this.sentenceModel);
	String[] sentences = sentenceDetector.sentDetect(testSents);
	for (int i=0;i<sentences.length; i++)
		System.err.println("sent: "+sentences[i]);
}

Source File: PassageScorerOpenNLPAda.java From uncc2014watsonsim with GNU General Public License v2.0

5 votes

public double scoreStructureNorm(String ca, String q, String passage, boolean verbose) throws InvalidFormatException{
	double score1=0, score2=0;
	//OnlpParserTest pt= new OnlpParserTest();
	Parse[] caParse = t.parsePassageText(ca);
	Parse[] qParse = t.parsePassageText(q);
	Parse[] pasParse = t.parsePassageText(passage);
	Parse[] caParseCh = t.getAllChildren(caParse);
	Parse[] qParseCh = t.getAllChildren(qParse);
	Parse[] pasParseCh = t.getAllChildren(pasParse);
	score1=this.compareParseType(qParseCh, pasParseCh,verbose);
	score2=this.compareParseType(caParseCh, pasParseCh,verbose);
	return score1*score2/passage.length();
}

Source File: PassageScorerOpenNLPAda.java From uncc2014watsonsim with GNU General Public License v2.0

5 votes

public double scoreStructure(String ca, String q, String passage, boolean verbose) throws InvalidFormatException{
	double score1=0, score2=0;
	Parse[] caParse = t.parsePassageText(ca);
	Parse[] qParse = t.parsePassageText(q);
	Parse[] pasParse = t.parsePassageText(passage);
	Parse[] caParseCh = t.getAllChildren(caParse);
	Parse[] qParseCh = t.getAllChildren(qParse);
	Parse[] pasParseCh = t.getAllChildren(pasParse);
	score1=this.compareParseType(qParseCh, pasParseCh,verbose);
	score2=this.compareParseType(caParseCh, pasParseCh,verbose);
	return score1*score2;
}

Source File: StephensonOpenNLPScorer.java From uncc2014watsonsim with GNU General Public License v2.0

5 votes

public void testSentDetector(String testSents) throws InvalidFormatException{
	init();
	SentenceDetectorME sentenceDetector = new SentenceDetectorME(this.sentenceModel);
	String[] sentences = sentenceDetector.sentDetect(testSents);
	for (int i=0;i<sentences.length; i++)
		System.err.println("sent: "+sentences[i]);
}

Source File: LemmatizerModel.java From ixa-pipe-pos with Apache License 2.0

4 votes

public LemmatizerModel(URL modelURL) throws IOException, InvalidFormatException {
  super(COMPONENT_NAME, modelURL);
}

Source File: LemmatizerModel.java From ixa-pipe-pos with Apache License 2.0

4 votes

public LemmatizerModel(File modelFile) throws IOException, InvalidFormatException {
  super(COMPONENT_NAME, modelFile);
}

Source File: POSStructureScorer.java From uncc2014watsonsim with GNU General Public License v2.0

4 votes

public static void main(String[] args) throws InvalidFormatException {


		
		init();
		
		String sampleQuestion = "Jane Austen";
		String sampleAnswer = "Jane Austen wrote Emma";
		String samplePassage = "Jane Austen was very modest about her own genius.[7] She once famously described her work as "+
				"the little bit (two Inches wide) of Ivory, on which I work with so fine a brush, " +
				"as produces little effect after much labor [7]. " +
				"Jane Austen wrote Emma."+
				"When she was a girl she wrote stories. Her works were printed only after much revision. " +
				"Only four of her novels were printed while she was alive. They were Sense and Sensibility (1811), " +
				"Pride and Prejudice (1813), Mansfield Park (1814) and Emma (1816). " +
				"Two other novels, Northanger Abbey and Persuasion, were printed in 1817 with " +
				"a biographical notice by her brother, Henry Austen. Persuasion was written shortly before her death. " +
				"She also wrote two earlier works, Lady Susan, and an unfinished novel, The Watsons. " +
				"She had been working on a new novel, Sanditon, but she died before she could finish it.";
		
		
		String sampleQACombined = sampleAnswer + sampleQuestion;
		Parse[] sentences = parsePassageText(samplePassage);
		
		int[] scorerModelQA = POSScoreSentece(sampleQACombined);
		int[] scorerModelEachSentenceInPassage;
		double tempScore = 0;
		double finalScore = 0;
		for (int i = 0; i < sentences.length; i++) {
			scorerModelEachSentenceInPassage = POSScoreSentece(sentences[i].toString());
			tempScore = AbsoluteScorerModelSubtractor(scorerModelQA,scorerModelEachSentenceInPassage);
			System.out.println("tempScore = "+tempScore);
			if(tempScore<= 0.1*sentences[i].toString().length())
			{
				finalScore = finalScore + tempScore;
			}
		}
		
		System.out.println("Final Score is : " + finalScore);

	}

Source File: LemmatizerModel.java From ixa-pipe-pos with Apache License 2.0

4 votes

public LemmatizerModel(InputStream in) throws IOException, InvalidFormatException {
  super(COMPONENT_NAME, in);
}

Source File: LemmatizerFactory.java From ixa-pipe-pos with Apache License 2.0

4 votes

@Override
public void validateArtifactMap() throws InvalidFormatException {
  // no additional artifacts
}

Source File: LexicalLibOpenNlpImplTest.java From SciGraph with Apache License 2.0

4 votes

@BeforeClass
public static void setup() throws InvalidFormatException, IOException {
  Injector i = Guice.createInjector(new LexicalLibModule(), new OpenNlpModule());
  lexLib = i.getInstance(LexicalLibOpenNlpImpl.class);
}

Source File: KensNLPScorer.java From uncc2014watsonsim with GNU General Public License v2.0

4 votes

@Override
public double scorePassage(Phrase q, Answer a, Passage p) {
	
	int countOfQuestionNPsInPassage = 0;
	try {
		//prep NLP tools
		if (!this.modelsAreInitialized) init();
		Parser parser = ParserFactory.create(this.parserModel, 20, 0.95);

		//create question parse
		Parse[] questionParse = ParserTool.parseLine(q.text, parser, 1);

		//create passage parses (one for each sentence)
		String[] passageSentences = this.DivideIntoSentences(p);
		Parse[] passageParses = new Parse[passageSentences.length];
		Parse[] tempParse;
		for (int i=0; i < passageSentences.length; i++) {
			tempParse = ParserTool.parseLine(passageSentences[i], parser, 1);
			passageParses[i] = tempParse[0];
		}
		
		//retrieve NPs from the question parse
		navigateTree(questionParse, 0, questionNPs);

		//retrieve NPs from the passage parse
		for (int i=0; i < passageParses.length; i++) {
			navigateTree(passageParses, i, passageNPs);				
		}
		
		//count the number of question NPs that are in the passage NP set (A)
		for (String qNP: questionNPs) {
			for (String pNP: passageNPs) {
				//System.out.println("comparing " + qNP + " with " + pNP);
				if (qNP.equals(pNP)) {
					//System.out.println("match found");
					countOfQuestionNPsInPassage++;
				}
			}
		}
		//System.out.println(countOfQuestionNPsInPassage);
		
		//count the number of all NPs that are in the passage NP set (B)
		//passageNPs.size();
		
	} catch (InvalidFormatException e) {
		e.printStackTrace();
	}

	//calculate A/B and return as the score
	//System.out.print("******** score:  " + (double)countOfQuestionNPsInPassage/passageNPs.size() + "  *******");
	//System.out.println(" count:  " + passageNPs.size() + "  *******");
	if (passageNPs.size() == 0)
		return 0;
	else
		return (double)countOfQuestionNPsInPassage/passageNPs.size();
}

Source File: KensNLPScorer.java From uncc2014watsonsim with GNU General Public License v2.0

4 votes

public String[] DivideIntoSentences(Passage p) throws InvalidFormatException{
	init();
	SentenceDetectorME sentenceDetector = new SentenceDetectorME(this.sentenceModel);
	return sentenceDetector.sentDetect(p.text);
}

opennlp.tools.util.InvalidFormatException Java Examples