edu.stanford.nlp.util.PropertiesUtils Java Examples
The following examples show how to use
edu.stanford.nlp.util.PropertiesUtils.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: TargetFunctionWordInsertion.java From phrasal with GNU General Public License v3.0 | 6 votes |
/** * Constructor. * * @param args */ public TargetFunctionWordInsertion(String...args) { Properties options = FeatureUtils.argsToProperties(args); if (args.length < 2) { throw new RuntimeException("Must specify source and target unigram counts files"); } System.err.println("Loading TargetFunctionWordInsertion template..."); String sourceFilename = options.getProperty("sourceFile"); String targetFilename = options.getProperty("targetFile"); this.rankCutoff = PropertiesUtils.getInt(options, "rankCutoff", DEFAULT_RANK_CUTOFF); System.err.println("Source words:"); sourceFunctionWordSet = loadCountsFile(sourceFilename); System.err.println("Target words:"); targetFunctionWordSet = loadCountsFile(targetFilename); }
Example #2
Source File: GermanPostprocessor.java From phrasal with GNU General Public License v3.0 | 6 votes |
/** * A main method for training and evaluating the postprocessor. * * @param args */ public static void main(String[] args) { // Strips off hyphens Properties options = StringUtils.argsToProperties(args, optionArgDefs()); if (options.containsKey("help") || args.length == 0) { System.err.println(usage(GermanPostprocessor.class.getName())); System.exit(-1); } int nThreads = PropertiesUtils.getInt(options, "nthreads", 1); GermanPreprocessor preProcessor = new GermanPreprocessor(); GermanPostprocessor postProcessor = new GermanPostprocessor(options); CRFPostprocessor.setup(postProcessor, preProcessor, options); CRFPostprocessor.execute(nThreads, preProcessor, postProcessor); }
Example #3
Source File: FrenchPostprocessor.java From phrasal with GNU General Public License v3.0 | 6 votes |
/** * A main method for training and evaluating the postprocessor. * * @param args */ public static void main(String[] args) { // Strips off hyphens Properties options = StringUtils.argsToProperties(args, optionArgDefs()); if (options.containsKey("help") || args.length == 0) { System.err.println(usage(FrenchPostprocessor.class.getName())); System.exit(-1); } int nThreads = PropertiesUtils.getInt(options, "nthreads", 1); FrenchPreprocessor preProcessor = new FrenchPreprocessor(); FrenchPostprocessor postProcessor = new FrenchPostprocessor(options); CRFPostprocessor.setup(postProcessor, preProcessor, options); CRFPostprocessor.execute(nThreads, preProcessor, postProcessor); }
Example #4
Source File: EnglishPostprocessor.java From phrasal with GNU General Public License v3.0 | 6 votes |
/** * A main method for training and evaluating the postprocessor. * * @param args */ public static void main(String[] args) { // Strips off hyphens Properties options = StringUtils.argsToProperties(args, optionArgDefs()); if (options.containsKey("help") || args.length == 0) { System.err.println(usage(EnglishPostprocessor.class.getName())); System.exit(-1); } int nThreads = PropertiesUtils.getInt(options, "nthreads", 1); EnglishPreprocessor preProcessor = new EnglishPreprocessor(); EnglishPostprocessor postProcessor = new EnglishPostprocessor(options); CRFPostprocessor.setup(postProcessor, preProcessor, options); CRFPostprocessor.execute(nThreads, preProcessor, postProcessor); }
Example #5
Source File: SpanishPostprocessor.java From phrasal with GNU General Public License v3.0 | 6 votes |
/** * A main method for training and evaluating the postprocessor. * * @param args */ public static void main(String[] args) { // Strips off hyphens Properties options = StringUtils.argsToProperties(args, optionArgDefs()); if (options.containsKey("help") || args.length == 0) { System.err.println(usage(SpanishPostprocessor.class.getName())); System.exit(-1); } int nThreads = PropertiesUtils.getInt(options, "nthreads", 1); SpanishPreprocessor preProcessor = new SpanishPreprocessor(); SpanishPostprocessor postProcessor = new SpanishPostprocessor(options); CRFPostprocessor.setup(postProcessor, preProcessor, options); CRFPostprocessor.execute(nThreads, preProcessor, postProcessor); }
Example #6
Source File: CoreNlpTokenizerTest.java From jstarcraft-nlp with Apache License 2.0 | 5 votes |
/** Test with part of speech and lemmatization */ @Test public void testWithLemma() throws IOException { AnnotationPipeline pipeline = new StanfordCoreNLP(PropertiesUtils.asProperties(// "annotators", "tokenize,ssplit,pos,lemma", // "parse.model", "edu/stanford/nlp/models/srparser/englishSR.ser.gz", // "tokenize.language", "en", // "tokenize.options", "americanize=true,asciiQuotes=true,ptb3Dashes=true,ptb3Ellipsis=true,untokenizable=noneKeep" // )); CoreNlpTokenizer tokenizer = new CoreNlpTokenizer(pipeline); String str = "Mary had a little lamb. And everywhere that Mary went, the lamb was sure to go."; tokenizer.setReader(new StringReader(str)); assertTokenStreamContents(tokenizer, // new String[] { "Mary", "have", "a", "little", "lamb", ".", // "and", "everywhere", "that", "Mary", "go", ",", // "the", "lamb", "be", "sure", "to", "go", "." }, // Start offsets: new int[] { 0, 5, 9, 11, 18, 22, // 24, 28, 39, 44, 49, 53, // 55, 59, 64, 68, 73, 76, 78 }, // End offsets: new int[] { 4, 8, 10, 17, 22, 23, // 27, 38, 43, 48, 53, 54, // 58, 63, 67, 72, 75, 78, 79 }, // Types new String[] { "NNP", "VBD", "DT", "JJ", "NN", ".", // "CC", "RB", "IN", "NNP", "VBD", ",", // "DT", "NN", "VBD", "JJ", "TO", "VB", "." }, // Increments: new int[] { 1, 1, 1, 1, 1, 1, // 1 + CoreNlpTokenizer.SENTENCE_GAP, 1, 1, 1, 1, 1, // 1, 1, 1, 1, 1, 1, 1, 1 } // ); }
Example #7
Source File: PhraseViewer.java From phrasal with GNU General Public License v3.0 | 5 votes |
private static boolean validateCommandLine(String[] args) { // Command line parsing Properties options = StringUtils.argsToProperties(args, argDefs()); VERBOSE = options.containsKey("v"); SRC_FILE = options.getProperty("s", null); OPTS_FILE = options.getProperty("o", null); XSD_FILE = options.getProperty("x", null); FIRST_ID = PropertiesUtils.getInt(options, "f", Integer.MIN_VALUE); LAST_ID = PropertiesUtils.getInt(options,"l",Integer.MAX_VALUE); return true; }
Example #8
Source File: RuleIndicator.java From phrasal with GNU General Public License v3.0 | 5 votes |
/** * Constructor for reflection loading. * * @param args */ public RuleIndicator(String... args) { Properties options = FeatureUtils.argsToProperties(args); this.addLexicalizedRule = options.containsKey("addLexicalized"); this.addClassBasedRule = options.containsKey("addClassBased"); this.countFeatureIndex = PropertiesUtils.getInt(options, "countFeatureIndex", -1); if (addClassBasedRule) { sourceMap = SourceClassMap.getInstance(); targetMap = TargetClassMap.getInstance(); } this.lexicalCutoff = PropertiesUtils.getInt(options, "lexicalCutoff", 0); }
Example #9
Source File: LexicalReorderingFeaturizer.java From phrasal with GNU General Public License v3.0 | 5 votes |
/** * Constructor for reflection loading discriminative lexicalized reordering. * * @param args */ public LexicalReorderingFeaturizer(String...args) { Properties options = FeatureUtils.argsToProperties(args); this.dynamic = PropertiesUtils.getBool(options, "dynamic", false); if (dynamic) { this.discriminativeSet = null; this.mlrt = null; this.featureTags = Arrays.stream(LexicalReorderingTable.msdBidirectionalPositionMapping).map(m -> String.format("%s:%s", FEATURE_PREFIX, m)).toArray(String[]::new); this.useAlignmentConstellations = false; this.useClasses = false; this.countFeatureIndex = -1; this.lexicalCutoff = 0; } else { this.discriminativeSet = new ArrayList<>(Arrays.asList(LexicalReorderingTable.ReorderingTypes.values())); this.useAlignmentConstellations = options.containsKey("conditionOnConstellations"); this.countFeatureIndex = PropertiesUtils.getInt(options, "countFeatureIndex", -1); // Which reordering classes to extract if (options.containsKey("classes")) { String[] typeStrings = options.getProperty("classes").split("-"); discriminativeSet = new ArrayList<>(); for (String type : typeStrings) { discriminativeSet.add(LexicalReorderingTable.ReorderingTypes.valueOf(type)); } } // Use class-based feature representations this.useClasses = options.containsKey("useClasses"); if (useClasses) { sourceMap = SourceClassMap.getInstance(); targetMap = TargetClassMap.getInstance(); } this.mlrt = null; this.featureTags = null; this.lexicalCutoff = PropertiesUtils.getInt(options, "lexicalCutoff", 0); } }
Example #10
Source File: NGramLanguageModelFeaturizer.java From phrasal with GNU General Public License v3.0 | 5 votes |
/** * Constructor called by Phrasal when NGramLanguageModelFeaturizer appears in * <code>Phrasal.LANGUAGE_MODEL_OPT</code>. * * The first argument is always the language model filename and the second * argument is always the feature name. * * Additional arguments are named parameters. */ public NGramLanguageModelFeaturizer(String...args) throws IOException { if (args.length < 2) { throw new RuntimeException( "At least two arguments are needed: LM file name and LM feature name"); } // Load the LM this.lm = LanguageModelFactory.load(args[0]); this.startToken = lm.getStartToken(); this.endToken = lm.getEndToken(); // Set the feature name this.featureName = args[1]; // Named parameters Properties options = FeatureUtils.argsToProperties(args); this.isClassBased = PropertiesUtils.getBool(options, "classBased", false); if (isClassBased && options.containsKey("classMap")) { // A local class map that differs from the one specified by Phrasal.TARGET_CLASS_MAP this.targetClassMap = new LocalTargetMap(); this.targetClassMap.load(options.getProperty("classMap")); } else if (isClassBased) { this.targetClassMap = TargetClassMap.getInstance(); } else { this.targetClassMap = null; } }
Example #11
Source File: SentenceLevelEvaluation.java From phrasal with GNU General Public License v3.0 | 5 votes |
/** * * @param args * @throws IOException */ public static void main(String[] args) throws IOException { if (args.length < 1) { System.err.print(usage()); System.exit(-1); } Properties options = StringUtils.argsToProperties(args, argDefs()); int ngramOrder = PropertiesUtils.getInt(options, "order", BLEUMetric.DEFAULT_MAX_NGRAM_ORDER); boolean disableTokenization = PropertiesUtils.getBool(options, "no-nist", false); String metric = options.getProperty("metric", "bleu"); String[] refs = options.getProperty("").split("\\s+"); List<List<Sequence<IString>>> referencesList = MetricUtils.readReferences(refs, ! disableTokenization); System.err.printf("Metric: %s with %d references%n", metric, referencesList.get(0).size()); LineNumberReader reader = new LineNumberReader(new InputStreamReader( System.in)); int sourceInputId = 0; for (String line; (line = reader.readLine()) != null; ++sourceInputId) { line = disableTokenization ? line : NISTTokenizer.tokenize(line); Sequence<IString> translation = IStrings.tokenize(line); double score = getScore(translation, referencesList.get(sourceInputId), ngramOrder, metric); System.out.printf("%.4f%n", score); } System.err.printf("Scored %d input segments%n", sourceInputId); }
Example #12
Source File: SerializedDependencyToCoNLL.java From phrasal with GNU General Public License v3.0 | 5 votes |
public static void main(String[] args) { Properties options = StringUtils.argsToProperties(args, optionArgDefs()); String annotations = PropertiesUtils.get(options, "annotations", null, String.class); boolean changepreps = PropertiesUtils.getBool(options, "changepreps", false); int sentenceCount = CoreNLPCache.loadSerialized(annotations); CoreMap sentence; for (int i = 0; i < sentenceCount; i++) { try { sentence = CoreNLPCache.get(i); if (sentence == null) { System.out.println(); System.err.println("Empty sentence #" + i); continue; } printDependencies(sentence, changepreps); //System.err.println("---------------------------"); } catch (Exception e) { System.err.println("SourceSentence #" + i); e.printStackTrace(); return; } } }
Example #13
Source File: MinimumBayesRisk.java From phrasal with GNU General Public License v3.0 | 5 votes |
/** * * @param args * @throws IOException */ public static void main(String[] args) throws IOException { if (args.length < 1) { System.err.print(usage()); System.exit(-1); } Properties options = StringUtils.argsToProperties(args, argDefs()); final double scale = PropertiesUtils.getDouble(options, "s", DEFAULT_SCALE); final String orientation = options.getProperty("o", "utility"); final boolean risk = "risk".equals(orientation); final String metricName = options.getProperty("m", DEFAULT_METRIC); final String filename = options.getProperty(""); BasicNBestList nbestlists = new BasicNBestList(filename); MulticoreWrapper<List<BasicNBestEntry>, List<Pair<Double, String>>> wrapper = new MulticoreWrapper<List<BasicNBestEntry>, List<Pair<Double, String>>>(0, new Processor(metricName, risk, scale), true); for (List<BasicNBestEntry> nbestlist : nbestlists) { wrapper.put(nbestlist); while (wrapper.peek()) { DumpRescored(wrapper.poll()); } } wrapper.join(); while (wrapper.peek()) { DumpRescored(wrapper.poll()); } }
Example #14
Source File: CoreNlpTokenizerTest.java From jstarcraft-nlp with Apache License 2.0 | 5 votes |
/** Test with NER */ @Test public void testWithNER() throws IOException { AnnotationPipeline pipeline = new StanfordCoreNLP(PropertiesUtils.asProperties(// "annotators", "tokenize,ssplit,pos,lemma,ner", // "parse.model", "edu/stanford/nlp/models/srparser/englishSR.ser.gz", // "tokenize.language", "en", // "tokenize.options", "americanize=true,asciiQuotes=true,ptb3Dashes=true,ptb3Ellipsis=true,untokenizable=noneKeep" // )); CoreNlpTokenizer tokenizer = new CoreNlpTokenizer(pipeline); String str = "Mary had a little lamb. And everywhere that Mary went, the lamb was sure to go."; tokenizer.setReader(new StringReader(str)); assertTokenStreamContents(tokenizer, // new String[] { "Mary", "have", "a", "little", "lamb", ".", // "and", "everywhere", "that", "Mary", "go", ",", // "the", "lamb", "be", "sure", "to", "go", "." }, // Start offsets: new int[] { 0, 5, 9, 11, 18, 22, // 24, 28, 39, 44, 49, 53, // 55, 59, 64, 68, 73, 76, 78 }, // End offsets: new int[] { 4, 8, 10, 17, 22, 23, // 27, 38, 43, 48, 53, 54, // 58, 63, 67, 72, 75, 78, 79 }, // Types new String[] { "PERSON", "VBD", "DT", "JJ", "NN", ".", // "CC", "RB", "IN", "PERSON", "VBD", ",", // "DT", "NN", "VBD", "JJ", "TO", "VB", "." }, // Increments: new int[] { 1, 1, 1, 1, 1, 1, // 1 + CoreNlpTokenizer.SENTENCE_GAP, 1, 1, 1, 1, 1, // 1, 1, 1, 1, 1, 1, 1, 1 } // ); }
Example #15
Source File: CoreNlpTokenizerTest.java From jstarcraft-nlp with Apache License 2.0 | 5 votes |
/** Test splitting only */ @Test public void testBasic() throws IOException { AnnotationPipeline pipeline = new StanfordCoreNLP(PropertiesUtils.asProperties(// "annotators", "tokenize,ssplit", // "tokenize.language", "en", // "tokenize.options", "americanize=true,asciiQuotes=true,ptb3Dashes=true,ptb3Ellipsis=true,untokenizable=noneKeep" // )); CoreNlpTokenizer tokenizer = new CoreNlpTokenizer(pipeline); String str = "Mary had a little lamb. And everywhere that Mary went, the lamb was sure to go."; tokenizer.setReader(new StringReader(str)); assertTokenStreamContents(tokenizer, // new String[] { "Mary", "had", "a", "little", "lamb", ".", // "And", "everywhere", "that", "Mary", "went", ",", // "the", "lamb", "was", "sure", "to", "go", "." }, // Start offsets: new int[] { 0, 5, 9, 11, 18, 22, // 24, 28, 39, 44, 49, 53, // 55, 59, 64, 68, 73, 76, 78 }, // End offsets: new int[] { 4, 8, 10, 17, 22, 23, // 27, 38, 43, 48, 53, 54, // 58, 63, 67, 72, 75, 78, 79 }, // Increments: new int[] { 1, 1, 1, 1, 1, 1, // 1 + CoreNlpTokenizer.SENTENCE_GAP, 1, 1, 1, 1, 1, // 1, 1, 1, 1, 1, 1, 1, 1 } // ); }
Example #16
Source File: BuildDependencyLMData2.java From phrasal with GNU General Public License v3.0 | 4 votes |
public static void main(String[] args) throws IOException { Properties options = StringUtils.argsToProperties(args, optionArgDefs()); String dependenciesFilename = PropertiesUtils.get(options, "input", null, String.class); String outdirPath = PropertiesUtils.get(options, "outdir", ".", String.class); String alignmentFilename = PropertiesUtils.get(options, "alignment", null, String.class); String sourceTokensFilename = PropertiesUtils.get(options, "sourceTokens", null, String.class); String targetTokensFilename = PropertiesUtils.get(options, "targetTokens", null, String.class); String rightDepLMFilename = outdirPath + File.separator + "deplm.nonevents"; String leftDepLMFilename = outdirPath + File.separator + "deplm.data"; String classMapFilename = PropertiesUtils.get(options, "classMap", null, String.class); useHeadClasses = PropertiesUtils.getBool(options, "headClasses", false); if (classMapFilename != null) { System.err.println("Loading word class mapping from " + classMapFilename); classMap = new LocalWordClassMap(); classMap.load(classMapFilename); } else { classMap = null; } /* Include alignment information and generate a "FRAG" tuple for each unaligned word instead of the real one. */ boolean includeAlignment = (alignmentFilename != null && sourceTokensFilename != null); LineNumberReader alignmentReader = null; LineNumberReader sourceTokensReader = null; LineNumberReader targetTokensReader = null; if (includeAlignment) { alignmentReader = IOTools.getReaderFromFile(alignmentFilename); sourceTokensReader = IOTools.getReaderFromFile(sourceTokensFilename); targetTokensReader = IOTools.getReaderFromFile(targetTokensFilename); } File leftDepLMFile = new File(leftDepLMFilename); if (!leftDepLMFile.exists()) leftDepLMFile.createNewFile(); File rightDepLMFile = new File(rightDepLMFilename); if (!rightDepLMFile.exists()) rightDepLMFile.createNewFile(); FileWriter leftFW = new FileWriter(leftDepLMFile.getAbsoluteFile()); FileWriter rightFW = new FileWriter(rightDepLMFile.getAbsoluteFile()); lmWriter = new BufferedWriter(leftFW); noEventWriter = new BufferedWriter(rightFW); LineNumberReader inputReader = IOTools.getReaderFromFile(dependenciesFilename); HashMap<Integer, Pair<IndexedWord, List<Integer>>> dependencies = null; while ((dependencies = DependencyUtils.getDependenciesFromCoNLLFileReader(inputReader, false, true)) != null) { SymmetricalWordAlignment alignment = null; if (includeAlignment) { alignment = new SymmetricalWordAlignment(sourceTokensReader.readLine(), targetTokensReader.readLine(), alignmentReader.readLine()); } updateCounts(dependencies, alignment); } inputReader.close(); lmWriter.close(); noEventWriter.close(); //headLmWriter.close(); }
Example #17
Source File: DependencyLanguageModelScoreNBest.java From phrasal with GNU General Public License v3.0 | 4 votes |
public static void main(String[] args) throws IOException { Properties options = StringUtils.argsToProperties(args, optionArgDefs()); String sourceTokens = PropertiesUtils.get(options, "sourceTokens", null, String.class); String nBestList = PropertiesUtils.get(options, "nBestList", null, String.class); String dependencies = PropertiesUtils.get(options, "dependencies", null, String.class); String lm = PropertiesUtils.get(options, "lm", null, String.class); String classMapFilename = PropertiesUtils.get(options, "classMap", null, String.class); DependencyLanguageModelPerplexity2.useHeadClasses = PropertiesUtils.getBool(options, "headClasses", false); boolean scoreFrag = PropertiesUtils.getBool(options, "scoreFrag", false); boolean scoreStop = PropertiesUtils.getBool(options, "scoreStop", false); boolean transitive = PropertiesUtils.getBool(options, "transitive", false); if (sourceTokens == null || nBestList == null || dependencies == null || lm == null) { System.err.println("java " + DependencyLanguageModelScoreNBest.class.getCanonicalName() + " -sourceTokens file -nBestList file -dependencies file -lm file [-classMap file]"); return; } if (classMapFilename != null) { System.err.println("Loading word class mapping from " + classMapFilename); classMap = new LocalWordClassMap(); classMap.load(classMapFilename); } else { classMap = null; } DEPLM = LanguageModelFactory.load(lm); LineNumberReader sourceReader = IOTools.getReaderFromFile(sourceTokens); LineNumberReader nBestListReader = IOTools.getReaderFromFile(nBestList); LineNumberReader dependenciesReader = IOTools.getReaderFromFile(dependencies); String separatorExpr = " \\|\\|\\| "; String separator = " ||| "; String sourceSentence; String nBestLine = nBestListReader.readLine(); String currentId = nBestLine.split(separatorExpr)[0]; DecimalFormat df = new DecimalFormat("0.####E0"); while ((sourceSentence = sourceReader.readLine()) != null) { HashMap<Integer, Pair<IndexedWord, List<Integer>>> head2Dependents = DependencyUtils.getDependenciesFromCoNLLFileReader(dependenciesReader, true, true); Map<Integer, Integer> dependent2Head = DependencyUtils.getReverseDependencies(head2Dependents); while (nBestLine != null && nBestLine.split(separatorExpr)[0].equals(currentId)) { String nBestParts[] = nBestLine.split(separatorExpr); String translation = nBestParts[1]; String alignmentString = nBestParts[4]; SymmetricalWordAlignment alignment = new SymmetricalWordAlignment(sourceSentence, translation, alignmentString); Map<Integer, NavigableSet<Integer>> projectedDependencies = DependencyProjectorCoNLL.projectDependencies(dependent2Head, alignment, transitive); Pair<Double, Integer> treeScore = scoreTree(projectedDependencies, alignment.e(), scoreFrag, scoreStop); double score = treeScore.first; int deplmWordCount = treeScore.second; System.out.print(nBestParts[0]); System.out.print(separator); System.out.print(nBestParts[1]); System.out.print(separator); System.out.print(nBestParts[2]); System.out.print(" DEPLM: "); System.out.print(df.format(score)); System.out.print(" DEPLMWORDPENALTY: "); System.out.print(-deplmWordCount); System.out.print(" DEPLMPERP: "); System.out.print(deplmWordCount > 0 ? df.format(score / deplmWordCount) : 0); System.out.print(separator); System.out.print(nBestParts[3]); System.out.print(separator); System.out.print(nBestParts[4]); System.out.println(""); nBestLine = nBestListReader.readLine(); } currentId = nBestLine != null ? nBestLine.split(separatorExpr)[0] : ""; } }
Example #18
Source File: DependencyProjector.java From phrasal with GNU General Public License v3.0 | 4 votes |
public static void main(String[] args) throws IOException { Properties options = StringUtils.argsToProperties(args, optionArgDefs()); String sourceTokens = PropertiesUtils.get(options, "sourceTokens", null, String.class); String targetTokens = PropertiesUtils.get(options, "targetTokens", null, String.class); String alignments = PropertiesUtils.get(options, "alignment", null, String.class); String annotations = PropertiesUtils.get(options, "annotations", null, String.class); String outdirPath = PropertiesUtils.get(options, "outdir", ".", String.class); String leftDepLMFilename = outdirPath + File.separator + "left.deplm"; String rightDepLMFilename = outdirPath + File.separator + "right.deplm"; String headDepLMFilename = outdirPath + File.separator + "head.deplm"; File leftDepLMFile = new File(leftDepLMFilename); if (!leftDepLMFile.exists()) leftDepLMFile.createNewFile(); File rightDepLMFile = new File(rightDepLMFilename); if (!rightDepLMFile.exists()) rightDepLMFile.createNewFile(); File headDepLMFile = new File(headDepLMFilename); if (!headDepLMFile.exists()) headDepLMFile.createNewFile(); FileWriter leftFW = new FileWriter(leftDepLMFile.getAbsoluteFile()); FileWriter rightFW = new FileWriter(rightDepLMFile.getAbsoluteFile()); FileWriter headFW = new FileWriter(headDepLMFile.getAbsoluteFile()); leftDepLMWriter = new BufferedWriter(leftFW); rightDepLMWriter = new BufferedWriter(rightFW); headDepLMWriter = new BufferedWriter(headFW); boolean annotationsSplit = PropertiesUtils.getBool(options, "annotationsSplit", false); boolean transitive = PropertiesUtils.getBool(options, "transitive", false); int maxDepth = PropertiesUtils.getInt(options, "maxDepth", 2); File sourceSentences = new File(sourceTokens); File targetSentences = new File(targetTokens); File alignmentFile = new File(alignments); BufferedReader sourceReader = new BufferedReader(new FileReader(sourceSentences)); BufferedReader targetReader = new BufferedReader(new FileReader(targetSentences)); BufferedReader alignmentReader = new BufferedReader(new FileReader(alignmentFile)); String sourceSentence; int i = 0; while ((sourceSentence = sourceReader.readLine()) != null) { //try { CoreMap sentence = getParsedSentence(annotations, i, annotationsSplit); String targetSentence = targetReader.readLine(); String alignmentString = alignmentReader.readLine(); //System.err.println("---------------------------"); //System.err.println("alignment = \"" + alignmentString + "\";"); SymmetricalWordAlignment alignment = new SymmetricalWordAlignment(sourceSentence, targetSentence, alignmentString); //projectSentence(sentence, alignment); Map<Integer, NavigableSet<Integer>> dependencies = projectDependencies(sentence, alignment, transitive, maxDepth); //if (i == 0) { // System.err.println(dependencies.get(-1)); // System.err.println(dependencies.get(1)); //} //printDependencyString(dependencies, -1, alignment.e(), ""); //System.out.println(dependencies); printLeftAndRightDependencies(dependencies, alignment.e()); //System.err.println("---------------------------"); //} catch (Exception e) { // e.printStackTrace(); //} i++; } sourceReader.close(); targetReader.close(); alignmentReader.close(); leftDepLMWriter.close(); rightDepLMWriter.close(); headDepLMWriter.close(); }
Example #19
Source File: BuildDependencyLMData.java From phrasal with GNU General Public License v3.0 | 4 votes |
public static void main(String[] args) throws IOException { Properties options = StringUtils.argsToProperties(args, optionArgDefs()); String sourceTokens = PropertiesUtils.get(options, "input", null, String.class); String outdirPath = PropertiesUtils.get(options, "outdir", ".", String.class); String leftDepLMFilename = outdirPath + File.separator + "left.deplm"; String rightDepLMFilename = outdirPath + File.separator + "right.deplm"; String headDepLMFilename = outdirPath + File.separator + "head.deplm"; File leftDepLMFile = new File(leftDepLMFilename); if (!leftDepLMFile.exists()) leftDepLMFile.createNewFile(); File rightDepLMFile = new File(rightDepLMFilename); if (!rightDepLMFile.exists()) rightDepLMFile.createNewFile(); File headDepLMFile = new File(headDepLMFilename); if (!headDepLMFile.exists()) headDepLMFile.createNewFile(); FileWriter leftFW = new FileWriter(leftDepLMFile.getAbsoluteFile()); FileWriter rightFW = new FileWriter(rightDepLMFile.getAbsoluteFile()); FileWriter headFW = new FileWriter(headDepLMFile.getAbsoluteFile()); leftDepLMWriter = new BufferedWriter(leftFW); rightDepLMWriter = new BufferedWriter(rightFW); headDepLMWriter = new BufferedWriter(headFW); File sourceSentences = new File(sourceTokens); BufferedReader sourceReader = new BufferedReader(new FileReader(sourceSentences)); String sourceSentence; Map<Integer, NavigableSet<Integer>> dependencies = new HashMap<>(); List<String> tokens = new LinkedList<>(); while (true) { sourceSentence = sourceReader.readLine(); if (sourceSentence == null || sourceSentence.equals("")) { printLeftAndRightDependencies(dependencies, new ArraySequence<String>(tokens)); dependencies = new HashMap<>(); tokens = new LinkedList<>(); if (sourceSentence == null) { break; } else { continue; } } String fields[] = sourceSentence.split("\t"); int id = Integer.parseInt(fields[0]) - 1; int head = fields[7].equals("frag") ? -2 : Integer.parseInt(fields[6]) - 1; String token = fields[1]; tokens.add(token); if (!isWord(token)) continue; if (!dependencies.containsKey(head)) dependencies.put(head, new TreeSet<Integer>()); if (!dependencies.containsKey(id)) dependencies.put(id, new TreeSet<Integer>()); dependencies.get(head).add(id); } sourceReader.close(); leftDepLMWriter.close(); rightDepLMWriter.close(); headDepLMWriter.close(); }
Example #20
Source File: Evaluate.java From phrasal with GNU General Public License v3.0 | 4 votes |
/** * * @param args * @throws IOException */ public static void main(String[] args) throws IOException { if (args.length < 2) { System.err.print(usage()); System.exit(-1); } Properties options = StringUtils.argsToProperties(args, argDefs()); boolean disableTokenization = PropertiesUtils.getBool(options, "no-nist", false); boolean doCased = PropertiesUtils.getBool(options, "cased", false); // Setup the metric tokenization scheme. Applies to both the references and // hypotheses if (doCased) NISTTokenizer.lowercase(false); NISTTokenizer.normalize( ! disableTokenization); // Load the references String[] parsedArgs = options.getProperty("").split("\\s+"); final String evalMetric = parsedArgs[0]; String[] refs= Arrays.copyOfRange(parsedArgs, 1, parsedArgs.length); final List<List<Sequence<IString>>> references = MetricUtils.readReferences(refs, true); System.out.printf("Metric: %s with %d references%n", evalMetric, refs.length); EvaluationMetric<IString,String> metric = CorpusLevelMetricFactory.newMetric(evalMetric, references); IncrementalEvaluationMetric<IString,String> incMetric = metric.getIncrementalMetric(); LineNumberReader reader = new LineNumberReader(new InputStreamReader( System.in)); for (String line; (line = reader.readLine()) != null; ) { line = NISTTokenizer.tokenize(line); Sequence<IString> translation = IStrings.tokenize(line); ScoredFeaturizedTranslation<IString, String> tran = new ScoredFeaturizedTranslation<>( translation, null, 0); incMetric.add(tran); } // Check for an incomplete set of translations if (reader.getLineNumber() < references.size()) { System.err.printf("WARNING: Translation candidate file is shorter than references (%d/%d)%n", reader.getLineNumber(), references.size()); } reader.close(); System.out.printf("%s = %.3f%n", evalMetric, 100 * Math.abs(incMetric.score())); System.out.printf("Details:%n%s%n", incMetric.scoreDetails()); }
Example #21
Source File: BLEUGenreEvaluator.java From phrasal with GNU General Public License v3.0 | 4 votes |
/** * * @param args * @throws IOException */ public static void main(String[] args) throws IOException { if (args.length < 2) { System.err.print(usage()); System.exit(-1); } Properties options = StringUtils.argsToProperties(args, argDefs()); int BLEUOrder = PropertiesUtils.getInt(options, "order", BLEUMetric.DEFAULT_MAX_NGRAM_ORDER); boolean doCased = PropertiesUtils.getBool(options, "cased", false); // Setup the metric tokenization scheme. Applies to both the references and // hypotheses if (doCased) NISTTokenizer.lowercase(false); // Load the references String[] parameters = options.getProperty("").split("\\s+"); String[] refs = new String[parameters.length - 1]; System.arraycopy(parameters, 1, refs, 0, refs.length); List<InputProperties> inputProperties = InputProperties.parse(new File(parameters[0])); List<List<Sequence<IString>>> referencesList = MetricUtils.readReferences(refs, true); Map<String,BLEUMetric<IString, String>.BLEUIncrementalMetric> metrics = BLEUGenreEvaluator.run(referencesList, inputProperties, BLEUOrder, System.in); for (Map.Entry<String,BLEUMetric<IString, String>.BLEUIncrementalMetric> entry : metrics.entrySet()) { String genre = entry.getKey(); BLEUMetric<IString, String>.BLEUIncrementalMetric incMetric = entry.getValue(); System.out.printf("Genre: %s%n", genre); double[] ngramPrecisions = incMetric.ngramPrecisions(); System.out.printf("BLEU = %.3f, ", 100 * incMetric.score()); for (int i = 0; i < ngramPrecisions.length; i++) { if (i != 0) { System.out.print("/"); } System.out.printf("%.3f", ngramPrecisions[i] * 100); } System.out.printf(" (BP=%.3f, ratio=%.3f %d/%d)%n", incMetric .brevityPenalty(), ((1.0 * incMetric.candidateLength()) / incMetric .effectiveReferenceLength()), incMetric.candidateLength(), incMetric .effectiveReferenceLength()); System.out.printf("%nPrecision Details:%n"); double[][] precCounts = incMetric.ngramPrecisionCounts(); for (int i = 0; i < ngramPrecisions.length; i++) { System.out.printf("\t%d:%d/%d%n", i, (int) precCounts[i][0], (int) precCounts[i][1]); } System.out.println(); } }
Example #22
Source File: BLEUMetric.java From phrasal with GNU General Public License v3.0 | 4 votes |
public static void main(String[] args) throws IOException { if (args.length < 1) { System.err.print(usage()); System.exit(-1); } Properties options = StringUtils.argsToProperties(args, argDefs()); int BLEUOrder = PropertiesUtils.getInt(options, "order", BLEUMetric.DEFAULT_MAX_NGRAM_ORDER); boolean doSmooth = PropertiesUtils.getBool(options, "smooth", false); boolean disableTokenization = PropertiesUtils.getBool(options, "no-nist", false); boolean doCased = PropertiesUtils.getBool(options, "cased", false); // Setup the metric tokenization scheme. Applies to both the references and // hypotheses if (doCased) NISTTokenizer.lowercase(false); NISTTokenizer.normalize( ! disableTokenization); // Load the references String[] refs = options.getProperty("").split("\\s+"); System.out.printf("Metric: BLEU-%d with %d references%n", BLEUOrder, refs.length); List<List<Sequence<IString>>> referencesList = MetricUtils.readReferences(refs, true); // For backwards compatibility doSmooth |= System.getProperty("smoothBLEU") != null; BLEUMetric<IString, String> bleu = new BLEUMetric<IString, String>(referencesList, BLEUOrder, doSmooth); BLEUMetric<IString, String>.BLEUIncrementalMetric incMetric = bleu .getIncrementalMetric(); LineNumberReader reader = new LineNumberReader(new InputStreamReader( System.in)); for (String line; (line = reader.readLine()) != null; ) { line = NISTTokenizer.tokenize(line); Sequence<IString> translation = IStrings.tokenize(line); ScoredFeaturizedTranslation<IString, String> tran = new ScoredFeaturizedTranslation<IString, String>( translation, null, 0); incMetric.add(tran); } // Check for an incomplete set of translations if (reader.getLineNumber() < referencesList.size()) { System.err.printf("WARNING: Translation candidate file is shorter than references (%d/%d)%n", reader.getLineNumber(), referencesList.size()); } reader.close(); double[] ngramPrecisions = incMetric.ngramPrecisions(); System.out.printf("BLEU = %.3f, ", 100 * incMetric.score()); for (int i = 0; i < ngramPrecisions.length; i++) { if (i != 0) { System.out.print("/"); } System.out.printf("%.3f", ngramPrecisions[i] * 100); } System.out.printf(" (BP=%.3f, ratio=%.3f %d/%d)%n", incMetric .brevityPenalty(), ((1.0 * incMetric.candidateLength()) / incMetric .effectiveReferenceLength()), incMetric.candidateLength(), incMetric .effectiveReferenceLength()); System.out.printf("%nPrecision Details:%n"); double[][] precCounts = incMetric.ngramPrecisionCounts(); for (int i = 0; i < ngramPrecisions.length; i++) { System.out.printf("\t%d:%d/%d%n", i, (int) precCounts[i][0], (int) precCounts[i][1]); } }
Example #23
Source File: LogicAnalysisTool.java From Criteria2Query with Apache License 2.0 | 4 votes |
public LogicAnalysisTool() { Properties properties = PropertiesUtils.asProperties("annotators", "tokenize,ssplit,pos,lemma,depparse"); // properties.setProperty("depparse.language", "English"); pipeline = new StanfordCoreNLP(properties); }
Example #24
Source File: PhrasalService.java From phrasal with GNU General Public License v3.0 | 4 votes |
/** * Start the service. * * @param args */ public static void main(String[] args) { Properties options = StringUtils.argsToProperties(args, optionArgDefs()); int port = PropertiesUtils.getInt(options, "p", DEFAULT_HTTP_PORT); boolean loadMockServlet = PropertiesUtils.getBool(options, "m", false); boolean localHost = PropertiesUtils.getBool(options, "l", false); String uiFile = options.getProperty("u", "debug.html"); String resourcePath = options.getProperty("r", "."); // Parse arguments String argList = options.getProperty("",null); String[] parsedArgs = argList == null ? null : argList.split("\\s+"); if (parsedArgs == null || parsedArgs.length != 1) { System.out.println(usage()); System.exit(-1); } String phrasalIniFile = parsedArgs[0]; // Setup the jetty server Server server = new Server(); // Jetty 8 way of configuring the server // Connector connector = new SelectChannelConnector(); // connector.setPort(port); // server.addConnector(connector); // Jetty9 way of configuring the server ServerConnector connector = new ServerConnector(server); connector.setPort(port); server.addConnector(connector); if (localHost) { connector.setHost(DEBUG_URL); } // Setup the servlet context ServletContextHandler context = new ServletContextHandler(ServletContextHandler.SESSIONS); context.setContextPath("/"); // Add Phrasal servlet PhrasalServlet servlet = loadMockServlet ? new PhrasalServlet() : new PhrasalServlet(phrasalIniFile); context.addServlet(new ServletHolder(servlet), SERVLET_ROOT); // TODO(spenceg): gzip compression causes an encoding problem for unicode characters // on the client. Not sure if the compression or decompression is the problem. // EnumSet<DispatcherType> dispatches = EnumSet.of(DispatcherType.REQUEST, DispatcherType.ASYNC); // context.addFilter(new FilterHolder(new IncludableGzipFilter()), "/t", dispatches); // Add debugging web-page ResourceHandler resourceHandler = new ResourceHandler(); resourceHandler.setWelcomeFiles(new String[]{ uiFile }); resourceHandler.setResourceBase(resourcePath); HandlerList handlers = new HandlerList(); handlers.setHandlers(new Handler[] { resourceHandler, context }); server.setHandler(handlers); // Start the service try { logger.info("Starting PhrasalService on port: " + String.valueOf(port)); server.start(); server.join(); } catch (Exception e) { logger.error("Servlet crashed. Service shutting down."); e.printStackTrace(); } }