edu.stanford.nlp.stats.Counter Java Examples
The following examples show how to use
edu.stanford.nlp.stats.Counter.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: ConvertWeights.java From phrasal with GNU General Public License v3.0 | 6 votes |
@SuppressWarnings("unchecked") public static void main(String[] args) { if (args.length != 1) { System.err.printf("Usage: java %s old_wts%n", ConvertWeights.class.getName()); System.exit(-1); } String filename = args[0]; Counter<String> oldWeights = IOTools.deserialize(filename, ClassicCounter.class, SerializationMode.DEFAULT); Path oldFilename = Paths.get(filename + ".old"); try { Files.move(Paths.get(filename), oldFilename); } catch (IOException e) { e.printStackTrace(); System.exit(-1); } IOTools.writeWeights(filename, oldWeights); System.out.printf("Converted %s to new format (old file moved to %s)%n", filename, oldFilename.toString()); }
Example #2
Source File: MakeWordClasses.java From phrasal with GNU General Public License v3.0 | 6 votes |
private int updateCountsWith(PartialStateUpdate result) { // Update counts Counters.addInPlace(classCount, result.deltaClassCount); Set<Integer> classes = result.deltaClassHistoryCount.firstKeySet(); for (Integer classId : classes) { Counter<NgramHistory> counter = this.classHistoryCount.getCounter(classId); Counter<NgramHistory> delta = result.deltaClassHistoryCount.getCounter(classId); Counters.addInPlace(counter, delta); } // Update assignments int numUpdates = 0; for (Map.Entry<IString, Integer> assignment : result.wordToClass.entrySet()) { int oldAssignment = wordToClass.get(assignment.getKey()); int newAssignment = assignment.getValue(); if (oldAssignment != newAssignment) { ++numUpdates; wordToClass.put(assignment.getKey(), assignment.getValue()); } } return numUpdates; }
Example #3
Source File: OnlineTuner.java From phrasal with GNU General Public License v3.0 | 6 votes |
/** * Load additional feature values from plain text file. * Features are only updated if not already present in weight vector. * * @param additionalFeatureWeights */ private void addAdditionalFeatureWeights(String additionalFeatureWeightsFile) { try { Counter<String> weights = IOTools.readWeightsPlain(additionalFeatureWeightsFile); System.err.println("read weights: "); for(Entry<String,Double> entry : weights.entrySet()) { if(!wtsAccumulator.containsKey(entry.getKey())) { wtsAccumulator.setCount(entry.getKey(), entry.getValue()); System.err.println("setting feature: " + entry.getKey() + " = " + entry.getValue()); } else System.err.println("skipping feature: " + entry.getKey()); } } catch (IOException e) { e.printStackTrace(); logger.fatal("Could not load additional weights from : {}", additionalFeatureWeightsFile); } }
Example #4
Source File: KBPStatisticalExtractor.java From InformationExtraction with GNU General Public License v3.0 | 6 votes |
/** * Score the given input, returning both the classification decision and the * probability of that decision. * Note that this method will not return a relation which does not type check. * * * @param input The input to classify. * @return A pair with the relation we classified into, along with its confidence. */ public Pair<String,Double> classify(KBPInput input) { RVFDatum<String, String> datum = new RVFDatum<>(features(input)); Counter<String> scores = classifier.scoresOf(datum); Counters.expInPlace(scores); Counters.normalize(scores); String best = Counters.argmax(scores); // While it doesn't type check, continue going down the list. // NO_RELATION is always an option somewhere in there, so safe to keep going... while (!NO_RELATION.equals(best) && (!edu.stanford.nlp.ie.KBPRelationExtractor.RelationType.fromString(best).get().validNamedEntityLabels.contains(input.objectType) || RelationType.fromString(best).get().entityType != input.subjectType) ) { scores.remove(best); Counters.normalize(scores); best = Counters.argmax(scores); } return Pair.makePair(best, scores.getCount(best)); }
Example #5
Source File: KBPStatisticalExtractor.java From InformationExtraction with GNU General Public License v3.0 | 6 votes |
public static Counter<String> features(KBPInput input) { // Ensure RegexNER Tags! input.sentence.regexner(DefaultPaths.DEFAULT_KBP_REGEXNER_CASED, false); input.sentence.regexner(DefaultPaths.DEFAULT_KBP_REGEXNER_CASELESS, true); // Get useful variables ClassicCounter<String> feats = new ClassicCounter<>(); if (Span.overlaps(input.subjectSpan, input.objectSpan) || input.subjectSpan.size() == 0 || input.objectSpan.size() == 0) { return new ClassicCounter<>(); } // Actually featurize denseFeatures(input, input.sentence, feats); surfaceFeatures(input, input.sentence, feats); dependencyFeatures(input, input.sentence, feats); relationSpecificFeatures(input, input.sentence, feats); return feats; }
Example #6
Source File: OptimizerUtils.java From phrasal with GNU General Public License v3.0 | 6 votes |
public static Set<String> featureWhiteList(FlatNBestList nbest, int minSegmentCount) { List<List<ScoredFeaturizedTranslation<IString, String>>> nbestlists = nbest.nbestLists(); Counter<String> featureSegmentCounts = new ClassicCounter<String>(); for (List<ScoredFeaturizedTranslation<IString, String>> nbestlist : nbestlists) { Set<String> segmentFeatureSet = new HashSet<String>(); for (ScoredFeaturizedTranslation<IString, String> trans : nbestlist) { for (FeatureValue<String> feature : trans.features) { segmentFeatureSet.add(feature.name); } } for (String featureName : segmentFeatureSet) { featureSegmentCounts.incrementCount(featureName); } } return Counters.keysAbove(featureSegmentCounts, minSegmentCount -1); }
Example #7
Source File: DependencyBnBPreorderer.java From phrasal with GNU General Public License v3.0 | 6 votes |
private static Set<String> getMostFrequentTokens(LineNumberReader reader, int k) throws IOException { Counter<String> tokenCounts = new ClassicCounter<String>(); String line; while ((line = reader.readLine()) != null) { String tokens[] = line.split("\\s+"); for (String t : tokens) { tokenCounts.incrementCount(t); } } Set<String> mostFrequentTokens = new HashSet<>(k); Counters.retainTop(tokenCounts, k); mostFrequentTokens.addAll(tokenCounts.keySet()); tokenCounts = null; return mostFrequentTokens; }
Example #8
Source File: MetricUtils.java From phrasal with GNU General Public License v3.0 | 6 votes |
/** * Calculates the "informativeness" of each ngram, which is used by the NIST * metric. In Matlab notation, the informativeness of the ngram w_1:n is * defined as -log2(count(w_1:n)/count(w_1:n-1)). * * @param ngramCounts * ngram counts according to references * @param totWords * total number of words, which is used to compute the * informativeness of unigrams. */ static public <TK> Counter<Sequence<TK>> getNGramInfo( Counter<Sequence<TK>> ngramCounts, int totWords) { Counter<Sequence<TK>> ngramInfo = new ClassicCounter<Sequence<TK>>(); for (Sequence<TK> ngram : ngramCounts.keySet()) { double num = ngramCounts.getCount(ngram); double denom = totWords; if (ngram.size() > 1) { Sequence<TK> ngramPrefix = ngram.subsequence(0, ngram.size() - 1); denom = ngramCounts.getCount(ngramPrefix); } double inf = -Math.log(num / denom) / LOG2; ngramInfo.setCount(ngram, inf); // System.err.printf("ngram info: %s %.3f\n", ngram.toString(), inf); } return ngramInfo; }
Example #9
Source File: ScorerFactory.java From phrasal with GNU General Public License v3.0 | 6 votes |
/** * Creates a scorer. * * @throws IOException */ public static Scorer<String> factory(String scorerName, Counter<String> config, Index<String> featureIndex) throws IOException { switch (scorerName) { case UNIFORM_SCORER: return new UniformScorer<String>(); case DENSE_SCORER: return new DenseScorer(config, featureIndex); case SPARSE_SCORER: return new SparseScorer(config, featureIndex); } throw new RuntimeException(String.format("Unknown scorer \"%s\"", scorerName)); }
Example #10
Source File: PairwiseRankingOptimizerSGD.java From phrasal with GNU General Public License v3.0 | 6 votes |
/** * True online learning, one example at a time. */ @Override public Counter<String> getGradient(Counter<String> weights, Sequence<IString> source, int sourceId, List<RichTranslation<IString, String>> translations, List<Sequence<IString>> references, double[] referenceWeights, SentenceLevelMetric<IString, String> scoreMetric) { Objects.requireNonNull(weights); Objects.requireNonNull(scoreMetric); assert sourceId >= 0; assert translations.size() > 0 : "No translations for source id: " + String.valueOf(sourceId); assert references.size() > 0; // Sample from the n-best list List<Datum> dataset = sampleNbestList(sourceId, source, scoreMetric, translations, references); Counter<String> gradient = computeGradient(dataset, weights, 1); if (dataset.isEmpty()) { logger.warn("Null gradient for sourceId: {}", sourceId); } if (VERBOSE) { System.err.printf("True online gradient"); displayGradient(gradient); } return gradient; }
Example #11
Source File: DocumentFrequencyCounter.java From wiseowl with MIT License | 6 votes |
/** * Get an IDF map for all the documents in the given file. * @param file * @return */ private static Counter<String> getIDFMapForFile(Reader file) throws SAXException, IOException, TransformerException { DocumentBuilder parser = XMLUtils.getXmlParser(); Document xml = parser.parse(new ReaderInputStream(file)); NodeList docNodes = xml.getDocumentElement().getElementsByTagName(TAG_DOCUMENT); Element doc; Counter<String> idfMap = new ClassicCounter<String>(); for (int i = 0; i < docNodes.getLength(); i++) { doc = (Element) docNodes.item(i); NodeList texts = doc.getElementsByTagName(TAG_TEXT); assert texts.getLength() == 1; Element text = (Element) texts.item(0); String textContent = getFullTextContent(text); idfMap.addAll(getIDFMapForDocument(textContent)); // Increment magic counter idfMap.incrementCount("__all__"); } return idfMap; }
Example #12
Source File: DocumentFrequencyCounter.java From wiseowl with MIT License | 6 votes |
/** * Get an IDF map for the given document string. * * @param document * @return */ private static Counter<String> getIDFMapForDocument(String document) { // Clean up -- remove some Gigaword patterns that slow things down // / don't help anything document = headingSeparator.matcher(document).replaceAll(""); DocumentPreprocessor preprocessor = new DocumentPreprocessor(new StringReader(document)); preprocessor.setTokenizerFactory(tokenizerFactory); Counter<String> idfMap = new ClassicCounter<String>(); for (List<HasWord> sentence : preprocessor) { if (sentence.size() > MAX_SENTENCE_LENGTH) continue; List<TaggedWord> tagged = tagger.tagSentence(sentence); for (TaggedWord w : tagged) { if (w.tag().startsWith("n")) idfMap.incrementCount(w.word()); } } return idfMap; }
Example #13
Source File: MERT.java From phrasal with GNU General Public License v3.0 | 5 votes |
static Counter<String> randomWts(Set<String> keySet) { Counter<String> randpt = new ClassicCounter<String>(); for (String f : keySet) { randpt.setCount(f, globalRandom.nextDouble()); } System.err.printf("random Wts: %s%n", randpt); return randpt; }
Example #14
Source File: OverrideBinwts.java From phrasal with GNU General Public License v3.0 | 5 votes |
public static void main(String[] args) { if(args.length != 3) { usage(); System.exit(-1); } String input = args[0]; String overrides = args[1]; String output = args[2]; System.err.println("reading weights from " + input); Counter<String> weights = IOTools.readWeights(input); try { Counter<String> overridesW = IOTools.readWeightsPlain(overrides); System.err.println("read weights from " + overrides + ":"); for(Entry<String,Double> entry : overridesW.entrySet()) { if(entry.getValue() == 0) weights.remove(entry.getKey()); else weights.setCount(entry.getKey(), entry.getValue()); System.err.println("setting feature: " + entry.getKey() + " = " + entry.getValue()); } } catch (IOException e) { e.printStackTrace(); System.exit(-1); } System.err.println("writing weights to " + output); IOTools.writeWeights(output, weights); }
Example #15
Source File: BLEUMetric.java From phrasal with GNU General Public License v3.0 | 5 votes |
public BLEUMetric(List<List<Sequence<TK>>> referencesList, int order) { this.order = order; maxReferenceCounts = new ArrayList<Counter<Sequence<TK>>>( referencesList.size()); refLengths = new int[referencesList.size()][]; multiplier = 1; init(referencesList); smooth = referencesList.size() == 1; }
Example #16
Source File: MERT.java From phrasal with GNU General Public License v3.0 | 5 votes |
public static Counter<String> summarizedAllFeaturesVector( List<ScoredFeaturizedTranslation<IString, String>> trans) { Counter<String> sumValues = new ClassicCounter<String>(); for (ScoredFeaturizedTranslation<IString, String> tran : trans) { for (FeatureValue<String> fValue : tran.features) { sumValues.incrementCount(fValue.name, fValue.value); } } return sumValues; }
Example #17
Source File: AdaGradFOBOSUpdater.java From phrasal with GNU General Public License v3.0 | 5 votes |
@Override public void update(Counter<String> weights, Counter<String> gradient, int timeStep, boolean endOfEpoch) { if (norm == Norm.LASSO) updateL1(weights, gradient, timeStep); else if (norm == Norm.aeLASSO) { updateElitistLasso(weights, gradient, timeStep); } else throw new UnsupportedOperationException("norm type " + norm + " cannot be recognized in AdaGradFOBOSUpdater"); }
Example #18
Source File: OptimizerUtils.java From phrasal with GNU General Public License v3.0 | 5 votes |
public static Counter<String> getWeightCounterFromArray(String[] weightNames, double[] wtsArr) { Counter<String> wts = new ClassicCounter<String>(); for (int i = 0; i < weightNames.length; i++) { wts.setCount(weightNames[i], wtsArr[i]); } return wts; }
Example #19
Source File: PairwiseRankingOptimizer.java From phrasal with GNU General Public License v3.0 | 5 votes |
@Override public Counter<String> optimize(Counter<String> initialWts) { Counter<String> wts = new ClassicCounter<String>(initialWts); Counters.normalize(wts); double seedSeed = Math.abs(Counters.max(wts)); long seed = (long)Math.exp(Math.log(seedSeed) + Math.log(Long.MAX_VALUE)); System.err.printf("PRO thread using random seed: %d\n", seed); RVFDataset<String, String> proSamples = getSamples(new Random(seed)); LogPrior lprior = new LogPrior(); lprior.setSigma(l2sigma); LogisticClassifierFactory<String,String> lcf = new LogisticClassifierFactory<String,String>(); LogisticClassifier<String, String> lc = lcf.trainClassifier(proSamples, lprior, false); Counter<String> decoderWeights = new ClassicCounter<String>(); Counter<String> lcWeights = lc.weightsAsCounter(); for (String key : lcWeights.keySet()) { double mul; if (key.startsWith("1 / ")) { mul = 1.0; } else if (key.startsWith("0 / ")) { mul = -1.0; } else { throw new RuntimeException("Unparsable weight name produced by logistic classifier: "+key); } String decoderKey = key.replaceFirst("^[10] / ", ""); decoderWeights.incrementCount(decoderKey, mul*lcWeights.getCount(key)); } synchronized (MERT.bestWts) { if (!updatedBestOnce) { System.err.println("Force updating weights (once)"); double metricEval = MERT.evalAtPoint(nbest, decoderWeights, emetric); MERT.updateBest(decoderWeights, metricEval, true); updatedBestOnce = true; } } return decoderWeights; }
Example #20
Source File: AdaGradFOBOSUpdater.java From phrasal with GNU General Public License v3.0 | 5 votes |
public AdaGradFOBOSUpdater(double initialRate, int expectedNumFeatures, double lambda, Norm norm, Counter<String> customL1, Set<String> fixedFeatures) { this.rate = initialRate; this.lambda = lambda; this.norm = norm; this.customL1 = customL1; this.fixedFeatures = fixedFeatures; sumGradSquare = new ClassicCounter<String>(expectedNumFeatures); }
Example #21
Source File: IOTools.java From phrasal with GNU General Public License v3.0 | 5 votes |
/** * Read weights from a file. Supports both binary and text formats. * * TODO(spenceg) Replace ClassicCounter with our own SparseVector implementation. * * @param filename * @param featureIndex * @return a counter of weights * @throws IOException */ @SuppressWarnings("unchecked") public static Counter<String> readWeights(String filename, Index<String> featureIndex) { Counter<String> wts = (Counter<String>) deserialize(filename, ClassicCounter.class, SerializationMode.BIN_GZ); if (wts == null) wts = new ClassicCounter<>(); if (featureIndex != null) { for (String key : wts.keySet()) { featureIndex.addToIndex(key); } } return wts; }
Example #22
Source File: CoverageChecker.java From phrasal with GNU General Public License v3.0 | 5 votes |
static public void countNgrams(String line, Counter<String> ngramCounts, Set<String> limitSet, int order) { String[] toks = line.split("\\s"); for (int i = 0; i < toks.length; i++) { for (int j = 0; j < order && j+i < toks.length ; j++) { String[] ngramArr = Arrays.copyOfRange(toks, i, i+j+1); String ngram = Sentence.listToString(Arrays.asList(ngramArr)); if (limitSet == null || limitSet.contains(ngram)) { ngramCounts.incrementCount(ngram); } } } }
Example #23
Source File: NISTMetric.java From phrasal with GNU General Public License v3.0 | 5 votes |
private void incCounts(Counter<Sequence<TK>> clippedCounts, Sequence<TK> sequence, int mul) { int seqSz = sequence.size(); for (int i = 0; i < order; i++) { possibleMatchCounts[i] += mul * possibleMatchCounts(i, seqSz); } double[] localCounts = localMatchCounts(clippedCounts); for (int i = 0; i < order; i++) { // System.err.printf("local Counts[%d]: %d\n", i, localCounts[i]); matchCounts[i] += mul * localCounts[i]; } }
Example #24
Source File: RandomNBestPoint.java From phrasal with GNU General Public License v3.0 | 5 votes |
@Override public Counter<String> optimize(Counter<String> initialWts) { Counter<String> wts = initialWts; for (int noProgress = 0; noProgress < MERT.NO_PROGRESS_LIMIT;) { Counter<String> dir; List<ScoredFeaturizedTranslation<IString, String>> rTrans; dir = MERT.summarizedAllFeaturesVector(rTrans = (better ? mert .randomBetterTranslations(nbest, wts, emetric) : mert .randomTranslations(nbest))); System.err.printf("Random n-best point score: %.5f\n", emetric.score(rTrans)); Counter<String> newWts = mert.lineSearch(nbest, wts, dir, emetric); double eval = MERT.evalAtPoint(nbest, newWts, emetric); double ssd = MERT.wtSsd(wts, newWts); if (ssd < MERT.NO_PROGRESS_SSD) noProgress++; else noProgress = 0; System.err.printf("Eval: %.5f SSD: %e (no progress: %d)\n", eval, ssd, noProgress); wts = newWts; } return wts; }
Example #25
Source File: AbstractOnlineOptimizer.java From phrasal with GNU General Public License v3.0 | 5 votes |
@Override public Counter<String> getGradient(Counter<String> weights, Sequence<IString> source, int sourceId, List<RichTranslation<IString, String>> translations, List<Sequence<IString>> references, double[] referenceWeights, SentenceLevelMetric<IString, String> scoreMetric) { return getBatchGradient(weights, Arrays.asList(source), new int[]{sourceId}, Arrays.asList(translations), Arrays.asList(references), referenceWeights, scoreMetric); }
Example #26
Source File: MIRA1BestHopeFearOptimizer.java From phrasal with GNU General Public License v3.0 | 5 votes |
@Override public Counter<String> getBatchGradient(Counter<String> weights, List<Sequence<IString>> sources, int[] sourceIds, List<List<RichTranslation<IString, String>>> translations, List<List<Sequence<IString>>> references, double[] referenceWeights, SentenceLevelMetric<IString, String> scoreMetric) { throw new UnsupportedOperationException("1-best MIRA does not support mini-batch learning"); }
Example #27
Source File: DownhillSimplexOptimizer.java From phrasal with GNU General Public License v3.0 | 5 votes |
private Counter<String> vectorToWeights(double[] x) { Counter<String> wts = new ClassicCounter<String>(); for (int i = 0; i < weightNames.length; i++) { wts.setCount(weightNames[i], x[i]); } return wts; }
Example #28
Source File: SequenceOptimizer.java From phrasal with GNU General Public License v3.0 | 5 votes |
@Override public Counter<String> optimize(Counter<String> initialWts) { Counter<String> wts = initialWts; for (BatchOptimizer opt : opts) { boolean done = false; while (!done) { Counter<String> newWts = opt.optimize(wts); double wtSsd = MERT.wtSsd(newWts, wts); double oldE = MERT.evalAtPoint(nbest, wts, emetric); double newE = MERT.evalAtPoint(nbest, newWts, emetric); // MERT.updateBest(newWts, -newE); boolean worse = oldE > newE; done = Math.abs(oldE - newE) <= MIN_OBJECTIVE_CHANGE || !loop || worse; System.err.printf( "seq optimizer: %s -> %s (%s) ssd: %f done: %s opt: %s\n", oldE, newE, newE - oldE, wtSsd, done, opt.toString()); if (worse) System.err.printf("WARNING: negative objective change!"); else wts = newWts; } } return wts; }
Example #29
Source File: AdaGradFastFOBOSUpdater.java From phrasal with GNU General Public License v3.0 | 5 votes |
/** * Constructor. * * @param initialRate * @param expectedNumFeatures * @param L1lambda * @param customL1 * @param fixedFeatures */ public AdaGradFastFOBOSUpdater(double initialRate, int expectedNumFeatures, double L1lambda, Counter<String> customL1, Set<String> fixedFeatures) { this.rate = initialRate; this.L1lambda = L1lambda; sumGradSquare = new ClassicCounter<>(expectedNumFeatures); lastUpdated = new ClassicCounter<>(expectedNumFeatures); this.customL1 = customL1; this.fixedFeatures = fixedFeatures; }
Example #30
Source File: AdaGradFastFOBOSUpdater.java From phrasal with GNU General Public License v3.0 | 5 votes |
public AdaGradFastFOBOSState(Counter<String> h, Counter<String> r, Set<String> f, Counter<String> u, int t) { this.gradHistory = h; this.customReg = r; this.fixedFeatures = f; this.lastUp = u; this.timeStep = t; }