com.google.common.collect.Multiset#entrySet

Source File: SpecValidations.java From deploymentmanager-autogen with Apache License 2.0

6 votes

private static void validateMetadataKeyUniqueness(SingleVmDeploymentPackageSpec spec) {
  // Ensures that metadata keys are unique.
  Multiset<String> metadataKeyCounts = HashMultiset.create();
  for (PasswordSpec password : spec.getPasswordsList()) {
    metadataKeyCounts.add(password.getMetadataKey());
  }
  for (GceMetadataItem metadataItem : spec.getGceMetadataItemsList()) {
    metadataKeyCounts.add(metadataItem.getKey());
  }
  for (Multiset.Entry<String> entry : metadataKeyCounts.entrySet()) {
    if (entry.getCount() > 1) {
      throw new IllegalArgumentException(
          String.format("Metadata key '%s' is not unique", entry.getElement()));
    }
  }
}

Source File: AbstractIdentifierRenamings.java From naturalize with BSD 3-Clause "New" or "Revised" License

6 votes

@Override
public SortedSet<Renaming> calculateScores(
		final Multiset<NGram<String>> ngrams,
		final Set<String> alternatives, final Scope scope) {
	final SortedSet<Renaming> scoreMap = Sets.newTreeSet();

	for (final String identifierName : alternatives) {
		double score = 0;
		for (final Entry<NGram<String>> ngram : ngrams.entrySet()) {
			try {
				final NGram<String> identNGram = NGram.substituteTokenWith(
						ngram.getElement(), WILDCARD_TOKEN, identifierName);
				final double ngramScore = scoreNgram(identNGram);
				score += DoubleMath.log2(ngramScore) * ngram.getCount();
			} catch (final Throwable e) {
				LOGGER.warning(ExceptionUtils.getFullStackTrace(e));
			}
		}
		scoreMap.add(new Renaming(identifierName, (addScopePriors(
				identifierName, scope) - score) / ngrams.size(), ngrams
				.size() / ngramLM.getN(), scope));
	}

	return scoreMap;
}

Source File: CorpusAnalysis.java From tac-kbp-eal with MIT License

6 votes

static void writeToChart(final Multiset<Symbol> counts, final File outFile,
    final GnuPlotRenderer renderer,
    final String chartTitle, final String xAxisLabel, final String yAxisLabel)
    throws IOException {

  final Axis X_AXIS = Axis.xAxis().setLabel(xAxisLabel).rotateLabels().build();
  final Axis Y_AXIS = Axis.yAxis().setLabel(yAxisLabel).build();

  final BarChart.Builder chartBuilder =
      BarChart.builder().setTitle(chartTitle).setXAxis(X_AXIS).setYAxis(Y_AXIS).hideKey();

  for (final Multiset.Entry<Symbol> e : counts.entrySet()) {
    chartBuilder
        .addBar(BarChart.Bar.builder(e.getCount()).setLabel(e.getElement().toString()).build());
  }

  renderer.renderTo(chartBuilder.build(), outFile);
}

Source File: ValueTypeComposer.java From immutables with Apache License 2.0

6 votes

private void checkAttributeNamesForDuplicates(ValueType type, Protoclass protoclass) {
  if (!type.attributes.isEmpty()) {
    Multiset<String> attributeNames = HashMultiset.create(type.attributes.size());
    for (ValueAttribute attribute : type.attributes) {
      if (attribute.isGenerateLazy) {
        attributeNames.add(attribute.name() + "$lazy"); // making lazy compare in it's own scope
      } else {
        attributeNames.add(attribute.name());
      }
    }

    List<String> duplicates = Lists.newArrayList();
    for (Multiset.Entry<String> entry : attributeNames.entrySet()) {
      if (entry.getCount() > 1) {
        duplicates.add(entry.getElement().replace("$lazy", ""));
      }
    }

    if (!duplicates.isEmpty()) {
      protoclass.report()
          .error("Duplicate attribute names %s. You should check if correct @Value.Style applied",
              duplicates);
    }
  }
}

Source File: TagDict.java From EasySRL with Apache License 2.0

6 votes

private static Map<String, Collection<Category>> makeDict(final Multiset<String> wordCounts,
		final Map<String, Multiset<Category>> wordToCatToCount) {
	// Now, save off a sorted list of categories
	final Multiset<Category> countsForOtherWords = HashMultiset.create();

	final Map<String, Collection<Category>> result = new HashMap<>();
	for (final Entry<String> wordAndCount : wordCounts.entrySet()) {
		final Multiset<Category> countForCategory = wordToCatToCount.get(wordAndCount.getElement());
		if (wordAndCount.getCount() > MIN_OCCURENCES_OF_WORD) {
			// Frequent word
			addEntryForWord(countForCategory, result, wordAndCount.getElement());
		} else {
			// Group stats for all rare words together.

			for (final Entry<Category> catToCount : countForCategory.entrySet()) {
				countsForOtherWords.add(catToCount.getElement(), catToCount.getCount());
			}
		}
	}
	addEntryForWord(countsForOtherWords, result, OTHER_WORDS);

	return ImmutableMap.copyOf(result);
}

Source File: EntityScorer.java From entity-fishing with Apache License 2.0

6 votes

public ScorerContext context(List<String> words) {
    Multiset<String> counter = TreeMultiset.create();
    counter.addAll(words);

    int word_dim = kb.getEmbeddingsSize();
    // word_vecs is the concatenation of all word vectors of the word list
    float[] word_vecs = new float[counter.size() * word_dim];
    IntArrayList word_counts = new IntArrayList();
    int n_words = 0;

    for(Multiset.Entry<String> entry : counter.entrySet()) {
        short[] vector = kb.getWordEmbeddings(entry.getElement());
        if (vector != null) {
            word_counts.add(entry.getCount());
            for (int i=0; i<kb.getEmbeddingsSize(); i++) {
                word_vecs[n_words * word_dim + i] = vector[i];
            }
            n_words += 1;
        }
    }
    word_counts.trim();

    return create_context(word_vecs, word_counts.elements());
}

Source File: NgramEnumerator.java From pyramid with Apache License 2.0

6 votes

public static Multiset<Ngram> gatherNgram(ESIndex index, String[] ids, NgramTemplate template, int minDF){
    Multiset<Ngram> multiset = ConcurrentHashMultiset.create();
    String field = template.getField();
    Arrays.stream(ids).parallel().forEach(id -> {
        Map<Integer,String> termVector = index.getTermVectorFromIndex(field, id);
        add(termVector,multiset,template);
    });
    Multiset<Ngram> filtered = ConcurrentHashMultiset.create();
    for (Multiset.Entry entry: multiset.entrySet()){
        Ngram ngram = (Ngram)entry.getElement();
        int count = entry.getCount();
        if (count>=minDF){
            filtered.add(ngram,count);
        }
    }
    return filtered;
}

Source File: ApplicationMasterService.java From twill with Apache License 2.0

6 votes

/**
 * Handling containers that are completed.
 */
private void handleCompleted(List<YarnContainerStatus> completedContainersStatuses) {
  Multiset<String> restartRunnables = HashMultiset.create();
  for (YarnContainerStatus status : completedContainersStatuses) {
    LOG.info("Container {} completed with {}:{}.",
             status.getContainerId(), status.getState(), status.getDiagnostics());
    runningContainers.handleCompleted(status, restartRunnables);
  }

  for (Multiset.Entry<String> entry : restartRunnables.entrySet()) {
    LOG.info("Re-request container for {} with {} instances.", entry.getElement(), entry.getCount());
    runnableContainerRequests.add(createRunnableContainerRequest(entry.getElement(),  entry.getCount()));
  }

  // For all runnables that needs to re-request for containers, update the expected count timestamp
  // so that the EventHandler would triggered with the right expiration timestamp.
  expectedContainers.updateRequestTime(restartRunnables.elementSet());
}

Source File: TagDict.java From easyccg with MIT License

6 votes

private static void addEntryForWord(Multiset<Category> countForCategory,
    Map<String, Collection<Category>> result, String word)
{
  List<Entry<Category>> cats = new ArrayList<Entry<Category>>();
  for (Entry<Category> catToCount : countForCategory.entrySet()) {
    cats.add(catToCount);
  }
  
  Collections.sort(cats, comparator);
  List<Category> cats2 = new ArrayList<Category>();
      
  for (Entry<Category> entry : cats) {
    cats2.add(entry.getElement());
  }
  
  result.put(word, cats2);
}

Source File: JavaInputAstVisitor.java From java-n-IDE-for-Android with Apache License 2.0

6 votes

/**
 * Returns true if {@code atLeastM} of the expressions in the given column are the same kind.
 */
private static boolean expressionsAreParallel(
        List<List<ExpressionTree>> rows, int column, int atLeastM) {
    Multiset<Tree.Kind> nodeTypes = HashMultiset.create();
    for (List<? extends ExpressionTree> row : rows) {
        if (column >= row.size()) {
            continue;
        }
        nodeTypes.add(row.get(column).getKind());
    }
    for (Multiset.Entry<Tree.Kind> nodeType : nodeTypes.entrySet()) {
        if (nodeType.getCount() >= atLeastM) {
            return true;
        }
    }
    return false;
}

Source File: JsonUtil.java From tac2015-event-detection with GNU General Public License v3.0

5 votes

public static <T> ObjectNode toJson(Multiset<T> counts) {
	ObjectNode jmap = newObject();
	for (Multiset.Entry<T> e : counts.entrySet()) {
		jmap.put(e.getElement().toString(), e.getCount());
	}
	return jmap;
}

Source File: TypeConformanceComputer.java From xtext-extras with Eclipse Public License 2.0

5 votes

/**
 * Keeps the cumulated distance for all the common raw super types of the given references.
 * Interfaces that are more directly implemented will get a lower total count than more general
 * interfaces.
 */
protected void cumulateDistance(final List<LightweightTypeReference> references, Multimap<JvmType, LightweightTypeReference> all,
		Multiset<JvmType> cumulatedDistance) {
	for(LightweightTypeReference other: references) {
		Multiset<JvmType> otherDistance = LinkedHashMultiset.create();
		initializeDistance(other, all, otherDistance);
		cumulatedDistance.retainAll(otherDistance);
		for(Multiset.Entry<JvmType> typeToDistance: otherDistance.entrySet()) {
			if (cumulatedDistance.contains(typeToDistance.getElement()))
				cumulatedDistance.add(typeToDistance.getElement(), typeToDistance.getCount());
		}
	}
}

Source File: RedisBungeeListener.java From RedisBungee with Eclipse Public License 1.0

5 votes

private void serializeMultiset(Multiset<String> collection, ByteArrayDataOutput output) {
    output.writeInt(collection.elementSet().size());
    for (Multiset.Entry<String> entry : collection.entrySet()) {
        output.writeUTF(entry.getElement());
        output.writeInt(entry.getCount());
    }
}

Source File: TableShardingTest.java From mango with Apache License 2.0

5 votes

private void check(List<Msg> msgs) {
  List<Msg> dbMsgs = new ArrayList<Msg>();
  Multiset<Integer> ms = HashMultiset.create();
  for (Msg msg : msgs) {
    ms.add(msg.getUid());
  }
  for (Multiset.Entry<Integer> entry : ms.entrySet()) {
    dbMsgs.addAll(dao.getMsgs(entry.getElement()));
  }
  assertThat(dbMsgs, hasSize(msgs.size()));
  assertThat(dbMsgs, containsInAnyOrder(msgs.toArray()));
}

Source File: SampleUtils.java From api-mining with GNU General Public License v3.0

5 votes

/**
 * Get a uniformly random element from a Multiset.
 * 
 * @param set
 * @return
 */
public static <T> T getRandomElement(final Multiset<T> set) {
	final int randPos = RandomUtils.nextInt(checkNotNull(set).size());

	T selected = null;
	int i = 0;
	for (final Multiset.Entry<T> entry : set.entrySet()) {
		i += entry.getCount();
		if (i > randPos) {
			selected = entry.getElement();
			break;
		}
	}
	return selected;
}

Source File: MostFrequentCharInString.java From levelup-java-examples with Apache License 2.0

5 votes

@Test
public void most_frequent_char_guava() throws IOException {

	Multiset<String> frequentCharacters = HashMultiset.create(Splitter
			.fixedLength(1).split(sentence.toLowerCase()));

	for (Entry<String> item : frequentCharacters.entrySet()) {
		System.out.println(item.getElement() + ":" + item.getCount());
	}

	assertEquals(7, frequentCharacters.count("e"), 0);
}

Source File: SampleUtils.java From tassal with BSD 3-Clause "New" or "Revised" License

5 votes

/**
 * Get a uniformly random element from a Multiset.
 * 
 * @param set
 * @return
 */
public static <T> T getRandomElement(final Multiset<T> set) {
	final int randPos = RandomUtils.nextInt(checkNotNull(set).size());

	T selected = null;
	int i = 0;
	for (final Multiset.Entry<T> entry : set.entrySet()) {
		i += entry.getCount();
		if (i > randPos) {
			selected = entry.getElement();
			break;
		}
	}
	return selected;
}

Source File: MultisetExpression.java From gef with Eclipse Public License 2.0

4 votes

@Override
public Set<com.google.common.collect.Multiset.Entry<E>> entrySet() {
	final Multiset<E> multiset = get();
	return (multiset == null) ? EMPTY_MULTISET.entrySet()
			: multiset.entrySet();
}

Source File: App1.java From pyramid with Apache License 2.0

4 votes

static Set<Ngram> gather(Config config, ESIndex index,
                             String[] ids, Logger logger) throws Exception{

        File metaDataFolder = new File(config.getString("output.folder"),"meta_data");
        metaDataFolder.mkdirs();

        Multiset<Ngram> allNgrams = ConcurrentHashMultiset.create();
        List<Integer> ns = config.getIntegers("train.feature.ngram.n");
        double minDf = config.getDouble("train.feature.ngram.minDf");
        int minDFrequency = (int)Math.floor(ids.length*minDf);
        List<String> fields = config.getStrings("train.feature.ngram.extractionFields");
        List<Integer> slops = config.getIntegers("train.feature.ngram.slop");
        boolean inorder = config.getBoolean("train.feature.ngram.inOrder");
        boolean allowDuplicates = config.getBoolean("train.feature.ngram.allowDuplicateWords");
        for (String field: fields){
            for (int n: ns){
                for (int slop:slops){
                    logger.info("gathering "+n+ "-grams from field "+field+" with slop "+slop+" and minDf "+minDf+ ", (actual frequency threshold = "+minDFrequency+")");
                    NgramTemplate template = new NgramTemplate(field,n,slop);
                    Multiset<Ngram> ngrams = NgramEnumerator.gatherNgram(index, ids, template, minDFrequency);
                    logger.info("gathered "+ngrams.elementSet().size()+ " ngrams");
                    int newCounter = 0;
                    for (Multiset.Entry<Ngram> entry: ngrams.entrySet()){
                        Ngram ngram = entry.getElement();
                        ngram.setInOrder(inorder);
                        int count = entry.getCount();
                        if (interesting(allNgrams,ngram,count)){
                            if (allowDuplicates) {
                                allNgrams.add(ngram, count);
                                newCounter += 1;
                            }else{
                                if (!ngram.hasDuplicate()){
                                    allNgrams.add(ngram, count);
                                    newCounter += 1;
                                }
                            }
                        }

                    }
                    logger.info(newCounter+" are really new");
                }
            }
        }
        logger.info("there are "+allNgrams.elementSet().size()+" ngrams in total");
//        BufferedWriter bufferedWriter = new BufferedWriter(new FileWriter(new File(metaDataFolder,"all_ngrams.txt")));
//        for (Multiset.Entry<Ngram> ngramEntry: allNgrams.entrySet()){
//            bufferedWriter.write(ngramEntry.getElement().toString());
//            bufferedWriter.write("\t");
//            bufferedWriter.write(""+ngramEntry.getCount());
//            bufferedWriter.newLine();
//        }
//
//        bufferedWriter.close();
//
//        //for serialization
//        Set<Ngram> uniques = new HashSet<>();
//        uniques.addAll(allNgrams.elementSet());
//        Serialization.serialize(uniques, new File(metaDataFolder, "all_ngrams.ser"));
        return allNgrams.elementSet();
    }

Source File: TagDict.java From easyccg with MIT License

4 votes

/**
 * Finds the set of categories used for each word in a corpus
 */
public static Map<String, Collection<Category>> makeDict(Iterable<InputToParser> input) {
  Multiset<String> wordCounts = HashMultiset.create();
  Map<String, Multiset<Category>> wordToCatToCount = new HashMap<String, Multiset<Category>>();
  
  // First, count how many times each word occurs with each category
  for (InputToParser sentence : input) {
    for (int i=0; i<sentence.getInputWords().size(); i++) {
      String word = sentence.getInputWords().get(i).word;
      Category cat = sentence.getGoldCategories().get(i);
      wordCounts.add(word);
      
      if (!wordToCatToCount.containsKey(word)) {
        Multiset<Category> tmp = HashMultiset.create();
        wordToCatToCount.put(word, tmp);
      }

      wordToCatToCount.get(word).add(cat);
    }
  }
  

  // Now, save off a sorted list of categories
  Multiset<Category> countsForOtherWords = HashMultiset.create();
  
  Map<String, Collection<Category>> result = new HashMap<String, Collection<Category>>();
  for (Entry<String> wordAndCount : wordCounts.entrySet()) {
    Multiset<Category> countForCategory = wordToCatToCount.get(wordAndCount.getElement());
    if (wordAndCount.getCount() > MIN_OCCURENCES_OF_WORD) {
      // Frequent word
      addEntryForWord(countForCategory, result, wordAndCount.getElement());
    } else {
      // Group stats for all rare words together.
      
      for (Entry<Category> catToCount : countForCategory.entrySet()) {
        countsForOtherWords.add(catToCount.getElement(), catToCount.getCount());
      }
    }
  }
  addEntryForWord(countsForOtherWords, result, OTHER_WORDS);


  return ImmutableMap.copyOf(result);
}

Java Code Examples for com.google.common.collect.Multiset#entrySet()