Java Code Examples for com.google.common.collect.ConcurrentHashMultiset#create()

The following examples show how to use com.google.common.collect.ConcurrentHashMultiset#create() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: GuavaConcurrentHashMultisetTest.java    From java_in_examples with Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) {
    // Parse text to separate words
    String INPUT_TEXT = "Hello World! Hello All! Hi World!";
    // Create Multiset
    Multiset<String> multiset = ConcurrentHashMultiset.create(Arrays.asList(INPUT_TEXT.split(" ")));

    // Print count words
    System.out.println(multiset); // print [Hi, Hello x 2, World! x 2, All!] - in random orders
    // Print all unique words
    System.out.println(multiset.elementSet());    // print [Hi, Hello, World!, All!] - in random orders

    // Print count occurrences of words
    System.out.println("Hello = " + multiset.count("Hello"));    // print 2
    System.out.println("World = " + multiset.count("World!"));    // print 2
    System.out.println("All = " + multiset.count("All!"));    // print 1
    System.out.println("Hi = " + multiset.count("Hi"));    // print 1
    System.out.println("Empty = " + multiset.count("Empty"));    // print 0

    // Print count all words
    System.out.println(multiset.size());    //print 6

    // Print count unique words
    System.out.println(multiset.elementSet().size());    //print 4
}
 
Example 2
Source File: GuavaConcurrentHashMultisetTest.java    From java_in_examples with Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) {
    // Разберем текст на слова
    String INPUT_TEXT = "Hello World! Hello All! Hi World!";
    // Создаем Multiset
    Multiset<String> multiset = ConcurrentHashMultiset.create(Arrays.asList(INPUT_TEXT.split(" ")));

    // Выводим кол-вом вхождений слов
    System.out.println(multiset); // напечатает [Hi, Hello x 2, World! x 2, All!] - в произвольном порядке
    // Выводим все уникальные слова
    System.out.println(multiset.elementSet());    // напечатает [Hi, Hello, World!, All!] - в произвольном порядке

    // Выводим количество по каждому слову
    System.out.println("Hello = " + multiset.count("Hello"));    // напечатает 2
    System.out.println("World = " + multiset.count("World!"));    // напечатает 2
    System.out.println("All = " + multiset.count("All!"));    // напечатает 1
    System.out.println("Hi = " + multiset.count("Hi"));    // напечатает 1
    System.out.println("Empty = " + multiset.count("Empty"));    // напечатает 0

    // Выводим общее количества всех слов в тексте
    System.out.println(multiset.size());    //напечатает 6

    // Выводим общее количество всех уникальных слов
    System.out.println(multiset.elementSet().size());    //напечатает 4
}
 
Example 3
Source File: NgramEnumerator.java    From pyramid with Apache License 2.0 6 votes vote down vote up
public static Multiset<Ngram> gatherNgram(ESIndex index, String[] ids, NgramTemplate template, int minDF){
    Multiset<Ngram> multiset = ConcurrentHashMultiset.create();
    String field = template.getField();
    Arrays.stream(ids).parallel().forEach(id -> {
        Map<Integer,String> termVector = index.getTermVectorFromIndex(field, id);
        add(termVector,multiset,template);
    });
    Multiset<Ngram> filtered = ConcurrentHashMultiset.create();
    for (Multiset.Entry entry: multiset.entrySet()){
        Ngram ngram = (Ngram)entry.getElement();
        int count = entry.getCount();
        if (count>=minDF){
            filtered.add(ngram,count);
        }
    }
    return filtered;
}
 
Example 4
Source File: NgramEnumerator.java    From pyramid with Apache License 2.0 6 votes vote down vote up
private static void add(List<String> source, Multiset<Ngram> multiset, String field, int slop, List<Integer> template){
    Multiset<Ngram> multiSetForDoc = ConcurrentHashMultiset.create();
    for (int i=0;i<source.size();i++){
        if(i+template.get(template.size()-1)<source.size()){
            List<String> list = new ArrayList<>();
            for (int j: template){
                list.add(source.get(i+j));
            }
            Ngram ngram = new Ngram();
            ngram.setNgram(Ngram.toNgramString(list));
            ngram.setSlop(slop);
            ngram.setField(field);
            ngram.setInOrder(true);
            multiSetForDoc.setCount(ngram,1);
        }
    }
     multiset.addAll(multiSetForDoc);
}
 
Example 5
Source File: GeneralF1Predictor.java    From pyramid with Apache License 2.0 6 votes vote down vote up
/**
 *
 * @param numClasses
 * @param samples sampled multi-labels; can have duplicates; their empirical probabilities will be estimated
 * @return
 */
public MultiLabel predict(int numClasses, List<MultiLabel> samples){
    Multiset<MultiLabel> multiset = ConcurrentHashMultiset.create();
    for (MultiLabel multiLabel: samples){
        multiset.add(multiLabel);
    }

    int sampleSize = samples.size();
    List<MultiLabel> uniqueOnes = new ArrayList<>();
    List<Double> probs = new ArrayList<>();
    for (Multiset.Entry<MultiLabel> entry: multiset.entrySet()){
        uniqueOnes.add(entry.getElement());
        probs.add((double)entry.getCount()/sampleSize);
    }
    return predict(numClasses,uniqueOnes,probs);
}
 
Example 6
Source File: ActionRewindStrategy.java    From bazel with Apache License 2.0 6 votes vote down vote up
/**
 * Log the top N action rewind events and clear the history of failed actions' lost inputs and
 * rewind plans.
 */
void reset(ExtendedEventHandler eventHandler) {
  ImmutableList<ActionRewindEvent> topActionRewindEvents =
      rewindPlansStats.stream()
          .collect(
              greatest(
                  MAX_ACTION_REWIND_EVENTS, comparing(RewindPlanStats::invalidatedNodesCount)))
          .stream()
          .map(ActionRewindingStats::toActionRewindEventProto)
          .collect(toImmutableList());
  ActionRewindingStats rewindingStats =
      new ActionRewindingStats(lostInputRecords.size(), topActionRewindEvents);
  eventHandler.post(rewindingStats);
  lostInputRecords = ConcurrentHashMultiset.create();
  rewindPlansStats = new ConcurrentLinkedQueue<>();
}
 
Example 7
Source File: NgramEnumerator.java    From pyramid with Apache License 2.0 5 votes vote down vote up
public static Multiset<Ngram> gatherNgram(ESIndex index, String[] ids, NgramTemplate template){
    Multiset<Ngram> multiset = ConcurrentHashMultiset.create();
    String field = template.getField();
    Arrays.stream(ids).parallel().forEach(id -> {
        Map<Integer,String> termVector = index.getTermVectorFromIndex(field, id);
        add(termVector,multiset,template);
    });
    return multiset;
}
 
Example 8
Source File: NgramEnumeratorTest.java    From pyramid with Apache License 2.0 5 votes vote down vote up
private static void test8(){
    NgramTemplate template = new NgramTemplate("body",3,1);
    Multiset<Ngram> multiset = ConcurrentHashMultiset.create();
    List<String> source = new ArrayList<>();
    for (int i=0;i<10;i++){
        source.add(""+i);
    }
    NgramEnumerator.add(source,multiset,template);
    System.out.println(multiset.elementSet().stream().map(Ngram::getNgram).collect(Collectors.toList()));
}
 
Example 9
Source File: App1.java    From pyramid with Apache License 2.0 4 votes vote down vote up
static Set<Ngram> gather(Config config, ESIndex index,
                             String[] ids, Logger logger) throws Exception{

        File metaDataFolder = new File(config.getString("output.folder"),"meta_data");
        metaDataFolder.mkdirs();

        Multiset<Ngram> allNgrams = ConcurrentHashMultiset.create();
        List<Integer> ns = config.getIntegers("train.feature.ngram.n");
        double minDf = config.getDouble("train.feature.ngram.minDf");
        int minDFrequency = (int)Math.floor(ids.length*minDf);
        List<String> fields = config.getStrings("train.feature.ngram.extractionFields");
        List<Integer> slops = config.getIntegers("train.feature.ngram.slop");
        boolean inorder = config.getBoolean("train.feature.ngram.inOrder");
        boolean allowDuplicates = config.getBoolean("train.feature.ngram.allowDuplicateWords");
        for (String field: fields){
            for (int n: ns){
                for (int slop:slops){
                    logger.info("gathering "+n+ "-grams from field "+field+" with slop "+slop+" and minDf "+minDf+ ", (actual frequency threshold = "+minDFrequency+")");
                    NgramTemplate template = new NgramTemplate(field,n,slop);
                    Multiset<Ngram> ngrams = NgramEnumerator.gatherNgram(index, ids, template, minDFrequency);
                    logger.info("gathered "+ngrams.elementSet().size()+ " ngrams");
                    int newCounter = 0;
                    for (Multiset.Entry<Ngram> entry: ngrams.entrySet()){
                        Ngram ngram = entry.getElement();
                        ngram.setInOrder(inorder);
                        int count = entry.getCount();
                        if (interesting(allNgrams,ngram,count)){
                            if (allowDuplicates) {
                                allNgrams.add(ngram, count);
                                newCounter += 1;
                            }else{
                                if (!ngram.hasDuplicate()){
                                    allNgrams.add(ngram, count);
                                    newCounter += 1;
                                }
                            }
                        }

                    }
                    logger.info(newCounter+" are really new");
                }
            }
        }
        logger.info("there are "+allNgrams.elementSet().size()+" ngrams in total");
//        BufferedWriter bufferedWriter = new BufferedWriter(new FileWriter(new File(metaDataFolder,"all_ngrams.txt")));
//        for (Multiset.Entry<Ngram> ngramEntry: allNgrams.entrySet()){
//            bufferedWriter.write(ngramEntry.getElement().toString());
//            bufferedWriter.write("\t");
//            bufferedWriter.write(""+ngramEntry.getCount());
//            bufferedWriter.newLine();
//        }
//
//        bufferedWriter.close();
//
//        //for serialization
//        Set<Ngram> uniques = new HashSet<>();
//        uniques.addAll(allNgrams.elementSet());
//        Serialization.serialize(uniques, new File(metaDataFolder, "all_ngrams.ser"));
        return allNgrams.elementSet();
    }
 
Example 10
Source File: MultisetSemaphoreTest.java    From bazel with Apache License 2.0 4 votes vote down vote up
@Test
public void testConcurrentRace_AllSameSizedCombinations() throws Exception {
  // When we have n values
  int n = 10;
  ImmutableSet.Builder<String> valsBuilder = ImmutableSet.builder();
  for (int i = 0; i < n; i++) {
    valsBuilder.add("val-" + i);
  }
  ImmutableSet<String> vals = valsBuilder.build();
  int k = 5;
  // And we have all combinations of size k of these n values
  Set<Set<String>> combinations = Sets.combinations(vals, k);
  int numCombinations = combinations.size();
  // And we have a MultisetSemaphore
  final MultisetSemaphore<String> multisetSemaphore = MultisetSemaphore.newBuilder()
      // with K max num unique values,
      .maxNumUniqueValues(k)
      .build();
  // And a ExecutorService with nCk threads,
  ExecutorService executorService = Executors.newFixedThreadPool(numCombinations);
  // And a recorder for thrown exceptions,
  ThrowableRecordingRunnableWrapper wrapper =
      new ThrowableRecordingRunnableWrapper("testConcurrentRace_AllSameSizedCombinations");
  // And a ConcurrentHashMultiset for counting the multiplicities of the values ourselves,
  ConcurrentHashMultiset<String> counts = ConcurrentHashMultiset.create();
  for (Set<String> combination : combinations) {
    // And, for each of the nCk combinations, we submit a Runnable, that
    @SuppressWarnings("unused")
    Future<?> possiblyIgnoredError =
        executorService.submit(
            wrapper.wrap(
                new Runnable() {
                  @Override
                  public void run() {
                    try {
                      // Tries to acquire permits for its set of k values,
                      multisetSemaphore.acquireAll(combination);
                      // And then verifies that the multiplicities are as expected,
                      combination.forEach(counts::add);
                      assertThat(counts.entrySet().size()).isAtMost(k);
                      combination.forEach(counts::remove);
                      // And then releases the permits.
                      multisetSemaphore.releaseAll(combination);
                    } catch (InterruptedException e) {
                      throw new IllegalStateException(e);
                    }
                  }
                }));
  }
  // Then all of our Runnables completed (without deadlock!), as expected,
  boolean interrupted = ExecutorUtil.interruptibleShutdown(executorService);
  // And also none of them threw any Exceptions.
  assertThat(wrapper.getFirstThrownError()).isNull();
  if (interrupted) {
    Thread.currentThread().interrupt();
    throw new InterruptedException();
  }
}