it.unimi.dsi.fastutil.objects.Object2IntMap#put

Source File: TableReader.java From fastjgame with Apache License 2.0

6 votes

/**
 * 读取属性名行
 *
 * @param fileName 文件名，用于打印更详细的错误原因
 * @param rowIndex 行索引
 * @param row      行内容
 * @return 命名行
 */
private ColNameRow readColNameRow(String fileName, int rowIndex, T row) {
    // 使用LinkedHashMap以保持读入顺序
    int totalColNum = getTotalColNum(row);
    Object2IntMap<String> colName2Index = new Object2IntLinkedOpenHashMap<>(totalColNum + 1);
    for (int colIndex = 0; colIndex < totalColNum; colIndex++) {
        String originalColName = getNullableCell(row, colIndex);
        // 属性名称行，空白属性跳过
        if (null == originalColName) {
            continue;
        }
        // 去掉空白填充
        String realColName = originalColName.trim();
        if (realColName.length() == 0) {
            continue;
        }
        // 属性名不可以有重复
        if (colName2Index.containsKey(realColName)) {
            throw new IllegalArgumentException("file " + fileName
                    + " propertyNameRow has duplicate column " + realColName);
        }
        colName2Index.put(realColName, colIndex);
    }
    return new ColNameRow(rowIndex, colName2Index);
}

Source File: Utils.java From AffectiveTweets with GNU General Public License v3.0

6 votes

/**
 * Calculates a vector of attributes from a list of tokens
 * 
 * @param tokens the input tokens 
 * @param prefix the prefix of each vector attribute
 * @param freqWeights true for considering term-frequency weights (booleans weights are used otherwise)
 * @return an Object2IntMap object mapping the attributes to their values
 */		
public static Object2IntMap<String> calculateTermFreq(List<String> tokens, String prefix, boolean freqWeights) {
	Object2IntMap<String> termFreq = new Object2IntOpenHashMap<String>();

	// Traverse the strings and increments the counter when the token was
	// already seen before
	for (String token : tokens) {
		// add frequency weights if the flat is set
		if(freqWeights)
			termFreq.put(prefix+token, termFreq.getInt(prefix+token) + 1);
		// otherwise, just consider boolean weights
		else{
			if(!termFreq.containsKey(token))
				termFreq.put(prefix+token, 1);
		}
	}

	return termFreq;
}

Source File: ReadCountCollection.java From gatk-protected with BSD 3-Clause "New" or "Revised" License

6 votes

/**
 * Rearrange the targets so that they are in a particular order.
 * @return a new collection.
 * @throws IllegalArgumentException if any of the following is true:
 * <ul>
 *     <li>{@code targetsInOrder} is {@code null},</li>
 *     <li>is empty,</li>
 *     <li>it contains {@code null},</li>
 *     <li>contains any target not present in this collection.</li>
 * </ul>
 */
public ReadCountCollection arrangeTargets(final List<Target> targetsInOrder) {
    Utils.nonNull(targetsInOrder);
    Utils.nonEmpty(targetsInOrder, "the input targets list cannot be empty");
    final RealMatrix counts = new Array2DRowRealMatrix(targetsInOrder.size(), columnNames.size());
    final Object2IntMap<Target> targetToIndex = new Object2IntOpenHashMap<>(targets.size());
    for (int i = 0; i < targets.size(); i++) {
        targetToIndex.put(targets.get(i), i);
    }
    for (int i = 0; i < targetsInOrder.size(); i++) {
        final Target target = targetsInOrder.get(i);
        Utils.validateArg(targetToIndex.containsKey(target), () -> String.format("target '%s' is not present in the collection", target.getName()));
        counts.setRow(i, this.counts.getRow(targetToIndex.getInt(target)));
    }
    return new ReadCountCollection(new ArrayList<>(targetsInOrder), columnNames, counts, false);
}

Source File: AlleleLikelihoods.java From gatk with BSD 3-Clause "New" or "Revised" License

6 votes

public void changeEvidence(final Map<EVIDENCE, EVIDENCE> evidenceReplacements) {
    final int sampleCount = samples.numberOfSamples();
    for (int s = 0; s < sampleCount; s++) {
        final List<EVIDENCE> sampleEvidence = evidenceBySampleIndex.get(s);
        final Object2IntMap<EVIDENCE> evidenceIndex = evidenceIndexBySampleIndex.get(s);
        final int sampleEvidenceCount = sampleEvidence.size();
        for (int r = 0; r < sampleEvidenceCount; r++) {
            final EVIDENCE evidence = sampleEvidence.get(r);
            final EVIDENCE replacement = evidenceReplacements.get(evidence);
            if (replacement == null) {
                continue;
            }
            sampleEvidence.set(r, replacement);
            if (evidenceIndex != null) {
                evidenceIndex.remove(evidence);
                evidenceIndex.put(replacement, r);
            }
        }
    }
}

Source File: AlleleLikelihoods.java From gatk with BSD 3-Clause "New" or "Revised" License

6 votes

private void appendEvidence(final List<EVIDENCE> newSampleEvidence, final int sampleIndex) {

        final List<EVIDENCE> sampleEvidence = evidenceBySampleIndex.get(sampleIndex);
        final Object2IntMap<EVIDENCE> sampleEvidenceIndex = evidenceIndexBySampleIndex(sampleIndex);

        for (final EVIDENCE newEvidence : newSampleEvidence) {
            final int previousValue = sampleEvidenceIndex.put(newEvidence, sampleEvidence.size());
            if (previousValue == MISSING_INDEX) {
                sampleEvidence.add(newEvidence);
            } else {
                sampleEvidenceIndex.put(newEvidence, previousValue); // revert
            }
        }

        numberOfEvidences[sampleIndex] = sampleEvidence.size();
    }

Source File: SmartDictionarySerializer.java From mph-table with Apache License 2.0

5 votes

private Object2IntMap<String> indexToDictionary(final String[] words) throws IOException {
    final Object2IntMap<String> result = new Object2IntOpenHashMap<>();
    for (int i = 0; i < words.length; ++i) {
        result.put(words[i], i);
    }
    return result;
}

Source File: CategoricalColumn.java From tablesaw with Apache License 2.0

5 votes

default Table countByCategory() {

    final Table t = new Table("Column: " + name());
    final CategoricalColumn<?> categories = (CategoricalColumn<?>) type().create("Category");
    final IntColumn counts = IntColumn.create("Count");

    final Object2IntMap<String> valueToCount = new Object2IntOpenHashMap<>();

    for (int i = 0; i < size(); i++) {
      if (!isMissing(i)) {
        final String next = getString(i);
        if (valueToCount.containsKey(next)) {
          valueToCount.put(next, valueToCount.getInt(next) + 1);
        } else {
          valueToCount.put(next, 1);
        }
      }
    }
    for (Map.Entry<String, Integer> entry : valueToCount.object2IntEntrySet()) {
      categories.appendCell(entry.getKey());
      counts.append(entry.getValue());
    }
    if (countMissing() > 0) {
      categories.appendMissing();
      counts.append(countMissing());
    }
    t.addColumns(categories);
    t.addColumns(counts);
    return t;
  }

Source File: CategoricalColumn.java From tablesaw with Apache License 2.0

5 votes

default Table countByCategory() {

    final Table t = new Table("Column: " + name());
    final CategoricalColumn<?> categories = (CategoricalColumn<?>) type().create("Category");
    final IntColumn counts = IntColumn.create("Count");

    final Object2IntMap<String> valueToCount = new Object2IntOpenHashMap<>();

    for (int i = 0; i < size(); i++) {
      if (!isMissing(i)) {
        final String next = getString(i);
        if (valueToCount.containsKey(next)) {
          valueToCount.put(next, valueToCount.getInt(next) + 1);
        } else {
          valueToCount.put(next, 1);
        }
      }
    }
    for (Map.Entry<String, Integer> entry : valueToCount.object2IntEntrySet()) {
      categories.appendCell(entry.getKey());
      counts.append(entry.getValue());
    }
    if (countMissing() > 0) {
      categories.appendMissing();
      counts.append(countMissing());
    }
    t.addColumns(categories);
    t.addColumns(counts);
    return t;
  }

Source File: AlleleLikelihoods.java From gatk with BSD 3-Clause "New" or "Revised" License

5 votes

private Object2IntMap<EVIDENCE> fillEvidenceToIndexCache(int sampleIndex) {
    final List<EVIDENCE> sampleEvidence = evidenceBySampleIndex.get(sampleIndex);
    final int sampleEvidenceCount = sampleEvidence.size();
    final Object2IntMap<EVIDENCE> index = new Object2IntOpenHashMap<>(sampleEvidenceCount);
    index.defaultReturnValue(MISSING_INDEX);
    for (int r = 0; r < sampleEvidenceCount; r++) {
        index.put(sampleEvidence.get(r), r);
    }
    evidenceIndexBySampleIndex.set(sampleIndex, index);
    return index;
}

Source File: SymmetricImmutablePairTest.java From liblevenshtein-java with MIT License

5 votes

@Test(dataProvider = "equivalentPairs")
public void testEquivalentPairs(
    final SymmetricImmutablePair<String> lhs,
    final SymmetricImmutablePair<String> rhs) {

  assertThat(lhs).isEqualByComparingTo(lhs);
  assertThat(rhs).isEqualByComparingTo(rhs);
  assertThat(lhs).isEqualByComparingTo(rhs);
  assertThat(rhs).isEqualByComparingTo(lhs);

  assertThat(lhs).isEqualTo(lhs);
  assertThat(rhs).isEqualTo(rhs);
  assertThat(lhs).isEqualTo(rhs);
  assertThat(rhs).isEqualTo(lhs);

  assertThat(lhs.hashCode()).isEqualTo(rhs.hashCode());

  Object2IntMap<SymmetricImmutablePair<String>> map;

  map = new Object2IntOpenHashMap<>(2);

  map.put(lhs, 1);
  assertThat(map).containsEntry(lhs, 1);
  assertThat(map).containsEntry(rhs, 1);

  map.put(rhs, 2);
  assertThat(map).containsEntry(rhs, 2);
  assertThat(map).containsEntry(lhs, 2);

  map = new Object2IntRBTreeMap<>();

  map.put(lhs, 1);
  assertThat(map).containsEntry(lhs, 1);
  assertThat(map).containsEntry(rhs, 1);

  map.put(rhs, 2);
  assertThat(map).containsEntry(rhs, 2);
  assertThat(map).containsEntry(lhs, 2);
}

Source File: NoDictionarySingleColumnGroupKeyGenerator.java From incubator-pinot with Apache License 2.0

5 votes

@SuppressWarnings("unchecked")
private int getKeyForValue(String value) {
  Object2IntMap<String> map = (Object2IntMap<String>) _groupKeyMap;
  int groupId = map.getInt(value);
  if (groupId == INVALID_ID) {
    if (_numGroups < _globalGroupIdUpperBound) {
      groupId = _numGroups;
      map.put(value, _numGroups++);
    }
  }
  return groupId;
}

Source File: HivePageSink.java From presto with Apache License 2.0

4 votes

public HivePageSink(
        HiveWriterFactory writerFactory,
        List<HiveColumnHandle> inputColumns,
        Optional<HiveBucketProperty> bucketProperty,
        PageIndexerFactory pageIndexerFactory,
        HdfsEnvironment hdfsEnvironment,
        int maxOpenWriters,
        ListeningExecutorService writeVerificationExecutor,
        JsonCodec<PartitionUpdate> partitionUpdateCodec,
        ConnectorSession session)
{
    this.writerFactory = requireNonNull(writerFactory, "writerFactory is null");

    requireNonNull(inputColumns, "inputColumns is null");

    requireNonNull(pageIndexerFactory, "pageIndexerFactory is null");

    this.hdfsEnvironment = requireNonNull(hdfsEnvironment, "hdfsEnvironment is null");
    this.maxOpenWriters = maxOpenWriters;
    this.writeVerificationExecutor = requireNonNull(writeVerificationExecutor, "writeVerificationExecutor is null");
    this.partitionUpdateCodec = requireNonNull(partitionUpdateCodec, "partitionUpdateCodec is null");

    requireNonNull(bucketProperty, "bucketProperty is null");
    this.pagePartitioner = new HiveWriterPagePartitioner(
            inputColumns,
            bucketProperty.isPresent(),
            pageIndexerFactory);

    // determine the input index of the partition columns and data columns
    // and determine the input index and type of bucketing columns
    ImmutableList.Builder<Integer> partitionColumns = ImmutableList.builder();
    ImmutableList.Builder<Integer> dataColumnsInputIndex = ImmutableList.builder();
    Object2IntMap<String> dataColumnNameToIdMap = new Object2IntOpenHashMap<>();
    Map<String, HiveType> dataColumnNameToTypeMap = new HashMap<>();
    // sample weight column is passed separately, so index must be calculated without this column
    for (int inputIndex = 0; inputIndex < inputColumns.size(); inputIndex++) {
        HiveColumnHandle column = inputColumns.get(inputIndex);
        if (column.isPartitionKey()) {
            partitionColumns.add(inputIndex);
        }
        else {
            dataColumnsInputIndex.add(inputIndex);
            dataColumnNameToIdMap.put(column.getName(), inputIndex);
            dataColumnNameToTypeMap.put(column.getName(), column.getHiveType());
        }
    }
    this.partitionColumnsInputIndex = Ints.toArray(partitionColumns.build());
    this.dataColumnInputIndex = Ints.toArray(dataColumnsInputIndex.build());

    if (bucketProperty.isPresent()) {
        BucketingVersion bucketingVersion = bucketProperty.get().getBucketingVersion();
        int bucketCount = bucketProperty.get().getBucketCount();
        bucketColumns = bucketProperty.get().getBucketedBy().stream()
                .mapToInt(dataColumnNameToIdMap::get)
                .toArray();
        List<HiveType> bucketColumnTypes = bucketProperty.get().getBucketedBy().stream()
                .map(dataColumnNameToTypeMap::get)
                .collect(toList());
        bucketFunction = new HiveBucketFunction(bucketingVersion, bucketCount, bucketColumnTypes);
    }
    else {
        bucketColumns = null;
        bucketFunction = null;
    }

    this.session = requireNonNull(session, "session is null");
}

Source File: LanguageDetector.java From jstarcraft-nlp with Apache License 2.0

4 votes

/**
 * 检测语言
 * 
 * @param text
 * @param options
 * @return
 */
public SortedSet<DetectionLanguage> detectLanguages(String text, Object2BooleanMap<String> options) {
    SortedSet<DetectionLanguage> locales = new TreeSet<>();

    // 最小长度限制
    int size = text.length();
    if (size < minimum) {
        return locales;
    }
    // 最大长度限制
    if (size > maximum) {
        text = text.substring(0, maximum);
        size = maximum;
    }

    // 白名单,黑名单
    Set<String> writes = options.size() == 0 ? Collections.EMPTY_SET : new HashSet<>();
    Set<String> blacks = options.size() == 0 ? Collections.EMPTY_SET : new HashSet<>();
    for (Object2BooleanMap.Entry<String> option : options.object2BooleanEntrySet()) {
        if (option.getBooleanValue()) {
            writes.add(option.getKey());
        } else {
            blacks.add(option.getKey());
        }
    }

    /*
     * Get the script which characters occur the most in `value`.
     */
    int count = -1;
    String script = null;
    for (DetectionPattern regulation : patterns.values()) {
        Pattern pattern = regulation.getPattern();
        Matcher matcher = pattern.matcher(text);
        int match = 0;
        while (matcher.find()) {
            match++;
        }
        if (match > count) {
            count = match;
            script = regulation.getName();
        }
    }
    if (script == null || count <= 0) {
        return locales;
    }

    /* One languages exists for the most-used script. */
    Set<DetectionTrie> dictionaries = tires.get(script);
    if (dictionaries == null) {
        /*
         * If no matches occured, such as a digit only string, or because the language is ignored, exit with `und`.
         */
        if (!checkLanguage(script, writes, blacks)) {
            return locales;
        }
        locales.add(new DetectionLanguage(Locale.forLanguageTag(script), 1D));
        return locales;
    }

    /*
     * Get all distances for a given script, and normalize the distance values.
     */
    // 前后补空格是为了N-Gram处理
    text = StringUtility.SPACE + REPLACE.matcher(text).replaceAll(StringUtility.SPACE).toLowerCase() + StringUtility.SPACE;
    CharacterNgram ngram = new CharacterNgram(3, text);
    Object2IntMap<CharSequence> tuples = new Object2IntOpenHashMap<>();
    for (CharSequence character : ngram) {
        count = tuples.getInt(character);
        tuples.put(character, count + 1);
    }
    for (DetectionTrie dictionary : dictionaries) {
        String language = dictionary.getName();
        if (checkLanguage(language, writes, blacks)) {
            double score = getScore(tuples, dictionary.getTrie());
            DetectionLanguage locale = new DetectionLanguage(Locale.forLanguageTag(language), score);
            locales.add(locale);
        }
    }
    if (!locales.isEmpty()) {
        normalizeScores(text, locales);
    }
    return locales;
}

Source File: OpenNlpNerRecommender.java From inception with Apache License 2.0

4 votes

private Span[] extractAnnotatedSpans(CAS aCas, AnnotationFS aSentence,
                                     Collection<AnnotationFS> aTokens) {
    // Convert character offsets to token indices
    Int2ObjectMap<AnnotationFS> idxTokenOffset = new Int2ObjectOpenHashMap<>();
    Object2IntMap<AnnotationFS> idxToken = new Object2IntOpenHashMap<>();
    int idx = 0;
    for (AnnotationFS t : aTokens) {
        idxTokenOffset.put(t.getBegin(), t);
        idxTokenOffset.put(t.getEnd(), t);
        idxToken.put(t, idx);
        idx++;
    }

    // Create spans from target annotations
    Type annotationType = getType(aCas, layerName);
    Feature feature = annotationType.getFeatureByBaseName(featureName);
    List<AnnotationFS> annotations = selectCovered(annotationType, aSentence);
    int numberOfAnnotations = annotations.size();
    List<Span> result = new ArrayList<>();

    int highestEndTokenPositionObserved = 0;
    for (int i = 0; i < numberOfAnnotations; i++) {
        AnnotationFS annotation = annotations.get(i);
        String label = annotation.getFeatureValueAsString(feature);
        
        AnnotationFS beginToken = idxTokenOffset.get(annotation.getBegin());
        AnnotationFS endToken = idxTokenOffset.get(annotation.getEnd());
        if (beginToken == null || endToken == null) {
            LOG.warn("Skipping annotation not starting/ending at token boundaries: [{}-{}, {}]",
                    annotation.getBegin(), annotation.getEnd(), label);
            continue;
        }
        
        int begin = idxToken.get(beginToken);
        int end = idxToken.get(endToken);
        
        // If the begin offset of the current annotation is lower than the highest offset so far
        // observed, then it is overlapping with some annotation that we have seen before. 
        // Because OpenNLP NER does not support overlapping annotations, we skip it.
        if (begin < highestEndTokenPositionObserved) {
            LOG.debug("Skipping overlapping annotation: [{}-{}, {}]", begin, end + 1, label);
            continue;
        }
        
        if (isNotBlank(label)) {
            result.add(new Span(begin, end + 1, label));
            highestEndTokenPositionObserved = end + 1;
        }
    }
    return result.toArray(new Span[result.size()]);
}

Java Code Examples for it.unimi.dsi.fastutil.objects.Object2IntMap#put()