Java Code Examples for it.unimi.dsi.fastutil.objects.Object2IntMap#put()
The following examples show how to use
it.unimi.dsi.fastutil.objects.Object2IntMap#put() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TableReader.java From fastjgame with Apache License 2.0 | 6 votes |
/** * 读取属性名行 * * @param fileName 文件名,用于打印更详细的错误原因 * @param rowIndex 行索引 * @param row 行内容 * @return 命名行 */ private ColNameRow readColNameRow(String fileName, int rowIndex, T row) { // 使用LinkedHashMap以保持读入顺序 int totalColNum = getTotalColNum(row); Object2IntMap<String> colName2Index = new Object2IntLinkedOpenHashMap<>(totalColNum + 1); for (int colIndex = 0; colIndex < totalColNum; colIndex++) { String originalColName = getNullableCell(row, colIndex); // 属性名称行,空白属性跳过 if (null == originalColName) { continue; } // 去掉空白填充 String realColName = originalColName.trim(); if (realColName.length() == 0) { continue; } // 属性名不可以有重复 if (colName2Index.containsKey(realColName)) { throw new IllegalArgumentException("file " + fileName + " propertyNameRow has duplicate column " + realColName); } colName2Index.put(realColName, colIndex); } return new ColNameRow(rowIndex, colName2Index); }
Example 2
Source File: Utils.java From AffectiveTweets with GNU General Public License v3.0 | 6 votes |
/** * Calculates a vector of attributes from a list of tokens * * @param tokens the input tokens * @param prefix the prefix of each vector attribute * @param freqWeights true for considering term-frequency weights (booleans weights are used otherwise) * @return an Object2IntMap object mapping the attributes to their values */ public static Object2IntMap<String> calculateTermFreq(List<String> tokens, String prefix, boolean freqWeights) { Object2IntMap<String> termFreq = new Object2IntOpenHashMap<String>(); // Traverse the strings and increments the counter when the token was // already seen before for (String token : tokens) { // add frequency weights if the flat is set if(freqWeights) termFreq.put(prefix+token, termFreq.getInt(prefix+token) + 1); // otherwise, just consider boolean weights else{ if(!termFreq.containsKey(token)) termFreq.put(prefix+token, 1); } } return termFreq; }
Example 3
Source File: ReadCountCollection.java From gatk-protected with BSD 3-Clause "New" or "Revised" License | 6 votes |
/** * Rearrange the targets so that they are in a particular order. * @return a new collection. * @throws IllegalArgumentException if any of the following is true: * <ul> * <li>{@code targetsInOrder} is {@code null},</li> * <li>is empty,</li> * <li>it contains {@code null},</li> * <li>contains any target not present in this collection.</li> * </ul> */ public ReadCountCollection arrangeTargets(final List<Target> targetsInOrder) { Utils.nonNull(targetsInOrder); Utils.nonEmpty(targetsInOrder, "the input targets list cannot be empty"); final RealMatrix counts = new Array2DRowRealMatrix(targetsInOrder.size(), columnNames.size()); final Object2IntMap<Target> targetToIndex = new Object2IntOpenHashMap<>(targets.size()); for (int i = 0; i < targets.size(); i++) { targetToIndex.put(targets.get(i), i); } for (int i = 0; i < targetsInOrder.size(); i++) { final Target target = targetsInOrder.get(i); Utils.validateArg(targetToIndex.containsKey(target), () -> String.format("target '%s' is not present in the collection", target.getName())); counts.setRow(i, this.counts.getRow(targetToIndex.getInt(target))); } return new ReadCountCollection(new ArrayList<>(targetsInOrder), columnNames, counts, false); }
Example 4
Source File: AlleleLikelihoods.java From gatk with BSD 3-Clause "New" or "Revised" License | 6 votes |
public void changeEvidence(final Map<EVIDENCE, EVIDENCE> evidenceReplacements) { final int sampleCount = samples.numberOfSamples(); for (int s = 0; s < sampleCount; s++) { final List<EVIDENCE> sampleEvidence = evidenceBySampleIndex.get(s); final Object2IntMap<EVIDENCE> evidenceIndex = evidenceIndexBySampleIndex.get(s); final int sampleEvidenceCount = sampleEvidence.size(); for (int r = 0; r < sampleEvidenceCount; r++) { final EVIDENCE evidence = sampleEvidence.get(r); final EVIDENCE replacement = evidenceReplacements.get(evidence); if (replacement == null) { continue; } sampleEvidence.set(r, replacement); if (evidenceIndex != null) { evidenceIndex.remove(evidence); evidenceIndex.put(replacement, r); } } } }
Example 5
Source File: AlleleLikelihoods.java From gatk with BSD 3-Clause "New" or "Revised" License | 6 votes |
private void appendEvidence(final List<EVIDENCE> newSampleEvidence, final int sampleIndex) { final List<EVIDENCE> sampleEvidence = evidenceBySampleIndex.get(sampleIndex); final Object2IntMap<EVIDENCE> sampleEvidenceIndex = evidenceIndexBySampleIndex(sampleIndex); for (final EVIDENCE newEvidence : newSampleEvidence) { final int previousValue = sampleEvidenceIndex.put(newEvidence, sampleEvidence.size()); if (previousValue == MISSING_INDEX) { sampleEvidence.add(newEvidence); } else { sampleEvidenceIndex.put(newEvidence, previousValue); // revert } } numberOfEvidences[sampleIndex] = sampleEvidence.size(); }
Example 6
Source File: SmartDictionarySerializer.java From mph-table with Apache License 2.0 | 5 votes |
private Object2IntMap<String> indexToDictionary(final String[] words) throws IOException { final Object2IntMap<String> result = new Object2IntOpenHashMap<>(); for (int i = 0; i < words.length; ++i) { result.put(words[i], i); } return result; }
Example 7
Source File: CategoricalColumn.java From tablesaw with Apache License 2.0 | 5 votes |
default Table countByCategory() { final Table t = new Table("Column: " + name()); final CategoricalColumn<?> categories = (CategoricalColumn<?>) type().create("Category"); final IntColumn counts = IntColumn.create("Count"); final Object2IntMap<String> valueToCount = new Object2IntOpenHashMap<>(); for (int i = 0; i < size(); i++) { if (!isMissing(i)) { final String next = getString(i); if (valueToCount.containsKey(next)) { valueToCount.put(next, valueToCount.getInt(next) + 1); } else { valueToCount.put(next, 1); } } } for (Map.Entry<String, Integer> entry : valueToCount.object2IntEntrySet()) { categories.appendCell(entry.getKey()); counts.append(entry.getValue()); } if (countMissing() > 0) { categories.appendMissing(); counts.append(countMissing()); } t.addColumns(categories); t.addColumns(counts); return t; }
Example 8
Source File: CategoricalColumn.java From tablesaw with Apache License 2.0 | 5 votes |
default Table countByCategory() { final Table t = new Table("Column: " + name()); final CategoricalColumn<?> categories = (CategoricalColumn<?>) type().create("Category"); final IntColumn counts = IntColumn.create("Count"); final Object2IntMap<String> valueToCount = new Object2IntOpenHashMap<>(); for (int i = 0; i < size(); i++) { if (!isMissing(i)) { final String next = getString(i); if (valueToCount.containsKey(next)) { valueToCount.put(next, valueToCount.getInt(next) + 1); } else { valueToCount.put(next, 1); } } } for (Map.Entry<String, Integer> entry : valueToCount.object2IntEntrySet()) { categories.appendCell(entry.getKey()); counts.append(entry.getValue()); } if (countMissing() > 0) { categories.appendMissing(); counts.append(countMissing()); } t.addColumns(categories); t.addColumns(counts); return t; }
Example 9
Source File: AlleleLikelihoods.java From gatk with BSD 3-Clause "New" or "Revised" License | 5 votes |
private Object2IntMap<EVIDENCE> fillEvidenceToIndexCache(int sampleIndex) { final List<EVIDENCE> sampleEvidence = evidenceBySampleIndex.get(sampleIndex); final int sampleEvidenceCount = sampleEvidence.size(); final Object2IntMap<EVIDENCE> index = new Object2IntOpenHashMap<>(sampleEvidenceCount); index.defaultReturnValue(MISSING_INDEX); for (int r = 0; r < sampleEvidenceCount; r++) { index.put(sampleEvidence.get(r), r); } evidenceIndexBySampleIndex.set(sampleIndex, index); return index; }
Example 10
Source File: SymmetricImmutablePairTest.java From liblevenshtein-java with MIT License | 5 votes |
@Test(dataProvider = "equivalentPairs") public void testEquivalentPairs( final SymmetricImmutablePair<String> lhs, final SymmetricImmutablePair<String> rhs) { assertThat(lhs).isEqualByComparingTo(lhs); assertThat(rhs).isEqualByComparingTo(rhs); assertThat(lhs).isEqualByComparingTo(rhs); assertThat(rhs).isEqualByComparingTo(lhs); assertThat(lhs).isEqualTo(lhs); assertThat(rhs).isEqualTo(rhs); assertThat(lhs).isEqualTo(rhs); assertThat(rhs).isEqualTo(lhs); assertThat(lhs.hashCode()).isEqualTo(rhs.hashCode()); Object2IntMap<SymmetricImmutablePair<String>> map; map = new Object2IntOpenHashMap<>(2); map.put(lhs, 1); assertThat(map).containsEntry(lhs, 1); assertThat(map).containsEntry(rhs, 1); map.put(rhs, 2); assertThat(map).containsEntry(rhs, 2); assertThat(map).containsEntry(lhs, 2); map = new Object2IntRBTreeMap<>(); map.put(lhs, 1); assertThat(map).containsEntry(lhs, 1); assertThat(map).containsEntry(rhs, 1); map.put(rhs, 2); assertThat(map).containsEntry(rhs, 2); assertThat(map).containsEntry(lhs, 2); }
Example 11
Source File: NoDictionarySingleColumnGroupKeyGenerator.java From incubator-pinot with Apache License 2.0 | 5 votes |
@SuppressWarnings("unchecked") private int getKeyForValue(String value) { Object2IntMap<String> map = (Object2IntMap<String>) _groupKeyMap; int groupId = map.getInt(value); if (groupId == INVALID_ID) { if (_numGroups < _globalGroupIdUpperBound) { groupId = _numGroups; map.put(value, _numGroups++); } } return groupId; }
Example 12
Source File: HivePageSink.java From presto with Apache License 2.0 | 4 votes |
public HivePageSink( HiveWriterFactory writerFactory, List<HiveColumnHandle> inputColumns, Optional<HiveBucketProperty> bucketProperty, PageIndexerFactory pageIndexerFactory, HdfsEnvironment hdfsEnvironment, int maxOpenWriters, ListeningExecutorService writeVerificationExecutor, JsonCodec<PartitionUpdate> partitionUpdateCodec, ConnectorSession session) { this.writerFactory = requireNonNull(writerFactory, "writerFactory is null"); requireNonNull(inputColumns, "inputColumns is null"); requireNonNull(pageIndexerFactory, "pageIndexerFactory is null"); this.hdfsEnvironment = requireNonNull(hdfsEnvironment, "hdfsEnvironment is null"); this.maxOpenWriters = maxOpenWriters; this.writeVerificationExecutor = requireNonNull(writeVerificationExecutor, "writeVerificationExecutor is null"); this.partitionUpdateCodec = requireNonNull(partitionUpdateCodec, "partitionUpdateCodec is null"); requireNonNull(bucketProperty, "bucketProperty is null"); this.pagePartitioner = new HiveWriterPagePartitioner( inputColumns, bucketProperty.isPresent(), pageIndexerFactory); // determine the input index of the partition columns and data columns // and determine the input index and type of bucketing columns ImmutableList.Builder<Integer> partitionColumns = ImmutableList.builder(); ImmutableList.Builder<Integer> dataColumnsInputIndex = ImmutableList.builder(); Object2IntMap<String> dataColumnNameToIdMap = new Object2IntOpenHashMap<>(); Map<String, HiveType> dataColumnNameToTypeMap = new HashMap<>(); // sample weight column is passed separately, so index must be calculated without this column for (int inputIndex = 0; inputIndex < inputColumns.size(); inputIndex++) { HiveColumnHandle column = inputColumns.get(inputIndex); if (column.isPartitionKey()) { partitionColumns.add(inputIndex); } else { dataColumnsInputIndex.add(inputIndex); dataColumnNameToIdMap.put(column.getName(), inputIndex); dataColumnNameToTypeMap.put(column.getName(), column.getHiveType()); } } this.partitionColumnsInputIndex = Ints.toArray(partitionColumns.build()); this.dataColumnInputIndex = Ints.toArray(dataColumnsInputIndex.build()); if (bucketProperty.isPresent()) { BucketingVersion bucketingVersion = bucketProperty.get().getBucketingVersion(); int bucketCount = bucketProperty.get().getBucketCount(); bucketColumns = bucketProperty.get().getBucketedBy().stream() .mapToInt(dataColumnNameToIdMap::get) .toArray(); List<HiveType> bucketColumnTypes = bucketProperty.get().getBucketedBy().stream() .map(dataColumnNameToTypeMap::get) .collect(toList()); bucketFunction = new HiveBucketFunction(bucketingVersion, bucketCount, bucketColumnTypes); } else { bucketColumns = null; bucketFunction = null; } this.session = requireNonNull(session, "session is null"); }
Example 13
Source File: LanguageDetector.java From jstarcraft-nlp with Apache License 2.0 | 4 votes |
/** * 检测语言 * * @param text * @param options * @return */ public SortedSet<DetectionLanguage> detectLanguages(String text, Object2BooleanMap<String> options) { SortedSet<DetectionLanguage> locales = new TreeSet<>(); // 最小长度限制 int size = text.length(); if (size < minimum) { return locales; } // 最大长度限制 if (size > maximum) { text = text.substring(0, maximum); size = maximum; } // 白名单,黑名单 Set<String> writes = options.size() == 0 ? Collections.EMPTY_SET : new HashSet<>(); Set<String> blacks = options.size() == 0 ? Collections.EMPTY_SET : new HashSet<>(); for (Object2BooleanMap.Entry<String> option : options.object2BooleanEntrySet()) { if (option.getBooleanValue()) { writes.add(option.getKey()); } else { blacks.add(option.getKey()); } } /* * Get the script which characters occur the most in `value`. */ int count = -1; String script = null; for (DetectionPattern regulation : patterns.values()) { Pattern pattern = regulation.getPattern(); Matcher matcher = pattern.matcher(text); int match = 0; while (matcher.find()) { match++; } if (match > count) { count = match; script = regulation.getName(); } } if (script == null || count <= 0) { return locales; } /* One languages exists for the most-used script. */ Set<DetectionTrie> dictionaries = tires.get(script); if (dictionaries == null) { /* * If no matches occured, such as a digit only string, or because the language is ignored, exit with `und`. */ if (!checkLanguage(script, writes, blacks)) { return locales; } locales.add(new DetectionLanguage(Locale.forLanguageTag(script), 1D)); return locales; } /* * Get all distances for a given script, and normalize the distance values. */ // 前后补空格是为了N-Gram处理 text = StringUtility.SPACE + REPLACE.matcher(text).replaceAll(StringUtility.SPACE).toLowerCase() + StringUtility.SPACE; CharacterNgram ngram = new CharacterNgram(3, text); Object2IntMap<CharSequence> tuples = new Object2IntOpenHashMap<>(); for (CharSequence character : ngram) { count = tuples.getInt(character); tuples.put(character, count + 1); } for (DetectionTrie dictionary : dictionaries) { String language = dictionary.getName(); if (checkLanguage(language, writes, blacks)) { double score = getScore(tuples, dictionary.getTrie()); DetectionLanguage locale = new DetectionLanguage(Locale.forLanguageTag(language), score); locales.add(locale); } } if (!locales.isEmpty()) { normalizeScores(text, locales); } return locales; }
Example 14
Source File: OpenNlpNerRecommender.java From inception with Apache License 2.0 | 4 votes |
private Span[] extractAnnotatedSpans(CAS aCas, AnnotationFS aSentence, Collection<AnnotationFS> aTokens) { // Convert character offsets to token indices Int2ObjectMap<AnnotationFS> idxTokenOffset = new Int2ObjectOpenHashMap<>(); Object2IntMap<AnnotationFS> idxToken = new Object2IntOpenHashMap<>(); int idx = 0; for (AnnotationFS t : aTokens) { idxTokenOffset.put(t.getBegin(), t); idxTokenOffset.put(t.getEnd(), t); idxToken.put(t, idx); idx++; } // Create spans from target annotations Type annotationType = getType(aCas, layerName); Feature feature = annotationType.getFeatureByBaseName(featureName); List<AnnotationFS> annotations = selectCovered(annotationType, aSentence); int numberOfAnnotations = annotations.size(); List<Span> result = new ArrayList<>(); int highestEndTokenPositionObserved = 0; for (int i = 0; i < numberOfAnnotations; i++) { AnnotationFS annotation = annotations.get(i); String label = annotation.getFeatureValueAsString(feature); AnnotationFS beginToken = idxTokenOffset.get(annotation.getBegin()); AnnotationFS endToken = idxTokenOffset.get(annotation.getEnd()); if (beginToken == null || endToken == null) { LOG.warn("Skipping annotation not starting/ending at token boundaries: [{}-{}, {}]", annotation.getBegin(), annotation.getEnd(), label); continue; } int begin = idxToken.get(beginToken); int end = idxToken.get(endToken); // If the begin offset of the current annotation is lower than the highest offset so far // observed, then it is overlapping with some annotation that we have seen before. // Because OpenNLP NER does not support overlapping annotations, we skip it. if (begin < highestEndTokenPositionObserved) { LOG.debug("Skipping overlapping annotation: [{}-{}, {}]", begin, end + 1, label); continue; } if (isNotBlank(label)) { result.add(new Span(begin, end + 1, label)); highestEndTokenPositionObserved = end + 1; } } return result.toArray(new Span[result.size()]); }