com.carrotsearch.hppc.IntIntOpenHashMap Java Exaples

Source File: SpanMergingEvaluatorDecorator.java From gerbil with GNU Affero General Public License v3.0

5 votes

@SuppressWarnings("unchecked")
protected List<T> merge(List<T> spans) {
    Span spanArray[] = spans.toArray(new Span[spans.size()]);
    Arrays.sort(spanArray, this);
    IntIntOpenHashMap enclosedByMap = new IntIntOpenHashMap();
    boolean isEnclosed;
    for (int i = 0; i < spanArray.length; ++i) {
        isEnclosed = false;
        for (int j = spanArray.length - 1; (j > i) && (!isEnclosed); --j) {
            // if spanArray[i] is enclosed by spanArray[j]
            if ((spanArray[i].getStartPosition() >= spanArray[j].getStartPosition())
                    && ((spanArray[i].getStartPosition() + spanArray[i].getLength()) <= (spanArray[j]
                            .getStartPosition() + spanArray[j].getLength()))) {
                enclosedByMap.put(i, j);
                isEnclosed = true;
            }
        }
    }
    // if no match could be found
    if (enclosedByMap.size() == 0) {
        return spans;
    }

    List<T> mergedMarkings = new ArrayList<T>(spans.size());
    // starting with the smallest span, check if a span is enclosed by
    // another
    int largerSpanId;
    for (int i = 0; i < spanArray.length; ++i) {
        if (enclosedByMap.containsKey(i)) {
            largerSpanId = enclosedByMap.lget();
            spanArray[largerSpanId] = merge(spanArray[i], spanArray[largerSpanId]);
        } else {
            mergedMarkings.add((T) spanArray[i]);
        }
    }
    return mergedMarkings;
}

Source File: WindowSupportingLuceneCorpusAdapter.java From Palmetto with GNU Affero General Public License v3.0

5 votes

@Override
public IntObjectOpenHashMap<IntArrayList[]> requestWordPositionsInDocuments(String[] words,
        IntIntOpenHashMap docLengths) {
    IntObjectOpenHashMap<IntArrayList[]> positionsInDocs = new IntObjectOpenHashMap<IntArrayList[]>();
    for (int i = 0; i < words.length; ++i) {
        requestDocumentsWithWord(words[i], positionsInDocs, docLengths, i, words.length);
    }
    return positionsInDocs;
}

Source File: AbstractWindowBasedFrequencyDeterminer.java From Palmetto with GNU Affero General Public License v3.0

5 votes

protected int[] determineCounts(String wordset[]) {
    int counts[] = new int[(1 << wordset.length)];
    IntArrayList positions[];
    IntIntOpenHashMap docLengths = new IntIntOpenHashMap();
    IntObjectOpenHashMap<IntArrayList[]> positionsInDocs = corpusAdapter.requestWordPositionsInDocuments(wordset,
            docLengths);
    for (int i = 0; i < positionsInDocs.keys.length; ++i) {
        if (positionsInDocs.allocated[i]) {
            positions = ((IntArrayList[]) ((Object[]) positionsInDocs.values)[i]);
            addCountsFromDocument(positions, counts, docLengths.get(positionsInDocs.keys[i]));
        }
    }
    return counts;
}

Source File: BooleanSlidingWindowProbabilitySupplierTest.java From Palmetto with GNU Affero General Public License v3.0

5 votes

@Override
public IntObjectOpenHashMap<IntArrayList[]> requestWordPositionsInDocuments(String[] words,
        IntIntOpenHashMap docLengths) {
    IntObjectOpenHashMap<IntArrayList[]> positionsInDocuments = new IntObjectOpenHashMap<IntArrayList[]>();
    IntArrayList[] positionsInDocument = new IntArrayList[positions.length];
    for (int i = 0; i < positionsInDocument.length; ++i) {
        if (positions[i].length > 0) {
            positionsInDocument[i] = new IntArrayList();
            positionsInDocument[i].add(positions[i]);
        }
    }
    positionsInDocuments.put(0, positionsInDocument);
    docLengths.put(0, docLength);
    return positionsInDocuments;
}

Source File: ContextWindowFrequencyDeterminerCountingTest.java From Palmetto with GNU Affero General Public License v3.0

5 votes

@Override
public IntObjectOpenHashMap<IntArrayList[]> requestWordPositionsInDocuments(String[] words,
        IntIntOpenHashMap docLengths) {
    IntObjectOpenHashMap<IntArrayList[]> positionsInDocuments = new IntObjectOpenHashMap<IntArrayList[]>();
    IntArrayList[] positionsInDocument = new IntArrayList[positions.length];
    for (int i = 0; i < positionsInDocument.length; ++i) {
        if ((positions[i] != null) && (positions[i].length > 0)) {
            positionsInDocument[i] = new IntArrayList();
            positionsInDocument[i].add(positions[i]);
        }
    }
    positionsInDocuments.put(0, positionsInDocument);
    docLengths.put(0, docLength);
    return positionsInDocuments;
}

Source File: BooleanSlidingWindowFrequencyDeterminerCountingTest.java From Palmetto with GNU Affero General Public License v3.0

5 votes

@Override
public IntObjectOpenHashMap<IntArrayList[]> requestWordPositionsInDocuments(String[] words,
        IntIntOpenHashMap docLengths) {
    IntObjectOpenHashMap<IntArrayList[]> positionsInDocuments = new IntObjectOpenHashMap<IntArrayList[]>();
    IntArrayList[] positionsInDocument = new IntArrayList[positions.length];
    for (int i = 0; i < positionsInDocument.length; ++i) {
        if ((positions[i] != null) && (positions[i].length > 0)) {
            positionsInDocument[i] = new IntArrayList();
            positionsInDocument[i].add(positions[i]);
        }
    }
    positionsInDocuments.put(0, positionsInDocument);
    docLengths.put(0, docLength);
    return positionsInDocuments;
}

Source File: GeneralizationHierarchy.java From arx with Apache License 2.0

5 votes

/**
 * Throws an exception, if the hierarchy is not monotonic.
 * 
 * @param manager
 */
public void checkMonotonicity(DataManager manager) {
    
    // Obtain dictionary
    String[] dictionary = null;
    String[] header = manager.getDataGeneralized().getHeader();
    for (int i=0; i<header.length; i++) {
        if (header[i].equals(attribute)) {
            dictionary = manager.getDataGeneralized().getDictionary().getMapping()[i];
        }
    }
    
    // Check
    if (dictionary==null) {
        throw new IllegalStateException("Cannot obtain dictionary for attribute ("+attribute+")");
    }
    
    // Level value -> level+1 value
    final IntIntOpenHashMap hMap = new IntIntOpenHashMap();
    
    // Input->level->output.
    for (int level = 0; level < (map[0].length - 1); level++) {
        hMap.clear();
        for (int i = 0; i < map.length; i++) {
            final int outputCurrentLevel = map[i][level];
            final int outputNextLevel = map[i][level + 1];
            if (hMap.containsKey(outputCurrentLevel)) {
                final int compare = hMap.get(outputCurrentLevel);
                if (compare != outputNextLevel) { 
                    String in = dictionary[outputCurrentLevel];
                    String out1 = dictionary[compare];
                    String out2 = dictionary[outputNextLevel];
                    throw new IllegalArgumentException("The transformation rule for the attribute '" + attribute + "' is not a hierarchy. ("+in+") can either be transformed to ("+out1+") or to ("+out2+")");
                }
            } else {
                hMap.put(outputCurrentLevel, outputNextLevel);
            }
        }
    }
}

Source File: WindowSupportingLuceneCorpusAdapter.java From Palmetto with GNU Affero General Public License v3.0

4 votes

protected void requestDocumentsWithWord(String word, IntObjectOpenHashMap<IntArrayList[]> positionsInDocs,
        IntIntOpenHashMap docLengths, int wordId, int numberOfWords) {
    DocsAndPositionsEnum docPosEnum = null;
    Term term = new Term(fieldName, word);
    int localDocId,
            globalDocId,
            baseDocId;
    IntArrayList positions[];
    try {
        for (int i = 0; i < reader.length; i++) {
            docPosEnum = reader[i].termPositionsEnum(term);
            baseDocId = contexts[i].docBase;
            if (docPosEnum != null) {
                while (docPosEnum.nextDoc() != DocsEnum.NO_MORE_DOCS) {
                    localDocId = docPosEnum.docID();
                    globalDocId = localDocId + baseDocId;
                    // if this is the first word and we found a new document
                    if (!positionsInDocs.containsKey(globalDocId)) {
                        positions = new IntArrayList[numberOfWords];
                        positionsInDocs.put(globalDocId, positions);
                    } else {
                        positions = positionsInDocs.get(globalDocId);
                    }
                    if (positions[wordId] == null) {
                        positions[wordId] = new IntArrayList();
                    }
                    // Go through the positions inside this document
                    for (int p = 0; p < docPosEnum.freq(); ++p) {
                        positions[wordId].add(docPosEnum.nextPosition());
                    }
                    if (!docLengths.containsKey(globalDocId)) {
                        // Get the length of the document
                        docLengths.put(globalDocId, reader[i].document(localDocId).getField(docLengthFieldName)
                                .numericValue().intValue());
                    }
                }
            }
        }
    } catch (IOException e) {
        LOGGER.error("Error while requesting documents for word \"" + word + "\".", e);
    }
}

Source File: PositionStoringLuceneIndexCreatorTest.java From Palmetto with GNU Affero General Public License v3.0

4 votes

@Test
public void test() throws CorruptIndexException, IOException {
    File indexDir = new File(
            FileUtils.getTempDirectoryPath() + File.separator + "temp_index" + Long.toString(System.nanoTime()));
    Assert.assertTrue(indexDir.mkdir());
    Iterator<IndexableDocument> docIterator = Arrays.asList(DOCUMENTS).iterator();
    // create the index
    PositionStoringLuceneIndexCreator creator = new PositionStoringLuceneIndexCreator(
            Palmetto.DEFAULT_TEXT_INDEX_FIELD_NAME, Palmetto.DEFAULT_DOCUMENT_LENGTH_INDEX_FIELD_NAME);
    Assert.assertTrue(creator.createIndex(indexDir, docIterator));
    LuceneIndexHistogramCreator hCreator = new LuceneIndexHistogramCreator(
            Palmetto.DEFAULT_DOCUMENT_LENGTH_INDEX_FIELD_NAME);
    hCreator.createLuceneIndexHistogram(indexDir.getAbsolutePath());

    // test the created index
    // create an adapter
    WindowSupportingLuceneCorpusAdapter adapter = null;
    try {
        adapter = WindowSupportingLuceneCorpusAdapter.create(indexDir.getAbsolutePath(),
                Palmetto.DEFAULT_TEXT_INDEX_FIELD_NAME, Palmetto.DEFAULT_DOCUMENT_LENGTH_INDEX_FIELD_NAME);
        // query the test words
        IntIntOpenHashMap docLengths = new IntIntOpenHashMap();
        IntObjectOpenHashMap<IntArrayList[]> wordPositions = adapter.requestWordPositionsInDocuments(TEST_WORDS,
                docLengths);
        // compare the result with the expected counts
        int positionInDoc;
        IntArrayList[] positionsInDocs;
        for (int i = 0; i < EXPECTED_WORD_POSITIONS.length; ++i) {
            positionsInDocs = wordPositions.get(i);
            for (int j = 0; j < positionsInDocs.length; ++j) {
                if (EXPECTED_WORD_POSITIONS[i][j] < 0) {
                    Assert.assertNull("Expected null because the word \"" + TEST_WORDS[j]
                            + "\" shouldn't be found inside document " + i + ". But got a position list instead.",
                            positionsInDocs[j]);
                } else {
                    Assert.assertEquals(1, positionsInDocs[j].elementsCount);
                    positionInDoc = positionsInDocs[j].buffer[0];
                    Assert.assertEquals("Expected the word \"" + TEST_WORDS[j] + "\" in document " + i
                            + " at position " + EXPECTED_WORD_POSITIONS[i][j] + " but got position " + positionInDoc
                            + " form the index.", EXPECTED_WORD_POSITIONS[i][j], positionInDoc);
                }
            }
        }

        // test the window based counting
        BooleanSlidingWindowFrequencyDeterminer determiner = new BooleanSlidingWindowFrequencyDeterminer(adapter,
                WINDOW_SIZE);
        CountedSubsets subsets = determiner.determineCounts(new String[][] { TEST_WORDS },
                new SegmentationDefinition[] { new SegmentationDefinition(new int[0], new int[0][0], null) })[0];
        Assert.assertArrayEquals(EXPECTED_COUNTS, subsets.counts);
    } finally {
        if (adapter != null) {
            adapter.close();
        }
    }
}

Source File: BooleanSlidingWindowFrequencyDeterminerSumCreationTest.java From Palmetto with GNU Affero General Public License v3.0

4 votes

@Override
public IntObjectOpenHashMap<IntArrayList[]> requestWordPositionsInDocuments(String[] words,
        IntIntOpenHashMap docLengths) {
    return null;
}

Source File: MetricMDNUEntropyPrecomputed.java From arx with Apache License 2.0

4 votes

@Override
/**
 * Implements the score function described in Section 5.3 of the article
 * 
 * Bild R, Kuhn KA, Prasser F. SafePub: A Truthful Data Anonymization Algorithm With Strong Privacy Guarantees.
 * Proceedings on Privacy Enhancing Technologies. 2018(1):67-87.
 */
public ILScore getScore(final Transformation<?> node, final HashGroupify groupify) {
    
    if (k < 0) {
        throw new RuntimeException("Parameters required for differential privacy have not been initialized yet");
    }
    
    // Prepare
    int dimensionsGeneralized = getDimensionsGeneralized();
    IntIntOpenHashMap[] nonSuppressedValueToCount = new IntIntOpenHashMap[dimensionsGeneralized];
    for (int dimension=0; dimension<dimensionsGeneralized; dimension++) {
        nonSuppressedValueToCount[dimension] = new IntIntOpenHashMap();
    }

    // Compute score. The casts to long are required to avoid integer overflows
    // when large numbers are being multiplied.
    BigFraction score = BigFraction.ZERO;
    HashGroupifyEntry m = groupify.getFirstEquivalenceClass();
    while (m != null) {
        m.read();
        for (int dimension=0; dimension<dimensionsGeneralized; dimension++) {
            int value = m.next();
            // Process values of records which have not been suppressed by sampling
            if (m.isNotOutlier && (rootValues[dimension] == -1 || value != rootValues[dimension])) {
                // The attribute value has neither been suppressed because of record suppression nor because of generalization
                nonSuppressedValueToCount[dimension].putOrAdd(value, m.count, m.count);
            } else {
                // The attribute value has been suppressed because of record suppression or because of generalization
                score = score.add(new BigFraction((long)m.count * (long)rows));
            }
            // Add values for records which have been suppressed by sampling
            score = score.add(new BigFraction((long)(m.pcount - m.count) * (long)rows));
        }
        m = m.nextOrdered;
    }
    // Add values for all attribute values which were not suppressed
    for (int dimension=0; dimension<dimensionsGeneralized; dimension++) {
        final boolean [] states = nonSuppressedValueToCount[dimension].allocated;
        final int [] counts = nonSuppressedValueToCount[dimension].values;
        for (int i=0; i<states.length; i++) {
            if (states[i]) {
                score = score.add(new BigFraction((long)counts[i] * (long)counts[i]));
            }
        }
    }

    // Adjust sensitivity and multiply with -1 so that higher values are better
    score = score.multiply(BigFraction.MINUS_ONE.divide(new BigFraction(((long)rows * (long)dimensionsGeneralized))));
    score = score.divide((k == 1) ? new BigFraction(5) : new BigFraction(k * k).divide(new BigFraction(k - 1)).add(BigFraction.ONE));
    
    // Return score
    return new ILScore(score);
}

Source File: RiskModelHistogram.java From arx with Apache License 2.0

4 votes

/**
 * Convert and analyze
 * 
 * @param grouped
 * @param stop
 * @param progress
 */
private void convertAndAnalyze(IntIntOpenHashMap grouped,
                               final WrappedBoolean stop,
                               final WrappedInteger progress) {

    // Convert
    int[][] temp = new int[grouped.size()][2];
    int idx = 0;
    final int[] values2 = grouped.values;
    final int[] keys2 = grouped.keys;
    final boolean[] states2 = grouped.allocated;
    for (int i = 0; i < states2.length; i++) {
        if (states2[i]) {
            temp[idx++] = new int[] { keys2[i], values2[i] };
        }
        if (stop.value) { throw new ComputationInterruptedException(); }
    }
    grouped = null;

    // Sort ascending by size
    Arrays.sort(temp, new Comparator<int[]>() {
        public int compare(int[] o1, int[] o2) {
            if (stop.value) { throw new ComputationInterruptedException(); }
            return Integer.compare(o1[0], o2[0]);
        }
    });

    // Convert and analyze
    int numClasses = 0;
    int numTuples = 0;
    this.equivalenceClasses = new int[temp.length * 2];
    idx = 0;
    for (int[] entry : temp) {
        this.equivalenceClasses[idx++] = entry[0];
        this.equivalenceClasses[idx++] = entry[1];
        numClasses += entry[1];
        numTuples += entry[0] * entry[1];
        if (stop.value) { throw new ComputationInterruptedException(); }
    }
    this.numRecords = numTuples;
    this.numClasses = numClasses;
    this.avgClassSize = this.numRecords / this.numClasses;
}

Source File: ImportWizardPageCSV.java From arx with Apache License 2.0

4 votes

/**
 * Tries to detect the separator used within this file
 *
 * This goes through up to {@link ImportWizardModel#PREVIEW_MAX_LINES} lines
 * and tries to detect the used separator by counting how often each of
 * the available {@link #delimiters} is used.
 *
 * @throws IOException In case file couldn't be accessed successfully
 */
private void detectDelimiter() throws IOException {
    Charset charset = getCharset();

    final BufferedReader r = new BufferedReader(new InputStreamReader(new FileInputStream(comboLocation.getText()), charset));
    final IntIntOpenHashMap map = new IntIntOpenHashMap();
    final CharIntOpenHashMap delimitors = new CharIntOpenHashMap();
    for (int i=0; i<this.delimiters.length; i++) {
        delimitors.put(this.delimiters[i], i);
    }
    int countLines = 0;
    int countChars = 0;

    /* Iterate over data */
    String line = r.readLine();
    outer: while ((countLines < ImportWizardModel.PREVIEW_MAX_LINES) && (line != null)) {

        /* Iterate over line character by character */
        final char[] a = line.toCharArray();
        for (final char c : a) {
            if (delimitors.containsKey(c)) {
                map.putOrAdd(delimitors.get(c), 0, 1);
            }
            countChars++;
            if (countChars > ImportWizardModel.DETECT_MAX_CHARS) {
                break outer;
            }
        }
        line = r.readLine();
        countLines++;
    }
    r.close();

    if (map.isEmpty()) {
        selectedDelimiter = 0;
        return;
    }

    /* Check which separator was used the most */
    int max = Integer.MIN_VALUE;
    final int [] keys = map.keys;
    final int [] values = map.values;
    final boolean [] allocated = map.allocated;
    for (int i = 0; i < allocated.length; i++) {
        if (allocated[i] && values[i] > max) {
            max = values[i];
            selectedDelimiter = keys[i];
        }
    }
}

Source File: WindowSupportingAdapter.java From Palmetto with GNU Affero General Public License v3.0

2 votes

/**
 * Returns the positions of the given words inside the corpus.
 * 
 * @param words
 *            the words for which the positions inside the documents should
 *            be determined
 * @param docLengths
 *            empty int int map in which the document lengths and counts are
 *            inserted
 * @return the positions of the given words inside the corpus
 */
public IntObjectOpenHashMap<IntArrayList[]> requestWordPositionsInDocuments(String words[],
        IntIntOpenHashMap docLengths);

Source File: RiskModelHistogram.java From arx with Apache License 2.0

2 votes

/**
 * Creates a new instance from the given distribution.
 * IMPORTANT: Suppressed records should have been ignored before calling this.
 * 
 * @param distribution
 */
public RiskModelHistogram(final IntIntOpenHashMap distribution) {
    this.convertAndAnalyze(distribution,
                           new WrappedBoolean(),
                           new WrappedInteger());
}

com.carrotsearch.hppc.IntIntOpenHashMap Java Examples