gnu.trove.list.TIntList Java Examples

The following examples show how to use gnu.trove.list.TIntList. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: DateEncoderTest.java    From htm.java with GNU Affero General Public License v3.0 6 votes vote down vote up
/**
 * Check bucket index support
 */
@Test
public void testBucketIndexSupport() {

    setUp();
    initDE();

    int[] bucketIndices = de.getBucketIndices(dt);
    System.out.println(String.format("bucket indices: %s", Arrays.toString(bucketIndices)));
    List<Encoding> bucketInfo = de.getBucketInfo(bucketIndices);

    List<Double> expectedList = Arrays.asList(320.25, 3.5, .167, 14.8);

    TIntList encodings = new TIntArrayList();

    for (int i = 0; i < bucketInfo.size(); i++) {
        Encoding r = bucketInfo.get(i);
        double actual = (double)r.getValue();
        double expected = expectedList.get(i);
        assertEquals(expected, actual, 4.0);

        encodings.addAll(r.getEncoding());
    }

    assertArrayEquals(expected, encodings.toArray());
}
 
Example #2
Source File: PartEnumJoin.java    From JedAIToolkit with Apache License 2.0 6 votes vote down vote up
private int check_overlap(TIntList a, TIntList b, int overlap) {
    int posa = 0, posb = 0, count = 0;
    while (posa < (int) a.size() && posb < (int) b.size()) {
        if (count + Math.min((int) a.size() - posa, (int) b.size() - posb) < overlap) {
            return -1;
        }
        if (a.get(posa) == b.get(posb)) {
            count++;
            posa++;
            posb++;
        } else if (a.get(posa) < b.get(posb)) {
            posa++;
        } else {
            posb++;
        }
    }
    return count;
}
 
Example #3
Source File: FuzzySetSimJoin.java    From JedAIToolkit with Apache License 2.0 6 votes vote down vote up
/**
 * Find matches for a given set
 */
private TIntFloatHashMap search(int[][] querySet, int[][][] collection, float simThreshold,
        TIntObjectMap<TIntList>[] idx) {

    /* SIGNATURE GENERATION */
    TIntSet[] unflattenedSignature = computeUnflattenedSignature(querySet, simThreshold, idx);

    /* CANDIDATE SELECTION AND CHECK FILTER */
    TIntObjectMap<TIntFloatMap> checkFilterCandidates = applyCheckFilter(querySet, collection,
            unflattenedSignature, idx, simThreshold);

    /* NEAREST NEIGHBOR FILTER */
    TIntSet nnFilterCandidates = applyNNFilter(querySet, collection, checkFilterCandidates, simThreshold);

    /* VERIFICATION */
    TIntFloatHashMap matches = verifyCandidates(querySet, collection, nnFilterCandidates, simThreshold);

    return matches;
}
 
Example #4
Source File: FuzzySetSimJoin.java    From JedAIToolkit with Apache License 2.0 6 votes vote down vote up
/**
 * Computes the join between two already transformed and indexed collections
 *
 * @param collection1
 * @param collection2
 * @param simThreshold
 * @return
 */
HashMap<String, Float> join(int[][][] collection1, int[][][] collection2, float simThreshold) {
    final HashMap<String, Float> matchingPairs = new HashMap<>();

    /* CREATE INDEX */
    TIntObjectMap<TIntList>[] idx = buildSetInvertedIndex(collection2, tokenDict.size());

    /* EXECUTE THE JOIN ALGORITHM */
    for (int i = 0; i < collection1.length; i++) {
        TIntFloatHashMap matches = search(collection1[i], collection2, simThreshold, idx);
        for (int j : matches.keys()) {
            matchingPairs.put(i + "_" + j, matches.get(j));
        }
    }

    return matchingPairs;
}
 
Example #5
Source File: BlockFiltering.java    From JedAIToolkit with Apache License 2.0 6 votes vote down vote up
protected List<AbstractBlock> restructureUnilateraBlocks(List<AbstractBlock> blocks) {
    final List<AbstractBlock> newBlocks = new ArrayList<>();
    blocks.stream().map((block) -> (UnilateralBlock) block).forEachOrdered((oldBlock) -> {
        final TIntList retainedEntities = new TIntArrayList();
        for (int entityId : oldBlock.getEntities()) {
            if (counterD1[entityId] < limitsD1[entityId]) {
                retainedEntities.add(entityId);
            }
        }
        if (1 < retainedEntities.size()) {
            for (TIntIterator iterator = retainedEntities.iterator(); iterator.hasNext();) {
                counterD1[iterator.next()]++;
            }
            newBlocks.add(new UnilateralBlock(oldBlock.getEntropy(), retainedEntities.toArray()));
        }
    });

    return newBlocks;
}
 
Example #6
Source File: AbstractAttributeClustering.java    From JedAIToolkit with Apache License 2.0 6 votes vote down vote up
protected void compareAttributes() {
    globalMaxSimilarities = new float[noOfAttributes];
    final TIntSet coOccurringAttrs = new TIntHashSet();
    int lastId = 0 < attributesDelimiter ? attributesDelimiter : noOfAttributes;
    for (int i = 0; i < lastId; i++) {
        coOccurringAttrs.clear();

        final Set<String> signatures = attributeModels[DATASET_1][i].getSignatures();
        for (String signature : signatures) {
            final TIntList attrIds = invertedIndex.get(signature);
            if (attrIds == null) {
                continue;
            }
            coOccurringAttrs.addAll(attrIds);
        }

        if (0 < attributesDelimiter) { // Clean-Clean ER
            executeCleanCleanErComparisons(i, coOccurringAttrs);
        } else { // Dirty ER
            executeDirtyErComparisons(i, coOccurringAttrs);
        }
    }
}
 
Example #7
Source File: DateEncoder.java    From htm.java with GNU Affero General Public License v3.0 6 votes vote down vote up
/**
 * Returns an array containing the sub-field bucket indices for
 * each sub-field of the inputData. To get the associated field names for each of
 * the buckets, call getScalarNames().
 * @param  	input 	The data from the source. This is typically a object with members.
 *
 * @return 	array of bucket indices
 */
public int[] getBucketIndices(DateTime input) {

    TDoubleList scalars = getScalars(input);

    TIntList l = new TIntArrayList();
    List<EncoderTuple> encoders = getEncoders(this);
    if(encoders != null && encoders.size() > 0) {
        int i = 0;
        for(EncoderTuple t : encoders) {
            l.addAll(t.getEncoder().getBucketIndices(scalars.get(i)));
            ++i;
        }
    }else{
        throw new IllegalStateException("Should be implemented in base classes that are not " +
                "containers for other encoders");
    }
    return l.toArray();
}
 
Example #8
Source File: GraphConfidenceEstimator.java    From ambiverse-nlu with Apache License 2.0 6 votes vote down vote up
/**
 * Will return numElements integers from the input elements. If numElements
 * is larger than elements.size(), everything will be returned.
 *
 * @param elements    Elements to choose from.
 * @param numElements Number of elements to choose.
 * @return numElement random integers from elements.
 */
private TIntSet getRandomElements(TIntSet elements, int numElements) {
  TIntList source = new TIntArrayList(elements.toArray());
  TIntSet randomElements = new TIntHashSet();
  for (int i = 0; i < numElements; ++i) {
    if (source.size() == 0) {
      break;
    }
    // TODO: this is not efficient, as deleting from the ArrayList
    // will copy ... make this more efficient when necessary.
    int elementPosition = random_.nextInt(source.size());
    int element = source.get(elementPosition);
    source.remove(element);
    randomElements.add(element);
  }
  return randomElements;
}
 
Example #9
Source File: SortedNeighborhoodBlocking.java    From JedAIToolkit with Apache License 2.0 6 votes vote down vote up
protected int[] getMixedSortedEntities(String[] sortedTerms) {
    int datasetLimit = entityProfilesD1.size();
    final TIntList sortedEntityIds = new TIntArrayList();

    for (String blockingKey : sortedTerms) {
        final TIntList sortedIds = new TIntArrayList();
        final TIntList d1EntityIds = invertedIndexD1.get(blockingKey);
        if (d1EntityIds != null) {
            sortedIds.addAll(d1EntityIds);
        }

        final TIntList d2EntityIds = invertedIndexD2.get(blockingKey);
        if (d2EntityIds != null) {
            for (TIntIterator iterator = d2EntityIds.iterator(); iterator.hasNext();) {
                sortedIds.add(datasetLimit + iterator.next());
            }
        }

        sortedIds.shuffle(random);
        sortedEntityIds.addAll(sortedIds);
    }

    return sortedEntityIds.toArray();
}
 
Example #10
Source File: ExtendedSortedNeighborhoodBlocking.java    From JedAIToolkit with Apache License 2.0 5 votes vote down vote up
@Override
protected void parseIndices() {
    final Set<String> blockingKeysSet = new HashSet<>();
    blockingKeysSet.addAll(invertedIndexD1.keySet());
    blockingKeysSet.addAll(invertedIndexD2.keySet());
    final String[] sortedTerms = blockingKeysSet.toArray(new String[0]);
    Arrays.sort(sortedTerms);

    //slide window over the sorted list of blocking keys
    int upperLimit = sortedTerms.length - windowSize;
    for (int i = 0; i <= upperLimit; i++) {
        final TIntSet entityIds1 = new TIntHashSet();
        final TIntSet entityIds2 = new TIntHashSet();
        for (int j = 0; j < windowSize; j++) {
            final TIntList d1Entities = invertedIndexD1.get(sortedTerms[i + j]);
            if (d1Entities != null) {
                entityIds1.addAll(d1Entities);
            }

            final TIntList d2Entities = invertedIndexD2.get(sortedTerms[i + j]);
            if (d2Entities != null) {
                entityIds2.addAll(d2Entities);
            }
        }

        if (!entityIds1.isEmpty() && !entityIds2.isEmpty()) {
            blocks.add(new BilateralBlock(entityIds1.toArray(), entityIds2.toArray()));
        }
    }
}
 
Example #11
Source File: PatternMachine.java    From htm.java-examples with GNU Affero General Public License v3.0 5 votes vote down vote up
/**
 * @param n     Number of available bits in pattern
 * @param w     Number of on bits in pattern, each pattern 
 *              will have a `w` randomly selected from the list.
 * @param num   Number of available patterns
 * @param seed  Random seed
 * 
 * Constructs a new {@code PatternMachine}
 */
public PatternMachine(int n, TIntList w, int num, int seed) {
    this.n = n;
    this.wList = w;
    random = new MersenneTwister(new int[] { seed });
    patterns = new LinkedHashMap<Integer, LinkedHashSet<Integer>>();
    this.numPatterns = num;
    
    generate();
}
 
Example #12
Source File: PatternMachine.java    From htm.java with GNU Affero General Public License v3.0 5 votes vote down vote up
/**
 * @param n     Number of available bits in pattern
 * @param w     Number of on bits in pattern, each pattern 
 *              will have a `w` randomly selected from the list.
 * @param num   Number of available patterns
 * @param seed  Random seed
 * 
 * Constructs a new {@code PatternMachine}
 */
public PatternMachine(int n, TIntList w, int num, int seed) {
    this.n = n;
    this.wList = w;
    random = new MersenneTwister(new int[] { seed });
    patterns = new LinkedHashMap<Integer, LinkedHashSet<Integer>>();
    this.numPatterns = num;
    
    generate();
}
 
Example #13
Source File: SortedNeighborhoodBlocking.java    From JedAIToolkit with Apache License 2.0 5 votes vote down vote up
protected int[] getSortedEntities(String[] sortedTerms) {
    final TIntList sortedEntityIds = new TIntArrayList();

    for (String blockingKey : sortedTerms) {
        final TIntList sortedIds = invertedIndexD1.get(blockingKey);
        sortedIds.shuffle(random);
        sortedEntityIds.addAll(sortedIds);
    }

    return sortedEntityIds.toArray();
}
 
Example #14
Source File: Encoder.java    From htm.java with GNU Affero General Public License v3.0 5 votes vote down vote up
/**
 * Returns an array containing the sub-field bucket indices for
    * each sub-field of the inputData. To get the associated field names for each of
    * the buckets, call getScalarNames().
 * @param  	input 	The data from the source. This is typically a object with members.
 *
 * @return 	array of bucket indices
 */
public int[] getBucketIndices(String input) {
	TIntList l = new TIntArrayList();
	Map<EncoderTuple, List<EncoderTuple>> encoders = getEncoders();
	if(encoders != null && encoders.size() > 0) {
		for(EncoderTuple t : encoders.keySet()) {
			l.addAll(t.getEncoder().getBucketIndices(input));
		}
	}else{
		throw new IllegalStateException("Should be implemented in base classes that are not " +
			"containers for other encoders");
	}
	return l.toArray();
}
 
Example #15
Source File: GroundTruthIndex.java    From JedAIToolkit with Apache License 2.0 5 votes vote down vote up
public TIntList getTotalCommonIndices(Comparison comparison) {
    final TIntList indices = new TIntArrayList();

    final int[] blocks1 = entityBlocks[comparison.getEntityId1()];
    final int[] blocks2 = entityBlocks[comparison.getEntityId2() + datasetLimit];
    if (blocks1 == null || blocks2 == null) {
        return indices;
    }
    
    for (int item : blocks1) {
        for (int value : blocks2) {
            if (value < item) {
                continue;
            }

            if (item < value) {
                break;
            }

            if (item == value) {
                indices.add(item);
            }
        }
    }

    return indices;
}
 
Example #16
Source File: ByteBufUtils.java    From GregTech with GNU Lesser General Public License v3.0 5 votes vote down vote up
public static TIntList readIntList(PacketBuffer buf) {
    TIntArrayList intArrayList = new TIntArrayList();
    int amount = buf.readVarInt();
    for (int i = 0; i < amount; i++) {
        intArrayList.add(buf.readVarInt());
    }
    return intArrayList;
}
 
Example #17
Source File: GroundTruthIndex.java    From JedAIToolkit with Apache License 2.0 5 votes vote down vote up
public TIntList getCommonBlockIndices(int blockIndex, Comparison comparison) {
    final int[] blocks1 = entityBlocks[comparison.getEntityId1()];
    final int[] blocks2 = entityBlocks[comparison.getEntityId2() + datasetLimit];

    boolean firstCommonIndex = false;
    int noOfBlocks1 = blocks1.length;
    int noOfBlocks2 = blocks2.length;
    final TIntList indices = new TIntArrayList();
    for (int item : blocks1) {
        for (int value : blocks2) {
            if (value < item) {
                continue;
            }

            if (item < value) {
                break;
            }

            if (item == value) {
                if (!firstCommonIndex) {
                    firstCommonIndex = true;
                    if (item != blockIndex) {
                        return null;
                    }
                }
                indices.add(item);
            }
        }
    }

    return indices;
}
 
Example #18
Source File: Encoder.java    From htm.java with GNU Affero General Public License v3.0 5 votes vote down vote up
/**
 * Returns an array containing the sub-field bucket indices for
    * each sub-field of the inputData. To get the associated field names for each of
    * the buckets, call getScalarNames().
 * @param  	input 	The data from the source. This is typically a object with members.
 *
 * @return 	array of bucket indices
 */
public int[] getBucketIndices(double input) {
	TIntList l = new TIntArrayList();
	Map<EncoderTuple, List<EncoderTuple>> encoders = getEncoders();
	if(encoders != null && encoders.size() > 0) {
		for(EncoderTuple t : encoders.keySet()) {
			l.addAll(t.getEncoder().getBucketIndices(input));
		}
	}else{
		throw new IllegalStateException("Should be implemented in base classes that are not " +
			"containers for other encoders");
	}
	return l.toArray();
}
 
Example #19
Source File: Topology.java    From htm.java with GNU Affero General Public License v3.0 5 votes vote down vote up
/**
 * Get the points in the neighborhood of a point.
 *
 * A point's neighborhood is the n-dimensional hypercube with sides ranging
 * [center - radius, center + radius], inclusive. For example, if there are two
 * dimensions and the radius is 3, the neighborhood is 6x6. Neighborhoods are
 * truncated when they are near an edge.
 * 
 * @param centerIndex       The index of the point. The coordinates are expressed as a single index by
 *                          using the dimensions as a mixed radix definition. For example, in dimensions
 *                          42x10, the point [1, 4] is index 1*420 + 4*10 = 460.
 * @param radius            The radius of this neighborhood about the centerIndex.
 * @return  The points in the neighborhood, including centerIndex.
 */
public int[] neighborhood(int centerIndex, int radius) {
    centerPosition = coordinatesFromIndex(centerIndex);
    
    igs = IntStream.range(0, dimensions.length)
        .mapToObj(i -> 
            IntGenerator.of(Math.max(0, centerPosition[i] - radius), 
                Math.min(dimensions[i] - 1, centerPosition[i] + radius) + 1))
        .toArray(IntGenerator[]::new);
   
    List<TIntList> result = new ArrayList<>();
    result.add(new TIntArrayList());
    List<TIntList> interim = new ArrayList<>();
    for(IntGenerator pool : igs) {
        int size = result.size();
        interim.clear();
        interim.addAll(result);
        result.clear();
        for(int x = 0;x < size;x++) {
            TIntList lx = interim.get(x);
            pool.reset();
            for(int y = 0;y < pool.size();y++) {
                int py = pool.next();
                TIntArrayList tl = new TIntArrayList();
                tl.addAll(lx);
                tl.add(py);
                result.add(tl);
            }
        }
    }
    
    return result.stream().mapToInt(tl -> indexFromCoordinates(tl.toArray())).toArray();
}
 
Example #20
Source File: Topology.java    From htm.java with GNU Affero General Public License v3.0 5 votes vote down vote up
/**
 * Like {@link #neighborhood(int, int)}, except that the neighborhood isn't truncated when it's
 * near an edge. It wraps around to the other side.
 * 
 * @param centerIndex       The index of the point. The coordinates are expressed as a single index by
 *                          using the dimensions as a mixed radix definition. For example, in dimensions
 *                          42x10, the point [1, 4] is index 1*420 + 4*10 = 460.
 * @param radius            The radius of this neighborhood about the centerIndex.
 * @return  The points in the neighborhood, including centerIndex.
 */
public int[] wrappingNeighborhood(int centerIndex, int radius) {
    int[] cp = coordinatesFromIndex(centerIndex);
    
    IntGenerator[] igs = IntStream.range(0, dimensions.length)
        .mapToObj(i -> 
            new IntGenerator(cp[i] - radius, 
                Math.min((cp[i] - radius) + dimensions[i] - 1, cp[i] + radius) + 1))
        .toArray(IntGenerator[]::new);
    
    List<TIntList> result = new ArrayList<>();
    result.add(new TIntArrayList());
    List<TIntList> interim = new ArrayList<>();
    for(int i = 0;i < igs.length;i++) {
        IntGenerator pool = igs[i];
        int size = result.size();
        interim.clear();
        interim.addAll(result);
        result.clear();
        for(int x = 0;x < size;x++) {
            TIntList lx = interim.get(x);
            pool.reset();
            for(int y = 0;y < pool.size();y++) {
                int py = ArrayUtils.modulo(pool.next(), dimensions[i]);
                TIntArrayList tl = new TIntArrayList();
                tl.addAll(lx);
                tl.add(py);
                result.add(tl);
            }
        }
    }
    
    return result.stream().mapToInt(tl -> indexFromCoordinates(tl.toArray())).toArray();
}
 
Example #21
Source File: AbstractSparseBinaryMatrix.java    From htm.java with GNU Affero General Public License v3.0 5 votes vote down vote up
/**
 * Returns a sorted array of occupied indexes.
 * @return  a sorted array of occupied indexes.
 */
@Override
public int[] getSparseIndices() {
    TIntList indexes = new TIntArrayList();
    for (int i = 0; i <= getMaxIndex(); i ++) {
        if (get(i) > 0) {
            indexes.add(i);
        }
    }
    
    return indexes.toArray();
}
 
Example #22
Source File: AbstractSparseBinaryMatrix.java    From htm.java with GNU Affero General Public License v3.0 5 votes vote down vote up
/**
 * Returns true if any of the on bit indexes of the specified collection are
 * matched by the on bits of this matrix. It is allowed that 
 * this matrix have more on bits than the specified matrix.
 * 
 * @param matrix
 * @return
 */
public boolean any(TIntList onBits) {
    TIntSet keySet = getSparseSet();
    
    for(TIntIterator i = onBits.iterator();i.hasNext();) {
        if(keySet.contains(i.next())) return true;
    }
    return false;
}
 
Example #23
Source File: AbstractSparseMatrix.java    From htm.java with GNU Affero General Public License v3.0 5 votes vote down vote up
/**
 * Recursively loops through the matrix dimensions to fill the results
 * array with flattened computed array indexes.
 * 
 * @param bounds
 * @param currentDimension
 * @param p
 * @param results
 */
private void visit(int[] bounds, int currentDimension, int[] p, TIntList results) {
    for (int i = 0; i < bounds[currentDimension]; i++) {
        p[currentDimension] = i;
        if (currentDimension == p.length - 1) {
            results.add(computeIndex(p));
        }
        else visit(bounds, currentDimension + 1, p, results);
    }
}
 
Example #24
Source File: SDRClassifier.java    From htm.java with GNU Affero General Public License v3.0 5 votes vote down vote up
/**
    * Constructor for the SDRClassifier
    * 
    * @param steps Sequence of the different steps of multi-step predictions to learn.
    * @param alpha The alpha used to adapt the weight matrix during learning. A larger alpha
    * 		  results in faster adaptation to the data.
    * @param actValueAlpha Used to track the actual value withing each bucket. A lower 
    * 		  actValueAlpha results in longer term memory.
    * @param verbosity Verbosity level, can be 0, 1, or 2.
    */
public SDRClassifier(TIntList steps, double alpha, double actValueAlpha, int verbosity) {
       this.steps = steps;
       this.alpha = alpha;
       this.actValueAlpha = actValueAlpha;
       this.verbosity = verbosity;
       actualValues.add(null);
       patternNZHistory = new Deque<Tuple>(ArrayUtils.max(steps.toArray()) + 1);
	for(int step : steps.toArray())
		weightMatrix.put(step, new FlexCompRowMatrix(maxBucketIdx + 1, maxInputIdx + 1));
}
 
Example #25
Source File: SpatialPooler.java    From htm.java with GNU Affero General Public License v3.0 5 votes vote down vote up
/**
 * Performs inhibition. This method calculates the necessary values needed to
 * actually perform inhibition and then delegates the task of picking the
 * active columns to helper functions.
 * 
 * @param c         the {@link Connections} matrix
 * @param overlaps  an array containing the overlap score for each  column.
 *                  The overlap score for a column is defined as the number
 *                  of synapses in a "connected state" (connected synapses)
 *                  that are connected to input bits which are turned on.
 * @param density   The fraction of columns to survive inhibition. This
 *                  value is only an intended target. Since the surviving
 *                  columns are picked in a local fashion, the exact fraction
 *                  of surviving columns is likely to vary.
 * @return  indices of the winning columns
 */
public int[] inhibitColumnsLocal(Connections c, double[] overlaps, double density) {
    double addToWinners = ArrayUtils.max(overlaps) / 1000.0d;
    if(addToWinners == 0) {
        addToWinners = 0.001;
    }
    double[] tieBrokenOverlaps = Arrays.copyOf(overlaps, overlaps.length);
    
    TIntList winners = new TIntArrayList();
    double stimulusThreshold = c.getStimulusThreshold();
    int inhibitionRadius = c.getInhibitionRadius();
    for(int i = 0;i < overlaps.length;i++) {
        int column = i;
        if(overlaps[column] >= stimulusThreshold) {
           int[] neighborhood = getColumnNeighborhood(c, column, inhibitionRadius);
           double[] neighborhoodOverlaps = ArrayUtils.sub(tieBrokenOverlaps, neighborhood);
           
           long numBigger = Arrays.stream(neighborhoodOverlaps)
               .parallel()
               .filter(d -> d > overlaps[column])
               .count();
           
           int numActive = (int)(0.5 + density * neighborhood.length);
           if(numBigger < numActive) {
               winners.add(column);
               tieBrokenOverlaps[column] += addToWinners;
           }
        }
    }

    return winners.toArray();
}
 
Example #26
Source File: FastSS.java    From JedAIToolkit with Apache License 2.0 5 votes vote down vote up
private int checkEditDistance(TIntList p1, TIntList p2) {
    int i = 0, j = 0, updates = 0;
    while (i < p1.size() && j < p2.size()) {
        if (p1.get(i) == p2.get(j)) {
            updates++;
            j++;
            i++;
        } else if (p1.get(i) < p2.get(j)) {
            i++;
        } else {
            j++;
        }
    }
    return p1.size() + p2.size() - updates;
}
 
Example #27
Source File: PartEnumJoin.java    From JedAIToolkit with Apache License 2.0 5 votes vote down vote up
private float verify(TIntList a, TIntList b) {
    float factor = categoryTHRESHOLD / (1 + categoryTHRESHOLD);
    int require_overlap = (int) Math.ceil(factor * (a.size() + b.size()) - 1e-6);
    int real_overlap = check_overlap(a, b, require_overlap);

    if (real_overlap == -1) {
        return -1;
    }
    return (real_overlap / (float) (a.size() + b.size() - real_overlap));
}
 
Example #28
Source File: FastSS.java    From JedAIToolkit with Apache License 2.0 5 votes vote down vote up
private List<Comparison> insertIndex(String attributeValue) {
    final TIntList delPos1 = new TIntArrayList(delPos);
    List<IntListPair> list = stringHashIndex.computeIfAbsent(attributeValue, k -> new ArrayList<>());

    final List<Comparison> executedComparisons = new ArrayList<>();
    for (IntListPair p : list) {
        if (id == p.getKey()) {
            continue;
        }

        if (isCleanCleanER) {
            if (id < datasetDelimiter && p.getKey() < datasetDelimiter) { // both belong to dataset 1
                continue;
            }

            if (datasetDelimiter <= id && datasetDelimiter <= p.getKey()) { // both belong to dataset 2
                continue;
            }
        }

        if (checkedFlag.contains(p.getKey())) {
            continue;
        }

        int ed = checkEditDistance(p.getValue(), delPos1);
        if (ed <= threshold) {
            checkedFlag.add(p.getKey());
            final Comparison currentComp = getComparison(id, p.getKey());
            currentComp.setUtilityMeasure(1 - (float) ed / threshold);
            executedComparisons.add(currentComp);
        }
    }

    final IntListPair kv = new IntListPair(id, delPos1);
    list.add(kv);

    return executedComparisons;
}
 
Example #29
Source File: UnitBuilder.java    From ambiverse-nlu with Apache License 2.0 5 votes vote down vote up
public static String buildUnit(TIntList unitTokens, TIntObjectHashMap<String> id2word) {
  String[] tokenStrings = new String[unitTokens.size()];
  for (int i = 0; i < unitTokens.size(); i++) {
    tokenStrings[i] = id2word.get(unitTokens.get(i));
  }
  return buildUnit(tokenStrings);
}
 
Example #30
Source File: XMLUtils.java    From ProjectAres with GNU Affero General Public License v3.0 5 votes vote down vote up
private static TIntList indexPath(Element child, int size) {
    final Element parent = child.getParentElement();
    if(parent == null) {
        return new TIntArrayList(size);
    } else {
        final TIntList path = indexPath(parent, size + 1);
        final int index = ((BoundedElement) child).indexInParent();
        if(index < 0) {
            throw new IllegalStateException("Parent element " + parent + " does not contain its child element " + child);
        }
        path.add(index);
        return path;
    }
}