Example 1
Source File:    From ambiverse-nlu with Apache License 2.0 6 votes vote down vote up
public TIntDoubleHashMap getEntityPriors(String mention, boolean isNamedentity) throws EntityLinkingDataAccessException {
  if (mention.equals("PAGE")) {
    TIntDoubleHashMap pagePriors = new TIntDoubleHashMap();
    pagePriors.put(DataAccess.getInternalIdForKBEntity(getTestKBEntity("Jimmy_Page")), 0.3);
    pagePriors.put(DataAccess.getInternalIdForKBEntity(getTestKBEntity("Larry_Page")), 0.7);
    return pagePriors;
  } else if (mention.equals("KASHMIR")) {
    TIntDoubleHashMap kashmirPriors = new TIntDoubleHashMap();
    kashmirPriors.put(DataAccess.getInternalIdForKBEntity(getTestKBEntity("Kashmir")), 0.9);
    kashmirPriors.put(DataAccess.getInternalIdForKBEntity(getTestKBEntity("Kashmir_(song)")), 0.1);
    return kashmirPriors;
  } else if (mention.equals("KNEBWORTH")) {
    TIntDoubleHashMap knebworthPriors = new TIntDoubleHashMap();
    knebworthPriors.put(DataAccess.getInternalIdForKBEntity(getTestKBEntity("Knebworth_Festival")), 1.0);
    return knebworthPriors;
  } else if (mention.equals("LES PAUL")) {
    return new TIntDoubleHashMap();
  } else {
    return new TIntDoubleHashMap();
Example 2
Source File:    From ambiverse-nlu with Apache License 2.0 6 votes vote down vote up
 * Normalizes values so that they sum up to 1.
 * @param values
 * @return Normalized values.
public static TIntDoubleHashMap normalizeValuesToSum(TIntDoubleHashMap values) {
  TIntDoubleHashMap normalizedScores = new TIntDoubleHashMap();
  double total = 0;
  for (TIntDoubleIterator itr = values.iterator(); itr.hasNext(); ) {
    total += itr.value();
  if (total == 0) {
    return values;
  for (TIntDoubleIterator itr = values.iterator(); itr.hasNext(); ) {
    Double normalizedScore = itr.value() / total;
    normalizedScores.put(itr.key(), normalizedScore);
  return normalizedScores;
Example 3
Source File:    From ambiverse-nlu with Apache License 2.0 6 votes vote down vote up
@Test public void testConditionalElement() {
  TIntDoubleHashMap elements = new TIntDoubleHashMap();
  elements.put(0, 0.1);
  elements.put(1, 0.3);
  elements.put(2, 0.05);
  elements.put(3, 0.15);
  elements.put(4, 0.4);

  Random rand = new Random(1337);
  TIntDoubleHashMap counts = new TIntDoubleHashMap();
  for (int i = 0; i < 1000000; ++i) {
    int chosen = CollectionUtils.getConditionalElement(elements, rand);
    counts.adjustOrPutValue(chosen, 1.0, 1.0);
  TIntDoubleHashMap actualProbs = CollectionUtils.normalizeValuesToSum(counts);
  assertEquals(0.1, actualProbs.get(0), 0.001);
  assertEquals(0.3, actualProbs.get(1), 0.001);
  assertEquals(0.05, actualProbs.get(2), 0.001);
  assertEquals(0.15, actualProbs.get(3), 0.001);
  assertEquals(0.4, actualProbs.get(4), 0.001);
Example 4
Source File:    From ambiverse-nlu with Apache License 2.0 5 votes vote down vote up
 * Computes all entity occurrence probabilities based on their incoming links.
 * @return Map of Entity->Probability.
 * @throws EntityLinkingDataAccessException
public static TIntDoubleHashMap getAllEntityProbabilities() throws EntityLinkingDataAccessException {
  TIntObjectHashMap<int[]> entityInlinks = getAllInlinks();

  TIntDoubleHashMap entityProbabilities = new TIntDoubleHashMap(entityInlinks.size(), 0.5f);

  // Get the total number of links.
  long totalLinkCount = 0;

  TIntObjectIterator<int[]> itr = entityInlinks.iterator();

  while (itr.hasNext()) {
    totalLinkCount += itr.value().length;

  // Derive probabilities from counts.
  itr = entityInlinks.iterator();

  while (itr.hasNext()) {
    double probability = (double) itr.value().length / (double) totalLinkCount;
    entityProbabilities.put(itr.key(), probability);

  return entityProbabilities;
Example 5
Source File:    From ambiverse-nlu with Apache License 2.0 5 votes vote down vote up
@Override public TIntDoubleHashMap getEntityPriors(String mention, boolean isNamedentity) throws EntityLinkingDataAccessException {
  TIntDoubleHashMap entityPriors = new TIntDoubleHashMap();
  try {
    DatabaseKeyValueStore db = DatabaseKeyValueStore.DICTIONARY_MENTION;
    if(db.getPartitions() != 1) {
      throw new IllegalArgumentException("Multiple partitions not supported for this key-value store");
    Codec codec = DataAccessKeyValueStoreHandler.singleton().getCodec(db);
    mention = EntityLinkingManager.conflateToken(mention, isNamedentity);
    KeyValueStore<byte[],byte[]> keyValueStore = DataAccessKeyValueStoreHandler.singleton().getKeyValueStore(db);
    byte[] resultBytes = keyValueStore.get(codec.encodeKey(mention));
    if (resultBytes != null) {
      TIntDoubleHashMap tempResults = (TIntDoubleHashMap) codec.decodeValue(resultBytes);
      TIntObjectHashMap<EntityType> entityClasses = DataAccessCache.singleton().getEntityClasses(tempResults.keys());

      for(int entityId: entityClasses.keys()) {
        if (isNamedentity) {
          if (!(entityClasses.get(entityId) == EntityType.CONCEPT)) {
            entityPriors.put(entityId, tempResults.get(entityId));
        else {
          if (!(entityClasses.get(entityId) == EntityType.NAMED_ENTITY)) {
            entityPriors.put(entityId, tempResults.get(entityId));
  } catch (Exception e) {
    throw new EntityLinkingDataAccessException(e);
  return entityPriors;
Example 6
Source File:    From ambiverse-nlu with Apache License 2.0 5 votes vote down vote up
public TIntDoubleHashMap getEntitiesImportances(int[] entitiesIds) throws EntityLinkingDataAccessException {
  if (entitiesIds == null || entitiesIds.length == 0) {
    return new TIntDoubleHashMap();

  TIntDoubleHashMap entitiesImportances = new TIntDoubleHashMap(getCapacity(entitiesIds.length), troveLoadFactor);
  try {
    DatabaseKeyValueStore db = DatabaseKeyValueStore.ENTITY_RANK_ENTITY;
    if(db.getPartitions() != 1) {
      throw new IllegalArgumentException("Multiple partitions not supported for this key-value store");
    Codec entityRankCodec = DataAccessKeyValueStoreHandler.singleton().getCodec(db);
    KeyValueStore<byte[], byte[]> keyValueStore = DataAccessKeyValueStoreHandler.singleton().getKeyValueStore(db);

    List<byte[]> encodedKeys = new ArrayList<>();
    for (int id : entitiesIds) {
    Map<byte[], byte[]> keyValueMap = keyValueStore.getAll(encodedKeys);

    for (Map.Entry<byte[], byte[]> entry : keyValueMap.entrySet()) {
      if (entry.getKey() == null || entry.getValue() == null) continue;
      //More than one value, not sure why?
      int entity = (int) entityRankCodec.decodeKey(entry.getKey());
      double[] rank = (double[]) entityRankCodec.decodeValue(entry.getValue());
      entitiesImportances.put(entity, rank[0]);
  } catch (Exception e) {
    throw new EntityLinkingDataAccessException(e);
  return entitiesImportances;
Example 7
Source File:    From ambiverse-nlu with Apache License 2.0 5 votes vote down vote up
@Override protected void setupEntities(Entities e) throws IOException, EntityLinkingDataAccessException {
  inlinkImportance = new TIntDoubleHashMap();
  TIntObjectHashMap<int[]> neighbors = DataAccess.getInlinkNeighbors(e);
  double collectionSize = (double) DataAccess.getCollectionSize();
  for (int eId : e.getUniqueIds()) {
    double importance = (double) neighbors.get(eId).length / (double) collectionSize;
    inlinkImportance.put(eId, importance);
Example 8
Source File:    From ambiverse-nlu with Apache License 2.0 5 votes vote down vote up
private TIntDoubleHashMap calcPriorDistribution(Mention mention, MaterializedPriorProbability pp) {
  TIntDoubleHashMap priors = new TIntDoubleHashMap();

  for (Entity entity : mention.getCandidateEntities()) {
    priors.put(entity.getId(), pp.getPriorProbability(mention, entity));

  return priors;
Example 9
Source File:    From ambiverse-nlu with Apache License 2.0 5 votes vote down vote up
private TIntDoubleHashMap calcSimDistribution(Mention mention, EnsembleMentionEntitySimilarity combSimMeasure) throws Exception {
  TIntDoubleHashMap sims = new TIntDoubleHashMap();
  for (Entity e : mention.getCandidateEntities()) {
    sims.put(e.getId(), combSimMeasure.calcSimilarity(mention, context, e));
  return CollectionUtils.normalizeValuesToSum(sims);
Example 10
Source File:    From ambiverse-nlu with Apache License 2.0 4 votes vote down vote up
@Override public Keyphrases getEntityKeyphrases(Entities entities, Map<String, Double> keyphraseSourceWeights, double minKeyphraseWeight,
    int maxEntityKeyphraseCount) throws EntityLinkingDataAccessException {
  Keyphrases keyphrases = new Keyphrases();
  TIntObjectHashMap<int[]> eKps = new TIntObjectHashMap<int[]>();
  TIntObjectHashMap<int[]> kpTokens = new TIntObjectHashMap<int[]>();
  getEntityKeyphraseTokens(entities, eKps, kpTokens);

  TIntObjectHashMap<TIntDoubleHashMap> e2kw2mi = new TIntObjectHashMap<TIntDoubleHashMap>();
  TIntObjectHashMap<TIntDoubleHashMap> e2kp2mi = new TIntObjectHashMap<TIntDoubleHashMap>();

  for (Entity entity : entities) {
    int eId = entity.getId();
    Entities singleEntity = new Entities();
    int entityCount = getEntitySuperdocSize(singleEntity).get(eId);
    TIntDoubleHashMap kp2mi = new TIntDoubleHashMap();
    e2kp2mi.put(entity.getId(), kp2mi);
    TIntDoubleHashMap kw2mi = new TIntDoubleHashMap();
    e2kw2mi.put(entity.getId(), kw2mi);

    if (!eKps.containsKey(eId)) {
    for (int kp : eKps.get(eId)) {
      TIntHashSet singleKp = new TIntHashSet();
      int kpCount = getKeyphraseDocumentFrequencies(singleKp).get(kp);
      int eKpIcCount = getEntityKeyphraseIntersectionCount(singleEntity).get(eId).get(kp);
      kp2mi.put(kp, WeightComputation.computeNPMI(entityCount, kpCount, eKpIcCount, TOTAL_ENTITY_COUNT));

      for (int kw : kpTokens.get(kp)) {
        TIntHashSet singleKw = new TIntHashSet();
        int kwCount = getKeywordDocumentFrequencies(singleKw).get(kw);
        int eKwIcCount = getEntityKeywordIntersectionCount(singleEntity).get(eId).get(kw);
        kw2mi.put(kw, WeightComputation.computeMI(entityCount, kwCount, eKwIcCount, TOTAL_ENTITY_COUNT, false));
  return keyphrases;
Example 11
Source File:    From ambiverse-nlu with Apache License 2.0 4 votes vote down vote up
@Test public void test() throws IOException {
  EncoderDecoderKryo<Integer> encoderInteger = new EncoderDecoderKryo(Integer.class);
  int a = 845739038;
  byte[] bytes = encoderInteger.encode(a);
  int resultInt = encoderInteger.decode(bytes);
  assertEquals(a, resultInt);

  EncoderDecoderKryo<Double> encoderDouble = new EncoderDecoderKryo(Double.class);
  double b = 845739038;
  bytes = encoderDouble.encode(b);
  double resultDouble = encoderDouble.decode(bytes);
  assertEquals(b, resultDouble, 0.00001);

  EncoderDecoderKryo<String> encoderString = new EncoderDecoderKryo(String.class);
  String str = "hola";
  bytes = encoderString.encode(str);
  String resultString = encoderString.decode(bytes);
  assertEquals(str, resultString);

  EncoderDecoderKryo<String[]> encoderStringArray = new EncoderDecoderKryo(String[].class);
  String[] strArray = new String[] { "hola", "y", "chau" };
  bytes = encoderStringArray.encode(strArray);
  String[] resultStringArray = encoderStringArray.decode(bytes);
  for (int i = 0; i < strArray.length; i++) {
    assertEquals(strArray[i], resultStringArray[i]);

  EncoderDecoderKryo<KeyValueStoreRow[]> keyvalueStoreRowArray = new EncoderDecoderKryo(KeyValueStoreRow[].class);
  Object[] elements = new Object[] { a, b, str };
  KeyValueStoreRow kvs1 = new KeyValueStoreRow(elements);
  Object[] elements2 = new Object[] { str, b, a };
  KeyValueStoreRow kvs2 = new KeyValueStoreRow(elements2);
  KeyValueStoreRow[] kvsr = new KeyValueStoreRow[] { kvs1, kvs2 };
  bytes = keyvalueStoreRowArray.encode(kvsr);
  KeyValueStoreRow[] kvsrResultl = keyvalueStoreRowArray.decode(bytes);

  assertEquals(kvsr[0].getInt(0), kvsrResultl[0].getInt(0));
  assertEquals(kvsr[0].getDouble(1), kvsrResultl[0].getDouble(1), 0.00001);
  assertEquals(kvsr[0].getString(2), kvsr[0].getString(2));

  assertEquals(kvsr[1].getInt(2), kvsrResultl[1].getInt(2));
  assertEquals(kvsr[1].getDouble(1), kvsrResultl[1].getDouble(1), 0.00001);
  assertEquals(kvsr[1].getString(0), kvsr[1].getString(0));

  EncoderDecoderKryo<TIntDoubleHashMap> tintDoubleHashMap = new EncoderDecoderKryo(TIntDoubleHashMap.class);
  TIntDoubleHashMap object = new TIntDoubleHashMap(2, Constants.DEFAULT_LOAD_FACTOR);
  object.put(7, 8.19);
  object.put(15, 7.9);
  bytes = tintDoubleHashMap.encode(object);
  TIntDoubleHashMap result = tintDoubleHashMap.decode(bytes);
  TIntDoubleIterator it = result.iterator();
  int index = 0;
  while (it.hasNext()) {
    if (index == 0) {
      assertEquals(7, it.key());
      assertEquals(it.value(), 8.19, 0.00001);
    } else {
      assertEquals(15, it.key());
      assertEquals(it.value(), 7.9, 0.00001);
