Java Code Examples for gnu.trove.map.hash.TIntIntHashMap#adjustOrPutValue()

The following examples show how to use gnu.trove.map.hash.TIntIntHashMap#adjustOrPutValue() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: DataAccessForTesting.java    From ambiverse-nlu with Apache License 2.0 6 votes vote down vote up
@Override public TIntObjectHashMap<TIntIntHashMap> getEntityKeywordIntersectionCount(Entities entities) throws EntityLinkingDataAccessException {
  TIntObjectHashMap<TIntIntHashMap> isec = new TIntObjectHashMap<TIntIntHashMap>();

  for (String[] eKps : allEntityKeyphrases) {
    int entity = DataAccess.getInternalIdForKBEntity(getTestKBEntity(eKps[0]));
    TIntIntHashMap counts = new TIntIntHashMap();
    isec.put(entity, counts);

    if (eKps.length > 1) {
      String[] tokens = null;
      for (int i = 1; i < eKps.length; ++i) {
        if (i % 2 == 1) {
          tokens = eKps[i].split(" ");
        } else {
          int count = Integer.parseInt(eKps[i]);
          for (String token : tokens) {
            counts.adjustOrPutValue(DataAccess.getIdForWord(token), count, count);
          }
        }
      }
    }
  }
  return isec;
}
 
Example 2
Source File: UnitsStatCollector.java    From ambiverse-nlu with Apache License 2.0 5 votes vote down vote up
public void startCollectingStatsInMemory(UnitType unitType) throws SQLException, EntityLinkingDataAccessException {
  TIntObjectHashMap<TIntHashSet> entityUnits = UnitUtil.loadEntityUnits(unitType.getUnitSize());

  // Count the occurrences of units with respect to entities
  TIntIntHashMap unitCounts = new TIntIntHashMap();
  for (int entity : entityUnits.keys()) {
    TIntHashSet units = entityUnits.get(entity);
    for (int unit : units.toArray()) {
      unitCounts.adjustOrPutValue(unit, 1, 1);
    }
  }

  logger.info("Storing data ...");
  storeUnitsIntoDB(unitCounts, unitType);
}
 
Example 3
Source File: DataAccessForTesting.java    From ambiverse-nlu with Apache License 2.0 5 votes vote down vote up
private TIntObjectHashMap<TIntIntHashMap> getEntityUnitIntersectionCountInternal(Entities entities, UnitType unitType)
    throws EntityLinkingDataAccessException {
  TIntObjectHashMap<TIntIntHashMap> isec = new TIntObjectHashMap<>();

  for (String[] eKps : allEntityKeyphrases) {
    int entity = DataAccess.getInternalIdForKBEntity(getTestKBEntity(eKps[0]));
    TIntIntHashMap counts = new TIntIntHashMap();
    isec.put(entity, counts);

    if (eKps.length > 1) {
      String[] keyphrase = null;

      for (int i = 1; i < eKps.length; ++i) {
        if (i % 2 == 1) {
          keyphrase = eKps[i].split(" ");
        } else {
          int count = Integer.parseInt(eKps[i]);

          Set<String[]> ngrams = StringUtils.getNgrams(keyphrase, unitType.getUnitSize());
          for (String[] ngram : ngrams) {
            String ngramString = String.join(" ", ngram);
            counts.adjustOrPutValue(DataAccess.getIdForWord(ngramString), count, count);
          }
        }
      }
    }
  }
  return isec;
}
 
Example 4
Source File: InputTextWrapper.java    From ambiverse-nlu with Apache License 2.0 5 votes vote down vote up
public InputTextWrapper(Context context, UnitType unitType, boolean removeStopwords) throws EntityLinkingDataAccessException {
  logger.debug("Wrapping input text.");
  mentionToIgnore = null;
  this.unitType = unitType;
  int unitLength = unitType.getUnitSize();
  if (context.getTokenCount() < unitLength) return;
  List<String> unitStrings = new ArrayList<>(context.getTokenCount());
  Queue<String> curTokens = new ArrayDeque<>(unitLength);
  String[] curTokensArray = new String[unitLength];
  for (String token : context.getTokens()) {
    curTokens.add(token);
    if (curTokens.size() == unitLength || (!curTokens.isEmpty() && curTokens.size() - 1 == unitLength)) {
      unitStrings.add(UnitBuilder.buildUnit(curTokens.toArray(curTokensArray)));
      curTokens.remove();
    }
  }

  logger.debug("Get ids for words.");
  TObjectIntHashMap<String> wordIds = DataAccess.getIdsForWords(unitStrings);
  units = new int[unitStrings.size()];
  unitCounts = new TIntIntHashMap((int) (wordIds.size() / Constants.DEFAULT_LOAD_FACTOR), Constants.DEFAULT_LOAD_FACTOR);
  numOfUnits = 0;
  for (int i = 0; i < unitStrings.size(); i++) {
    int unitId = wordIds.get(unitStrings.get(i));
    if (unitId == 0) continue;

    logger.debug("Get contract term for unit id {}.", unitId);
    int contractedUnitId = DataAccess.contractTerm(unitId);
    if (contractedUnitId != 0) unitId = contractedUnitId;
    if (removeStopwords && StopWord.isStopwordOrSymbol(unitId, Language.getLanguageForString("en")))  continue;
    units[i] = unitId;
    unitCounts.adjustOrPutValue(unitId, 1, 1);
    numOfUnits++;
  }
}
 
Example 5
Source File: HigherOrderViterbiPAUpdate.java    From fnlp with GNU Lesser General Public License v3.0 4 votes vote down vote up
/**
 * data 每个元素为特征空间索引位置 1 ... T T列(模板个数) 1 N行(序列长度) 2 . data[r][t] N
 * 第t个模板作用在第r个位置 得到feature的起始位置
 * 
 * target[r],predict[r] label的编号
 * 
 * @param c
 * @param weights
 */
public float update(Instance inst, float[] weights, int k,
		float[] extraweight, Object predictLabel,
		Object goldenLabel, float c) {
	int[][] data = (int[][]) inst.getData();
	int[] target;
	if (goldenLabel == null)
		target = (int[]) inst.getTarget();
	else
		target = (int[]) goldenLabel;
	int[] predict = (int[]) predictLabel;
	// 当前clique中不同label的个数
	int ne = 0;
	/**
	 * 偏移索引
	 * 
	 */
	int tS = 0, pS = 0;

	float diffW = 0;

	int loss = 0;

	int L = data.length;
	// 稀疏矩阵表示(f(x,y)-f(x,\bar{y}))
	TIntIntHashMap diffF = new TIntIntHashMap(); // 最多有2*L*numTemplets个不同

	for (int o = -templets.maxOrder - 1, l = 0; l < L; o++, l++) {
		tS = tS * numLabels % templets.numStates + target[l]; // 目标值:计算当前状态组合的y空间偏移
		pS = pS * numLabels % templets.numStates + predict[l];// 预测值:计算当前状态组合的y空间偏移
		if (predict[l] != target[l])
			ne++;
		if (o >= 0 && (predict[o] != target[o]))
			ne--; // 减去移出clique的节点的label差异

		if (ne > 0) { // 当前clique有不相同label
			loss++; // L(y,ybar)
			for (int t = 0; t < numTemplets; t++) {
				if (data[l][t] == -1)
					continue;
				int tI = data[l][t] + templets.offset[t][tS]; // 特征索引:找到对应weights的维数
				int pI = data[l][t] + templets.offset[t][pS]; // 特征索引:找到对应weights的维数
				if (tI != pI) {
					diffF.adjustOrPutValue(tI, 1, 1);
					diffF.adjustOrPutValue(pI, -1, -1);
					diffW += weights[tI] - weights[pI]; // w^T(f(x,y)-f(x,ybar))
				}
			}
		}
	}

	float diff = 0;
	TIntIntIterator it = diffF.iterator();
	for (int i = diffF.size(); i-- > 0;) {
		it.advance();
		diff += it.value() * it.value();
	}
	it = null;
	float alpha;
	float delta;
	if (useLoss) {
		delta = loss;
	} else
		delta = 1;
	if (diffW < delta) {

		tS = 0;
		pS = 0;
		ne = 0;
		alpha = (delta - diffW) / diff;
		// System.out.println(alpha);
		alpha = Math.min(c, alpha);
		it = diffF.iterator();
		for (int i = diffF.size(); i-- > 0;) {
			it.advance();
			weights[it.key()] += it.value() * alpha;
		}

		return loss;
	} else {
		return 0;
	}
}