Java Code Examples for gnu.trove.TIntArrayList#sort()
The following examples show how to use
gnu.trove.TIntArrayList#sort() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: BestAutomaticNegativesChooser.java From jatecs with GNU General Public License v3.0 | 6 votes |
public TIntArrayListIterator selectNegatives(String category) { short catID = _index.getCategoryDB().getCategory(category); TreeSet<DocumentSimilar> best = _best.get(catID); TIntArrayList neg = new TIntArrayList(); Iterator<DocumentSimilar> it = best.iterator(); while (it.hasNext()) { DocumentSimilar docS = it.next(); neg.add(docS.docID); } neg.sort(); return new TIntArrayListIterator(neg); }
Example 2
Source File: BestKNNNegativesChooser.java From jatecs with GNU General Public License v3.0 | 6 votes |
public TIntArrayListIterator selectNegatives(String category) { short catID = _index.getCategoryDB().getCategory(category); TreeSet<Item> best = _best.get(catID); assert (best != null); TIntArrayList neg = new TIntArrayList(); Iterator<Item> it = best.iterator(); while (it.hasNext()) { Item docS = it.next(); assert (!neg.contains(docS.docID)); neg.add(docS.docID); } neg.sort(); return new TIntArrayListIterator(neg); }
Example 3
Source File: TreeBoostCMCLearner.java From jatecs with GNU General Public License v3.0 | 5 votes |
protected IIndex selectPositives(short catID, IIndex training) { // First create a new index. IIndex idx = training.cloneIndex(); IShortIterator childCats = getAllChildsCategoriesFor(idx, catID); short nextCatID = Short.MIN_VALUE; if (childCats.hasNext()) nextCatID = childCats.next(); // Remove unwanted categories. TShortArrayList toRemove = new TShortArrayList(); for (short i = 0; i < training.getCategoryDB().getCategoriesCount(); i++) { if (i == nextCatID) { if (childCats.hasNext()) nextCatID = childCats.next(); continue; } toRemove.add(i); } toRemove.sort(); idx.removeCategories(new TShortArrayListIterator(toRemove)); // Remove unwanted documents. TIntArrayList docsToRemove = new TIntArrayList(); IIntIterator docs = idx.getDocumentDB().getDocuments(); while (docs.hasNext()) { int docID = docs.next(); IShortIterator curCats = idx.getClassificationDB() .getDocumentCategories(docID); if (!curCats.hasNext()) docsToRemove.add(docID); } docsToRemove.sort(); idx.removeDocuments(new TIntArrayListIterator(docsToRemove), false); return idx; }
Example 4
Source File: TrigramBuilderTest.java From consulo with Apache License 2.0 | 5 votes |
public void testBuilder() { final Ref<Integer> trigramCountRef = new Ref<Integer>(); final TIntArrayList list = new TIntArrayList(); TrigramBuilder.processTrigrams("String$CharData", new TrigramBuilder.TrigramProcessor() { @Override public boolean execute(int value) { list.add(value); return true; } @Override public boolean consumeTrigramsCount(int count) { trigramCountRef.set(count); return true; } }); list.sort(); Integer trigramCount = trigramCountRef.get(); assertNotNull(trigramCount); int expectedTrigramCount = 13; assertEquals(expectedTrigramCount, (int)trigramCount); assertEquals(expectedTrigramCount, list.size()); int[] expected = {buildTrigram("$Ch"), buildTrigram("arD"), buildTrigram("ata"), 6514785, 6578548, 6759523, 6840690, 6909543, 7235364, 7496801, 7498094, 7566450, 7631465, }; for(int i = 0; i < expectedTrigramCount; ++i) assertEquals(expected[i], list.getQuick(i)); }
Example 5
Source File: TreeRecommenderLearner.java From jatecs with GNU General Public License v3.0 | 5 votes |
protected IIndex selectPositives(short catID, IIndex training) { // First create a new index. IIndex idx = training.cloneIndex(); IShortIterator childCats = getAllChildsCategoriesFor(idx, catID); short nextCatID = Short.MIN_VALUE; if (childCats.hasNext()) nextCatID = childCats.next(); // Remove unwanted categories. TShortArrayList toRemove = new TShortArrayList(); for (short i = 0; i < training.getCategoryDB().getCategoriesCount(); i++) { if (i == nextCatID) { if (childCats.hasNext()) nextCatID = childCats.next(); continue; } toRemove.add(i); } toRemove.sort(); idx.removeCategories(new TShortArrayListIterator(toRemove)); // Remove unwanted documents. TIntArrayList docsToRemove = new TIntArrayList(); IIntIterator docs = idx.getDocumentDB().getDocuments(); while (docs.hasNext()) { int docID = docs.next(); IShortIterator curCats = idx.getClassificationDB() .getDocumentCategories(docID); if (!curCats.hasNext()) docsToRemove.add(docID); } docsToRemove.sort(); idx.removeDocuments(new TIntArrayListIterator(docsToRemove), false); return idx; }
Example 6
Source File: TroveDomainDB.java From jatecs with GNU General Public License v3.0 | 5 votes |
public void removeCategoryFeatures(short category, IIntIterator removedFeatures) { TIntArrayList feats = _categoriesFeatures.get(category); while (removedFeatures.hasNext()) { int feature = removedFeatures.next(); if (feats.binarySearch(feature) < 0) feats.add(feature); } feats.sort(); _hasLocalRepresentation = _hasLocalRepresentation || feats.size() > 0; }
Example 7
Source File: SingleLabelTreeBoostLearner.java From jatecs with GNU General Public License v3.0 | 5 votes |
protected IIndex selectPositives(short catID, IIndex training) { // First create a new index. IIndex idx = training.cloneIndex(); IShortIterator childCats = getAllChildsCategoriesFor(idx, catID); short nextCatID = Short.MIN_VALUE; if (childCats.hasNext()) nextCatID = childCats.next(); // Remove unwanted categories. TShortArrayList toRemove = new TShortArrayList(); for (short i = 0; i < training.getCategoryDB().getCategoriesCount(); i++) { if (i == nextCatID) { if (childCats.hasNext()) nextCatID = childCats.next(); continue; } toRemove.add(i); } toRemove.sort(); idx.removeCategories(new TShortArrayListIterator(toRemove)); // Remove unwanted documents. TIntArrayList docsToRemove = new TIntArrayList(); IIntIterator docs = idx.getDocumentDB().getDocuments(); while (docs.hasNext()) { int docID = docs.next(); IShortIterator curCats = idx.getClassificationDB() .getDocumentCategories(docID); if (!curCats.hasNext()) docsToRemove.add(docID); } docsToRemove.sort(); idx.removeDocuments(new TIntArrayListIterator(docsToRemove), false); return idx; }
Example 8
Source File: TreeBoostLearner.java From jatecs with GNU General Public License v3.0 | 5 votes |
protected IIndex selectPositives(short catID, IIndex training) { // First create a new index. IIndex idx = training.cloneIndex(); IShortIterator childCats = getAllChildsCategoriesFor(idx, catID); short nextCatID = Short.MIN_VALUE; if (childCats.hasNext()) nextCatID = childCats.next(); // Remove unwanted categories. TShortArrayList toRemove = new TShortArrayList(); for (short i = 0; i < training.getCategoryDB().getCategoriesCount(); i++) { if (i == nextCatID) { if (childCats.hasNext()) nextCatID = childCats.next(); continue; } toRemove.add(i); } toRemove.sort(); idx.removeCategories(new TShortArrayListIterator(toRemove)); // Remove unwanted documents. TIntArrayList docsToRemove = new TIntArrayList(); IIntIterator docs = idx.getDocumentDB().getDocuments(); while (docs.hasNext()) { int docID = docs.next(); IShortIterator curCats = idx.getClassificationDB() .getDocumentCategories(docID); if (!curCats.hasNext()) docsToRemove.add(docID); } docsToRemove.sort(); idx.removeDocuments(new TIntArrayListIterator(docsToRemove), false); return idx; }
Example 9
Source File: RegexTSR.java From jatecs with GNU General Public License v3.0 | 5 votes |
@Override public void computeTSR(IIndex index) { TextualProgressBar bar = new TextualProgressBar( "Compute TSR with by using regex matcher"); int total = index.getFeatureDB().getFeaturesCount(); int step = 0; TIntArrayList toRemove = new TIntArrayList(); IIntIterator it = index.getFeatureDB().getFeatures(); while (it.hasNext()) { int featID = it.next(); String featName = index.getFeatureDB().getFeatureName(featID); if (!featName.matches(regexPatternMatching)) { toRemove.add(featID); } step++; bar.signal((step * 100) / total); } bar.signal(100); toRemove.sort(); // Remove the worst features. JatecsLogger.status().print("Removing worst features..."); index.removeFeatures(new TIntArrayListIterator(toRemove)); JatecsLogger.status().println( "done. Now the DB contains " + index.getFeatureDB().getFeaturesCount() + " feature(s)."); }
Example 10
Source File: GlobalThresholdTSR.java From jatecs with GNU General Public License v3.0 | 5 votes |
public void computeTSR(IIndex index) { TextualProgressBar bar = new TextualProgressBar( "Compute global threshold (" + Os.generateDoubleString(_threshold, 3) + ") TSR with " + _function.getClass().getName()); int total = index.getFeatureDB().getFeaturesCount(); int step = 0; TIntArrayList toRemove = new TIntArrayList(); IIntIterator it = index.getFeatureDB().getFeatures(); while (it.hasNext()) { int featID = it.next(); double score = _function.compute((short) 0, featID, index); if (score < _threshold) toRemove.add(featID); step++; bar.signal((step * 100) / total); } bar.signal(100); toRemove.sort(); // Remove the worst features. JatecsLogger.status().print( "Removing " + toRemove.size() + " features..."); index.removeFeatures(new TIntArrayListIterator(toRemove)); JatecsLogger.status().println( "done. Now the DB contains " + index.getFeatureDB().getFeaturesCount() + " feature(s)."); }
Example 11
Source File: StringPrefixTSR.java From jatecs with GNU General Public License v3.0 | 5 votes |
@Override public void computeTSR(IIndex index) { TextualProgressBar bar = new TextualProgressBar( "Compute TSR with by using regex matcher"); int total = index.getFeatureDB().getFeaturesCount(); int step = 0; TIntArrayList toRemove = new TIntArrayList(); IIntIterator it = index.getFeatureDB().getFeatures(); while (it.hasNext()) { int featID = it.next(); String featName = index.getFeatureDB().getFeatureName(featID); if (!featName.matches(regexPatternMatching)) { toRemove.add(featID); } step++; bar.signal((step * 100) / total); } bar.signal(100); toRemove.sort(); // Remove the worst features. JatecsLogger.status().print("Removing worst features..."); TIntArrayListIterator toRemoveIT = new TIntArrayListIterator(toRemove); index.removeFeatures(toRemoveIT); JatecsLogger.status().println( "done. Now the DB contains " + index.getFeatureDB().getFeaturesCount() + " feature(s)."); }
Example 12
Source File: SvmDDagSingleLabelLearner.java From jatecs with GNU General Public License v3.0 | 4 votes |
private IIndex buildBinaryLocalIndex(IIndex trainingIndex, ArrayList<Short> catsGood) { if (!(catsGood.size() == 2)) throw new RuntimeException("The set of valid categories must be 2"); // First create a new index. IIndex idx = trainingIndex.cloneIndex(); // Remove unwanted categories. TShortArrayList toRemove = new TShortArrayList(); IShortIterator allCats = idx.getCategoryDB().getCategories(); while (allCats.hasNext()) { short catID = allCats.next(); if (catsGood.contains(catID)) continue; toRemove.add(catID); } toRemove.sort(); idx.removeCategories(new TShortArrayListIterator(toRemove)); // Remove unwanted documents. TIntArrayList docsToRemove = new TIntArrayList(); IIntIterator docs = idx.getDocumentDB().getDocuments(); while (docs.hasNext()) { int docID = docs.next(); IShortIterator curCats = idx.getClassificationDB() .getDocumentCategories(docID); if (!curCats.hasNext()) docsToRemove.add(docID); } docsToRemove.sort(); idx.removeDocuments(new TIntArrayListIterator(docsToRemove), true); // If the case, apply TSR. if (tsrType != null) { tsrType.computeTSR(idx); // Apply weighting. IWeighting weighting = null; if (weightingType == WeightingType.TF_IDF) { weighting = new TfNormalizedIdf(idx); } else if (weightingType == WeightingType.BM25) { weighting = new BM25(idx); } idx = weighting.computeWeights(idx); } // Remove 2nd category to make an index for a binary classifier. toRemove.clear(); String catNameToRemove = trainingIndex.getCategoryDB() .getCategoryName(catsGood.get(1)); toRemove.add(idx.getCategoryDB().getCategory(catNameToRemove)); idx.removeCategories(new TShortArrayListIterator(toRemove)); return idx; }
Example 13
Source File: DCSSingleLabelKnnFoldValidator.java From jatecs with GNU General Public License v3.0 | 4 votes |
public static Pair<IIndex, IIndex> splitIndex(int step, IIndex index, int numValidationSteps) { int numPositives = index.getDocumentDB().getDocumentsCount(); int numSteps = Math.min(numPositives, numValidationSteps); if (step >= numSteps) return null; TIntArrayList tr = new TIntArrayList(); TIntArrayList va = new TIntArrayList(); int numPositivesInValidation = numPositives / numSteps; int numPositivesInTraining = numPositives - numPositivesInValidation; int startTrainingID = (numPositives / numSteps) * step; int endTrainingID = (startTrainingID + numPositivesInTraining - 1); TIntIntHashMap map = new TIntIntHashMap(); for (int i = startTrainingID; i <= endTrainingID; i++) { int v = i % numPositives; map.put(v, v); } int curDoc = 0; IIntIterator docs = index.getDocumentDB().getDocuments(); while (docs.hasNext()) { int docID = docs.next(); if (map.containsKey(curDoc)) { tr.add(docID); } else { va.add(docID); } curDoc++; } tr.sort(); va.sort(); IIndex trIndex = index.cloneIndex(); trIndex.removeDocuments(new TIntArrayListIterator(va), false); IIndex vaIndex = index.cloneIndex(); vaIndex.removeDocuments(new TIntArrayListIterator(tr), false); JatecsLogger.status().println( "done. The training contains " + tr.size() + " document(s) and the validation contains " + va.size() + " document(s)."); Pair<IIndex, IIndex> ret = new Pair<IIndex, IIndex>(trIndex, vaIndex); return ret; }
Example 14
Source File: CRWMVSingleLabelKnnFoldValidator.java From jatecs with GNU General Public License v3.0 | 4 votes |
public static Pair<IIndex, IIndex> splitIndex(int step, IIndex index, int numValidationSteps) { int numPositives = index.getDocumentDB().getDocumentsCount(); int numSteps = Math.min(numPositives, numValidationSteps); if (step >= numSteps) return null; TIntArrayList tr = new TIntArrayList(); TIntArrayList va = new TIntArrayList(); int numPositivesInValidation = numPositives / numSteps; int numPositivesInTraining = numPositives - numPositivesInValidation; int startTrainingID = (numPositives / numSteps) * step; int endTrainingID = (startTrainingID + numPositivesInTraining - 1); TIntIntHashMap map = new TIntIntHashMap(); for (int i = startTrainingID; i <= endTrainingID; i++) { int v = i % numPositives; map.put(v, v); } int curDoc = 0; IIntIterator docs = index.getDocumentDB().getDocuments(); while (docs.hasNext()) { int docID = docs.next(); if (map.containsKey(curDoc)) { tr.add(docID); } else { va.add(docID); } curDoc++; } tr.sort(); va.sort(); IIndex trIndex = index.cloneIndex(); trIndex.removeDocuments(new TIntArrayListIterator(va), false); IIndex vaIndex = index.cloneIndex(); vaIndex.removeDocuments(new TIntArrayListIterator(tr), false); JatecsLogger.status().println( "done. The training contains " + tr.size() + " document(s) and the validation contains " + va.size() + " document(s)."); Pair<IIndex, IIndex> ret = new Pair<IIndex, IIndex>(trIndex, vaIndex); return ret; }
Example 15
Source File: SingleLabelKnnFoldValidator.java From jatecs with GNU General Public License v3.0 | 4 votes |
public static Pair<IIndex, IIndex> splitIndex(int step, IIndex index, int numValidationSteps) { int numPositives = index.getDocumentDB().getDocumentsCount(); int numSteps = Math.min(numPositives, numValidationSteps); if (step >= numSteps) return null; TIntArrayList tr = new TIntArrayList(); TIntArrayList va = new TIntArrayList(); int numPositivesInValidation = numPositives / numSteps; int numPositivesInTraining = numPositives - numPositivesInValidation; int startTrainingID = (numPositives / numSteps) * step; int endTrainingID = (startTrainingID + numPositivesInTraining - 1); TIntIntHashMap map = new TIntIntHashMap(); for (int i = startTrainingID; i <= endTrainingID; i++) { int v = i % numPositives; map.put(v, v); } int curDoc = 0; IIntIterator docs = index.getDocumentDB().getDocuments(); while (docs.hasNext()) { int docID = docs.next(); if (map.containsKey(curDoc)) { tr.add(docID); } else { va.add(docID); } curDoc++; } tr.sort(); va.sort(); IIndex trIndex = index.cloneIndex(); trIndex.removeDocuments(new TIntArrayListIterator(va), false); IIndex vaIndex = index.cloneIndex(); vaIndex.removeDocuments(new TIntArrayListIterator(tr), false); JatecsLogger.status().println( "done. The training contains " + tr.size() + " document(s) and the validation contains " + va.size() + " document(s)."); Pair<IIndex, IIndex> ret = new Pair<IIndex, IIndex>(trIndex, vaIndex); return ret; }
Example 16
Source File: WMVSingleLabelKnnFoldValidator.java From jatecs with GNU General Public License v3.0 | 4 votes |
public static Pair<IIndex, IIndex> splitIndex(int step, IIndex index, int numValidationSteps) { int numPositives = index.getDocumentDB().getDocumentsCount(); int numSteps = Math.min(numPositives, numValidationSteps); if (step >= numSteps) return null; TIntArrayList tr = new TIntArrayList(); TIntArrayList va = new TIntArrayList(); int numPositivesInValidation = numPositives / numSteps; int numPositivesInTraining = numPositives - numPositivesInValidation; int startTrainingID = (numPositives / numSteps) * step; int endTrainingID = (startTrainingID + numPositivesInTraining - 1); TIntIntHashMap map = new TIntIntHashMap(); for (int i = startTrainingID; i <= endTrainingID; i++) { int v = i % numPositives; map.put(v, v); } int curDoc = 0; IIntIterator docs = index.getDocumentDB().getDocuments(); while (docs.hasNext()) { int docID = docs.next(); if (map.containsKey(curDoc)) { tr.add(docID); } else { va.add(docID); } curDoc++; } tr.sort(); va.sort(); IIndex trIndex = index.cloneIndex(); trIndex.removeDocuments(new TIntArrayListIterator(va), false); IIndex vaIndex = index.cloneIndex(); vaIndex.removeDocuments(new TIntArrayListIterator(tr), false); JatecsLogger.status().println("done. The training contains " + tr.size() + " document(s) and the validation contains " + va.size() + " document(s)."); Pair<IIndex, IIndex> ret = new Pair<IIndex, IIndex>(trIndex, vaIndex); return ret; }
Example 17
Source File: CRDCSSingleLabelKnnFoldValidator.java From jatecs with GNU General Public License v3.0 | 4 votes |
public static Pair<IIndex, IIndex> splitIndex(int step, IIndex index, int numValidationSteps) { int numPositives = index.getDocumentDB().getDocumentsCount(); int numSteps = Math.min(numPositives, numValidationSteps); if (step >= numSteps) return null; TIntArrayList tr = new TIntArrayList(); TIntArrayList va = new TIntArrayList(); int numPositivesInValidation = numPositives / numSteps; int numPositivesInTraining = numPositives - numPositivesInValidation; int startTrainingID = (numPositives / numSteps) * step; int endTrainingID = (startTrainingID + numPositivesInTraining - 1); TIntIntHashMap map = new TIntIntHashMap(); for (int i = startTrainingID; i <= endTrainingID; i++) { int v = i % numPositives; map.put(v, v); } int curDoc = 0; IIntIterator docs = index.getDocumentDB().getDocuments(); while (docs.hasNext()) { int docID = docs.next(); if (map.containsKey(curDoc)) { tr.add(docID); } else { va.add(docID); } curDoc++; } tr.sort(); va.sort(); IIndex trIndex = index.cloneIndex(); trIndex.removeDocuments(new TIntArrayListIterator(va), false); IIndex vaIndex = index.cloneIndex(); vaIndex.removeDocuments(new TIntArrayListIterator(tr), false); JatecsLogger.status().println( "done. The training contains " + tr.size() + " document(s) and the validation contains " + va.size() + " document(s)."); Pair<IIndex, IIndex> ret = new Pair<IIndex, IIndex>(trIndex, vaIndex); return ret; }
Example 18
Source File: GlobalTSR.java From jatecs with GNU General Public License v3.0 | 4 votes |
public void computeTSR(IIndex index) { TextualProgressBar bar = new TextualProgressBar( "Compute global TSR with " + _func.getClass().getName()); int total = index.getFeatureDB().getFeaturesCount(); int step = 0; TreeSet<FeatureEntry> best = new TreeSet<FeatureEntry>(); TIntArrayList toRemove = new TIntArrayList(); IIntIterator it = index.getFeatureDB().getFeatures(); while (it.hasNext()) { int featID = it.next(); double[] scores = new double[index.getCategoryDB() .getCategoriesCount()]; for (short catID = 0; catID < scores.length; catID++) { if (!index.getDomainDB().hasCategoryFeature(catID, featID)) scores[catID] = 0; else scores[catID] = _func.compute(catID, featID, index); } // Compute feature global value. double score = _global.compute(scores, index); FeatureEntry fe = new FeatureEntry(); fe.featureID = featID; fe.score = score; best.add(fe); if (best.size() > _numBestFeature) { toRemove.add(best.first().featureID); best.remove(best.first()); } step++; bar.signal((step * 100) / total); } bar.signal(100); toRemove.sort(); // Remove the worst features. JatecsLogger.status().print("Removing worst features..."); index.removeFeatures(new TIntArrayListIterator(toRemove)); JatecsLogger.status().println( "done. Now the DB contains " + index.getFeatureDB().getFeaturesCount() + " feature(s)."); }