gnu.trove.set.hash.THashSet#add

Source File: PartitionEquivalences.java From metanome-algorithms with Apache License 2.0

6 votes

public void addPartition(EquivalenceManagedPartition partition) {
	if (!this.observedPartitions.contains(partition.getIndices()) && !this.containsSimilarPartition(partition)) {
		this.observedPartitions.add(partition.getIndices());
		long hashNumber = partition.getHashNumber();
		System.out.println(String.format("Partition[%s]\t%d\tSize: %d", partition.getIndices(), Long.valueOf(hashNumber), Integer.valueOf(partition.size())));
		partitionHashes.putIfAbsent(hashNumber, new TIntObjectHashMap<THashSet<EquivalenceManagedPartition>>());
		partitionHashes.get(hashNumber).putIfAbsent(partition.size(), new THashSet<EquivalenceManagedPartition>());
		THashSet<EquivalenceManagedPartition> partitionGroup = partitionHashes.get(hashNumber).get(partition.size());

		if (partitionGroup.isEmpty()) {
			partitionGroup.add(partition);
		} else {
			// then there is at least one element in the partitionGroup
			checkPossibleEquivalences(partitionGroup, partition);
		}
	}
}

Source File: Observations.java From metanome-algorithms with Apache License 2.0

5 votes

public THashSet<ColumnCollection> getUncheckedMaximalSubsets(ColumnCollection lhs, ColumnOrder order) {
		THashSet<ColumnCollection> uncheckedMaximalSubsets = new THashSet<>();
		
//		if (lhs.cardinality() > 2) {
			for (int columnIndex : order.getOrderHighDistinctCount(lhs)) { 
				ColumnCollection subsetIndices = lhs.removeColumnCopy(columnIndex);
				if (!this.containsKey(subsetIndices)) {
					uncheckedMaximalSubsets.add(subsetIndices);
				}
			}
//		}
		return uncheckedMaximalSubsets;
	}

Source File: Observations.java From metanome-algorithms with Apache License 2.0

5 votes

public THashSet<ColumnCollection> getUncheckedOrCandidateMaximalSubsets(ColumnCollection lhs, ColumnOrder order) {
	THashSet<ColumnCollection> uncheckedMaximalSubsets = new THashSet<>();
	
	// we only want to check subsets with at least 2 columns
	if (lhs.cardinality() > 2) {
		for (int columnIndex : order.getOrderHighDistinctCount(lhs)) { 
			ColumnCollection subsetIndices = lhs.removeColumnCopy(columnIndex);
			if (!this.containsKey(subsetIndices) || this.get(subsetIndices) == Observation.CANDIDATE_MINIMAL_DEPENDENCY) {
				uncheckedMaximalSubsets.add(subsetIndices);
			}
		}
	}
	return uncheckedMaximalSubsets;
}

Source File: Observations.java From metanome-algorithms with Apache License 2.0

5 votes

public THashSet<ColumnCollection> getMaximalSubsets(ColumnCollection lhs, ColumnOrder order) {
	THashSet<ColumnCollection> uncheckedMaximalSubsets = new THashSet<>();
	
	// we only want to check subsets with at least 2 columns
	if (lhs.cardinality() > 2) {
		for (int columnIndex : order.getOrderHighDistinctCount(lhs)) { 
			ColumnCollection subsetIndices = lhs.removeColumnCopy(columnIndex);
			uncheckedMaximalSubsets.add(subsetIndices);
		}
	}
	return uncheckedMaximalSubsets;
}

Source File: Observations.java From metanome-algorithms with Apache License 2.0

5 votes

public THashSet<ColumnCollection> getUncheckedOrCandidateMinimalSupersets(ColumnCollection lhs, int rhsIndex, ColumnOrder order) {
	THashSet<ColumnCollection> uncheckedMinimalSupersets = new THashSet<>();
	
	for (int columnIndex : order.getOrderLowDistinctCount(lhs.setCopy(rhsIndex).complement())) {
		ColumnCollection supersetIndices = lhs.setCopy(columnIndex);
		if (!this.containsKey(supersetIndices) || this.get(supersetIndices) == Observation.CANDIDATE_MAXIMAL_NON_DEPENDENCY) {
			uncheckedMinimalSupersets.add(supersetIndices);
		}
	}
	return uncheckedMinimalSupersets;
}

Source File: Observations.java From metanome-algorithms with Apache License 2.0

5 votes

public THashSet<ColumnCollection> getUncheckedMinimalSupersets(ColumnCollection lhs, int rhsIndex, ColumnOrder order) {
	THashSet<ColumnCollection> uncheckedMinimalSupersets = new THashSet<>();
	
	for (int columnIndex : order.getOrderLowDistinctCount(lhs.setCopy(rhsIndex).complement())) {
		ColumnCollection supersetIndices = lhs.setCopy(columnIndex);
		if (!this.containsKey(supersetIndices)) {
			uncheckedMinimalSupersets.add(supersetIndices);
		}
	}
	return uncheckedMinimalSupersets;
}

Source File: Observations.java From metanome-algorithms with Apache License 2.0

5 votes

public THashSet<ColumnCollection> getMinimalSupersets(ColumnCollection lhs, int rhsIndex, ColumnOrder order) {
	THashSet<ColumnCollection> uncheckedMinimalSupersets = new THashSet<>();

	for (int columnIndex : order.getOrderLowDistinctCount(lhs.setCopy(rhsIndex).complement())) {
		ColumnCollection supersetIndices = lhs.setCopy(columnIndex);
		uncheckedMinimalSupersets.add(supersetIndices);
	}
	return uncheckedMinimalSupersets;
}

Source File: Dependencies.java From metanome-algorithms with Apache License 2.0

5 votes

public THashSet<ColumnCollection> getPrunedSubsets(THashSet<ColumnCollection> subsets) {
	THashSet<ColumnCollection> prunedSubsets = new THashSet<>();
	for (ColumnCollection subset : subsets) {
		if (this.isRepresented(subset)) {
			prunedSubsets.add(subset);
		}
	}
	return prunedSubsets;
}

Source File: NonDependencies.java From metanome-algorithms with Apache License 2.0

5 votes

public THashSet<ColumnCollection> getPrunedSupersets(THashSet<ColumnCollection> supersets) {
	THashSet<ColumnCollection> prunedSupersets = new THashSet<>();
	for (ColumnCollection superset : supersets) {
		if (this.isRepresented(superset)) {
			prunedSupersets.add(superset);
		}
	}
	return prunedSupersets;
}

Source File: DICT.java From fnlp with GNU Lesser General Public License v3.0

5 votes

public static void BMES2DICT(String file, String dicfile) throws UnsupportedEncodingException,
FileNotFoundException, IOException {


	BufferedReader bfr = new BufferedReader(new InputStreamReader(new FileInputStream(file),"utf8"));
	String line = null;			
	int count=0;
	THashSet<String> dict = new THashSet<String>();
	StringBuilder sb = new StringBuilder();
	while ((line = bfr.readLine()) != null) {
		if(line.length()==0)
			continue;

		String[] toks = line.split("\\s+");
		String label = toks[1];
		String w = toks[0];
		if(w.equals(" ")){//空格特殊处理
			if(sb.length()>0){
				dict.add(sb.toString());
				sb = new StringBuilder();
			}
			continue;
		}
		sb.append(w);
		if (label.equals("E") || label.equals("S")) {
			dict.add(sb.toString());
			sb = new StringBuilder();
		}
	}
	MyCollection.write(dict,dicfile);
}

Source File: IterativeConditionalAnalysis.java From systemsgenetics with GNU General Public License v3.0

5 votes

private THashSet<String> collectEQTLProbes(String origOutputDir, int currentIteration, double fdr) throws IOException {

		THashSet<String> output = new THashSet<String>();
		String iterationFile = origOutputDir + "/Iteration" + (currentIteration - 1) + "/eQTLProbesFDR" + fdr + "-ProbeLevel.txt.gz";
		if (m_settings.fdrType.equals(FDR.FDRMethod.FULL)) {
			iterationFile = origOutputDir + "/Iteration" + (currentIteration - 1) + "/eQTLProbesFDR" + fdr + ".txt.gz";
		} else if (m_settings.fdrType.equals(FDR.FDRMethod.SNPLEVEL)) {
			iterationFile = origOutputDir + "/Iteration" + (currentIteration - 1) + "/eQTLProbesFDR" + fdr + "-SNPLevel.txt.gz";
		} else if (m_settings.fdrType.equals(FDR.FDRMethod.GENELEVEL)) {
			iterationFile = origOutputDir + "/Iteration" + (currentIteration - 1) + "/eQTLProbesFDR" + fdr + "-GeneLevel.txt.gz";
		}

		System.out.println("Trying to collect genes/probes from: " + iterationFile);
		if (Gpio.exists(iterationFile)) {
			TextFile tf = new TextFile(iterationFile, TextFile.R);
			tf.readLineElems(TextFile.tab);
			String[] elems = tf.readLineElems(TextFile.tab);
			while (elems != null) {
				output.add(elems[4]);
				elems = tf.readLineElems(TextFile.tab);
			}
			System.out.println("Iteration " + (currentIteration - 1) + " has " + output.size() + " significant probes.");
		}


		return output;
	}

Source File: DFDMiner.java From metanome-algorithms with Apache License 2.0

4 votes

private Stack<Seed> nextSeeds(int currentRHSIndex) {
//		System.out.println("Find holes");
		THashSet<ColumnCollection> deps = new THashSet<>();
		ArrayList<ColumnCollection> currentMaximalNonDependencies = maximalNonDependencies.getLHSForRHS(currentRHSIndex);
		HashSet<ColumnCollection> currentMinimalDependencies = new HashSet<>(minimalDependencies.getLHSForRHS(currentRHSIndex));
		ArrayList<ColumnCollection> newDeps = new ArrayList<>(numberOfColumns * deps.size());
//		Holes holes = new Holes();
		
//		int i = 0;
//		for (ColumnCollection maximalNonDependency : currentMaximalNonDependencies) {
//			ColumnCollection complement = maximalNonDependency.setCopy(currentRHSIndex).complement();
//			if (deps.isEmpty()) {
//				ColumnCollection emptyColumnIndices = new ColumnCollection(numberOfColumns);
//				for (Integer complementColumnIndex : complement.getSetBits()) {
//					deps.add(emptyColumnIndices.setCopy(complementColumnIndex));
//				}
//			} else {
//				for (ColumnCollection dep : deps) {
//					int[] setBits = complement.getSetBits();
//					for (int setBit = 0; setBit < setBits.length; setBit++) {
//						holes.add(dep.setCopy(setBits[setBit]));
////						System.out.println("Dep:\t" + dep.setCopy(setBits[setBit]));
//					}
//				}
//				// minimize newDeps
//				System.out.println(i++ + "\t" + currentMaximalNonDependencies.size());
//				System.out.println("total deps:\t" + deps.size());
//				System.out.println("before minimizing:\t" + holes.size());
////				ArrayList<ColumnCollection> minimizedNewDeps = minimizeSeeds(newDeps);
//				holes.minimize();
//				System.out.println("after minimizing:\t" + holes.size());
//				deps.clear();
//				deps.addAll(holes);
//				holes.clear();
//			}
//		}

		for (ColumnCollection maximalNonDependency : currentMaximalNonDependencies) {
			ColumnCollection complement = maximalNonDependency.setCopy(currentRHSIndex).complement();
			if (deps.isEmpty()) {
				ColumnCollection emptyColumnIndices = new ColumnCollection(numberOfColumns);
				for (int complementColumnIndex : complement.getSetBits()) {
					deps.add(emptyColumnIndices.setCopy(complementColumnIndex));
				}
			} else {
				for (ColumnCollection dep : deps) {
					int[] setBits = complement.getSetBits();
					for (int setBit = 0; setBit < setBits.length; setBit++) {
						newDeps.add(dep.setCopy(setBits[setBit]));
					}
				}
				// minimize newDeps
				ArrayList<ColumnCollection> minimizedNewDeps = minimizeSeeds(newDeps);
				deps.clear();
				deps.addAll(minimizedNewDeps);
				newDeps.clear();
			}
		}
		
		// return only elements that aren't already covered by the minimal
		// dependencies
		Stack<Seed> remainingSeeds = new Stack<>();
		deps.removeAll(currentMinimalDependencies);
		for (ColumnCollection remainingSeed : deps) {
			remainingSeeds.push(new Seed(remainingSeed));
		}

		return remainingSeeds;
	}

Source File: RLSeg.java From fnlp with GNU Lesser General Public License v3.0

4 votes

int update(String[] toks) throws IOException {
	if(toks==null)
		return 0;
	THashSet<String> newdict = new THashSet<String>();
	String nowords = "";
	int count = 0;
	for(int i=0;i<toks.length;i++){//取得包含新词的最长子串
		if(Chars.isLetterOrDigitOrPunc(toks[i]))
			continue;

		if(!dict.contains(toks[i])&&!tempdict.contains(toks[i])){
			nowords += "" + toks[i];
			count++;
		}else{
			if(nowords.length()>0){
				System.out.println(nowords);
				newdict.add(nowords.trim());
				nowords = "";
			}
		}
	}


	TObjectHashIterator<String> it = newdict.iterator();
	while(it.hasNext()){
		String s = it.next();
		if(nodict.contains(s))
			continue;
		System.out.println("搜索： "+s);
		THashSet<String> sset = getNewWords(s);
		if(sset==null||sset.size()==0)
			continue;
		System.out.println(sset);
		tempdict.addAll(sset);
		if(!sset.contains(s)&&!nodict.contains(s)){
			nodict.add(s);
			bwNo.write(s);
			bwNo.write("\n");
		}

	}
	bwNew.flush();
	bwNo.flush();
	return count;
}

Source File: RLSeg.java From fnlp with GNU Lesser General Public License v3.0

4 votes

public THashSet<String> getNewWords(String s) throws IOException {
	if(s.length()==0)
		return null;
	THashSet<String> newset = new THashSet<String>();
	HashMap<String,Float> map = new HashMap<String, Float>();
	String q = genQuery(s);
	String res = SearchByBaidu.search(q);
	if(res.length()==0)
		return null;

	String[] words = tag.tag2Array(res);

	for(int i=0;i<words.length;i++){
		String w = words[i];
		if(w.length()<2||dict.contains(w)||tempdict.contains(w))
			continue;
		//				if(dict.contains(words[i]))
		//					continue;
		if(map.containsKey(w))
			map.put(w, map.get(w)+1);
		else
			map.put(w, 1f);
	}
	//			Set<Entry<String, Float>> set = map.entrySet();
	//			for(Entry e:set){
	//				e.setValue((Float) e.getValue()/words.length);
	//			}
	List<Entry> list = MyCollection.sort(map);



	int num = getOccur(res, s);

	float thres = num*prop;
	thres = thres<50?50:thres;
	for(Entry e:list){
		String ss = (String) e.getKey();
		if((Float) e.getValue()>thres&&ss.length()>1&&!dict.contains(ss)&&!tempdict.contains(ss)){				
			newset.add(ss);
			bwNew.write(ss);
			bwNew.write("\n");
		}

	}

	newset.remove("快照");
	return newset;
}

Source File: TriTyperGeneticalGenomicsDataset.java From systemsgenetics with GNU General Public License v3.0

4 votes

public TriTyperGeneticalGenomicsDataset(TriTyperGeneticalGenomicsDatasetSettings settings, Pair<List<String>, List<List<String>>> pathwayDefinitions, boolean displayWarnings) throws IOException, Exception {

		this.settings = settings;

		settings.genotypeLocation = Gpio.formatAsDirectory(settings.genotypeLocation);

		if (settings.expressionLocation == null) {
			settings.expressionLocation = settings.genotypeLocation + "ExpressionData.txt";
		}

		// load the genotype metadata
		genotypeData = new TriTyperGenotypeData();
		genotypeData.displayWarnings = displayWarnings;
		genotypeData.load(settings.genotypeLocation, settings.snpmapFileLocation, settings.snpFileLocation);
		THashSet<String> includedExpressionIndividuals = new THashSet<String>();
		Boolean[] isIncluded = genotypeData.getIsIncluded();

		// preload the sample coupling file
		loadCouplings();

		// determine which expression samples to include
		Set<Entry<String, String>> entries = genotypeToExpressionCouplings.entrySet();
		for (Entry<String, String> entry : entries) {
			String genotypeIndividual = entry.getKey();
			Integer genotypeIndividualId = genotypeData.getIndividualId(genotypeIndividual);

			if (genotypeIndividualId != -9 && isIncluded[genotypeIndividualId] != null && isIncluded[genotypeIndividualId]) {
				includedExpressionIndividuals.add(entry.getValue());
			}
		}

		if (includedExpressionIndividuals.isEmpty()) {
			System.err.println("ERROR: none of the expression samples will be included with your current settings.\n" +
					"zPlease check the links between genotype and gene expression samples and/or your PhenotypeInformation.txt");
			System.exit(-1);
		}

		// load the expression data
		expressionData = new TriTyperExpressionData();
		expressionData.displayWarnings = displayWarnings;
		expressionData.confineToProbes(settings.tsProbesConfine);
		expressionData.setConfineToProbesThatMapToAnyChromosome(settings.confineProbesToProbesMappingToAnyChromosome);
		expressionData.setConfineToProbesThatMapToChromosome(settings.confineProbesToProbesThatMapToChromosome);
		expressionData.setIncludeIndividuals(includedExpressionIndividuals);
		expressionData.setPathwayDefinitions(pathwayDefinitions);
		expressionDataLoadedCorrectly = expressionData.load(settings.expressionLocation, settings.probeannotation, settings.expressionplatform, (settings.cisAnalysis && settings.transAnalysis));
		pruneGenotypeToExpressionCouplings();

		if (settings.covariateFile != null && Gpio.exists(settings.covariateFile)) {
			// load covariates..
			System.out.println("Loading covariates: " + settings.covariateFile);
			HashSet<String> individualSet = new HashSet<String>();
			individualSet.addAll(Arrays.asList(expressionData.getIndividuals()));
			covariates = new DoubleMatrixDataset<String, String>(settings.covariateFile, null, individualSet);

			if (covariates.colObjects.isEmpty()) {
				// try the transpose
				System.out.println("Could not find matching sample identifiers between covariate file and expression file.\nTransposing your covariate file.");
				covariates = new DoubleMatrixDataset<String, String>(settings.covariateFile, individualSet);
				if (covariates.rowObjects.isEmpty()) {
					System.err.println("Could not find matching samples between expression data and covariate data.");
					System.exit(-1);
				} else {
					covariates.transposeDataset(); // put the covariates on the rows, samples on the columns
					covariates.recalculateHashMaps();
				}
			}

			covariates.removeColumnsWithNaNs();
			covariates.recalculateHashMaps();
			if (covariates.colObjects.isEmpty()) {
				System.err.println("ERROR: after removing samples with NaN values, no covariates remain");
				System.exit(-1);
			}

			System.out.println(covariates.rowObjects.size() + " covariates loaded for " + covariates.colObjects.size() + " samples");

			// remove expression samples without covariates, and reorder expression data
			expressionData.pruneAndReorderSamples(covariates.colObjects);

			// prune expression dataset to samples having covariates
			loadCouplings();
			pruneGenotypeToExpressionCouplings();
		}

	}

Java Code Examples for gnu.trove.set.hash.THashSet#add()