Python scipy.argsort() Examples
The following are 5
code examples of scipy.argsort().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
scipy
, or try the search function
.
Example #1
Source File: plinkfiles.py From ldpred with MIT License | 5 votes |
def parse_plink_snps(genotype_file, snp_indices): plinkf = plinkfile.PlinkFile(genotype_file) samples = plinkf.get_samples() num_individs = len(samples) num_snps = len(snp_indices) raw_snps = sp.empty((num_snps, num_individs), dtype='int8') # If these indices are not in order then we place them in the right place while parsing SNPs. snp_order = sp.argsort(snp_indices) ordered_snp_indices = list(snp_indices[snp_order]) ordered_snp_indices.reverse() # Iterating over file to load SNPs snp_i = 0 next_i = ordered_snp_indices.pop() line_i = 0 max_i = ordered_snp_indices[0] while line_i <= max_i: if line_i < next_i: next(plinkf) elif line_i == next_i: line = next(plinkf) snp = sp.array(line, dtype='int8') bin_counts = line.allele_counts() if bin_counts[-1] > 0: mode_v = sp.argmax(bin_counts[:2]) snp[snp == 3] = mode_v s_i = snp_order[snp_i] raw_snps[s_i] = snp if line_i < max_i: next_i = ordered_snp_indices.pop() snp_i += 1 line_i += 1 plinkf.close() assert snp_i == len(raw_snps), 'Parsing SNPs from plink file failed.' num_indivs = len(raw_snps[0]) freqs = sp.sum(raw_snps, 1, dtype='float32') / (2 * float(num_indivs)) return raw_snps, freqs
Example #2
Source File: MultipleComparisonCorrection.py From ClearMap with GNU General Public License v3.0 | 4 votes |
def correctPValues(pvalues, method = 'BH'): """Corrects p-values for multiple testing using various methods Arguments: pvalues (array): list of p values to be corrected method (Optional[str]): method to use: BH = FDR = Benjamini-Hochberg, B = FWER = Bonferoni References: - `Benjamini Hochberg, 1995 <http://www.jstor.org/stable/2346101?seq=1#page_scan_tab_contents>`_ - `Bonferoni correction <http://www.tandfonline.com/doi/abs/10.1080/01621459.1961.10482090#.VmHWUHbH6KE>`_ - `R statistics package <https://www.r-project.org/>`_ Notes: - modified from http://statsmodels.sourceforge.net/ipdirective/generated/scikits.statsmodels.sandbox.stats.multicomp.multipletests.html """ pvals = numpy.asarray(pvalues); if method.lower() in ['bh', 'fdr']: pvals_sorted_ids = numpy.argsort(pvals); pvals_sorted = pvals[pvals_sorted_ids] sorted_ids_inv = pvals_sorted_ids.argsort() n = len(pvals); bhfactor = numpy.arange(1,n+1)/float(n); pvals_corrected_raw = pvals_sorted / bhfactor; pvals_corrected = numpy.minimum.accumulate(pvals_corrected_raw[::-1])[::-1] pvals_corrected[pvals_corrected>1] = 1; return pvals_corrected[sorted_ids_inv]; elif method.lower() in ['b', 'fwer']: n = len(pvals); pvals_corrected = n * pvals; pvals_corrected[pvals_corrected>1] = 1;\ return pvals_corrected; #return reject[pvals_sortind.argsort()]
Example #3
Source File: pcabinaryprojections.py From aurum-datadiscovery with MIT License | 4 votes |
def __init__(self, hash_name, projection_count, training_set): """ Computes principal components for training vector set. Uses first projection_count principal components for projections. Training set must be either a numpy matrix or a list of numpy vectors. """ super(PCABinaryProjections, self).__init__(hash_name) self.projection_count = projection_count # Only do training if training set was specified if not training_set is None: # Get numpy array representation of input training_set = numpy_array_from_list_or_numpy_array(training_set) # Get subspace size from training matrix self.dim = training_set.shape[0] # Get transposed training set matrix for PCA training_set_t = numpy.transpose(training_set) # Compute principal components (eigenvalues, eigenvectors) = perform_pca(training_set_t) # Get largest N eigenvalue/eigenvector indices largest_eigenvalue_indices = numpy.flipud( scipy.argsort(eigenvalues))[:projection_count] # Create matrix for first N principal components self.components = numpy.zeros((self.dim, len(largest_eigenvalue_indices))) # Put first N principal components into matrix for index in range(len(largest_eigenvalue_indices)): self.components[:, index] = \ eigenvectors[:, largest_eigenvalue_indices[index]] # We need the component vectors to be in the rows self.components = numpy.transpose(self.components) else: self.dim = None self.components = None # This is only used in case we need to process sparse vectors self.components_csr = None
Example #4
Source File: pcadiscretizedprojections.py From aurum-datadiscovery with MIT License | 4 votes |
def __init__(self, hash_name, projection_count, training_set, bin_width): """ Computes principal components for training vector set. Uses first projection_count principal components for projections. Training set must be either a numpy matrix or a list of numpy vectors. """ super(PCADiscretizedProjections, self).__init__(hash_name) self.projection_count = projection_count self.bin_width = bin_width # Only do training if training set was specified if not training_set is None: # Get numpy array representation of input training_set = numpy_array_from_list_or_numpy_array(training_set) # Get subspace size from training matrix self.dim = training_set.shape[0] # Get transposed training set matrix for PCA training_set_t = numpy.transpose(training_set) # Compute principal components (eigenvalues, eigenvectors) = perform_pca(training_set_t) # Get largest N eigenvalue/eigenvector indices largest_eigenvalue_indices = numpy.flipud( scipy.argsort(eigenvalues))[:projection_count] # Create matrix for first N principal components self.components = numpy.zeros((self.dim, len(largest_eigenvalue_indices))) # Put first N principal components into matrix for index in range(len(largest_eigenvalue_indices)): self.components[:, index] = \ eigenvectors[:, largest_eigenvalue_indices[index]] # We need the component vectors to be in the rows self.components = numpy.transpose(self.components) # This is only used in case we need to process sparse vectors self.components_csr = None
Example #5
Source File: recallprecisionexperiment.py From aurum-datadiscovery with MIT License | 4 votes |
def __init__(self, N, vectors, coverage_ratio=0.2): """ Performs exact nearest neighbour search on the data set. vectors can either be a numpy matrix with all the vectors as columns OR a python array containing the individual numpy vectors. """ # We need a dict from vector string representation to index self.vector_dict = {} self.N = N self.coverage_ratio = coverage_ratio numpy_vectors = numpy_array_from_list_or_numpy_array(vectors) # Get numpy array representation of input self.vectors = numpy.vstack([unitvec(v) for v in numpy_vectors.T]) # Build map from vector string representation to vector for index, v in enumerate(self.vectors): self.vector_dict[self.__vector_to_string(v)] = index # Determine the indices of query vectors used for comparance # with approximated search. query_count = numpy.floor(self.coverage_ratio * len(self.vectors)) self.query_indices = [] for k in range(int(query_count)): index = numpy.floor(k * (float(len(self.vectors)) / query_count)) index = min(index, len(self.vectors) - 1) self.query_indices.append(int(index)) print('\nStarting exact search (query set size=%d)...\n' % query_count) # For each query vector get the closest N neighbours self.closest = {} self.exact_search_time_per_vector = 0.0 for index in self.query_indices: v = self.vectors[index, numpy.newaxis] exact_search_start_time = time.time() D = cdist(v, self.vectors, 'euclidean') self.closest[index] = scipy.argsort(D)[0, 1:N + 1] # Save time needed for exact search exact_search_time = time.time() - exact_search_start_time self.exact_search_time_per_vector += exact_search_time print('Done with exact search...\n') # Normalize search time self.exact_search_time_per_vector /= float(len(self.query_indices))