Python sklearn.feature_selection.mutual_info_classif() Examples
The following are 5
code examples of sklearn.feature_selection.mutual_info_classif().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
sklearn.feature_selection
, or try the search function
.
Example #1
Source File: preprocessing.py From Emotion-Recognition-from-Speech with Apache License 2.0 | 7 votes |
def mutual_info_select(self,F,y,threshold): mi = list(enumerate(mutual_info_classif(F,y))) f_best = [] for (ind,rank) in mi: if rank > threshold: f_best.append(ind) return f_best
Example #2
Source File: scores.py From SecuML with GNU General Public License v2.0 | 5 votes |
def _set_scoring_func(self): self.scoring_func = [('variance', False)] if self.annotated_instances.num_instances() > 0: self.scoring_func.append(('f_classif', True)) self.scoring_func.append(('mutual_info_classif', False)) if self.instances.features.all_positives(): self.scoring_func.append(('chi2', True))
Example #3
Source File: scores.py From SecuML with GNU General Public License v2.0 | 5 votes |
def compute_scoring_func(self, func): if func == 'variance': features = self.instances.features.get_values() annotations = self.instances.annotations.get_labels() if isinstance(features, spmatrix): variance = mean_variance_axis(features, axis=0)[1] else: variance = features.var(axis=0) return variance, None features = self.annotated_instances.features.get_values() annotations = self.annotated_instances.annotations.get_supervision( self.multiclass) if func == 'f_classif': return f_classif(features, annotations) elif func == 'mutual_info_classif': if isinstance(features, spmatrix): discrete_indexes = True else: features_types = self.instances.features.info.types discrete_indexes = [i for i, t in enumerate(features_types) if t == FeatureType.binary] if not discrete_indexes: discrete_indexes = False return (mutual_info_classif(features, annotations, discrete_features=discrete_indexes), None) elif func == 'chi2': return chi2(features, annotations) else: assert(False)
Example #4
Source File: eda.py From xam with MIT License | 4 votes |
def feature_importance_classification(features, target, n_neighbors=3, random_state=None): cont = features.select_dtypes(include=[np.floating]) disc = features.select_dtypes(include=[np.integer, np.bool]) cont_imp = pd.DataFrame(index=cont.columns) disc_imp = pd.DataFrame(index=disc.columns) # Continuous features if cont_imp.index.size > 0: # F-test f_test = feature_selection.f_classif(cont, target) cont_imp['f_statistic'] = f_test[0] cont_imp['f_p_value'] = f_test[1] # Mutual information mut_inf = feature_selection.mutual_info_classif(cont, target, discrete_features=False, n_neighbors=n_neighbors, random_state=random_state) cont_imp['mutual_information'] = mut_inf # Discrete features if disc_imp.index.size > 0: # Chi²-test chi2_tests = defaultdict(dict) for feature in disc.columns: cont = pd.crosstab(disc[feature], target) statistic, p_value, _, _ = stats.chi2_contingency(cont) chi2_tests[feature]['chi2_statistic'] = statistic chi2_tests[feature]['chi2_p_value'] = p_value chi2_tests_df = pd.DataFrame.from_dict(chi2_tests, orient='index') disc_imp['chi2_statistic'] = chi2_tests_df['chi2_statistic'] disc_imp['chi2_p_value'] = chi2_tests_df['chi2_p_value'] # Cramér's V (corrected) disc_imp['cramers_v'] = [ cramers_v_corrected_stat(pd.crosstab(feature, target).values) for _, feature in disc.iteritems() ] # Mutual information mut_inf = feature_selection.mutual_info_classif(disc, target, discrete_features=True, n_neighbors=n_neighbors, random_state=random_state) disc_imp['mutual_information'] = mut_inf return cont_imp, disc_imp
Example #5
Source File: shapelet_transform.py From pyts with BSD 3-Clause "New" or "Revised" License | 4 votes |
def _fit_one_time_series( self, x, X, y, n_timestamps, n_shapelets, window_sizes, window_steps, remove_similar, i, rng ): """Fit one time series.""" # Extract all shapelets shapelets, lengths, start_idx, end_idx = _extract_all_shapelets( x, window_sizes, window_steps, n_timestamps) # Derive distances between shapelets and time series X_dist = _derive_all_distances( X, window_sizes, shapelets, lengths, fit=True) if self.criterion == 'mutual_info': scores = mutual_info_classif(X_dist, y, discrete_features=False, random_state=rng) else: scores, _ = f_classif(X_dist, y) # Flatten the list of 2D arrays into an array of 1D arrays shapelets = [list(shapelet) for shapelet in shapelets] shapelets = np.asarray(list(chain.from_iterable(shapelets))) # Concatenate the list/tuple of 1D arrays into one 1D array start_idx = np.concatenate(start_idx) end_idx = np.concatenate(end_idx) # Remove similar shapelets if remove_similar: idx = _remove_similar_shapelets(scores.copy(), start_idx, end_idx) scores = scores[idx] shapelets = shapelets[idx] start_idx = start_idx[idx] end_idx = end_idx[idx] X_dist = X_dist[:, idx] # Keep at most 'n_shapelets' if scores.size > n_shapelets - 1: idx = np.argpartition( scores, scores.size - n_shapelets)[-n_shapelets:] scores = scores[idx] shapelets = shapelets[idx] start_idx = start_idx[idx] end_idx = end_idx[idx] X_dist = X_dist[:, idx] time_series_idx = np.full(scores.size, i) return X_dist, scores, shapelets, start_idx, end_idx, time_series_idx