Python sklearn.datasets.load_breast_cancer() Examples
The following are 30
code examples of sklearn.datasets.load_breast_cancer().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
sklearn.datasets
, or try the search function
.
Example #1
Source File: test_classifier_comb.py From combo with BSD 2-Clause "Simplified" License | 7 votes |
def setUp(self): self.roc_floor = 0.9 self.accuracy_floor = 0.9 random_state = 42 X, y = load_breast_cancer(return_X_y=True) self.X_train, self.X_test, self.y_train, self.y_test = \ train_test_split(X, y, test_size=0.4, random_state=random_state) classifiers = [DecisionTreeClassifier(random_state=random_state), LogisticRegression(random_state=random_state), KNeighborsClassifier(), RandomForestClassifier(random_state=random_state), GradientBoostingClassifier(random_state=random_state)] self.clf = SimpleClassifierAggregator(classifiers, method='average')
Example #2
Source File: test_Feature_Binarizer_From_Trees.py From AIX360 with Apache License 2.0 | 7 votes |
def setUp(self) -> None: self.random_state = 0 d: dict = load_breast_cancer() X: DataFrame = DataFrame(d['data'], columns=d['feature_names']) self.col_ordinal = X.columns.to_list() np.random.seed(self.random_state) s = np.array(['a', 'b', 'c']) X['cat alpha'] = s[np.random.randint(0, 3, len(X))] X['cat num'] = np.random.randint(0, 3, len(X)) self.col_categorical = ['cat alpha', 'cat num'] s = np.array(['a', 'b']) X['bin alpha'] = s[np.random.randint(0, 2, len(X))] X['bin num'] = np.random.randint(0, 2, len(X)) self.col_binary = ['bin alpha', 'bin num'] self.X = X self.y: ndarray = d['target'] self.X_train, self.X_test, self.y_train, self.y_test = \ train_test_split(self.X, self.y, test_size=0.4, random_state=self.random_state)
Example #3
Source File: test_classifier_stacking.py From combo with BSD 2-Clause "Simplified" License | 7 votes |
def setUp(self): self.roc_floor = 0.9 self.accuracy_floor = 0.9 random_state = 42 X, y = load_breast_cancer(return_X_y=True) self.X_train, self.X_test, self.y_train, self.y_test = \ train_test_split(X, y, test_size=0.4, random_state=random_state) classifiers = [DecisionTreeClassifier(random_state=random_state), LogisticRegression(random_state=random_state), KNeighborsClassifier(), RandomForestClassifier(random_state=random_state), GradientBoostingClassifier(random_state=random_state)] self.clf = Stacking(classifiers, n_folds=4) self.clf.fit(self.X_train, self.y_train)
Example #4
Source File: test_sklearn_feature_selection_converters.py From sklearn-onnx with MIT License | 6 votes |
def test_select_fwe_int(self): model = SelectFwe() X, y = load_breast_cancer(return_X_y=True) model.fit(X, y) model_onnx = convert_sklearn( model, "select fwe", [("input", Int64TensorType([None, X.shape[1]]))]) self.assertTrue(model_onnx is not None) dump_data_and_model( X.astype(np.int64), model, model_onnx, basename="SklearnSelectFwe", allow_failure="StrictVersion(onnx.__version__)" " < StrictVersion('1.2') or " "StrictVersion(onnxruntime.__version__)" " <= StrictVersion('0.2.1')", )
Example #5
Source File: main_nearest_neighbor.py From wisconsin-breast-cancer with Apache License 2.0 | 6 votes |
def main(): dataset = datasets.load_breast_cancer() features = dataset.data labels = dataset.target num_features = features.shape[1] features = StandardScaler().fit_transform(features) train_features, test_features, train_labels, test_labels = train_test_split( features, labels, test_size=0.3, stratify=labels ) model = NearestNeighbor(train_features, train_labels, num_features) model.predict(test_features, test_labels, result_path="./results/nearest_neighbor/")
Example #6
Source File: test_classifier_des.py From combo with BSD 2-Clause "Simplified" License | 6 votes |
def setUp(self): self.roc_floor = 0.9 self.accuracy_floor = 0.9 random_state = 42 X, y = load_breast_cancer(return_X_y=True) self.X_train, self.X_test, self.y_train, self.y_test = \ train_test_split(X, y, test_size=0.4, random_state=random_state) classifiers = [DecisionTreeClassifier(random_state=random_state), LogisticRegression(random_state=random_state), KNeighborsClassifier(), RandomForestClassifier(random_state=random_state), GradientBoostingClassifier(random_state=random_state)] self.clf = DES_LA(classifiers, local_region_size=30) self.clf.fit(self.X_train, self.y_train)
Example #7
Source File: test_gridsearch.py From dislib with Apache License 2.0 | 6 votes |
def test_fit_2(self): """Tests GridSearchCV fit() with different data.""" x_np, y_np = datasets.load_breast_cancer(return_X_y=True) x = ds.array(x_np, block_size=(100, 10)) x = StandardScaler().fit_transform(x) y = ds.array(y_np.reshape(-1, 1), block_size=(100, 1)) parameters = {'c': [0.1], 'gamma': [0.1]} csvm = CascadeSVM() searcher = GridSearchCV(csvm, parameters, cv=5) searcher.fit(x, y) self.assertTrue(hasattr(searcher, 'best_estimator_')) self.assertTrue(hasattr(searcher, 'best_score_')) self.assertTrue(hasattr(searcher, 'best_params_')) self.assertTrue(hasattr(searcher, 'best_index_')) self.assertTrue(hasattr(searcher, 'scorer_')) self.assertEqual(searcher.n_splits_, 5)
Example #8
Source File: test_classifier_comb.py From combo with BSD 2-Clause "Simplified" License | 6 votes |
def setUp(self): self.roc_floor = 0.9 self.accuracy_floor = 0.9 random_state = 42 X, y = load_breast_cancer(return_X_y=True) self.X_train, self.X_test, self.y_train, self.y_test = \ train_test_split(X, y, test_size=0.4, random_state=random_state) classifiers = [DecisionTreeClassifier(random_state=random_state), LogisticRegression(random_state=random_state), KNeighborsClassifier(), RandomForestClassifier(random_state=random_state), GradientBoostingClassifier(random_state=random_state)] self.clf = SimpleClassifierAggregator(classifiers, method='average') self.clf.fit(self.X_train, self.y_train)
Example #9
Source File: test_classifier_comb.py From combo with BSD 2-Clause "Simplified" License | 6 votes |
def setUp(self): self.roc_floor = 0.9 self.accuracy_floor = 0.9 random_state = 42 X, y = load_breast_cancer(return_X_y=True) self.X_train, self.X_test, self.y_train, self.y_test = \ train_test_split(X, y, test_size=0.4, random_state=random_state) clf_weights = np.array([0.1, 0.4, 0.1, 0.2, 0.2]) classifiers = [DecisionTreeClassifier(random_state=random_state), LogisticRegression(random_state=random_state), KNeighborsClassifier(), RandomForestClassifier(random_state=random_state), GradientBoostingClassifier(random_state=random_state)] self.clf = SimpleClassifierAggregator(classifiers, method='average', weights=clf_weights) self.clf.fit(self.X_train, self.y_train)
Example #10
Source File: test_classifier_comb.py From combo with BSD 2-Clause "Simplified" License | 6 votes |
def setUp(self): self.roc_floor = 0.9 self.accuracy_floor = 0.9 random_state = 42 X, y = load_breast_cancer(return_X_y=True) self.X_train, self.X_test, self.y_train, self.y_test = \ train_test_split(X, y, test_size=0.4, random_state=random_state) classifiers = [DecisionTreeClassifier(random_state=random_state), LogisticRegression(random_state=random_state), KNeighborsClassifier(), RandomForestClassifier(random_state=random_state), GradientBoostingClassifier(random_state=random_state)] self.clf = SimpleClassifierAggregator(classifiers, method='maximization') self.clf.fit(self.X_train, self.y_train)
Example #11
Source File: test_pyfms.py From pyfms with MIT License | 6 votes |
def test_save_load_classifier(self): X, y = datasets.load_breast_cancer(return_X_y=True) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2) k = 4 classifier_before = pyfms.Classifier(X.shape[1], k=k) classifier_before.fit(X_train, y_train, nb_epoch=1000) weights_before = classifier_before.get_weights() accuracy_before = accuracy_score(y_test, classifier_before.predict(X_test)) classifier_file = os.path.join(self.workspace, 'classifier.fm') classifier_before.save_weights(classifier_file) classifier_after = pyfms.Classifier(X.shape[1]) classifier_after.load_weights(classifier_file) weights_after = classifier_after.get_weights() accuracy_after = accuracy_score(y_test, classifier_after.predict(X_test)) for wb, wa in zip(weights_before, weights_after): np.testing.assert_array_equal(wb, wa) self.assertEqual(accuracy_before, accuracy_after)
Example #12
Source File: test_classifier_comb.py From combo with BSD 2-Clause "Simplified" License | 6 votes |
def setUp(self): self.roc_floor = 0.9 self.accuracy_floor = 0.9 random_state = 42 X, y = load_breast_cancer(return_X_y=True) self.X_train, self.X_test, self.y_train, self.y_test = \ train_test_split(X, y, test_size=0.4, random_state=random_state) classifiers = [DecisionTreeClassifier(random_state=random_state), LogisticRegression(random_state=random_state), KNeighborsClassifier(), RandomForestClassifier(random_state=random_state), GradientBoostingClassifier(random_state=random_state)] self.clf = SimpleClassifierAggregator(classifiers, method='median') self.clf.fit(self.X_train, self.y_train)
Example #13
Source File: test_cluster_comb.py From combo with BSD 2-Clause "Simplified" License | 6 votes |
def setUp(self): self.X, self.y = load_breast_cancer(return_X_y=True) self.n_clusters = 5 self.n_estimators = 3 # Initialize a set of estimators estimators = [KMeans(n_clusters=self.n_clusters), MiniBatchKMeans(n_clusters=self.n_clusters), AgglomerativeClustering(n_clusters=self.n_clusters)] # Clusterer Ensemble without initializing a new Class self.original_labels = np.zeros([self.X.shape[0], self.n_estimators]) for i, estimator in enumerate(estimators): estimator.fit(self.X) self.original_labels[:, i] = estimator.labels_
Example #14
Source File: test_sklearn_feature_selection_converters.py From sklearn-onnx with MIT License | 6 votes |
def test_select_fdr_int(self): model = SelectFdr() X, y = load_breast_cancer(return_X_y=True) model.fit(X, y) model_onnx = convert_sklearn( model, "select fdr", [("input", Int64TensorType([None, X.shape[1]]))]) self.assertTrue(model_onnx is not None) dump_data_and_model( X.astype(np.int64), model, model_onnx, basename="SklearnSelectFdr", allow_failure="StrictVersion(onnx.__version__)" " < StrictVersion('1.2') or " "StrictVersion(onnxruntime.__version__)" " <= StrictVersion('0.2.1')", )
Example #15
Source File: test_sklearn_feature_selection_converters.py From sklearn-onnx with MIT License | 6 votes |
def test_select_fdr_float(self): model = SelectFdr() X, y = load_breast_cancer(return_X_y=True) model.fit(X, y) model_onnx = convert_sklearn( model, "select fdr", [("input", FloatTensorType([None, X.shape[1]]))]) self.assertTrue(model_onnx is not None) dump_data_and_model( X.astype(np.float32), model, model_onnx, basename="SklearnSelectFdr", allow_failure="StrictVersion(onnx.__version__)" " < StrictVersion('1.2') or " "StrictVersion(onnxruntime.__version__)" " <= StrictVersion('0.2.1')", )
Example #16
Source File: test_sklearn_feature_selection_converters.py From sklearn-onnx with MIT License | 6 votes |
def test_select_fwe_float(self): model = SelectFwe() X, y = load_breast_cancer(return_X_y=True) model.fit(X, y) model_onnx = convert_sklearn( model, "select fwe", [("input", FloatTensorType([None, X.shape[1]]))]) self.assertTrue(model_onnx is not None) dump_data_and_model( X.astype(np.float32), model, model_onnx, basename="SklearnSelectFwe", allow_failure="StrictVersion(onnx.__version__)" " < StrictVersion('1.2') or " "StrictVersion(onnxruntime.__version__)" " <= StrictVersion('0.2.1')", )
Example #17
Source File: test_logistic.py From h2o4gpu with Apache License 2.0 | 6 votes |
def test_not_labels(): data = load_breast_cancer() X = data.data y = data.target # convert class values to [0,2] # y = y * 2 # Splitting data into train and test X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.25, random_state=42) # sklearn clf_sklearn = linear_model.LogisticRegression() clf_sklearn.fit(X_train, y_train) y_pred_sklearn = clf_sklearn.predict(X_test) # h2o clf_h2o = h2o4gpu.LogisticRegression() clf_h2o.fit(X_train, y_train) y_pred_h2o = clf_h2o.predict(X_test) assert np.allclose(accuracy_score(y_test, y_pred_sklearn), accuracy_score(y_test, y_pred_h2o.squeeze()))
Example #18
Source File: test_des_integration.py From DESlib with BSD 3-Clause "New" or "Revised" License | 6 votes |
def load_dataset(encode_labels, rng): # Generate a classification dataset data = load_breast_cancer() X = data.data y = data.target if encode_labels is not None: y = np.take(encode_labels, y) # split the data into training and test data X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=rng) # Scale the variables to have 0 mean and unit variance scalar = StandardScaler() X_train = scalar.fit_transform(X_train) X_test = scalar.transform(X_test) # Split the data into training and DSEL for DS techniques X_train, X_dsel, y_train, y_dsel = train_test_split(X_train, y_train, test_size=0.5, random_state=rng) # Considering a pool composed of 10 base classifiers # Calibrating Perceptrons to estimate probabilities return X_dsel, X_test, X_train, y_dsel, y_test, y_train
Example #19
Source File: test_des_integration.py From DESlib with BSD 3-Clause "New" or "Revised" License | 6 votes |
def test_meta_no_pool_of_classifiers(knn_methods): rng = np.random.RandomState(123456) data = load_breast_cancer() X = data.data y = data.target # split the data into training and test data X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=rng) # Scale the variables to have 0 mean and unit variance scalar = StandardScaler() X_train = scalar.fit_transform(X_train) X_test = scalar.transform(X_test) meta_des = METADES(knn_classifier=knn_methods, random_state=rng, DSEL_perc=0.5) meta_des.fit(X_train, y_train) assert np.isclose(meta_des.score(X_test, y_test), 0.9095744680851063)
Example #20
Source File: test_utils.py From pyDML with GNU General Public License v3.0 | 5 votes |
def breast_cancer(): return Xy_dataset(load_breast_cancer)
Example #21
Source File: custom_objective.py From autogbt-alt with MIT License | 5 votes |
def main(): X, y = load_breast_cancer(return_X_y=True) train_X, valid_X, train_y, valid_y = train_test_split(X, y, test_size=0.1) model = AutoGBTClassifier(n_trials=5, objective=CustomObjective()) model.fit(train_X, train_y) print('valid AUC: %.3f' % (roc_auc_score(valid_y, model.predict(valid_X)))) print('CV AUC: %.3f' % (model.best_score))
Example #22
Source File: sofm_heatmap_visualization.py From neupy with MIT License | 5 votes |
def load_data(): data, target = datasets.load_breast_cancer(return_X_y=True) scaler = preprocessing.MinMaxScaler() data = scaler.fit_transform(data) return data, target
Example #23
Source File: common_utils.py From interpret-community with MIT License | 5 votes |
def create_scikit_cancer_data(): breast_cancer_data = load_breast_cancer() classes = breast_cancer_data.target_names.tolist() # Split data into train and test x_train, x_test, y_train, y_test = train_test_split(breast_cancer_data.data, breast_cancer_data.target, test_size=0.2, random_state=0) feature_names = breast_cancer_data.feature_names classes = breast_cancer_data.target_names.tolist() return x_train, x_test, y_train, y_test, feature_names, classes
Example #24
Source File: test_distns.py From ngboost with Apache License 2.0 | 5 votes |
def cls_data(self): X, Y = load_breast_cancer(True) return train_test_split(X, Y, test_size=0.2)
Example #25
Source File: __init__.py From skoot with MIT License | 5 votes |
def load_breast_cancer_df(include_tgt=True, tgt_name="target", names=None): """Get the breast cancer dataset. Loads the breast cancer dataset into a dataframe with the target set as the "target" feature or whatever name is specified in ``tgt_name``. Parameters ---------- include_tgt : bool, optional (default=True) Whether to include the target tgt_name : str, optional (default="target") The name of the target feature names : iterable or None The column names for the dataframe. If not defined, will default to the ``feature_names`` attribute in the sklearn bunch instance. Returns ------- X : pd.DataFrame, shape=(n_samples, n_features) The loaded breast cancer dataset """ from sklearn.datasets import load_breast_cancer return _load_from_bunch(load_breast_cancer(), include_tgt, tgt_name, names)
Example #26
Source File: dominance.py From dominance-analysis with MIT License | 5 votes |
def get_breast_cancer(cls): print("""The copy of UCI ML Breast Cancer Wisconsin (Diagnostic) dataset is downloaded from: https://goo.gl/U2Uwz2""") print("""Internally using load_breast_cancer function from sklearn.datasets """) breast_cancer_data=pd.DataFrame(data=load_breast_cancer()['data'],columns=load_breast_cancer()['feature_names']) breast_cancer_data['target']=load_breast_cancer()['target'] target_dict=dict({j for i,j in zip(load_breast_cancer()['target_names'],enumerate(load_breast_cancer()['target_names']))}) breast_cancer_data['target_names']=breast_cancer_data['target'].map(target_dict) return breast_cancer_data.iloc[:,:-1]
Example #27
Source File: conftest.py From python-sasctl with Apache License 2.0 | 5 votes |
def cancer_dataset(): """Binary classification dataset.""" pytest.importorskip('sklearn') pd = pytest.importorskip('pandas') from sklearn import datasets raw = datasets.load_breast_cancer() df = pd.DataFrame(raw.data, columns=raw.feature_names) df['Type'] = raw.target df.Type = df.Type.astype('category') df.Type.cat.categories = raw.target_names return df
Example #28
Source File: test_environment.py From hyperparameter_hunter with MIT License | 5 votes |
def get_breast_cancer_data(): data = load_breast_cancer() df = pd.DataFrame(data=data.data, columns=data.feature_names) df["diagnosis"] = data.target return df
Example #29
Source File: test_cc.py From pycobra with MIT License | 5 votes |
def setUp(self): # setting up our random data-set rng = np.random.RandomState(42) bc = datasets.load_breast_cancer() self.X = bc.data[:-20] self.y = bc.target[:-20] self.test_data = bc.data[-20:] self.cc = ClassifierCobra(random_state=0).fit(self.X, self.y)
Example #30
Source File: test_plots.py From scikit-optimize with BSD 3-Clause "New" or "Revised" License | 5 votes |
def test_plots_work_without_cat(): """Basic smoke tests to make sure plotting doesn't crash.""" SPACE = [ Integer(1, 20, name='max_depth'), Integer(2, 100, name='min_samples_split'), Integer(5, 30, name='min_samples_leaf'), Integer(1, 30, name='max_features'), ] def objective(params): clf = DecisionTreeClassifier(random_state=3, **{dim.name: val for dim, val in zip(SPACE, params) if dim.name != 'dummy'}) return -np.mean(cross_val_score(clf, *load_breast_cancer(True))) res = gp_minimize(objective, SPACE, n_calls=10, random_state=3) plots.plot_convergence(res) plots.plot_evaluations(res) plots.plot_objective(res) plots.plot_objective(res, minimum='expected_minimum') plots.plot_objective(res, sample_source='expected_minimum', n_minimum_search=10) plots.plot_objective(res, sample_source='result') plots.plot_regret(res) # TODO: Compare plots to known good results? # Look into how matplotlib does this.