Python sklearn.linear_model.LogisticRegression() Examples
The following are 30
code examples of sklearn.linear_model.LogisticRegression().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
sklearn.linear_model
, or try the search function
.
Example #1
Source File: create_ngrams.py From rasa_lookup_demo with Apache License 2.0 | 8 votes |
def run_logreg(X_train, y_train, selection_threshold=0.2): print("\nrunning logistic regression...") print("using a selection threshold of {}".format(selection_threshold)) pipe = Pipeline( [ ( "feature_selection", RandomizedLogisticRegression(selection_threshold=selection_threshold), ), ("classification", LogisticRegression()), ] ) pipe.fit(X_train, y_train) print("training accuracy : {}".format(pipe.score(X_train, y_train))) print("testing accuracy : {}".format(pipe.score(X_test, y_test))) return pipe
Example #2
Source File: classifier.py From Video-Highlight-Detection with MIT License | 7 votes |
def _build_model(self,model_name,params=None): if params==None: if model_name=='xgb': self.model=XGBClassifier(n_estimators=100,learning_rate=0.02) elif model_name=='svm': kernel_function=chi2_kernel if not (self.model_kernel=='linear' or self.model_kernel=='rbf') else self.model_kernel self.model=SVC(C=1,kernel=kernel_function,gamma=1,probability=True) elif model_name=='lr': self.model=LR(C=1,penalty='l1',tol=1e-6) else: if model_name=='xgb': self.model=XGBClassifier(n_estimators=1000,learning_rate=0.02,**params) elif model_name=='svm': self.model=SVC(C=1,kernel=kernel_function,gamma=1,probability=True) elif model_name=='lr': self.model=LR(C=1,penalty='l1',tol=1e-6) log.l.info('=======> built the model {} done'.format(self.model_name))
Example #3
Source File: test_run.py From nyaggle with MIT License | 6 votes |
def test_experiment_sklearn_classifier(tmpdir_name): X, y = make_classification_df(n_samples=1024, n_num_features=10, n_cat_features=0, class_sep=0.98, random_state=0, id_column='user_id') X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=0) params = { 'C': 0.1 } result = run_experiment(params, X_train, y_train, X_test, tmpdir_name, eval_func=roc_auc_score, algorithm_type=LogisticRegression, with_auto_prep=False) assert len(np.unique(result.oof_prediction)) > 5 # making sure prediction is not binarized assert len(np.unique(result.test_prediction)) > 5 assert roc_auc_score(y_train, result.oof_prediction) >= 0.8 assert roc_auc_score(y_test, result.test_prediction) >= 0.8 _check_file_exists(tmpdir_name)
Example #4
Source File: test_LogisticRegression.py From differential-privacy-library with MIT License | 6 votes |
def test_bad_params(self): X = [[1]] y = [0] with self.assertRaises(ValueError): LogisticRegression(data_norm=1, C=-1).fit(X, y) with self.assertRaises(ValueError): LogisticRegression(data_norm=1, C=1.2).fit(X, y) with self.assertRaises(ValueError): LogisticRegression(data_norm=1, max_iter=-1).fit(X, y) with self.assertRaises(ValueError): LogisticRegression(data_norm=1, max_iter="100").fit(X, y) with self.assertRaises(ValueError): LogisticRegression(data_norm=1, tol=-1).fit(X, y) with self.assertRaises(ValueError): LogisticRegression(data_norm=1, tol="1").fit(X, y)
Example #5
Source File: utils.py From contextualbandits with BSD 2-Clause "Simplified" License | 6 votes |
def __init__(self, lambda_=1., fit_intercept=True, alpha=0.95, m=1.0, ts=False, ts_from_ci=True, sample_unique=False, random_state=1): self.conf_coef = alpha self.m = m self.fit_intercept = fit_intercept self.lambda_ = lambda_ self.ts = ts self.ts_from_ci = ts_from_ci self.warm_start = True self.sample_unique = bool(sample_unique) self.random_state = _check_random_state(random_state) self.is_fitted = False self.model = LogisticRegression(C=1./lambda_, penalty="l2", fit_intercept=fit_intercept, solver='lbfgs', max_iter=15000, warm_start=True) self.Sigma = np.empty((0,0), dtype=np.float64)
Example #6
Source File: utils.py From contextualbandits with BSD 2-Clause "Simplified" License | 6 votes |
def _check_autograd_supported(base_algorithm): supported = ['LogisticRegression', 'SGDClassifier', 'RidgeClassifier', 'StochasticLogisticRegression', 'LinearRegression'] if not base_algorithm.__class__.__name__ in supported: raise ValueError("Automatic gradients only implemented for the following classes: " + ", ".join(supported)) if base_algorithm.__class__.__name__ == 'LogisticRegression': if base_algorithm.penalty != 'l2': raise ValueError("Automatic gradients only defined for LogisticRegression with l2 regularization.") if base_algorithm.intercept_scaling != 1: raise ValueError("Automatic gradients for LogisticRegression not implemented with 'intercept_scaling'.") if base_algorithm.__class__.__name__ == 'RidgeClassifier': if base_algorithm.normalize: raise ValueError("Automatic gradients for LogisticRegression only implemented without 'normalize'.") if base_algorithm.__class__.__name__ == 'SGDClassifier': if base_algorithm.loss != 'log': raise ValueError("Automatic gradients for LogisticRegression only implemented with logistic loss.") if base_algorithm.penalty != 'l2': raise ValueError("Automatic gradients only defined for LogisticRegression with l2 regularization.") try: if base_algorithm.class_weight is not None: raise ValueError("Automatic gradients for LogisticRegression not supported with 'class_weight'.") except: pass
Example #7
Source File: test_LogisticRegression.py From differential-privacy-library with MIT License | 6 votes |
def test_same_results(self): from sklearn import datasets from sklearn.model_selection import train_test_split from sklearn import linear_model dataset = datasets.load_iris() X_train, X_test, y_train, y_test = train_test_split(dataset.data, dataset.target, test_size=0.2) clf = LogisticRegression(data_norm=12, epsilon=float("inf")) clf.fit(X_train, y_train) predict1 = clf.predict(X_test) clf = linear_model.LogisticRegression(solver="lbfgs", multi_class="ovr") clf.fit(X_train, y_train) predict2 = clf.predict(X_test) self.assertTrue(np.all(predict1 == predict2))
Example #8
Source File: test_LogisticRegression.py From differential-privacy-library with MIT License | 6 votes |
def test_accountant(self): from diffprivlib.accountant import BudgetAccountant acc = BudgetAccountant() X = np.array( [0.50, 0.75, 1.00, 1.25, 1.50, 1.75, 1.75, 2.00, 2.25, 2.50, 2.75, 3.00, 3.25, 3.50, 4.00, 4.25, 4.50, 4.75, 5.00, 5.50]) y = np.array([0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1]) X = X[:, np.newaxis] X -= 3.0 X /= 2.5 clf = LogisticRegression(epsilon=2, data_norm=1.0, accountant=acc) clf.fit(X, y) self.assertEqual((2, 0), acc.total()) with BudgetAccountant(3, 0) as acc2: clf = LogisticRegression(epsilon=2, data_norm=1.0) clf.fit(X, y) self.assertEqual((2, 0), acc2.total()) with self.assertRaises(BudgetError): clf.fit(X, y)
Example #9
Source File: common_utils.py From interpret-text with MIT License | 6 votes |
def create_logistic_vectorizer(): vectorizer = CountVectorizer(lowercase=False, min_df=0.0, binary=True) lr = LogisticRegression(random_state=777) return Pipeline([("vectorizer", vectorizer), ("lr", lr)])
Example #10
Source File: stability_selection.py From stability-selection with BSD 3-Clause "New" or "Revised" License | 6 votes |
def __init__(self, base_estimator=LogisticRegression(penalty='l1'), lambda_name='C', lambda_grid=np.logspace(-5, -2, 25), n_bootstrap_iterations=100, sample_fraction=0.5, threshold=0.6, bootstrap_func=bootstrap_without_replacement, bootstrap_threshold=None, verbose=0, n_jobs=1, pre_dispatch='2*n_jobs', random_state=None): self.base_estimator = base_estimator self.lambda_name = lambda_name self.lambda_grid = lambda_grid self.n_bootstrap_iterations = n_bootstrap_iterations self.sample_fraction = sample_fraction self.threshold = threshold self.bootstrap_func = bootstrap_func self.bootstrap_threshold = bootstrap_threshold self.verbose = verbose self.n_jobs = n_jobs self.pre_dispatch = pre_dispatch self.random_state = random_state
Example #11
Source File: train_sampling_unsupervised.py From dgl with Apache License 2.0 | 6 votes |
def compute_acc(emb, labels, train_nids, val_nids, test_nids): """ Compute the accuracy of prediction given the labels. """ emb = emb.cpu().numpy() train_nids = train_nids.cpu().numpy() train_labels = labels[train_nids].cpu().numpy() val_nids = val_nids.cpu().numpy() val_labels = labels[val_nids].cpu().numpy() test_nids = test_nids.cpu().numpy() test_labels = labels[test_nids].cpu().numpy() emb = (emb - emb.mean(0, keepdims=True)) / emb.std(0, keepdims=True) lr = lm.LogisticRegression(multi_class='multinomial', max_iter=10000) lr.fit(emb[train_nids], labels[train_nids]) pred = lr.predict(emb) f1_micro_eval = skm.f1_score(labels[val_nids], pred[val_nids], average='micro') f1_micro_test = skm.f1_score(labels[test_nids], pred[test_nids], average='micro') f1_macro_eval = skm.f1_score(labels[val_nids], pred[val_nids], average='macro') f1_macro_test = skm.f1_score(labels[test_nids], pred[test_nids], average='macro') return f1_micro_eval, f1_micro_test
Example #12
Source File: top_factors.py From healthcareai-py with MIT License | 6 votes |
def prepare_fit_model_for_factors(model_type, x_train, y_train): """ Given a model type, train and test data Args: model_type (str): 'classification' or 'regression' x_train: y_train: Returns: (sklearn.base.BaseEstimator): A fit model. """ if model_type == 'classification': algorithm = LogisticRegression() elif model_type == 'regression': algorithm = LinearRegression() else: algorithm = None if algorithm is not None: algorithm.fit(x_train, y_train) return algorithm
Example #13
Source File: maximum_margin_reduction.py From libact with BSD 2-Clause "Simplified" License | 6 votes |
def __init__(self, *args, **kwargs): super(MaximumLossReductionMaximalConfidence, self).__init__(*args, **kwargs) # self.n_labels = len(self.dataset.get_labeled_entries()[0][1]) self.n_labels = len(self.dataset.get_labeled_entries()[1][0]) random_state = kwargs.pop('random_state', None) self.random_state_ = seed_random_state(random_state) self.logreg_param = kwargs.pop('logreg_param', {'multi_class': 'multinomial', 'solver': 'newton-cg', 'random_state': random_state}) self.logistic_regression_ = LogisticRegression(**self.logreg_param) self.br_base = kwargs.pop('br_base', SklearnProbaAdapter(SVC(kernel='linear', probability=True, gamma="auto", random_state=random_state)))
Example #14
Source File: similarity_scores_time_benchmark.py From dirty_cat with BSD 3-Clause "New" or "Revised" License | 5 votes |
def benchmark(strat='k-means', limit=50000, n_proto=100, hash_dim=None, ngram_range=(3, 3)): df = dfr[:limit].copy() df = df.dropna(axis=0) df = df.reset_index() y = df['Violation Type'] if strat == 'k-means': sim_enc = SimilarityEncoder(similarity='ngram', ngram_range=ngram_range, categories='k-means', hashing_dim=hash_dim, n_prototypes=n_proto, random_state=3498) else: sim_enc = SimilarityEncoder(similarity='ngram', ngram_range=ngram_range, categories='most_frequent', hashing_dim=hash_dim, n_prototypes=n_proto, random_state=3498) column_trans = ColumnTransformer( transformers=transformers + [('sim_enc', sim_enc, ['Description'])], remainder='drop' ) t0 = time() X = column_trans.fit_transform(df) t1 = time() t_score_1 = t1 - t0 model = pipeline.Pipeline([('logistic', linear_model.LogisticRegression())]) t0 = time() m_score = model_selection.cross_val_score(model, X, y, cv=20) t1 = time() t_score_2 = t1 - t0 return t_score_1, m_score, t_score_2
Example #15
Source File: mnist.py From mlens with MIT License | 5 votes |
def build_ensemble(cls, **kwargs): """Build ML-Ensemble""" ens = cls(**kwargs) use = ["ExtraTrees", "RandomForest", "LogisticRegression-SAG", "MLP-adam"] meta = RandomForestClassifier(n_estimators=100, random_state=0, n_jobs=-1) base_learners = list() for est_name, est in ESTIMATORS.items(): e = clone(est) if est_name not in use: continue elif est_name == "MLP-adam": e.verbose = False try: e.set_params(**{'n_jobs': 1}) except ValueError: pass base_learners.append((est_name, e)) ens.add(base_learners, proba=True, shuffle=True, random_state=1) ens.add_meta(meta, shuffle=True, random_state=2) return ens
Example #16
Source File: classifier_chains.py From scikit-multiflow with BSD 3-Clause "New" or "Revised" License | 5 votes |
def __init__(self, base_estimator=LogisticRegression(), order=None, random_state=None): super().__init__() self.base_estimator = base_estimator self.order = order self.random_state = random_state self.chain = None self.ensemble = None self.L = None self._random_state = None # This is the actual random_state object used internally self.__configure()
Example #17
Source File: train.py From face-recognition with BSD 3-Clause "New" or "Revised" License | 5 votes |
def train(args, embeddings, labels): softmax = LogisticRegression(solver='lbfgs', multi_class='multinomial', C=10, max_iter=10000) if args.grid_search: clf = GridSearchCV( estimator=softmax, param_grid={'C': [0.001, 0.01, 0.1, 1, 10, 100, 1000]}, cv=3 ) else: clf = softmax clf.fit(embeddings, labels) return clf.best_estimator_ if args.grid_search else clf
Example #18
Source File: deepwalk_wiki.py From GraphEmbedding with MIT License | 5 votes |
def evaluate_embeddings(embeddings): X, Y = read_node_label('../data/wiki/wiki_labels.txt') tr_frac = 0.8 print("Training classifier using {:.2f}% nodes...".format( tr_frac * 100)) clf = Classifier(embeddings=embeddings, clf=LogisticRegression()) clf.split_train_evaluate(X, Y, tr_frac)
Example #19
Source File: sdne_wiki.py From GraphEmbedding with MIT License | 5 votes |
def evaluate_embeddings(embeddings): X, Y = read_node_label('../data/wiki/wiki_labels.txt') tr_frac = 0.8 print("Training classifier using {:.2f}% nodes...".format( tr_frac * 100)) clf = Classifier(embeddings=embeddings, clf=LogisticRegression()) clf.split_train_evaluate(X, Y, tr_frac)
Example #20
Source File: node2vec_wiki.py From GraphEmbedding with MIT License | 5 votes |
def evaluate_embeddings(embeddings): X, Y = read_node_label('../data/wiki/wiki_labels.txt') tr_frac = 0.8 print("Training classifier using {:.2f}% nodes...".format( tr_frac * 100)) clf = Classifier(embeddings=embeddings, clf=LogisticRegression()) clf.split_train_evaluate(X, Y, tr_frac)
Example #21
Source File: ensemble.py From gap with MIT License | 5 votes |
def fit(self, X, X_val, X_tst, verbose, **params): self.X_val = X_val C = params['C'] del params['C'] res = self.base_model.train_evaluate_cv(X, X_val=None, X_tst=[X_val, X_tst], batch_size=32, verbose=verbose, seed=21, return_probs=True, **params ) self.X_ens, self.X_tst = res.probs_raw X_ens = self.X_ens self.X_tst = np.transpose(self.X_tst, (1, 0, 2)).reshape(-1, 15) self.X_tst = np.hstack((self.X_tst, np.array(X_tst[2].values.tolist()))) X_ens = np.transpose(X_ens, (1, 0, 2)).reshape(-1, 15) X_ens = np.hstack((X_ens, np.array(X_val[2].values.tolist()))) y_ens = np.argmax(np.array(X_val[4].values.tolist()), axis=1) self.lr = LogisticRegression(random_state=0, C=C, solver='lbfgs', multi_class='multinomial') self.lr.fit(X_ens, y_ens) return self
Example #22
Source File: ensemble.py From gap with MIT License | 5 votes |
def __init__(self, base_model): self.base_model = base_model self.lr = LogisticRegression(random_state=0, C=1.0, solver='lbfgs', multi_class='multinomial')
Example #23
Source File: logit.py From fairtest with Apache License 2.0 | 5 votes |
def train_and_test_model1(features_train, labels_train, features_test, labels_test, features_test_original): model = LogisticRegression() model.fit(features_train,labels_train) print("train", log_loss(labels_train, model.predict_proba(features_train.as_matrix()))) cat_indexes = labels_test.cat.codes predict_probas = model.predict_proba(features_test.as_matrix()) sumloss = .0 losses = [] for i in range(predict_probas.shape[0]): loss = (-1) * sp.log(max(min(predict_probas[i][cat_indexes[i]], 1 - 10**(-5)), 10**(-5))) sumloss += loss losses.append(loss) feature_list = features_test_original.columns.tolist() for feature in ["X","Y", "ZipCode", "Address", "Resolution", "Description", "Dates", "Time", "Category", "Descript"]: if feature in feature_list: feature_list.remove(feature) feature_list_original = ["X","Y", "ZipCode", "Address", "Resolution", "Description", "Dates", "Time", "Category", "Descript"] features_test_original = features_test_original[feature_list] print("Test Loss: %.5f" % (sumloss / predict_probas.shape[0])) print("test: %.5f" % log_loss(labels_test, model.predict_proba(features_test.as_matrix())))
Example #24
Source File: LR.py From MNIST-baselines with MIT License | 5 votes |
def LR(): loader = MnistLoader(flatten=True, data_path='../data', var_per=None) model = LogisticRegression(penalty='l2') model.fit(loader.data_train, loader.label_train) print('model trained') res = model.score(loader.data_test, loader.label_test) print(res) return res
Example #25
Source File: DiachronicTermMiner.py From scattertext with Apache License 2.0 | 5 votes |
def _regress_terms(self, X, cat, categories, category_idx_store, neg_mask, terms): pos_mask = categories.isin(category_idx_store.getidxstrictbatch([cat])).values catX = X[neg_mask | pos_mask, :] catY = np.zeros(catX.shape[0]).astype(bool) catY[pos_mask[neg_mask | pos_mask]] = True scores = (pd.Series(LogisticRegression(penalty='l2').fit(catX, catY).coef_[0], index=terms) .sort_values(ascending=False)) return scores
Example #26
Source File: TermDocMatrix.py From scattertext with Apache License 2.0 | 5 votes |
def get_logreg_coefs(self, category, clf=LogisticRegression()): ''' Computes regression score of tdfidf transformed features Parameters ---------- category : str category name to score clf : sklearn regressor Returns ------- coefficient array ''' self._fit_tfidf_model(category, clf) return clf.coef_[0]
Example #27
Source File: baselines.py From cactus-maml with MIT License | 5 votes |
def embedding_logistic_regression(C=FLAGS.inverse_reg, penalty='l2', multi_class='multinomial', num_classes=FLAGS.way, num_shots=FLAGS.shot, num_tasks=FLAGS.num_tasks, num_encoding_dims=FLAGS.num_encoding_dims, test_set=FLAGS.test_set, dataset=FLAGS.dataset): print('{}-way {}-shot logistic regression'.format(num_classes, num_shots)) if dataset != 'celeba': _, _, _, X_test, Y_test, Z_test = get_data(dataset, num_encoding_dims, test_set) task_generator = TaskGenerator(num_classes=num_classes, num_train_samples_per_class=num_shots, num_samples_per_class=num_shots+5) partition = task_generator.get_partition_from_labels(Y_test) partitions = [partition] else: _, _, _, X_test, attributes_test, Z_test = get_data(dataset, num_encoding_dims, test_set) task_generator = TaskGenerator(num_classes=num_classes, num_train_samples_per_class=num_shots, num_samples_per_class=num_shots+5) partitions = task_generator.get_celeba_task_pool(attributes_test) tasks = task_generator.get_tasks(num_tasks=num_tasks, partitions=partitions) train_accuracies, test_accuracies = [], [] start = time.time() for i_task, task in enumerate(tasks): if (i_task + 1) % (num_tasks // 10) == 0: print('test {}, train accuracy {:.5}, test accuracy {:.5}'.format(i_task + 1, np.mean(train_accuracies), np.mean(test_accuracies))) ind_train_few, Y_train_few, ind_test_few, Y_test_few = task Z_train_few, Z_test_few = Z_test[ind_train_few], Z_test[ind_test_few] logistic_regression = LogisticRegression(n_jobs=-1, penalty=penalty, C=C, multi_class=multi_class, solver='saga', max_iter=1000) logistic_regression.fit(Z_train_few, Y_train_few) test_accuracies.append(logistic_regression.score(Z_test_few, Y_test_few)) train_accuracies.append(logistic_regression.score(Z_train_few, Y_train_few)) print('penalty={}, C={}, multi_class={}'.format(penalty, C, multi_class)) print('{}-way {}-shot logistic regression: {:.5} with 95% CI {:.5} over {} tests'.format(num_classes, num_shots, np.mean(test_accuracies), 1.96*np.std(test_accuracies)/np.sqrt(num_tasks), num_tasks)) print('Mean training accuracy: {:.5}; standard deviation: {:.5}'.format(np.mean(train_accuracies), np.std(train_accuracies))) print('{} few-shot classification tasks: {:.5} seconds.'.format(num_tasks, time.time() - start))
Example #28
Source File: test_sklearn_adapter.py From libact with BSD 2-Clause "Simplified" License | 5 votes |
def test_adapt_logistic_regression(self): adapter = SklearnProbaAdapter( LogisticRegression(solver='liblinear', multi_class="ovr", random_state=1126)) clf = LogisticRegression(solver='liblinear', multi_class="ovr", random_state=1126) self.check_functions(adapter, clf)
Example #29
Source File: p115_l1_l2_regularization.py From PythonMachineLearningExamples with MIT License | 5 votes |
def weight_graph(regularization = 'l1'): weights, params = [], [] for c in np.arange(0, 6): lr = LogisticRegression(penalty=regularization, C=10**c, random_state=0) lr.fit(X_train_std, y_train) weights.append(lr.coef_[1]) params.append(10**c) weights = np.array(weights) for column, color in zip(range(weights.shape[1]), colors): plt.plot(params, weights[:, column], label=columnsXY[column+1], color=color) plt.axhline(0, color='black', linestyle='--', linewidth=3) plt.xlim([10**(-5), 10**5]) plt.ylabel('weight coefficient') plt.xlabel('C') plt.xscale('log') title = 'regularization {}'.format(regularization) plt.title(title) plt.legend(loc='upper left') ax.legend(loc='upper center', bbox_to_anchor=(1.38, 1.03), ncol=1, fancybox=True) ocr_utils.show_figures(plt,title + ' path')
Example #30
Source File: struc2vec_flight.py From GraphEmbedding with MIT License | 5 votes |
def evaluate_embeddings(embeddings): X, Y = read_node_label('../data/flight/labels-brazil-airports.txt',skip_head=True) tr_frac = 0.8 print("Training classifier using {:.2f}% nodes...".format( tr_frac * 100)) clf = Classifier(embeddings=embeddings, clf=LogisticRegression()) clf.split_train_evaluate(X, Y, tr_frac)