Python Examples of sklearn.linear_model.LogisticRegression

Source File: create_ngrams.py From rasa_lookup_demo with Apache License 2.0

8 votes

def run_logreg(X_train, y_train, selection_threshold=0.2):
    print("\nrunning logistic regression...")
    print("using a selection threshold of {}".format(selection_threshold))
    pipe = Pipeline(
        [
            (
                "feature_selection",
                RandomizedLogisticRegression(selection_threshold=selection_threshold),
            ),
            ("classification", LogisticRegression()),
        ]
    )
    pipe.fit(X_train, y_train)
    print("training accuracy : {}".format(pipe.score(X_train, y_train)))
    print("testing accuracy : {}".format(pipe.score(X_test, y_test)))
    return pipe

Source File: classifier.py From Video-Highlight-Detection with MIT License

7 votes

def _build_model(self,model_name,params=None):
        if params==None:
            if model_name=='xgb':
                self.model=XGBClassifier(n_estimators=100,learning_rate=0.02)
            elif model_name=='svm':
                kernel_function=chi2_kernel if not (self.model_kernel=='linear' or self.model_kernel=='rbf') else self.model_kernel
                self.model=SVC(C=1,kernel=kernel_function,gamma=1,probability=True)
            elif model_name=='lr':
                self.model=LR(C=1,penalty='l1',tol=1e-6)
        else:
            if model_name=='xgb':
                self.model=XGBClassifier(n_estimators=1000,learning_rate=0.02,**params)
            elif model_name=='svm':
                self.model=SVC(C=1,kernel=kernel_function,gamma=1,probability=True)
            elif model_name=='lr':
                self.model=LR(C=1,penalty='l1',tol=1e-6)

        log.l.info('=======> built the model {} done'.format(self.model_name))

Source File: test_run.py From nyaggle with MIT License

6 votes

def test_experiment_sklearn_classifier(tmpdir_name):
    X, y = make_classification_df(n_samples=1024, n_num_features=10, n_cat_features=0,
                                  class_sep=0.98, random_state=0, id_column='user_id')

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=0)

    params = {
        'C': 0.1
    }

    result = run_experiment(params, X_train, y_train, X_test, tmpdir_name, eval_func=roc_auc_score,
                            algorithm_type=LogisticRegression, with_auto_prep=False)

    assert len(np.unique(result.oof_prediction)) > 5  # making sure prediction is not binarized
    assert len(np.unique(result.test_prediction)) > 5
    assert roc_auc_score(y_train, result.oof_prediction) >= 0.8
    assert roc_auc_score(y_test, result.test_prediction) >= 0.8

    _check_file_exists(tmpdir_name)

Source File: test_LogisticRegression.py From differential-privacy-library with MIT License

6 votes

def test_bad_params(self):
        X = [[1]]
        y = [0]

        with self.assertRaises(ValueError):
            LogisticRegression(data_norm=1, C=-1).fit(X, y)

        with self.assertRaises(ValueError):
            LogisticRegression(data_norm=1, C=1.2).fit(X, y)

        with self.assertRaises(ValueError):
            LogisticRegression(data_norm=1, max_iter=-1).fit(X, y)

        with self.assertRaises(ValueError):
            LogisticRegression(data_norm=1, max_iter="100").fit(X, y)

        with self.assertRaises(ValueError):
            LogisticRegression(data_norm=1, tol=-1).fit(X, y)

        with self.assertRaises(ValueError):
            LogisticRegression(data_norm=1, tol="1").fit(X, y)

Source File: utils.py From contextualbandits with BSD 2-Clause "Simplified" License

6 votes

def __init__(self, lambda_=1., fit_intercept=True, alpha=0.95,
                 m=1.0, ts=False, ts_from_ci=True, sample_unique=False, random_state=1):
        self.conf_coef = alpha
        self.m = m
        self.fit_intercept = fit_intercept
        self.lambda_ = lambda_
        self.ts = ts
        self.ts_from_ci = ts_from_ci
        self.warm_start = True
        self.sample_unique = bool(sample_unique)
        self.random_state = _check_random_state(random_state)
        self.is_fitted = False
        self.model = LogisticRegression(C=1./lambda_, penalty="l2",
                                        fit_intercept=fit_intercept,
                                        solver='lbfgs', max_iter=15000,
                                        warm_start=True)
        self.Sigma = np.empty((0,0), dtype=np.float64)

Source File: utils.py From contextualbandits with BSD 2-Clause "Simplified" License

6 votes

def _check_autograd_supported(base_algorithm):
    supported = ['LogisticRegression', 'SGDClassifier', 'RidgeClassifier', 'StochasticLogisticRegression', 'LinearRegression']
    if not base_algorithm.__class__.__name__ in supported:
        raise ValueError("Automatic gradients only implemented for the following classes: " + ", ".join(supported))
    if base_algorithm.__class__.__name__ == 'LogisticRegression':
        if base_algorithm.penalty != 'l2':
            raise ValueError("Automatic gradients only defined for LogisticRegression with l2 regularization.")
        if base_algorithm.intercept_scaling != 1:
            raise ValueError("Automatic gradients for LogisticRegression not implemented with 'intercept_scaling'.")

    if base_algorithm.__class__.__name__ == 'RidgeClassifier':
        if base_algorithm.normalize:
            raise ValueError("Automatic gradients for LogisticRegression only implemented without 'normalize'.")

    if base_algorithm.__class__.__name__ == 'SGDClassifier':
        if base_algorithm.loss != 'log':
            raise ValueError("Automatic gradients for LogisticRegression only implemented with logistic loss.")
        if base_algorithm.penalty != 'l2':
            raise ValueError("Automatic gradients only defined for LogisticRegression with l2 regularization.")
    
    try:
        if base_algorithm.class_weight is not None:
            raise ValueError("Automatic gradients for LogisticRegression not supported with 'class_weight'.")
    except:
        pass

Source File: test_LogisticRegression.py From differential-privacy-library with MIT License

6 votes

def test_same_results(self):
        from sklearn import datasets
        from sklearn.model_selection import train_test_split
        from sklearn import linear_model

        dataset = datasets.load_iris()
        X_train, X_test, y_train, y_test = train_test_split(dataset.data, dataset.target, test_size=0.2)

        clf = LogisticRegression(data_norm=12, epsilon=float("inf"))
        clf.fit(X_train, y_train)

        predict1 = clf.predict(X_test)

        clf = linear_model.LogisticRegression(solver="lbfgs", multi_class="ovr")
        clf.fit(X_train, y_train)

        predict2 = clf.predict(X_test)

        self.assertTrue(np.all(predict1 == predict2))

Source File: test_LogisticRegression.py From differential-privacy-library with MIT License

6 votes

def test_accountant(self):
        from diffprivlib.accountant import BudgetAccountant
        acc = BudgetAccountant()

        X = np.array(
            [0.50, 0.75, 1.00, 1.25, 1.50, 1.75, 1.75, 2.00, 2.25, 2.50, 2.75, 3.00, 3.25, 3.50, 4.00, 4.25, 4.50, 4.75,
             5.00, 5.50])
        y = np.array([0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1])
        X = X[:, np.newaxis]
        X -= 3.0
        X /= 2.5

        clf = LogisticRegression(epsilon=2, data_norm=1.0, accountant=acc)
        clf.fit(X, y)
        self.assertEqual((2, 0), acc.total())

        with BudgetAccountant(3, 0) as acc2:
            clf = LogisticRegression(epsilon=2, data_norm=1.0)
            clf.fit(X, y)
            self.assertEqual((2, 0), acc2.total())

            with self.assertRaises(BudgetError):
                clf.fit(X, y)

Source File: common_utils.py From interpret-text with MIT License

6 votes

def create_logistic_vectorizer():
    vectorizer = CountVectorizer(lowercase=False, min_df=0.0, binary=True)
    lr = LogisticRegression(random_state=777)
    return Pipeline([("vectorizer", vectorizer), ("lr", lr)])

Source File: stability_selection.py From stability-selection with BSD 3-Clause "New" or "Revised" License

6 votes

def __init__(self, base_estimator=LogisticRegression(penalty='l1'), lambda_name='C',
                 lambda_grid=np.logspace(-5, -2, 25), n_bootstrap_iterations=100,
                 sample_fraction=0.5, threshold=0.6, bootstrap_func=bootstrap_without_replacement,
                 bootstrap_threshold=None, verbose=0, n_jobs=1, pre_dispatch='2*n_jobs',
                 random_state=None):
        self.base_estimator = base_estimator
        self.lambda_name = lambda_name
        self.lambda_grid = lambda_grid
        self.n_bootstrap_iterations = n_bootstrap_iterations
        self.sample_fraction = sample_fraction
        self.threshold = threshold
        self.bootstrap_func = bootstrap_func
        self.bootstrap_threshold = bootstrap_threshold
        self.verbose = verbose
        self.n_jobs = n_jobs
        self.pre_dispatch = pre_dispatch
        self.random_state = random_state

Source File: train_sampling_unsupervised.py From dgl with Apache License 2.0

6 votes

def compute_acc(emb, labels, train_nids, val_nids, test_nids):
    """
    Compute the accuracy of prediction given the labels.
    """
    emb = emb.cpu().numpy()
    train_nids = train_nids.cpu().numpy()
    train_labels = labels[train_nids].cpu().numpy()
    val_nids = val_nids.cpu().numpy()
    val_labels = labels[val_nids].cpu().numpy()
    test_nids = test_nids.cpu().numpy()
    test_labels = labels[test_nids].cpu().numpy()

    emb = (emb - emb.mean(0, keepdims=True)) / emb.std(0, keepdims=True)

    lr = lm.LogisticRegression(multi_class='multinomial', max_iter=10000)
    lr.fit(emb[train_nids], labels[train_nids])

    pred = lr.predict(emb)
    f1_micro_eval = skm.f1_score(labels[val_nids], pred[val_nids], average='micro')
    f1_micro_test = skm.f1_score(labels[test_nids], pred[test_nids], average='micro')
    f1_macro_eval = skm.f1_score(labels[val_nids], pred[val_nids], average='macro')
    f1_macro_test = skm.f1_score(labels[test_nids], pred[test_nids], average='macro')
    return f1_micro_eval, f1_micro_test

Source File: top_factors.py From healthcareai-py with MIT License

6 votes

def prepare_fit_model_for_factors(model_type, x_train, y_train):
    """
    Given a model type, train and test data
    
    Args:
        model_type (str): 'classification' or 'regression'
        x_train:
        y_train:

    Returns:
        (sklearn.base.BaseEstimator): A fit model.
    """

    if model_type == 'classification':
        algorithm = LogisticRegression()
    elif model_type == 'regression':
        algorithm = LinearRegression()
    else:
        algorithm = None

    if algorithm is not None:
        algorithm.fit(x_train, y_train)

    return algorithm

Source File: maximum_margin_reduction.py From libact with BSD 2-Clause "Simplified" License

6 votes

def __init__(self, *args, **kwargs):
        super(MaximumLossReductionMaximalConfidence, self).__init__(*args, **kwargs)

        # self.n_labels = len(self.dataset.get_labeled_entries()[0][1])
        self.n_labels = len(self.dataset.get_labeled_entries()[1][0])

        random_state = kwargs.pop('random_state', None)
        self.random_state_ = seed_random_state(random_state)

        self.logreg_param = kwargs.pop('logreg_param',
                                       {'multi_class': 'multinomial',
                                        'solver': 'newton-cg',
                                        'random_state': random_state})
        self.logistic_regression_ = LogisticRegression(**self.logreg_param)

        self.br_base = kwargs.pop('br_base',
              SklearnProbaAdapter(SVC(kernel='linear',
                                      probability=True,
                                      gamma="auto",
                                      random_state=random_state)))

Source File: similarity_scores_time_benchmark.py From dirty_cat with BSD 3-Clause "New" or "Revised" License

5 votes

def benchmark(strat='k-means', limit=50000, n_proto=100, hash_dim=None, ngram_range=(3, 3)):
    df = dfr[:limit].copy()
    df = df.dropna(axis=0)
    df = df.reset_index()

    y = df['Violation Type']

    if strat == 'k-means':
        sim_enc = SimilarityEncoder(similarity='ngram', ngram_range=ngram_range, categories='k-means',
                                    hashing_dim=hash_dim, n_prototypes=n_proto, random_state=3498)
    else:
        sim_enc = SimilarityEncoder(similarity='ngram', ngram_range=ngram_range, categories='most_frequent',
                                    hashing_dim=hash_dim, n_prototypes=n_proto, random_state=3498)

    column_trans = ColumnTransformer(
        transformers=transformers + [('sim_enc', sim_enc, ['Description'])],
        remainder='drop'
    )

    t0 = time()
    X = column_trans.fit_transform(df)
    t1 = time()
    t_score_1 = t1 - t0

    model = pipeline.Pipeline([('logistic', linear_model.LogisticRegression())])

    t0 = time()
    m_score = model_selection.cross_val_score(model, X, y, cv=20)
    t1 = time()
    t_score_2 = t1 - t0
    return t_score_1, m_score, t_score_2

Source File: mnist.py From mlens with MIT License

5 votes

def build_ensemble(cls, **kwargs):
    """Build ML-Ensemble"""
    ens = cls(**kwargs)

    use = ["ExtraTrees", "RandomForest",
           "LogisticRegression-SAG", "MLP-adam"]

    meta = RandomForestClassifier(n_estimators=100,
                                  random_state=0,
                                  n_jobs=-1)
    base_learners = list()
    for est_name, est in ESTIMATORS.items():
        e = clone(est)
        if est_name not in use:
            continue
        elif est_name == "MLP-adam":
            e.verbose = False
        try:
            e.set_params(**{'n_jobs': 1})
        except ValueError:
            pass

        base_learners.append((est_name, e))
    ens.add(base_learners, proba=True, shuffle=True, random_state=1)
    ens.add_meta(meta, shuffle=True, random_state=2)
    return ens

Source File: classifier_chains.py From scikit-multiflow with BSD 3-Clause "New" or "Revised" License

5 votes

def __init__(self, base_estimator=LogisticRegression(), order=None, random_state=None):
        super().__init__()
        self.base_estimator = base_estimator
        self.order = order
        self.random_state = random_state
        self.chain = None
        self.ensemble = None
        self.L = None
        self._random_state = None   # This is the actual random_state object used internally
        self.__configure()

Source File: train.py From face-recognition with BSD 3-Clause "New" or "Revised" License

5 votes

def train(args, embeddings, labels):
    softmax = LogisticRegression(solver='lbfgs', multi_class='multinomial', C=10, max_iter=10000)
    if args.grid_search:
        clf = GridSearchCV(
            estimator=softmax,
            param_grid={'C': [0.001, 0.01, 0.1, 1, 10, 100, 1000]},
            cv=3
        )
    else:
        clf = softmax
    clf.fit(embeddings, labels)

    return clf.best_estimator_ if args.grid_search else clf

Source File: deepwalk_wiki.py From GraphEmbedding with MIT License

5 votes

def evaluate_embeddings(embeddings):
    X, Y = read_node_label('../data/wiki/wiki_labels.txt')
    tr_frac = 0.8
    print("Training classifier using {:.2f}% nodes...".format(
        tr_frac * 100))
    clf = Classifier(embeddings=embeddings, clf=LogisticRegression())
    clf.split_train_evaluate(X, Y, tr_frac)

Source File: sdne_wiki.py From GraphEmbedding with MIT License

5 votes

def evaluate_embeddings(embeddings):
    X, Y = read_node_label('../data/wiki/wiki_labels.txt')
    tr_frac = 0.8
    print("Training classifier using {:.2f}% nodes...".format(
        tr_frac * 100))
    clf = Classifier(embeddings=embeddings, clf=LogisticRegression())
    clf.split_train_evaluate(X, Y, tr_frac)

Source File: node2vec_wiki.py From GraphEmbedding with MIT License

5 votes

def evaluate_embeddings(embeddings):
    X, Y = read_node_label('../data/wiki/wiki_labels.txt')
    tr_frac = 0.8
    print("Training classifier using {:.2f}% nodes...".format(
        tr_frac * 100))
    clf = Classifier(embeddings=embeddings, clf=LogisticRegression())
    clf.split_train_evaluate(X, Y, tr_frac)

Source File: ensemble.py From gap with MIT License

5 votes

def fit(self, X, X_val, X_tst, verbose, **params):
        self.X_val = X_val
        C = params['C']
        del params['C']
        res = self.base_model.train_evaluate_cv(X,
                                X_val=None, 
                                X_tst=[X_val, X_tst], 
                                batch_size=32, 
                                verbose=verbose,
                                seed=21,
                                return_probs=True,
                                **params
                                )
        
        self.X_ens, self.X_tst = res.probs_raw
        X_ens = self.X_ens
        
        self.X_tst = np.transpose(self.X_tst, (1, 0, 2)).reshape(-1, 15)
        self.X_tst = np.hstack((self.X_tst, np.array(X_tst[2].values.tolist())))
                          
        X_ens = np.transpose(X_ens, (1, 0, 2)).reshape(-1, 15)
        X_ens = np.hstack((X_ens, np.array(X_val[2].values.tolist())))
                          
        y_ens = np.argmax(np.array(X_val[4].values.tolist()), axis=1)
        
        self.lr = LogisticRegression(random_state=0, C=C, solver='lbfgs',
                          multi_class='multinomial')
        self.lr.fit(X_ens, y_ens)
        
        return self

Source File: ensemble.py From gap with MIT License

5 votes

def __init__(self, base_model):
        self.base_model = base_model
        self.lr = LogisticRegression(random_state=0, C=1.0, solver='lbfgs',
                          multi_class='multinomial')

Source File: logit.py From fairtest with Apache License 2.0

5 votes

def train_and_test_model1(features_train, labels_train, features_test,
                          labels_test, features_test_original):
    model = LogisticRegression()
    model.fit(features_train,labels_train)
    print("train", log_loss(labels_train, model.predict_proba(features_train.as_matrix())))

    cat_indexes = labels_test.cat.codes
    predict_probas = model.predict_proba(features_test.as_matrix())

    sumloss = .0
    losses = []
    for i in range(predict_probas.shape[0]):
      loss = (-1) * sp.log(max(min(predict_probas[i][cat_indexes[i]], 1 - 10**(-5)), 10**(-5)))
      sumloss += loss
      losses.append(loss)

    feature_list = features_test_original.columns.tolist()
    for feature in ["X","Y", "ZipCode", "Address", "Resolution", "Description",
                    "Dates", "Time", "Category", "Descript"]:
        if feature in feature_list:
            feature_list.remove(feature)
    feature_list_original  = ["X","Y", "ZipCode", "Address", "Resolution",
                              "Description", "Dates", "Time", "Category",
                              "Descript"]
    features_test_original = features_test_original[feature_list]
    print("Test Loss: %.5f" % (sumloss / predict_probas.shape[0]))
    print("test: %.5f" % log_loss(labels_test, model.predict_proba(features_test.as_matrix())))

Source File: LR.py From MNIST-baselines with MIT License

5 votes

def LR():
    loader = MnistLoader(flatten=True, data_path='../data', var_per=None)
    model = LogisticRegression(penalty='l2')

    model.fit(loader.data_train, loader.label_train)
    print('model trained')
    res = model.score(loader.data_test, loader.label_test)
    print(res)

    return res

Source File: DiachronicTermMiner.py From scattertext with Apache License 2.0

5 votes

def _regress_terms(self, X, cat, categories, category_idx_store, neg_mask, terms):
        pos_mask = categories.isin(category_idx_store.getidxstrictbatch([cat])).values
        catX = X[neg_mask | pos_mask, :]
        catY = np.zeros(catX.shape[0]).astype(bool)
        catY[pos_mask[neg_mask | pos_mask]] = True
        scores = (pd.Series(LogisticRegression(penalty='l2').fit(catX, catY).coef_[0], index=terms)
                  .sort_values(ascending=False))
        return scores

Source File: TermDocMatrix.py From scattertext with Apache License 2.0

5 votes

def get_logreg_coefs(self, category, clf=LogisticRegression()):
        ''' Computes regression score of tdfidf transformed features
        Parameters
        ----------
        category : str
            category name to score
        clf : sklearn regressor

        Returns
        -------
        coefficient array
        '''
        self._fit_tfidf_model(category, clf)
        return clf.coef_[0]

Source File: baselines.py From cactus-maml with MIT License

5 votes

def embedding_logistic_regression(C=FLAGS.inverse_reg, penalty='l2', multi_class='multinomial', num_classes=FLAGS.way, num_shots=FLAGS.shot, num_tasks=FLAGS.num_tasks,
                                num_encoding_dims=FLAGS.num_encoding_dims, test_set=FLAGS.test_set,
                                dataset=FLAGS.dataset):
    print('{}-way {}-shot logistic regression'.format(num_classes, num_shots))
    if dataset != 'celeba':
        _, _, _, X_test, Y_test, Z_test = get_data(dataset, num_encoding_dims, test_set)
        task_generator = TaskGenerator(num_classes=num_classes, num_train_samples_per_class=num_shots, num_samples_per_class=num_shots+5)
        partition = task_generator.get_partition_from_labels(Y_test)
        partitions = [partition]
    else:
        _, _, _, X_test, attributes_test, Z_test = get_data(dataset, num_encoding_dims, test_set)
        task_generator = TaskGenerator(num_classes=num_classes, num_train_samples_per_class=num_shots, num_samples_per_class=num_shots+5)
        partitions = task_generator.get_celeba_task_pool(attributes_test)
    tasks = task_generator.get_tasks(num_tasks=num_tasks, partitions=partitions)

    train_accuracies, test_accuracies = [], []

    start = time.time()
    for i_task, task in enumerate(tasks):
        if (i_task + 1) % (num_tasks // 10) == 0:
            print('test {}, train accuracy {:.5}, test accuracy {:.5}'.format(i_task + 1, np.mean(train_accuracies), np.mean(test_accuracies)))
        ind_train_few, Y_train_few, ind_test_few, Y_test_few = task
        Z_train_few, Z_test_few = Z_test[ind_train_few], Z_test[ind_test_few]

        logistic_regression = LogisticRegression(n_jobs=-1, penalty=penalty, C=C, multi_class=multi_class, solver='saga', max_iter=1000)
        logistic_regression.fit(Z_train_few, Y_train_few)
        test_accuracies.append(logistic_regression.score(Z_test_few, Y_test_few))
        train_accuracies.append(logistic_regression.score(Z_train_few, Y_train_few))
    print('penalty={}, C={}, multi_class={}'.format(penalty, C, multi_class))
    print('{}-way {}-shot logistic regression: {:.5} with 95% CI {:.5} over {} tests'.format(num_classes, num_shots, np.mean(test_accuracies), 1.96*np.std(test_accuracies)/np.sqrt(num_tasks), num_tasks))
    print('Mean training accuracy: {:.5}; standard deviation: {:.5}'.format(np.mean(train_accuracies), np.std(train_accuracies)))
    print('{} few-shot classification tasks: {:.5} seconds.'.format(num_tasks, time.time() - start))

Source File: test_sklearn_adapter.py From libact with BSD 2-Clause "Simplified" License

5 votes

def test_adapt_logistic_regression(self):
        adapter = SklearnProbaAdapter(
            LogisticRegression(solver='liblinear', multi_class="ovr",
                               random_state=1126))
        clf = LogisticRegression(solver='liblinear', multi_class="ovr",
                                 random_state=1126)
        self.check_functions(adapter, clf)

Source File: p115_l1_l2_regularization.py From PythonMachineLearningExamples with MIT License

5 votes

def weight_graph(regularization = 'l1'):
    weights, params = [], []
    for c in np.arange(0, 6):
        lr = LogisticRegression(penalty=regularization, C=10**c, random_state=0)
        lr.fit(X_train_std, y_train)
        weights.append(lr.coef_[1])
        params.append(10**c)
    
    weights = np.array(weights)
    
    for column, color in zip(range(weights.shape[1]), colors):
        plt.plot(params, weights[:, column],
                 label=columnsXY[column+1],
                 color=color)
    
           
    plt.axhline(0, color='black', linestyle='--', linewidth=3)
    plt.xlim([10**(-5), 10**5])
    plt.ylabel('weight coefficient')
    plt.xlabel('C')
    plt.xscale('log')
    title = 'regularization {}'.format(regularization)
    plt.title(title)
    plt.legend(loc='upper left')
    ax.legend(loc='upper center', 
              bbox_to_anchor=(1.38, 1.03),
              ncol=1, fancybox=True)
    ocr_utils.show_figures(plt,title + ' path')

Source File: struc2vec_flight.py From GraphEmbedding with MIT License

5 votes

def evaluate_embeddings(embeddings):

    X, Y = read_node_label('../data/flight/labels-brazil-airports.txt',skip_head=True)

    tr_frac = 0.8

    print("Training classifier using {:.2f}% nodes...".format(

        tr_frac * 100))

    clf = Classifier(embeddings=embeddings, clf=LogisticRegression())

    clf.split_train_evaluate(X, Y, tr_frac)

Python sklearn.linear_model.LogisticRegression() Examples