Python sklearn.ensemble.RandomForestClassifier() Examples
The following are 30
code examples of sklearn.ensemble.RandomForestClassifier().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
sklearn.ensemble
, or try the search function
.
Example #1
Source File: mmbot.py From MaliciousMacroBot with MIT License | 8 votes |
def mmb_evaluate_model(self): """ Returns scores from cross validation evaluation on the malicious / benign classifier """ predictive_features = self.features['predictive_features'] self.clf_X = self.modeldata[predictive_features].values self.clf_y = np.array(self.modeldata['label']) X_train, X_test, y_train, y_test = train_test_split(self.clf_X, self.clf_y, test_size=0.2, random_state=0) lb = LabelBinarizer() y_train = np.array([number[0] for number in lb.fit_transform(y_train)]) eval_cls = RandomForestClassifier(n_estimators=100, max_features=.2) eval_cls.fit(X_train, y_train) recall = cross_val_score(eval_cls, X_train, y_train, cv=5, scoring='recall') precision = cross_val_score(eval_cls, X_train, y_train, cv=5, scoring='precision') accuracy = cross_val_score(eval_cls, X_train, y_train, cv=5, scoring='accuracy') f1_score = cross_val_score(eval_cls, X_train, y_train, cv=5, scoring='f1_macro') return {'accuracy': accuracy, 'f1': f1_score, 'precision': precision, 'recall': recall}
Example #2
Source File: forest.py From cgpm with Apache License 2.0 | 6 votes |
def __init__(self, outputs, inputs, k=None, hypers=None, params=None, distargs=None, rng=None): self.rng = gu.gen_rng() if rng is None else rng self.outputs = outputs self.inputs = inputs self.rng = gu.gen_rng() if rng is None else rng assert len(self.outputs) == 1 assert len(self.inputs) >= 1 assert self.outputs[0] not in self.inputs assert len(distargs['inputs']['stattypes']) == len(self.inputs) self.stattypes = distargs['inputs']['stattypes'] # Number of output categories and input dimension. # XXX WHATTA HACK. BayesDB passes in top-level kwargs, not in distargs. self.k = k if k is not None else int(distargs['k']) self.p = len(distargs['inputs']['stattypes']) # Sufficient statistics. self.N = 0 self.data = Data(x=OrderedDict(), Y=OrderedDict()) self.counts = [0] * self.k # Outlier and random forest parameters. if params is None: params = {} self.alpha = params.get('alpha', .1) self.regressor = params.get('forest', None) if self.regressor is None: self.regressor = RandomForestClassifier(random_state=self.rng)
Example #3
Source File: classifier.py From stock-price-prediction with MIT License | 6 votes |
def buildModel(dataset, method, parameters): """ Build final model for predicting real testing data """ features = dataset.columns[0:-1] if method == 'RNN': clf = performRNNlass(dataset[features], dataset['UpDown']) return clf elif method == 'RF': clf = RandomForestClassifier(n_estimators=1000, n_jobs=-1) elif method == 'KNN': clf = neighbors.KNeighborsClassifier() elif method == 'SVM': c = parameters[0] g = parameters[1] clf = SVC(C=c, gamma=g) elif method == 'ADA': clf = AdaBoostClassifier() return clf.fit(dataset[features], dataset['UpDown'])
Example #4
Source File: 03_fit_predict_plot_midwest_survey.py From dirty_cat with BSD 3-Clause "New" or "Revised" License | 6 votes |
def make_pipeline(encoding_method): # static transformers from the other columns transformers = [('one-hot-clean', encoder_dict['one-hot'], clean_columns)] # adding the encoded column transformers += [(encoding_method + '-dirty', encoder_dict[encoding_method], [dirty_column])] pipeline = Pipeline([ # Use ColumnTransformer to combine the features ('union', ColumnTransformer( transformers=transformers, remainder='drop')), ('scaler', StandardScaler(with_mean=False)), ('classifier', RandomForestClassifier(random_state=5)) ]) return pipeline ############################################################################### # Evaluation of different encoding methods # ----------------------------------------- # We then loop over encoding methods, scoring the different pipeline predictions # using a cross validation score:
Example #5
Source File: vanilla_model.py From OpenChem with MIT License | 6 votes |
def __init__(self, model_type='classifier', feature_type='fingerprints', n_estimators=100, n_ensemble=5): super(RandomForestQSAR, self).__init__() self.n_estimators = n_estimators self.n_ensemble = n_ensemble self.model = [] self.model_type = model_type if self.model_type == 'classifier': for i in range(n_ensemble): self.model.append(RFC(n_estimators=n_estimators)) elif self.model_type == 'regressor': for i in range(n_ensemble): self.model.append(RFR(n_estimators=n_estimators)) else: raise ValueError('invalid value for argument') self.feature_type = feature_type if self.feature_type == 'descriptors': self.calc = Calculator(descriptors, ignore_3D=True) self.desc_mean = [0]*self.n_ensemble
Example #6
Source File: adult_RF_Classify.py From Machine-Learning-for-Beginner-by-Python3 with MIT License | 6 votes |
def Train(data, treecount, tezh, yanzhgdata): model = RFC(n_estimators=treecount, max_features=tezh, class_weight='balanced') model.fit(data[:, :-1], data[:, -1]) # 给出训练数据的预测值 train_out = model.predict(data[:, :-1]) # 计算MSE train_mse = fmse(data[:, -1], train_out)[0] # 给出验证数据的预测值 add_yan = model.predict(yanzhgdata[:, :-1]) # 计算f1度量 add_mse = fmse(yanzhgdata[:, -1], add_yan)[0] print(train_mse, add_mse) return train_mse, add_mse # 最终确定组合的函数
Example #7
Source File: test_train_pairwise_similarity_model.py From redshells with MIT License | 6 votes |
def test_run(self): self.input_data['item2embedding'] = dict(i0=[1, 2], i1=[3, 4]) self.input_data['similarity_data'] = pd.DataFrame( dict(item1=['i0', 'i0', 'i1'], item2=['i0', 'i1', 'i1'], similarity=[1, 0, 1])) task = TrainPairwiseSimilarityModel( item2embedding_task=_DummyTask(), similarity_data_task=_DummyTask(), model_name='RandomForestClassifier', item0_column_name='item1', item1_column_name='item2', similarity_column_name='similarity') task.load = MagicMock(side_effect=self._load) task.dump = MagicMock(side_effect=self._dump) task.run() self.assertIsInstance(self.dump_data, RandomForestClassifier)
Example #8
Source File: function.py From Karta with MIT License | 6 votes |
def trainFunctionTypeClassifier(self, scs): """Train the type classifier, according to all known code segments. Args: scs (list): list of all known (sark) code segments Note: Training must happen *after* the calibration phase """ functions = [] for sc in scs: functions += list(filter(lambda func: not self._analyzer.fptr_identifier.isPointedFunction(func.start_ea), sc.functions)) clf = RandomForestClassifier(n_estimators=100) eas = list(map(lambda x: x.start_ea, functions)) data_set = list(map(self.extractFunctionTypeSample, eas)) data_results = list(map(self._analyzer.codeType, eas)) # classify clf.fit(data_set, data_results) # store the results self._type_classifier = clf
Example #9
Source File: Stock_Prediction_Model_Random_Forrest.py From StockRecommendSystem with MIT License | 6 votes |
def build_model(self, X_train, y_train): if self.paras.load == True: model = self.load_training_model(self.paras.window_len) if model != None: return model print('build Random Forrest model...') # range of number of trees : 5*(1 -> 10) = 5,10,...,50 trees t_min = self.paras.tree_min[index] t_max = self.paras.tree_max[index] # range of max of features : 1 -> 10 features f_min = self.paras.feature_min[index] f_max = self.paras.feature_max[index] # range of window : 1 -> 70 days w_min = self.paras.window_min w_max = self.paras.window_max w_opt, n_opt, m_opt = self.best_window(X_train, y_train, w_min,w_max,t_min,t_max,f_min,f_max) model = RandomForestClassifier(n_estimators=n_opt,max_features=m_opt, n_jobs=8, verbose=self.paras.verbose) return model
Example #10
Source File: test_random_forest_classifier.py From monasca-analytics with Apache License 2.0 | 5 votes |
def test_learn_structure(self): data = self.get_testing_data() clf = self.rf_sml.learn_structure(data) self.assertIsInstance(clf, ensemble.RandomForestClassifier)
Example #11
Source File: Blending_Classify_adult.py From Machine-Learning-for-Beginner-by-Python3 with MIT License | 5 votes |
def RF_First(self, data, n_estimators=800, max_features='sqrt'): # 对训练数据进行训练,返回模验证数据,预测数据的预测结果 model = RF(n_estimators=n_estimators, max_features=max_features) model.fit(data['train'][:, :-1], data['train'][:, -1]) # 存储验证数据集结果和预测数据集结果 # 训练数据集的预测结果 xul = model.predict(data['train'][:, :-1]) # 验证的预测结果 yanre = model.predict(data['test'][:, :-1]) # 预测的预测结果 prer = model.predict(data['predict'][:, :-1]) # 每计算一折后,要计算训练、验证、预测数据的误差 xx = self.F1(xul, data['train'][:, -1]) yy = self.F1(yanre, data['test'][:, -1]) pp = self.F1(prer, data['predict'][:, -1]) # 开始结合 self.yanzhneg_pr.append(yanre) self.yanzhneg_real = data['test'][:, -1] self.predi.append(prer) self.preal = data['predict'][:, -1] # 存储误差 self.error_dict['随机森林'] = [xx, yy, pp] return print('1层中的随机森林运行完毕') # AdaBoost
Example #12
Source File: classification_randomForest.py From practicalDataAnalysisCookbook with GNU General Public License v2.0 | 5 votes |
def fitRandomForest(data): ''' Build a random forest classifier ''' # create the classifier object forest = en.RandomForestClassifier(n_jobs=-1, min_samples_split=100, n_estimators=10, class_weight="auto") # fit the data return forest.fit(data[0],data[1]) # the file name of the dataset
Example #13
Source File: mlmodel.py From speech-emotion-recognition with MIT License | 5 votes |
def __init__(self, **params): params['name'] = 'Random Forest' super(RF, self).__init__(**params) self.model = RandomForestClassifier(n_estimators=30)
Example #14
Source File: test_pipe.py From skutil with BSD 3-Clause "New" or "Revised" License | 5 votes |
def test_pipeline_complex(): pipe = Pipeline([ ('selector', FeatureRetainer(cols=['sepal length (cm)', 'sepal width (cm)', 'petal length (cm)'])), ('scaler', SelectiveScaler()), ('boxcox', BoxCoxTransformer()), ('pca', SelectivePCA()), ('svd', SelectiveTruncatedSVD()), ('model', RandomForestClassifier()) ]) pipe.fit(X, iris.target)
Example #15
Source File: test_random_forest_classifier.py From monasca-analytics with Apache License 2.0 | 5 votes |
def setUp(self): super(TestRandomForestClassifier, self).setUp() self.rf_sml = random_forest_classifier.RandomForestClassifier( "fakeid", {"module": "fake", "nb_samples": 1000})
Example #16
Source File: random_forest_classifier.py From monasca-analytics with Apache License 2.0 | 5 votes |
def _get_best_detector(self, train, label): detector = ensemble.RandomForestClassifier() detector.fit(train, label) return detector
Example #17
Source File: random_forest_classifier.py From monasca-analytics with Apache License 2.0 | 5 votes |
def __init__(self, _id, _config): super(RandomForestClassifier, self).__init__(_id, _config) self._nb_samples = int(_config['nb_samples'])
Example #18
Source File: models.py From aletheia with MIT License | 5 votes |
def fit(self, X, y): self.selector = SelectKBest(f_classif, k=self.max_features) self.selector.fit(X, y) X_train=self.selector.transform(X) y_train=y param_list=[] idx = range(len(y_train)) for i in range(self.n_estimators): random.shuffle(idx) param_list.append((X_train[idx[:self.max_samples]], y_train[idx[:self.max_samples]])) pool = ThreadPool(cpu_count()) self.clf_list = pool.map(self._prepare_classifier, param_list) pool.close() pool.join() """ X2=[] for clf in self.clf_list: P=clf.predict_proba(X_train) if len(X2)==0: X2=P[:, 0] else: X2=numpy.vstack((X2, P[:, 0])) X2=numpy.swapaxes(X2, 0, 1) print "X2:", X2.shape from sklearn.ensemble import RandomForestClassifier self.clf2=RandomForestClassifier(n_estimators=100) self.clf2.fit(X2, y_train) """
Example #19
Source File: pipline.py From MachineLearning with Apache License 2.0 | 5 votes |
def get_rfc(): return RandomForestClassifier( n_estimators=100, max_features=0.5, max_depth=None, max_leaf_nodes=270, min_impurity_decrease=0.0001, random_state=123, n_jobs=-1 )
Example #20
Source File: classifier.py From stock-price-prediction with MIT License | 5 votes |
def performRFClass(X_train, y_train, X_test, y_test, parameters, savemodel): """ Random Forest Binary Classification """ clf = RandomForestClassifier(n_estimators=1000, n_jobs=-1) clf.fit(X_train, y_train) accuracy = clf.score(X_test, y_test) return accuracy
Example #21
Source File: pu_learning.py From LogClass with MIT License | 5 votes |
def instatiate_pu_adapter(params, **kwargs): """ Returns a RF adapted to do PU Learning wrapped by the PUAdapterWrapper. """ hparms = { 'n_estimators': 10, 'criterion': "entropy", 'bootstrap': True, 'n_jobs': -1, } hparms.update(kwargs) estimator = RandomForestClassifier(**hparms) wrapped_pu_estimator = PUAdapterWrapper(PUAdapter(estimator), params) return wrapped_pu_estimator
Example #22
Source File: test_pipe.py From skutil with BSD 3-Clause "New" or "Revised" License | 5 votes |
def test_pipeline_basic(): pipe = Pipeline([ ('selector', FeatureRetainer(cols=['sepal length (cm)', 'sepal width (cm)', 'petal length (cm)'])), ('scaler', SelectiveScaler()), ('model', RandomForestClassifier()) ]) pipe.fit(X, iris.target)
Example #23
Source File: function.py From Karta with MIT License | 5 votes |
def calibrateFunctionTypeClassifier(self, scs): """Calibrate the type classifier, according to all known code segments. Args: scs (list): list of all known (sark) code segments Return Value: True iff the calibration was successfully and is more accurate than the assigned lower bound """ functions = [] for sc in scs: functions += list(filter(lambda func: not self._analyzer.fptr_identifier.isPointedFunction(func.start_ea), sc.functions)) # 1st round - calibration # 2nd round - test for training_round in range(2): round_name = "Calibration" if training_round == 0 else "Testing" clf = RandomForestClassifier(n_estimators=100) eas = list(map(lambda x: x.start_ea, functions)) data_set = list(map(self.extractFunctionTypeSample, eas)) data_results = list(map(self._analyzer.codeType, eas)) # split to train and test (70%, 30%) X_train, X_test, Y_train, Y_test = train_test_split(data_set, data_results, test_size=0.7, random_state=5) # classify clf.fit(X_train, Y_train) # test Y_pred = clf.predict(X_test) accuracy = metrics.accuracy_score(Y_test, Y_pred) self._analyzer.logger.info("%s: Function accuracy Type Accuracy: %.2f%%", round_name, accuracy * 100) # Pick up the best features, and use only them (only needed in the first round) if training_round == 0: type_impact = list(zip(self._classifier_type_offsets, clf.feature_importances_)) type_impact.sort(key=lambda x: x[1], reverse=True) self._classifier_type_offsets = list(map(lambda x: x[0], type_impact[:self._feature_size])) elif accuracy < CALIBRATION_LOWER_BOUND: self._analyzer.logger.error("Function Prologue Type Accuracy is too low, can't continue: %.2f%% < %.2f%%", accuracy * 100, CALIBRATION_LOWER_BOUND * 100) return False # If reached this point it means that all was OK return True
Example #24
Source File: annotation.py From scVI with MIT License | 5 votes |
def compute_accuracy_rf( data_train, labels_train, data_test, labels_test, param_grid=None, verbose=0 ): if param_grid is None: param_grid = {"max_depth": np.arange(3, 10), "n_estimators": [10, 50, 100, 200]} rf = RandomForestClassifier(max_depth=2, random_state=0) clf = GridSearchCV(rf, param_grid, verbose=verbose, cv=3) return compute_accuracy_classifier( clf, data_train, labels_train, data_test, labels_test )
Example #25
Source File: advanced_supvervised_model_trainer.py From healthcareai-py with MIT License | 5 votes |
def random_forest_classifier(self, trees=200, scoring_metric='roc_auc', hyperparameter_grid=None, randomized_search=True, number_iteration_samples=5): """ A light wrapper for Sklearn's random forest classifier that performs randomized search over an overridable default hyperparameter grid. Args: trees (int): number of trees to use if not performing a randomized grid search scoring_metric (str): Any sklearn scoring metric appropriate for classification hyperparameter_grid (dict): hyperparameters by name randomized_search (bool): True for randomized search (default) number_iteration_samples (int): Number of models to train during the randomized search for exploring the hyperparameter space. More may lead to a better model, but will take longer. Returns: TrainedSupervisedModel: """ self.validate_classification('Random Forest Classifier') if hyperparameter_grid is None: max_features = hcai_helpers.calculate_random_forest_mtry_hyperparameter(len(self.X_test.columns), self.model_type) hyperparameter_grid = {'n_estimators': [100, 200, 300], 'max_features': max_features} number_iteration_samples = 5 algorithm = get_algorithm(RandomForestClassifier, scoring_metric, hyperparameter_grid, randomized_search, number_iteration_samples=number_iteration_samples, n_estimators=trees) trained_supervised_model = self._create_trained_supervised_model(algorithm) return trained_supervised_model
Example #26
Source File: transpile.py From go-ml-transpiler with Apache License 2.0 | 5 votes |
def main(export_dir): ## load dataset x, y = load_dataset(return_X_y=True) ## train xgb xgbc = xgb.XGBClassifier(n_estimators=100, max_depth=7) xgbc.fit(x, y) # transpile model os.mkdir(os.path.join(export_dir, "xgb")) transpiler = Transpiler(xgbc) transpiler.transpile(package_name="xgb", method_name="predict", export_method=True) transpiler.write(os.path.join(export_dir, "xgb")) print("xgb done.") ## train rfc rfc = RFC(n_estimators=100, max_depth=7) rfc.fit(x, y) # transpile model os.mkdir(os.path.join(export_dir, "rfc")) transpiler = Transpiler(rfc) transpiler.transpile(package_name="rfc", method_name="predict", export_method=True) transpiler.write(os.path.join(export_dir, "rfc")) print("rfc done.")
Example #27
Source File: common_utils.py From interpret-text with MIT License | 5 votes |
def create_random_forest_tfidf(): vectorizer = TfidfVectorizer(lowercase=False) rf = RandomForestClassifier(n_estimators=500, random_state=777) return Pipeline([("vectorizer", vectorizer), ("rf", rf)])
Example #28
Source File: malss.py From malss with MIT License | 5 votes |
def select_features(self): if self.data is None: warnings.warn("'drop_col' must be used after 'fit' has used.") return if self.task == 'regression': rf = RandomForestRegressor(random_state=0, oob_score=True, n_estimators=50, n_jobs=self.n_jobs) else: rf = RandomForestClassifier(random_state=0, oob_score=True, n_estimators=50, n_jobs=self.n_jobs) num_col = len(self.data.X.columns) self.data.drop_col(rf) if len(self.data.X.columns) < num_col: self.algorithms = self.__choose_algorithm() self.is_ready = True
Example #29
Source File: rfpimp.py From malss with MIT License | 5 votes |
def importances_raw(rf, X_train, y_train, n_samples=5000): if isinstance(rf, RandomForestClassifier): return permutation_importances_raw(rf, X_train, y_train, oob_classifier_accuracy, n_samples) elif isinstance(rf, RandomForestRegressor): return permutation_importances_raw(rf, X_train, y_train, oob_regression_r2_score, n_samples) return None
Example #30
Source File: rfpimp.py From malss with MIT License | 5 votes |
def oob_dropcol_importances(rf, X_train, y_train): """ Compute drop-column feature importances for scikit-learn. Given a RandomForestClassifier or RandomForestRegressor in rf and training X and y data, return a data frame with columns Feature and Importance sorted in reverse order by importance. A clone of rf is trained once to get the baseline score and then again, once per feature to compute the drop in out of bag (OOB) score. return: A data frame with Feature, Importance columns SAMPLE CODE rf = RandomForestRegressor(n_estimators=100, n_jobs=-1, oob_score=True) X_train, y_train = ..., ... rf.fit(X_train, y_train) imp = oob_dropcol_importances(rf, X_train, y_train) """ rf_ = clone(rf) rf_.random_state = 999 rf_.fit(X_train, y_train) baseline = rf_.oob_score_ imp = [] for col in X_train.columns: X = X_train.drop(col, axis=1) rf_ = clone(rf) rf_.random_state = 999 rf_.fit(X, y_train) o = rf_.oob_score_ imp.append(baseline - o) imp = np.array(imp) I = pd.DataFrame(data={'Feature':X_train.columns, 'Importance':imp}) I = I.set_index('Feature') I = I.sort_values('Importance', ascending=False) return I