Python sklearn.ensemble.IsolationForest() Examples
The following are 30
code examples of sklearn.ensemble.IsolationForest().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
sklearn.ensemble
, or try the search function
.
Example #1
Source File: isoforest.py From Deep-SAD-PyTorch with MIT License | 6 votes |
def __init__(self, hybrid=False, n_estimators=100, max_samples='auto', contamination=0.1, n_jobs=-1, seed=None, **kwargs): """Init Isolation Forest instance.""" self.n_estimators = n_estimators self.max_samples = max_samples self.contamination = contamination self.n_jobs = n_jobs self.seed = seed self.model = IsolationForest(n_estimators=n_estimators, max_samples=max_samples, contamination=contamination, n_jobs=n_jobs, random_state=seed, **kwargs) self.hybrid = hybrid self.ae_net = None # autoencoder network for the case of a hybrid model self.results = { 'train_time': None, 'test_time': None, 'test_auc': None, 'test_scores': None }
Example #2
Source File: test_iforest.py From twitter-stock-recommendation with MIT License | 6 votes |
def test_iforest_error(): """Test that it gives proper exception on deficient input.""" X = iris.data # Test max_samples assert_raises(ValueError, IsolationForest(max_samples=-1).fit, X) assert_raises(ValueError, IsolationForest(max_samples=0.0).fit, X) assert_raises(ValueError, IsolationForest(max_samples=2.0).fit, X) # The dataset has less than 256 samples, explicitly setting # max_samples > n_samples should result in a warning. If not set # explicitly there should be no warning assert_warns_message(UserWarning, "max_samples will be set to n_samples for estimation", IsolationForest(max_samples=1000).fit, X) assert_no_warnings(IsolationForest(max_samples='auto').fit, X) assert_no_warnings(IsolationForest(max_samples=np.int64(2)).fit, X) assert_raises(ValueError, IsolationForest(max_samples='foobar').fit, X) assert_raises(ValueError, IsolationForest(max_samples=1.5).fit, X)
Example #3
Source File: test_iforest.py From twitter-stock-recommendation with MIT License | 6 votes |
def test_iforest_sparse(): """Check IForest for various parameter settings on sparse input.""" rng = check_random_state(0) X_train, X_test, y_train, y_test = train_test_split(boston.data[:50], boston.target[:50], random_state=rng) grid = ParameterGrid({"max_samples": [0.5, 1.0], "bootstrap": [True, False]}) for sparse_format in [csc_matrix, csr_matrix]: X_train_sparse = sparse_format(X_train) X_test_sparse = sparse_format(X_test) for params in grid: # Trained on sparse format sparse_classifier = IsolationForest( n_estimators=10, random_state=1, **params).fit(X_train_sparse) sparse_results = sparse_classifier.predict(X_test_sparse) # Trained on dense format dense_classifier = IsolationForest( n_estimators=10, random_state=1, **params).fit(X_train) dense_results = dense_classifier.predict(X_test) assert_array_equal(sparse_results, dense_results)
Example #4
Source File: iso_forest.py From safekit with MIT License | 6 votes |
def sample_hyps_iso_forest(nest, contam, boot): """ :param nest: :param contam: :param boot: :return: An IsolationForest object with specified hyperparameters, used to detect anomaly. """ n_estimators = nest # random.choice(range(20, 300)) # default is 100 max_samples = 'auto' contamination = contam #randrange_float(0.0, 0.5, 0.05) max_features = 1.0 # default is 1.0 (use all features) bootstrap = boot # random.choice(['True', 'False']) n_jobs = -1 # Uses all cores verbose = 0 model = IsolationForest(n_estimators=n_estimators, max_samples=max_samples, contamination=contamination, max_features=max_features, bootstrap=bootstrap, n_jobs=n_jobs, verbose=verbose) return model
Example #5
Source File: test_iforest.py From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def test_deprecation(): X = [[0.0], [1.0]] clf = IsolationForest() assert_warns_message(FutureWarning, 'default contamination parameter 0.1 will change ' 'in version 0.22 to "auto"', clf.fit, X) assert_warns_message(FutureWarning, 'behaviour="old" is deprecated and will be removed ' 'in version 0.22', clf.fit, X) clf = IsolationForest().fit(X) assert_warns_message(DeprecationWarning, "threshold_ attribute is deprecated in 0.20 and will" " be removed in 0.22.", getattr, clf, "threshold_")
Example #6
Source File: test_iforest.py From twitter-stock-recommendation with MIT License | 6 votes |
def test_iforest_performance(): """Test Isolation Forest performs well""" # Generate train/test data rng = check_random_state(2) X = 0.3 * rng.randn(120, 2) X_train = np.r_[X + 2, X - 2] X_train = X[:100] # Generate some abnormal novel observations X_outliers = rng.uniform(low=-4, high=4, size=(20, 2)) X_test = np.r_[X[100:], X_outliers] y_test = np.array([0] * 20 + [1] * 20) # fit the model clf = IsolationForest(max_samples=100, random_state=rng).fit(X_train) # predict scores (the lower, the more normal) y_pred = - clf.decision_function(X_test) # check that there is at most 6 errors (false positive or false negative) assert_greater(roc_auc_score(y_test, y_pred), 0.98)
Example #7
Source File: test_iforest.py From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def test_iforest_sparse(): """Check IForest for various parameter settings on sparse input.""" rng = check_random_state(0) X_train, X_test, y_train, y_test = train_test_split(boston.data[:50], boston.target[:50], random_state=rng) grid = ParameterGrid({"max_samples": [0.5, 1.0], "bootstrap": [True, False]}) for sparse_format in [csc_matrix, csr_matrix]: X_train_sparse = sparse_format(X_train) X_test_sparse = sparse_format(X_test) for params in grid: # Trained on sparse format sparse_classifier = IsolationForest( n_estimators=10, random_state=1, **params).fit(X_train_sparse) sparse_results = sparse_classifier.predict(X_test_sparse) # Trained on dense format dense_classifier = IsolationForest( n_estimators=10, random_state=1, **params).fit(X_train) dense_results = dense_classifier.predict(X_test) assert_array_equal(sparse_results, dense_results)
Example #8
Source File: outliers.py From visualqc with Apache License 2.0 | 6 votes |
def run_isolation_forest(features, id_list, fraction_of_outliers=.3): """Performs anomaly detection based on Isolation Forest.""" rng = np.random.RandomState(1984) num_samples = features.shape[0] iso_f = IsolationForest(max_samples=num_samples, contamination=fraction_of_outliers, random_state=rng) iso_f.fit(features) pred_scores = iso_f.decision_function(features) threshold = stats.scoreatpercentile(pred_scores, 100 * fraction_of_outliers) outlying_ids = id_list[pred_scores < threshold] return outlying_ids
Example #9
Source File: test_iforest.py From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def test_iforest_performance(): """Test Isolation Forest performs well""" # Generate train/test data rng = check_random_state(2) X = 0.3 * rng.randn(120, 2) X_train = np.r_[X + 2, X - 2] X_train = X[:100] # Generate some abnormal novel observations X_outliers = rng.uniform(low=-4, high=4, size=(20, 2)) X_test = np.r_[X[100:], X_outliers] y_test = np.array([0] * 20 + [1] * 20) # fit the model clf = IsolationForest(max_samples=100, random_state=rng).fit(X_train) # predict scores (the lower, the more normal) y_pred = - clf.decision_function(X_test) # check that there is at most 6 errors (false positive or false negative) assert_greater(roc_auc_score(y_test, y_pred), 0.98)
Example #10
Source File: test_outlier_remover.py From scikit-lego with MIT License | 5 votes |
def test_pipeline_integration(): np.random.seed(42) dataset = np.concatenate([np.random.normal(0, 1, (2000, 2))]) isolation_forest_remover = OutlierRemover(outlier_detector=IsolationForest()) gmm_remover = OutlierRemover(outlier_detector=GMMOutlierDetector()) pipeline = Pipeline( [ ("isolation_forest_remover", isolation_forest_remover), ("gmm_remover", gmm_remover), ("kmeans", KMeans()), ] ) pipeline.fit(dataset) pipeline.transform(dataset)
Example #11
Source File: random_split_trees.py From ad_examples with MIT License | 5 votes |
def fit(self, X, y=None, sample_weight=None): self.ifor = IsolationForest(n_estimators=self.n_estimators, max_samples=self.max_samples, contamination=self.contamination, max_features=self.max_features, bootstrap=self.bootstrap, n_jobs=self.n_jobs, random_state=self.random_state, verbose=self.verbose) self.ifor.fit(X, y, sample_weight) self.estimators_ = self.ifor.estimators_ self.estimators_features_ = self.ifor.estimators_features_ self.updated = False
Example #12
Source File: multiview_forest.py From ad_examples with MIT License | 5 votes |
def _multiview_fit(self, X, y, feature_partitions, n_estimators_view): n_features = X.shape[1] estimators_group = [] feature_offset = 0 logger.debug("IForestMultiview n_estimators_view: %s" % str(list(n_estimators_view))) for n_feats, n_est_ in zip(feature_partitions, n_estimators_view): estimators = [] X_ = X[:, feature_offset:(feature_offset+n_feats)] if n_est_ > 0: # contruct isolation forest for the view containing just the feature subset ifor_ = IsolationForest(n_estimators=n_est_, max_samples=self.max_samples, contamination=self.contamination, max_features=self.max_features, bootstrap=self.bootstrap, n_jobs=self.n_jobs, random_state=self.random_state, verbose=self.verbose) ifor_.fit(X_, y, sample_weight=None) for tree in ifor_.estimators_: # The IsolationForest trees contain read-only properties. We copy # over all the properties to our custom tree structure so that we # can modify them if needed. ifor_mv_estimator = IForestMultiviewTree(n_features=n_features, ifor_tree=tree.tree_) # adjust the feature indexes at the tree nodes. ifor_mv_estimator.tree_.feature += feature_offset estimators.append(ifor_mv_estimator) estimators_group.append(estimators) feature_offset += n_feats return estimators_group
Example #13
Source File: test_iso_gan.py From ad_examples with MIT License | 5 votes |
def get_iso_model(x, y, opts): outliers_fraction = 0.1 ifor_random_state = opts.randseed iso_model = IsolationForest(n_estimators=100, max_samples=256, contamination=outliers_fraction, random_state=ifor_random_state) iso_model.fit(x) r = np.reshape(iso_model.decision_function(x), (-1, 1)) # logger.debug("iforest r:\n%s" % str(list(r))) return iso_model, r
Example #14
Source File: IsolationForest.py From mltk-algo-contrib with Apache License 2.0 | 5 votes |
def __init__(self,options): self.handle_options(options) out_params = convert_params( options.get('params',{}), ints = ['n_estimators','n_jobs','random_state','verbose'], floats = ['max_samples','contamination','max_features'], bools = ['bootstrap'] ) self.return_scores = out_params.pop('anomaly_score', True) # whitelist n_estimators > 0 if 'n_estimators' in out_params and out_params['n_estimators']<=0: msg = 'Invalid value error: n_estimators must be greater than 0 and an integer, but found n_estimators="{}".' raise RuntimeError(msg.format(out_params['n_estimators'])) # whitelist max_samples > 0 and < 1 if 'max_samples' in out_params and out_params['max_samples']<0 and out_params['max_samples']>1: msg = 'Invalid value error: max_samples must be greater than 0 and a float, but found max_samples="{}".' raise RuntimeError(msg.format(out_params['max_samples'])) # whitelist contamination should be in (0.0, 0.5] as error raised by sklearn for values out of range if 'contamination' in out_params and not (0.0 < out_params['contamination'] <= 0.5): msg = ( 'Invalid value error: Valid values for contamination are in (0.0, 0.5], ' 'but found contamination="{}".' ) raise RuntimeError(msg.format(out_params['contamination'])) # whitelist max_features > 0 and < 1 if 'max_features' in out_params and out_params['max_features']<0 and out_params['max_features']>1: msg = 'Invalid value error: max_features must be greater than 0, but found max_features="{}".' raise RuntimeError(msg.format(out_params['max_features'])) self.estimator = _IsolationForest(**out_params)
Example #15
Source File: test_outlier_remover.py From scikit-lego with MIT License | 5 votes |
def test_estimator_checks(test_fn): gmm_remover = OutlierRemover(outlier_detector=GMMOutlierDetector(), refit=True) test_fn(OutlierRemover.__name__, gmm_remover) isolation_forest_remover = OutlierRemover( outlier_detector=IsolationForest(), refit=True ) test_fn(OutlierRemover.__name__, isolation_forest_remover)
Example #16
Source File: model.py From batea with GNU General Public License v2.0 | 5 votes |
def build_model(self, outlier_ratio=0.1, n_estimators=100, max_samples='auto'): self.model = IsolationForest(contamination=outlier_ratio, n_estimators=n_estimators, max_samples=max_samples, behaviour='new')
Example #17
Source File: test_outlier_remover.py From scikit-lego with MIT License | 5 votes |
def test_estimator_checks(test_fn): gmm_remover = OutlierRemover(outlier_detector=GMMOutlierDetector(), refit=True) test_fn(OutlierRemover.__name__, gmm_remover) isolation_forest_remover = OutlierRemover( outlier_detector=IsolationForest(), refit=True ) test_fn(OutlierRemover.__name__, isolation_forest_remover)
Example #18
Source File: test_outlier_remover.py From scikit-lego with MIT License | 5 votes |
def test_pipeline_integration(): np.random.seed(42) dataset = np.concatenate([np.random.normal(0, 1, (2000, 2))]) isolation_forest_remover = OutlierRemover(outlier_detector=IsolationForest()) gmm_remover = OutlierRemover(outlier_detector=GMMOutlierDetector()) pipeline = Pipeline( [ ("isolation_forest_remover", isolation_forest_remover), ("gmm_remover", gmm_remover), ("kmeans", KMeans()), ] ) pipeline.fit(dataset) pipeline.transform(dataset)
Example #19
Source File: test_iforest.py From twitter-stock-recommendation with MIT License | 5 votes |
def test_max_samples_attribute(): X = iris.data clf = IsolationForest().fit(X) assert_equal(clf.max_samples_, X.shape[0]) clf = IsolationForest(max_samples=500) assert_warns_message(UserWarning, "max_samples will be set to n_samples for estimation", clf.fit, X) assert_equal(clf.max_samples_, X.shape[0]) clf = IsolationForest(max_samples=0.4).fit(X) assert_equal(clf.max_samples_, 0.4*X.shape[0])
Example #20
Source File: isoForest.py From Deep-SVDD with MIT License | 5 votes |
def initialize_isoForest(self, seed=0, **kwargs): self.isoForest = IsolationForest(n_estimators=self.n_estimators, max_samples=self.max_samples, contamination=self.contamination, n_jobs=-1, random_state=seed, **kwargs)
Example #21
Source File: test_ensemble.py From pandas-ml with BSD 3-Clause "New" or "Revised" License | 5 votes |
def test_objectmapper(self): df = pdml.ModelFrame([]) self.assertIs(df.ensemble.AdaBoostClassifier, ensemble.AdaBoostClassifier) self.assertIs(df.ensemble.AdaBoostRegressor, ensemble.AdaBoostRegressor) self.assertIs(df.ensemble.BaggingClassifier, ensemble.BaggingClassifier) self.assertIs(df.ensemble.BaggingRegressor, ensemble.BaggingRegressor) self.assertIs(df.ensemble.ExtraTreesClassifier, ensemble.ExtraTreesClassifier) self.assertIs(df.ensemble.ExtraTreesRegressor, ensemble.ExtraTreesRegressor) self.assertIs(df.ensemble.GradientBoostingClassifier, ensemble.GradientBoostingClassifier) self.assertIs(df.ensemble.GradientBoostingRegressor, ensemble.GradientBoostingRegressor) self.assertIs(df.ensemble.IsolationForest, ensemble.IsolationForest) self.assertIs(df.ensemble.RandomForestClassifier, ensemble.RandomForestClassifier) self.assertIs(df.ensemble.RandomTreesEmbedding, ensemble.RandomTreesEmbedding) self.assertIs(df.ensemble.RandomForestRegressor, ensemble.RandomForestRegressor) self.assertIs(df.ensemble.VotingClassifier, ensemble.VotingClassifier)
Example #22
Source File: test_iforest.py From twitter-stock-recommendation with MIT License | 5 votes |
def test_iforest(): """Check Isolation Forest for various parameter settings.""" X_train = np.array([[0, 1], [1, 2]]) X_test = np.array([[2, 1], [1, 1]]) grid = ParameterGrid({"n_estimators": [3], "max_samples": [0.5, 1.0, 3], "bootstrap": [True, False]}) with ignore_warnings(): for params in grid: IsolationForest(random_state=rng, **params).fit(X_train).predict(X_test)
Example #23
Source File: test_iforest.py From twitter-stock-recommendation with MIT License | 5 votes |
def test_recalculate_max_depth(): """Check max_depth recalculation when max_samples is reset to n_samples""" X = iris.data clf = IsolationForest().fit(X) for est in clf.estimators_: assert_equal(est.max_depth, int(np.ceil(np.log2(X.shape[0]))))
Example #24
Source File: test_iforest.py From Mastering-Elasticsearch-7.0 with MIT License | 5 votes |
def test_iforest_error(): """Test that it gives proper exception on deficient input.""" X = iris.data # Test max_samples assert_raises(ValueError, IsolationForest(max_samples=-1).fit, X) assert_raises(ValueError, IsolationForest(max_samples=0.0).fit, X) assert_raises(ValueError, IsolationForest(max_samples=2.0).fit, X) # The dataset has less than 256 samples, explicitly setting # max_samples > n_samples should result in a warning. If not set # explicitly there should be no warning assert_warns_message(UserWarning, "max_samples will be set to n_samples for estimation", IsolationForest(max_samples=1000).fit, X) # note that assert_no_warnings does not apply since it enables a # PendingDeprecationWarning triggered by scipy.sparse's use of # np.matrix. See issue #11251. with pytest.warns(None) as record: IsolationForest(max_samples='auto').fit(X) user_warnings = [each for each in record if issubclass(each.category, UserWarning)] assert len(user_warnings) == 0 with pytest.warns(None) as record: IsolationForest(max_samples=np.int64(2)).fit(X) user_warnings = [each for each in record if issubclass(each.category, UserWarning)] assert len(user_warnings) == 0 assert_raises(ValueError, IsolationForest(max_samples='foobar').fit, X) assert_raises(ValueError, IsolationForest(max_samples=1.5).fit, X) # test X_test n_features match X_train one: assert_raises(ValueError, IsolationForest().fit(X).predict, X[:, 1:]) # test threshold_ attribute error when behaviour is not old: msg = "threshold_ attribute does not exist when behaviour != 'old'" assert_raises_regex(AttributeError, msg, getattr, IsolationForest(behaviour='new'), 'threshold_')
Example #25
Source File: test_iforest.py From twitter-stock-recommendation with MIT License | 5 votes |
def test_iforest_subsampled_features(): # It tests non-regression for #5732 which failed at predict. rng = check_random_state(0) X_train, X_test, y_train, y_test = train_test_split(boston.data[:50], boston.target[:50], random_state=rng) clf = IsolationForest(max_features=0.8) clf.fit(X_train, y_train) clf.predict(X_test)
Example #26
Source File: test_iforest.py From twitter-stock-recommendation with MIT License | 5 votes |
def test_max_samples_consistency(): # Make sure validated max_samples in iforest and BaseBagging are identical X = iris.data clf = IsolationForest().fit(X) assert_equal(clf.max_samples_, clf._max_samples)
Example #27
Source File: test_iforest.py From twitter-stock-recommendation with MIT License | 5 votes |
def test_iforest_works(): # toy sample (the last two samples are outliers) X = [[-2, -1], [-1, -1], [-1, -2], [1, 1], [1, 2], [2, 1], [6, 3], [-4, 7]] # Test LOF clf = IsolationForest(random_state=rng, contamination=0.25) clf.fit(X) decision_func = - clf.decision_function(X) pred = clf.predict(X) # assert detect outliers: assert_greater(np.min(decision_func[-2:]), np.max(decision_func[:-2])) assert_array_equal(pred, 6 * [1] + 2 * [-1])
Example #28
Source File: isolation_forest.py From monasca-analytics with Apache License 2.0 | 5 votes |
def __init__(self, _id, _config): super(IsolationForest, self).__init__(_id, _config) self._nb_samples = int(_config['nb_samples'])
Example #29
Source File: isolation_forest.py From monasca-analytics with Apache License 2.0 | 5 votes |
def get_default_config(): return { 'module': IsolationForest.__name__, 'nb_samples': N_SAMPLES }
Example #30
Source File: isolation_forest.py From monasca-analytics with Apache License 2.0 | 5 votes |
def _get_best_detector(self, train): detector = ensemble.IsolationForest() detector.fit(train) return detector