Python sklearn.datasets.fetch_kddcup99() Examples
The following are 4
code examples of sklearn.datasets.fetch_kddcup99().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
sklearn.datasets
, or try the search function
.
Example #1
Source File: test_kddcup99.py From Mastering-Elasticsearch-7.0 with MIT License | 5 votes |
def test_percent10(): try: data = fetch_kddcup99(download_if_missing=False) except IOError: raise SkipTest("kddcup99 dataset can not be loaded.") assert_equal(data.data.shape, (494021, 41)) assert_equal(data.target.shape, (494021,)) data_shuffled = fetch_kddcup99(shuffle=True, random_state=0) assert_equal(data.data.shape, data_shuffled.data.shape) assert_equal(data.target.shape, data_shuffled.target.shape) data = fetch_kddcup99('SA') assert_equal(data.data.shape, (100655, 41)) assert_equal(data.target.shape, (100655,)) data = fetch_kddcup99('SF') assert_equal(data.data.shape, (73237, 4)) assert_equal(data.target.shape, (73237,)) data = fetch_kddcup99('http') assert_equal(data.data.shape, (58725, 3)) assert_equal(data.target.shape, (58725,)) data = fetch_kddcup99('smtp') assert_equal(data.data.shape, (9571, 3)) assert_equal(data.target.shape, (9571,)) fetch_func = partial(fetch_kddcup99, 'smtp') check_return_X_y(data, fetch_func)
Example #2
Source File: test_kddcup99.py From Mastering-Elasticsearch-7.0 with MIT License | 5 votes |
def test_shuffle(): try: dataset = fetch_kddcup99(random_state=0, subset='SA', shuffle=True, percent10=True, download_if_missing=False) except IOError: raise SkipTest("kddcup99 dataset can not be loaded.") assert(any(dataset.target[-100:] == b'normal.'))
Example #3
Source File: kdd.py From mixed-anomaly with Apache License 2.0 | 5 votes |
def load_train_test_data(small: bool, train_normal_only: bool) -> Tuple[Tuple[pd.DataFrame, np.ndarray], Tuple[pd.DataFrame, np.ndarray]]: X, y = fetch_kddcup99(subset='SA', percent10=small, return_X_y=True) columns = ["duration", "protocol_type", "service", "flag", "src_bytes", "dst_bytes", "land", "wrong_fragment", "urgent", "hot", "num_failed_logins", "logged_in", "num_compromised", "root_shell", "su_attempted", "num_root", "num_file_creations", "num_shells", "num_access_files", "num_outbound_cmds", "is_host_login", "is_guest_login", "count", "srv_count", "serror_rate", "srv_serror_rate", "rerror_rate", "srv_rerror_rate", "same_srv_rate", "diff_srv_rate", "srv_diff_host_rate", "dst_host_count", "dst_host_srv_count", "dst_host_same_srv_rate", "dst_host_diff_srv_rate", "dst_host_same_src_port_rate", "dst_host_srv_diff_host_rate", "dst_host_serror_rate", "dst_host_srv_serror_rate", "dst_host_rerror_rate", "dst_host_srv_rerror_rate"] categorical_columns = ["protocol_type", "flag", "service"] features = pd.DataFrame(X, columns=columns) target = (y == b'normal.') * 1 for categorical_column in categorical_columns: features[categorical_column] = features[categorical_column].astype('category') number_anomalies = np.sum(1 - target) number_test_samples = 2 * number_anomalies if train_normal_only: features_train, features_test = features.iloc[:-number_test_samples], features.iloc[-number_test_samples:] target_train, target_test = target[:-number_test_samples], target[-number_test_samples:] else: test_indices = np.random.choice(a=range(len(features)), size=number_test_samples, replace=False) features_train, features_test = features.drop(test_indices), features.loc[test_indices] target_train, target_test = np.delete(target, test_indices), target[test_indices] return (features_train, target_train), (features_test, target_test) # features, target= load_train_test_data(small=True, train_normal_only=True) # print(features.columns)
Example #4
Source File: test_kddcup99.py From twitter-stock-recommendation with MIT License | 5 votes |
def test_percent10(): try: data = fetch_kddcup99(download_if_missing=False) except IOError: raise SkipTest("kddcup99 dataset can not be loaded.") assert_equal(data.data.shape, (494021, 41)) assert_equal(data.target.shape, (494021,)) data_shuffled = fetch_kddcup99(shuffle=True, random_state=0) assert_equal(data.data.shape, data_shuffled.data.shape) assert_equal(data.target.shape, data_shuffled.target.shape) data = fetch_kddcup99('SA') assert_equal(data.data.shape, (100655, 41)) assert_equal(data.target.shape, (100655,)) data = fetch_kddcup99('SF') assert_equal(data.data.shape, (73237, 4)) assert_equal(data.target.shape, (73237,)) data = fetch_kddcup99('http') assert_equal(data.data.shape, (58725, 3)) assert_equal(data.target.shape, (58725,)) data = fetch_kddcup99('smtp') assert_equal(data.data.shape, (9571, 3)) assert_equal(data.target.shape, (9571,))