Python sklearn.preprocessing.Binarizer() Examples
The following are 12
code examples of sklearn.preprocessing.Binarizer().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
sklearn.preprocessing
, or try the search function
.
Example #1
Source File: plot.py From graspy with Apache License 2.0 | 6 votes |
def _transform(arr, method): if method is not None: if method in ["log", "log10"]: # arr = np.log(arr, where=(arr > 0)) # hacky, but np.log(arr, where=arr>0) is really buggy arr = arr.copy() if method == "log": arr[arr > 0] = np.log(arr[arr > 0]) else: arr[arr > 0] = np.log10(arr[arr > 0]) elif method in ["zero-boost", "simple-all", "simple-nonzero"]: arr = pass_to_ranks(arr, method=method) elif method == "binarize": transformer = Binarizer().fit(arr) arr = transformer.transform(arr) else: msg = "Transform must be one of {log, log10, binarize, zero-boost, simple-all, \ simple-nonzero, not {}.".format( method ) raise ValueError(msg) return arr
Example #2
Source File: _supported_operators.py From sklearn-onnx with MIT License | 6 votes |
def get_model_alias(model_type): """ Get alias model. Raise an exception if not found. :param model_type: A scikit-learn object (e.g., SGDClassifier and Binarizer) :return: A string which stands for the type of the input model in our conversion framework """ res = _get_sklearn_operator_name(model_type) if res is None: raise RuntimeError("Unable to find alias for model '{}'. " "The converter is likely missing." "".format(type(model_type))) return res # registered converters
Example #3
Source File: test_sklearn_binarizer_converter.py From sklearn-onnx with MIT License | 6 votes |
def test_model_binarizer(self): data = np.array([[1., -1., 2.], [2., 0., 0.], [0., 1., -1.]], dtype=np.float32) model = Binarizer(threshold=0.5) model_onnx = convert_sklearn( model, "scikit-learn binarizer", [("input", FloatTensorType(data.shape))], ) self.assertTrue(model_onnx is not None) dump_data_and_model( data, model, model_onnx, basename="SklearnBinarizer-SkipDim1", )
Example #4
Source File: test_onnx_helper.py From sklearn-onnx with MIT License | 6 votes |
def test_onnx_helper_load_save(self): model = make_pipeline(StandardScaler(), Binarizer(threshold=0.5)) X = numpy.array([[0.1, 1.1], [0.2, 2.2]]) model.fit(X) model_onnx = convert_sklearn(model, "binarizer", [("input", FloatTensorType([None, 2]))]) filename = "temp_onnx_helper_load_save.onnx" save_onnx_model(model_onnx, filename) model = load_onnx_model(filename) new_model = select_model_inputs_outputs(model, "variable") assert new_model.graph is not None tr1 = self.get_model(model) tr2 = self.get_model(new_model) X = X.astype(numpy.float32) X1 = tr1(X) X2 = tr2(X) assert X1.shape == (2, 2) assert X2.shape == (2, 2)
Example #5
Source File: test_onnx_helper.py From sklearn-onnx with MIT License | 6 votes |
def test_onnx_helper_load_save_init(self): model = make_pipeline( Binarizer(), OneHotEncoder(sparse=False, handle_unknown='ignore'), StandardScaler()) X = numpy.array([[0.1, 1.1], [0.2, 2.2], [0.4, 2.2], [0.2, 2.4]]) model.fit(X) model_onnx = convert_sklearn(model, "pipe3", [("input", FloatTensorType([None, 2]))]) filename = "temp_onnx_helper_load_save.onnx" save_onnx_model(model_onnx, filename) model = load_onnx_model(filename) new_model = select_model_inputs_outputs(model, "variable") assert new_model.graph is not None tr1 = self.get_model(model) tr2 = self.get_model(new_model) X = X.astype(numpy.float32) X1 = tr1(X) X2 = tr2(X) assert X1.shape == (4, 2) assert X2.shape == (4, 2)
Example #6
Source File: test_preprocessing.py From pandas-ml with BSD 3-Clause "New" or "Revised" License | 6 votes |
def test_objectmapper(self): df = pdml.ModelFrame([]) self.assertIs(df.preprocessing.Binarizer, pp.Binarizer) self.assertIs(df.preprocessing.FunctionTransformer, pp.FunctionTransformer) self.assertIs(df.preprocessing.Imputer, pp.Imputer) self.assertIs(df.preprocessing.KernelCenterer, pp.KernelCenterer) self.assertIs(df.preprocessing.LabelBinarizer, pp.LabelBinarizer) self.assertIs(df.preprocessing.LabelEncoder, pp.LabelEncoder) self.assertIs(df.preprocessing.MultiLabelBinarizer, pp.MultiLabelBinarizer) self.assertIs(df.preprocessing.MaxAbsScaler, pp.MaxAbsScaler) self.assertIs(df.preprocessing.MinMaxScaler, pp.MinMaxScaler) self.assertIs(df.preprocessing.Normalizer, pp.Normalizer) self.assertIs(df.preprocessing.OneHotEncoder, pp.OneHotEncoder) self.assertIs(df.preprocessing.PolynomialFeatures, pp.PolynomialFeatures) self.assertIs(df.preprocessing.RobustScaler, pp.RobustScaler) self.assertIs(df.preprocessing.StandardScaler, pp.StandardScaler)
Example #7
Source File: test_preprocessing.py From pandas-ml with BSD 3-Clause "New" or "Revised" License | 6 votes |
def test_transform_1d_frame_int(self): arr = np.array([1, 2, 3, 1, 2, 3, 1, 2, 3]) idx = pd.Index('a b c d e f g h i'.split(' ')) df = pdml.ModelFrame(arr, index=idx, columns=['X']) self.assertEqual(len(df.columns), 1) # reshape arr to 2d arr = arr.reshape(-1, 1) if pd.compat.PY3: models = ['Binarizer', 'Imputer', 'StandardScaler'] # MinMaxScalar raises TypeError in ufunc else: models = ['Binarizer', 'Imputer', 'StandardScaler', 'MinMaxScaler'] for model in models: mod1 = getattr(df.preprocessing, model)() mod2 = getattr(pp, model)() self._assert_transform(df, arr, mod1, mod2) mod1 = getattr(df.preprocessing, model)() mod2 = getattr(pp, model)() self._assert_fit_transform(df, arr, mod1, mod2)
Example #8
Source File: vectorization.py From atap with Apache License 2.0 | 5 votes |
def sklearn_one_hot_vectorize(corpus): # The Sklearn one hot vectorize method from sklearn.feature_extraction.text import CountVectorizer from sklearn.preprocessing import Binarizer freq = CountVectorizer() vectors = freq.fit_transform(corpus) print(len(vectors.toarray()[0])) onehot = Binarizer() vectors = onehot.fit_transform(vectors.toarray()) print(len(vectors[0]))
Example #9
Source File: _supported_operators.py From sklearn-onnx with MIT License | 5 votes |
def _get_sklearn_operator_name(model_type): """ Get operator name of the input argument :param model_type: A scikit-learn object (e.g., SGDClassifier and Binarizer) :return: A string which stands for the type of the input model in our conversion framework """ if model_type not in sklearn_operator_name_map: # "No proper operator name found, it means a local operator. return None return sklearn_operator_name_map[model_type]
Example #10
Source File: test_preprocessing.py From pandas-ml with BSD 3-Clause "New" or "Revised" License | 5 votes |
def test_transform_series_int(self): arr = np.array([1, 2, 3, 1, 2, 3, 1, 2, 3]) s = pdml.ModelSeries(arr, index='a b c d e f g h i'.split(' ')) # reshape arr to 2d arr = arr.reshape(-1, 1) if pd.compat.PY3: models = ['Binarizer', 'Imputer', 'StandardScaler'] # MinMaxScalar raises TypeError in ufunc else: models = ['Binarizer', 'Imputer', 'StandardScaler', 'MinMaxScaler'] for model in models: mod1 = getattr(s.preprocessing, model)() mod2 = getattr(pp, model)() s.fit(mod1) mod2.fit(arr) result = s.transform(mod1) expected = mod2.transform(arr).flatten() self.assertIsInstance(result, pdml.ModelSeries) self.assert_numpy_array_almost_equal(result.values, expected) mod1 = getattr(s.preprocessing, model)() mod2 = getattr(pp, model)() result = s.fit_transform(mod1) expected = mod2.fit_transform(arr).flatten() self.assertIsInstance(result, pdml.ModelSeries) self.assert_numpy_array_almost_equal(result.values, expected)
Example #11
Source File: myalexnet_feature.py From visual-search with MIT License | 4 votes |
def main(): x, fc6 = initModel() init = tf.global_variables_initializer() sess = tf.Session() sess.run(init) img_names = load_image_names(args.input_data_dir) with open(args.output_image_name_file, 'w') as img_names_file: for img_name in img_names: img_names_file.write(img_name + '\n') t = time.time() # 图像太多了,必须分批次 batch_size = 100 features = [] with open(args.output_feature_file, 'w') as output_file: for i in range(0, int(math.ceil(len(img_names) / (batch_size * 1.0)))): print('batch: %d' % i) if (i + 1) * batch_size < len(img_names): img_names_batch = img_names[i * batch_size:(i + 1) * batch_size] else: img_names_batch = img_names[i * batch_size:len(img_names)] img_batch = load_images(img_names_batch) output = sess.run(fc6, feed_dict={x: img_batch}) features.append(output) features = np.vstack(features) # binarizer = preprocessing.Binarizer().fit(features) # features = binarizer.transform(features) np.save(output_file, features) # with open('fc6.npy', 'w') as output_file: # for i in range(0, int(math.ceil(len(imgs) / (batch_size * 1.0)))): # print('batch: %d' % i) # if (i + 1) * batch_size < len(imgs): # img_batch = imgs[i * batch_size:(i + 1) * batch_size] # else: # img_batch = imgs[i * batch_size: len(imgs)] # output = sess.run(fc6, feed_dict={x: img_batch}) # features.append(output) # features = np.vstack(features) # np.save(output_file, features) print(time.time() - t)
Example #12
Source File: visual_search.py From visual-search with MIT License | 4 votes |
def main(): t = time.time() img = imread(args.img_file_path) imgs = [img, watermark(img), rotate(img), crop(img), mirror(img)] imgs_norm = image_normalize(imgs) dataset_features = np.load('fc6.npy') query_start = time.time() query_features = extract_feature(imgs_norm) binarizer = preprocessing.Binarizer().fit(query_features) query_features = binarizer.transform(query_features) print(dataset_features) # https://docs.scipy.org/doc/scipy/reference/generated/scipy.spatial.distance.cdist.html#scipy.spatial.distance.cdist cosine = distance.cdist(dataset_features, query_features, 'cosine') print(cosine.shape) dis = cosine inds_all = argsort(dis, axis=0) # 按列排序 https://docs.scipy.org/doc/numpy/reference/generated/numpy.argsort.html print('query cost: %f, dataset: %d, query: %d' % (time.time() - query_start, len(dataset_features), len(imgs))) img_names = load_image_names() fig, axes = plt.subplots(5, 11, figsize=(22, 10), subplot_kw={'xticks': [], 'yticks': []}) fig.subplots_adjust(hspace=0.15, wspace=0.01, left=.02, right=.98, top=.92, bottom=.08) titles = ['original', 'watermark', 'rotate', 'crop', 'mirror'] for i in range(len(imgs)): topK = [] inds = inds_all[:, i] # print(inds) for k in range(10): topK.append(img_names[inds[k]]) print(inds[k], dis[inds[k], i], img_names[inds[k]]) original = axes[i, 0] original.set_title(titles[i]) img = imgs[i] original.imshow(img) for j in range(10): ax = axes[i, j + 1] img = imread(topK[j]) ax.imshow(img) title = '%d : %f' % (j + 1, dis[inds[j], i]) ax.set_title(title) savePath = args.img_file_path + '_search_result.jpg' plt.savefig(savePath) print(time.time() - t) # os.system('open -a Preview.app -F ' + savePath)