Python sklearn.preprocessing() Examples
The following are 30
code examples of sklearn.preprocessing().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
sklearn
, or try the search function
.
Example #1
Source File: performance_test.py From HumanRecognition with MIT License | 6 votes |
def convert_inst_scores_to_cls_scores(similarity, testset0, testset1, num_identity, lbl_map): cls_scores1 = [] for i in range(len(similarity)): cls_score = np.zeros(num_identity) for j in range(len(similarity[i])): id = lbl_map[testset1[j].identity_id] cls_score[id] = max(cls_score[id], similarity[i][j]) cls_scores1.append(cls_score) cls_scores1 = np.array(cls_scores1) cls_scores1 = sklearn.preprocessing.normalize(cls_scores1) similarity = np.transpose(similarity) cls_scores2 = [] for i in range(len(similarity)): cls_score = np.zeros(num_identity) for j in range(len(similarity[i])): id = lbl_map[testset0[j].identity_id] cls_score[id] = max(cls_score[id], similarity[i][j]) cls_scores2.append(cls_score) cls_scores2 = np.array(cls_scores2) cls_scores2 = sklearn.preprocessing.normalize(cls_scores2) return cls_scores1, cls_scores2
Example #2
Source File: preprocessor.py From qlik-py-tools with MIT License | 6 votes |
def get_scaler(df, missing="zeros", scaler="standard", **kwargs): """ Fit a sklearn scaler on a Data Frame and return the scaler. Valid options for the scaler are: standard, minmax, maxabs, robust, quantile Missing values must be dealt with before the scaling is applied. Valid options specified through the missing parameter are: zeros, mean, median, mode """ scalers = {'standard':'StandardScaler', 'minmax':'MinMaxScaler', 'maxabs':'MaxAbsScaler',\ 'robust':'RobustScaler', 'quantile':'QuantileTransformer'} s = getattr(preprocessing, scalers[scaler]) s = s(**kwargs) df = Preprocessor.fillna(df, missing=missing) return s.fit(df)
Example #3
Source File: sklearn_intent_classifier.py From Rasa_NLU_Chi with Apache License 2.0 | 6 votes |
def __init__(self, component_config=None, # type: Dict[Text, Any] clf=None, # type: sklearn.model_selection.GridSearchCV le=None # type: sklearn.preprocessing.LabelEncoder ): # type: (...) -> None """Construct a new intent classifier using the sklearn framework.""" from sklearn.preprocessing import LabelEncoder super(SklearnIntentClassifier, self).__init__(component_config) if le is not None: self.le = le else: self.le = LabelEncoder() self.clf = clf _sklearn_numpy_warning_fix()
Example #4
Source File: test_gReadUntil.py From RUscripts with MIT License | 6 votes |
def squiggle_search2_old(squiggle,kmerhash,seqlen): result=[] for ref in kmerhash: #print "ss2",ref queryarray = sklearn.preprocessing.scale(np.array(squiggle),axis=0,with_mean=True,with_std=True,copy=True) dist, cost, path = mlpy.dtw_subsequence(queryarray,kmerhash[ref]['Fprime']) result.append((dist,ref,"F",path[1][0],ref,path[1][-1])) dist, cost, path = mlpy.dtw_subsequence(queryarray,kmerhash[ref]['Rprime']) result.append((dist,ref,"R",path[1][0],ref,path[1][-1])) #('J02459', 41.017514495176989, 'F', 10003, 'J02459', 10198) #distanceR,seqmatchnameR,frR,rsR,reR,qsR,qeR=sorted(result,key=lambda result: result[0])[0] #return seqmatchnameR,distanceR,frR,rsR,reR,qsR,qeR return sorted(result,key=lambda result: result[0])[0][1],sorted(result,key=lambda result: result[0])[0][0],sorted(result,key=lambda result: result[0])[0][2],sorted(result,key=lambda result: result[0])[0][3],sorted(result,key=lambda result: result[0])[0][4],sorted(result,key=lambda result: result[0])[0][5] ###################################################################### ######################################################################
Example #5
Source File: clf_helpers.py From ibeis with Apache License 2.0 | 6 votes |
def encoded_1d(samples): """ Returns a unique label for each combination of samples """ # from sklearn.preprocessing import MultiLabelBinarizer encoded_2d = samples.encoded_2d() class_space = [v.n_classes for k, v in samples.items()] offsets = np.array([1] + np.cumprod(class_space).tolist()[:-1])[None, :] encoded_1d = (offsets * encoded_2d).sum(axis=1) # e = MultiLabelBinarizer() # bin_coeff = e.fit_transform(encoded_2d) # bin_basis = (2 ** np.arange(bin_coeff.shape[1]))[None, :] # # encoded_1d = (bin_coeff * bin_basis).sum(axis=1) # encoded_1d = (bin_coeff * bin_basis[::-1]).sum(axis=1) # # vt.unique_rows(sklearn.preprocessing.MultiLabelBinarizer().fit_transform(encoded_2d)) # [v.encoded_df.values for k, v in samples.items()] # encoded_df_1d = pd.concat([v.encoded_df for k, v in samples.items()], axis=1) return encoded_1d
Example #6
Source File: _utils.py From qlik-py-tools with MIT License | 6 votes |
def scale(df, missing="zeros", scaler="robust", **kwargs): """ Scale values in a Data Frame using the relevant sklearn preprocessing method. Valid options for the scaler are: standard, minmax, maxabs, robust, quantile Missing values must be dealt with before the scaling is applied. Valid options specified through the missing parameter are: zeros, mean, median, mode """ scalers = {'standard':'StandardScaler', 'minmax':'MinMaxScaler', 'maxabs':'MaxAbsScaler',\ 'robust':'RobustScaler', 'quantile':'QuantileTransformer'} s = getattr(preprocessing, scalers[scaler]) s = s(**kwargs) df = fillna(df, method=missing) df = pd.DataFrame(s.fit_transform(df), index=df.index, columns=df.columns) return df
Example #7
Source File: sklearn_intent_classifier.py From rasa_nlu with Apache License 2.0 | 6 votes |
def __init__(self, component_config: Dict[Text, Any] = None, clf: 'sklearn.model_selection.GridSearchCV' = None, le: Optional['sklearn.preprocessing.LabelEncoder'] = None ) -> None: """Construct a new intent classifier using the sklearn framework.""" from sklearn.preprocessing import LabelEncoder super(SklearnIntentClassifier, self).__init__(component_config) if le is not None: self.le = le else: self.le = LabelEncoder() self.clf = clf _sklearn_numpy_warning_fix()
Example #8
Source File: sklearn_intent_classifier.py From rasa-for-botfront with Apache License 2.0 | 6 votes |
def load( cls, meta: Dict[Text, Any], model_dir: Optional[Text] = None, model_metadata: Optional[Metadata] = None, cached_component: Optional["SklearnIntentClassifier"] = None, **kwargs: Any, ) -> "SklearnIntentClassifier": from sklearn.preprocessing import LabelEncoder classifier_file = os.path.join(model_dir, meta.get("classifier")) encoder_file = os.path.join(model_dir, meta.get("encoder")) if os.path.exists(classifier_file): classifier = io_utils.json_unpickle(classifier_file) classes = io_utils.json_unpickle(encoder_file) encoder = LabelEncoder() encoder.classes_ = classes return cls(meta, classifier, encoder) else: return cls(meta)
Example #9
Source File: sklearn_intent_classifier.py From rasa-for-botfront with Apache License 2.0 | 6 votes |
def __init__( self, component_config: Optional[Dict[Text, Any]] = None, clf: "sklearn.model_selection.GridSearchCV" = None, le: Optional["sklearn.preprocessing.LabelEncoder"] = None, ) -> None: """Construct a new intent classifier using the sklearn framework.""" from sklearn.preprocessing import LabelEncoder super().__init__(component_config) if le is not None: self.le = le else: self.le = LabelEncoder() self.clf = clf
Example #10
Source File: deep_learning.py From EliteQuant_Python with Apache License 2.0 | 5 votes |
def normalize_data(df): min_max_scaler = sklearn.preprocessing.MinMaxScaler() df['open'] = min_max_scaler.fit_transform(df.open.values.reshape(-1,1)) df['high'] = min_max_scaler.fit_transform(df.high.values.reshape(-1,1)) df['low'] = min_max_scaler.fit_transform(df.low.values.reshape(-1,1)) df['close'] = min_max_scaler.fit_transform(df['close'].values.reshape(-1,1)) return df # function to create train, validation, test data given stock data and sequence length # use previous 19 days to predict today
Example #11
Source File: ordinal_encoder.py From lale with Apache License 2.0 | 5 votes |
def __init__(self, categories='auto', dtype='float64', handle_unknown='ignore', encode_unknown_with='auto'): self._hyperparams = { 'categories': categories, 'dtype': dtype} self.handle_unknown = handle_unknown self.encode_unknown_with = encode_unknown_with self._wrapped_model = sklearn.preprocessing.OrdinalEncoder(**self._hyperparams) self.unknown_categories_mapping = [] #used during inverse transform to keep track of mapping of unknown categories
Example #12
Source File: _utils.py From qlik-py-tools with MIT License | 5 votes |
def get_scaler(df, missing="zeros", scaler="StandardScaler", **kwargs): """ Fit a sklearn scaler on a Data Frame and return the scaler. Valid options for the scaler are: StandardScaler, MinMaxScaler, MaxAbsScaler, RobustScaler, QuantileTransformer Missing values must be dealt with before the scaling is applied. Valid options specified through the missing parameter are: zeros, mean, median, mode """ s = getattr(preprocessing, scaler) s = s(**kwargs) df = fillna(df, method=missing) return s.fit(df)
Example #13
Source File: preproc.py From fathom with Apache License 2.0 | 5 votes |
def mfcc_features(filename): """Preprocessing per CTC paper. (These are not the simpler linear spectrogram features alone as in Deep Speech). Properties: - 10ms frames with 5ms overlap - 12 MFCCs with 26 filter banks - replace first MFCC with energy (TODO: log-energy) - add first-order derivatives for all of the above - total: 26 coefficients """ d, sr = librosa.load(filename) frame_length_seconds = 0.010 frame_overlap_seconds = 0.005 mfccs = librosa.feature.mfcc(d, sr, n_mfcc=1+12, n_fft=int(frame_overlap_seconds*sr), hop_length=int(frame_overlap_seconds*sr)) # energy (TODO: log?) energy = librosa.feature.rmse(d, n_fft=int(frame_overlap_seconds*sr), hop_length=int(frame_overlap_seconds*sr)) mfccs[0] = energy # replace first MFCC with energy, per convention deltas = librosa.feature.delta(mfccs, order=1) mfccs_plus_deltas = np.vstack([mfccs, deltas]) coeffs = sklearn.preprocessing.scale(mfccs_plus_deltas, axis=1) return coeffs
Example #14
Source File: min_max_scaler.py From lale with Apache License 2.0 | 5 votes |
def __init__(self, **hyperparams): self._hyperparams = hyperparams self._wrapped_model = sklearn.preprocessing.MinMaxScaler( **self._hyperparams)
Example #15
Source File: test_gReadUntil.py From RUscripts with MIT License | 5 votes |
def process_ref_fasta_orig(ref_fasta,model_kmer_means,seqlen,kmerlen): print "processing the reference fasta." kmer_len=kmerlen kmer_means=dict() for record in SeqIO.parse(ref_fasta, 'fasta'): kmer_means[record.id]=dict() kmer_means[record.id]["F"]=list() kmer_means[record.id]["R"]=list() kmer_means[record.id]["Fprime"]=list() kmer_means[record.id]["Rprime"]=list() print "ID", record.id print "length", len(record.seq) print "FORWARD STRAND" seq = record.seq for x in range(len(seq)+1-kmer_len): kmer = str(seq[x:x+kmer_len]) kmer_means[record.id]["F"].append(float(model_kmer_means[kmer])) #if model_kmer_means[kmer]: #print x, kmer, model_kmer_means[kmer] print "REVERSE STRAND" seq = revcomp = record.seq.reverse_complement() for x in range(len(seq)+1-kmer_len): kmer = str(seq[x:x+kmer_len]) kmer_means[record.id]["R"].append(float(model_kmer_means[kmer])) kmer_means[record.id]["Fprime"]=sklearn.preprocessing.scale(kmer_means[record.id]["F"], axis=0, with_mean=True, with_std=True, copy=True) kmer_means[record.id]["Rprime"]=sklearn.preprocessing.scale(kmer_means[record.id]["R"], axis=0, with_mean=True, with_std=True, copy=True) return kmer_means #######################################################################
Example #16
Source File: search_fusion_weights.py From HumanRecognition with MIT License | 5 votes |
def cal_feature_similarity(detections_0, detections_1, feature_name): features_0 = get_features(detections_0, feature_name) features_1 = get_features(detections_1, feature_name) features_0 = sklearn.preprocessing.normalize(features_0) features_1 = sklearn.preprocessing.normalize(features_1) similarity = sklearn.metrics.pairwise.cosine_similarity(features_0, features_1) similarity = 1.0/(1+np.exp(-(config.beta0 + config.beta1 * similarity))) return similarity
Example #17
Source File: performance_test.py From HumanRecognition with MIT License | 5 votes |
def cal_feature_similarity(detections_0, detections_1, feature_name): features_0 = get_features(detections_0, feature_name) features_1 = get_features(detections_1, feature_name) features_0 = sklearn.preprocessing.normalize(features_0) features_1 = sklearn.preprocessing.normalize(features_1) similarity = sklearn.metrics.pairwise.cosine_similarity(features_0, features_1) similarity = 1.0/(1+np.exp(-(config.beta0 + config.beta1 * similarity))) return similarity
Example #18
Source File: min_max_scaler.py From lale with Apache License 2.0 | 5 votes |
def partial_fit(self, X, y=None): if not hasattr(self, "_wrapped_model"): self._wrapped_model = sklearn.preprocessing.MinMaxScaler( **self._hyperparams) self._wrapped_model.partial_fit(X) return self
Example #19
Source File: one_hot_encoder.py From lale with Apache License 2.0 | 5 votes |
def __init__(self, **hyperparams): self._hyperparams = hyperparams self._wrapped_model = sklearn.preprocessing.OneHotEncoder(**self._hyperparams)
Example #20
Source File: unit_tests.py From pynisher with MIT License | 5 votes |
def svc_example(n_samples = 10000, n_features = 4): from sklearn.svm import LinearSVC from sklearn.preprocessing import PolynomialFeatures from sklearn.datasets import make_classification X,Y = make_classification(n_samples, n_features) #pp = PolynomialFeatures(degree=3) #X = pp.fit_transform(X) m = LinearSVC() m.fit(X,Y)
Example #21
Source File: gen_video_feature.py From 1.FaceRecognition with MIT License | 5 votes |
def get_feature(buffer): global emb_size input_count = len(buffer) if use_flip: input_count *= 2 network_count = input_count if input_count%ctx_num!=0: network_count = (input_count//ctx_num+1)*ctx_num input_blob = np.zeros( (network_count, 3, image_shape[1], image_shape[2]), dtype=np.float32) idx = 0 for item in buffer: img = cv2.imread(item)[:,:,::-1] #to rgb img = np.transpose( img, (2,0,1) ) attempts = [0,1] if use_flip else [0] for flipid in attempts: _img = np.copy(img) if flipid==1: do_flip(_img) input_blob[idx] = _img idx+=1 data = mx.nd.array(input_blob) db = mx.io.DataBatch(data=(data,)) net.model.forward(db, is_train=False) _embedding = net.model.get_outputs()[0].asnumpy() _embedding = _embedding[0:input_count] if emb_size==0: emb_size = _embedding.shape[1] print('set emb_size to ', emb_size) embedding = np.zeros( (len(buffer), emb_size), dtype=np.float32 ) if use_flip: embedding1 = _embedding[0::2] embedding2 = _embedding[1::2] embedding = embedding1+embedding2 else: embedding = _embedding embedding = sklearn.preprocessing.normalize(embedding) return embedding
Example #22
Source File: gen_image_feature.py From 1.FaceRecognition with MIT License | 5 votes |
def get_feature(buffer): global emb_size if use_flip: input_blob = np.zeros( (len(buffer)*2, 3, image_shape[1], image_shape[2] ) ) else: input_blob = np.zeros( (len(buffer), 3, image_shape[1], image_shape[2] ) ) idx = 0 for item in buffer: img = cv2.imread(item)[:,:,::-1] #to rgb img = np.transpose( img, (2,0,1) ) attempts = [0,1] if use_flip else [0] for flipid in attempts: _img = np.copy(img) if flipid==1: do_flip(_img) input_blob[idx] = _img idx+=1 data = mx.nd.array(input_blob) db = mx.io.DataBatch(data=(data,)) net.model.forward(db, is_train=False) _embedding = net.model.get_outputs()[0].asnumpy() if emb_size==0: emb_size = _embedding.shape[1] print('set emb_size to ', emb_size) embedding = np.zeros( (len(buffer), emb_size), dtype=np.float32 ) if use_flip: embedding1 = _embedding[0::2] embedding2 = _embedding[1::2] embedding = embedding1+embedding2 else: embedding = _embedding embedding = sklearn.preprocessing.normalize(embedding) return embedding
Example #23
Source File: gen_glint.py From 1.FaceRecognition with MIT License | 5 votes |
def get_feature(buffer): global emb_size if use_flip: input_blob = np.zeros( (len(buffer)*2, 3, image_shape[1], image_shape[2] ) ) else: input_blob = np.zeros( (len(buffer), 3, image_shape[1], image_shape[2] ) ) idx = 0 for item in buffer: img = face_preprocess.read_image(item[0], mode='rgb') img = face_preprocess.preprocess(img, bbox=None, landmark=item[1], image_size='%d,%d'%(image_shape[1], image_shape[2])) img = np.transpose( img, (2,0,1) ) attempts = [0,1] if use_flip else [0] for flipid in attempts: _img = np.copy(img) if flipid==1: do_flip(_img) input_blob[idx] = _img idx+=1 data = mx.nd.array(input_blob) db = mx.io.DataBatch(data=(data,)) net.model.forward(db, is_train=False) _embedding = net.model.get_outputs()[0].asnumpy() if emb_size==0: emb_size = _embedding.shape[1] print('set emb_size to ', emb_size) embedding = np.zeros( (len(buffer), emb_size), dtype=np.float32 ) if use_flip: embedding1 = _embedding[0::2] embedding2 = _embedding[1::2] embedding = embedding1+embedding2 else: embedding = _embedding embedding = sklearn.preprocessing.normalize(embedding) return embedding
Example #24
Source File: gen_megaface.py From 1.FaceRecognition with MIT License | 5 votes |
def get_feature(imgs, nets): count = len(imgs) data = mx.nd.zeros(shape = (count*2, 3, imgs[0].shape[0], imgs[0].shape[1])) for idx, img in enumerate(imgs): img = img[:,:,::-1] #to rgb img = np.transpose( img, (2,0,1) ) for flipid in [0,1]: _img = np.copy(img) if flipid==1: _img = _img[:,:,::-1] _img = nd.array(_img) data[count*flipid+idx] = _img F = [] for net in nets: db = mx.io.DataBatch(data=(data,)) net.model.forward(db, is_train=False) x = net.model.get_outputs()[0].asnumpy() embedding = x[0:count,:] + x[count:,:] embedding = sklearn.preprocessing.normalize(embedding) #print('emb', embedding.shape) F.append(embedding) F = np.concatenate(F, axis=1) F = sklearn.preprocessing.normalize(F) #print('F', F.shape) return F
Example #25
Source File: gen_megaface.py From insightface with MIT License | 5 votes |
def get_feature(imgs, nets): count = len(imgs) data = mx.nd.zeros(shape = (count*2, 3, imgs[0].shape[0], imgs[0].shape[1])) for idx, img in enumerate(imgs): img = img[:,:,::-1] #to rgb img = np.transpose( img, (2,0,1) ) for flipid in [0,1]: _img = np.copy(img) if flipid==1: _img = _img[:,:,::-1] _img = nd.array(_img) data[count*flipid+idx] = _img F = [] for net in nets: db = mx.io.DataBatch(data=(data,)) net.model.forward(db, is_train=False) x = net.model.get_outputs()[0].asnumpy() embedding = x[0:count,:] + x[count:,:] embedding = sklearn.preprocessing.normalize(embedding) #print('emb', embedding.shape) F.append(embedding) F = np.concatenate(F, axis=1) F = sklearn.preprocessing.normalize(F) #print('F', F.shape) return F
Example #26
Source File: classifiers.py From seizure-prediction with MIT License | 5 votes |
def predict_proba(self, X): predictions = self.predict(X) predictions = sklearn.preprocessing.scale(predictions) predictions = 1.0 / (1.0 + np.exp(-0.5 * predictions)) return np.vstack((1.0 - predictions, predictions)).T
Example #27
Source File: gen_video_feature.py From insightface with MIT License | 5 votes |
def get_feature(buffer): global emb_size input_count = len(buffer) if use_flip: input_count *= 2 network_count = input_count if input_count%ctx_num!=0: network_count = (input_count//ctx_num+1)*ctx_num input_blob = np.zeros( (network_count, 3, image_shape[1], image_shape[2]), dtype=np.float32) idx = 0 for item in buffer: img = cv2.imread(item)[:,:,::-1] #to rgb img = np.transpose( img, (2,0,1) ) attempts = [0,1] if use_flip else [0] for flipid in attempts: _img = np.copy(img) if flipid==1: do_flip(_img) input_blob[idx] = _img idx+=1 data = mx.nd.array(input_blob) db = mx.io.DataBatch(data=(data,)) net.model.forward(db, is_train=False) _embedding = net.model.get_outputs()[0].asnumpy() _embedding = _embedding[0:input_count] if emb_size==0: emb_size = _embedding.shape[1] print('set emb_size to ', emb_size) embedding = np.zeros( (len(buffer), emb_size), dtype=np.float32 ) if use_flip: embedding1 = _embedding[0::2] embedding2 = _embedding[1::2] embedding = embedding1+embedding2 else: embedding = _embedding embedding = sklearn.preprocessing.normalize(embedding) return embedding
Example #28
Source File: gen_image_feature.py From insightface with MIT License | 5 votes |
def get_feature(buffer): global emb_size if use_flip: input_blob = np.zeros( (len(buffer)*2, 3, image_shape[1], image_shape[2] ) ) else: input_blob = np.zeros( (len(buffer), 3, image_shape[1], image_shape[2] ) ) idx = 0 for item in buffer: img = cv2.imread(item)[:,:,::-1] #to rgb img = np.transpose( img, (2,0,1) ) attempts = [0,1] if use_flip else [0] for flipid in attempts: _img = np.copy(img) if flipid==1: do_flip(_img) input_blob[idx] = _img idx+=1 data = mx.nd.array(input_blob) db = mx.io.DataBatch(data=(data,)) net.model.forward(db, is_train=False) _embedding = net.model.get_outputs()[0].asnumpy() if emb_size==0: emb_size = _embedding.shape[1] print('set emb_size to ', emb_size) embedding = np.zeros( (len(buffer), emb_size), dtype=np.float32 ) if use_flip: embedding1 = _embedding[0::2] embedding2 = _embedding[1::2] embedding = embedding1+embedding2 else: embedding = _embedding embedding = sklearn.preprocessing.normalize(embedding) return embedding
Example #29
Source File: gen_glint.py From insightface with MIT License | 5 votes |
def get_feature(buffer): global emb_size if use_flip: input_blob = np.zeros( (len(buffer)*2, 3, image_shape[1], image_shape[2] ) ) else: input_blob = np.zeros( (len(buffer), 3, image_shape[1], image_shape[2] ) ) idx = 0 for item in buffer: img = face_preprocess.read_image(item[0], mode='rgb') img = face_preprocess.preprocess(img, bbox=None, landmark=item[1], image_size='%d,%d'%(image_shape[1], image_shape[2])) img = np.transpose( img, (2,0,1) ) attempts = [0,1] if use_flip else [0] for flipid in attempts: _img = np.copy(img) if flipid==1: do_flip(_img) input_blob[idx] = _img idx+=1 data = mx.nd.array(input_blob) db = mx.io.DataBatch(data=(data,)) net.model.forward(db, is_train=False) _embedding = net.model.get_outputs()[0].asnumpy() if emb_size==0: emb_size = _embedding.shape[1] print('set emb_size to ', emb_size) embedding = np.zeros( (len(buffer), emb_size), dtype=np.float32 ) if use_flip: embedding1 = _embedding[0::2] embedding2 = _embedding[1::2] embedding = embedding1+embedding2 else: embedding = _embedding embedding = sklearn.preprocessing.normalize(embedding) return embedding
Example #30
Source File: classical.py From netharn with Apache License 2.0 | 4 votes |
def _make_est_func(self): import sklearn from sklearn import multiclass # NOQA from sklearn import ensemble # NOQA from sklearn import neural_network # NOQA from sklearn import svm # NOQA from sklearn import preprocessing # NOQA from sklearn import pipeline # NOQA from functools import partial wrap_type = self.wrap_type est_type = self.est_type multiclass_wrapper = { None: ub.identity, 'OVR': sklearn.multiclass.OneVsRestClassifier, 'OVO': sklearn.multiclass.OneVsOneClassifier, }[wrap_type] est_class = { 'RF': sklearn.ensemble.RandomForestClassifier, 'SVC': sklearn.svm.SVC, 'Logit': partial(sklearn.linear_model.LogisticRegression, solver='lbfgs'), 'MLP': sklearn.neural_network.MLPClassifier, }[est_type] est_kw = self.est_kw try: from sklearn.impute import SimpleImputer Imputer = SimpleImputer import numpy as np NAN = np.nan except Exception: from sklearn.preprocessing import Imputer NAN = 'NaN' if est_type == 'MLP': def make_estimator(): pipe = sklearn.pipeline.Pipeline([ ('inputer', Imputer( missing_values=NAN, strategy='mean')), # ('scale', sklearn.preprocessing.StandardScaler), ('est', est_class(**est_kw)), ]) return multiclass_wrapper(pipe) elif est_type == 'Logit': def make_estimator(): pipe = sklearn.pipeline.Pipeline([ ('inputer', Imputer( missing_values=NAN, strategy='mean')), ('est', est_class(**est_kw)), ]) return multiclass_wrapper(pipe) else: def make_estimator(): return multiclass_wrapper(est_class(**est_kw)) return make_estimator