Python Examples of sklearn.preprocessing

Source File: performance_test.py From HumanRecognition with MIT License

6 votes

def convert_inst_scores_to_cls_scores(similarity, testset0, testset1, num_identity, lbl_map):
    cls_scores1 = []
    for i in range(len(similarity)):
        cls_score = np.zeros(num_identity)
        for j in range(len(similarity[i])):
            id = lbl_map[testset1[j].identity_id]
            cls_score[id] = max(cls_score[id], similarity[i][j])
        cls_scores1.append(cls_score)
    cls_scores1 = np.array(cls_scores1)
    cls_scores1 = sklearn.preprocessing.normalize(cls_scores1)

    similarity = np.transpose(similarity)
    cls_scores2 = []
    for i in range(len(similarity)):
        cls_score = np.zeros(num_identity)
        for j in range(len(similarity[i])):
            id = lbl_map[testset0[j].identity_id]
            cls_score[id] = max(cls_score[id], similarity[i][j])
        cls_scores2.append(cls_score)
    cls_scores2 = np.array(cls_scores2)
    cls_scores2 = sklearn.preprocessing.normalize(cls_scores2)

    return cls_scores1, cls_scores2

Source File: preprocessor.py From qlik-py-tools with MIT License

6 votes

def get_scaler(df, missing="zeros", scaler="standard", **kwargs):
        """
        Fit a sklearn scaler on a Data Frame and return the scaler.
        Valid options for the scaler are: standard, minmax, maxabs, robust, quantile
        Missing values must be dealt with before the scaling is applied. 
        Valid options specified through the missing parameter are: zeros, mean, median, mode
        """

        scalers = {'standard':'StandardScaler', 'minmax':'MinMaxScaler', 'maxabs':'MaxAbsScaler',\
                   'robust':'RobustScaler', 'quantile':'QuantileTransformer'}

        s = getattr(preprocessing, scalers[scaler])
        s = s(**kwargs)

        df = Preprocessor.fillna(df, missing=missing)
        
        return s.fit(df)

Source File: sklearn_intent_classifier.py From Rasa_NLU_Chi with Apache License 2.0

6 votes

def __init__(self,
                 component_config=None,  # type: Dict[Text, Any]
                 clf=None,  # type: sklearn.model_selection.GridSearchCV
                 le=None  # type: sklearn.preprocessing.LabelEncoder
                 ):
        # type: (...) -> None
        """Construct a new intent classifier using the sklearn framework."""
        from sklearn.preprocessing import LabelEncoder

        super(SklearnIntentClassifier, self).__init__(component_config)

        if le is not None:
            self.le = le
        else:
            self.le = LabelEncoder()
        self.clf = clf

        _sklearn_numpy_warning_fix()

Source File: test_gReadUntil.py From RUscripts with MIT License

6 votes

def squiggle_search2_old(squiggle,kmerhash,seqlen):
    result=[]

    for ref in kmerhash:
        #print "ss2",ref
        queryarray = sklearn.preprocessing.scale(np.array(squiggle),axis=0,with_mean=True,with_std=True,copy=True)

        dist, cost, path = mlpy.dtw_subsequence(queryarray,kmerhash[ref]['Fprime'])
        result.append((dist,ref,"F",path[1][0],ref,path[1][-1]))
        dist, cost, path = mlpy.dtw_subsequence(queryarray,kmerhash[ref]['Rprime'])
        result.append((dist,ref,"R",path[1][0],ref,path[1][-1]))

    #('J02459', 41.017514495176989, 'F', 10003, 'J02459', 10198)
    #distanceR,seqmatchnameR,frR,rsR,reR,qsR,qeR=sorted(result,key=lambda result: result[0])[0]
    #return seqmatchnameR,distanceR,frR,rsR,reR,qsR,qeR
    return sorted(result,key=lambda result: result[0])[0][1],sorted(result,key=lambda result: result[0])[0][0],sorted(result,key=lambda result: result[0])[0][2],sorted(result,key=lambda result: result[0])[0][3],sorted(result,key=lambda result: result[0])[0][4],sorted(result,key=lambda result: result[0])[0][5]

######################################################################

######################################################################

Source File: clf_helpers.py From ibeis with Apache License 2.0

6 votes

def encoded_1d(samples):
        """ Returns a unique label for each combination of samples """
        # from sklearn.preprocessing import MultiLabelBinarizer
        encoded_2d = samples.encoded_2d()
        class_space = [v.n_classes for k, v in samples.items()]
        offsets = np.array([1] + np.cumprod(class_space).tolist()[:-1])[None, :]
        encoded_1d = (offsets * encoded_2d).sum(axis=1)
        # e = MultiLabelBinarizer()
        # bin_coeff = e.fit_transform(encoded_2d)
        # bin_basis = (2 ** np.arange(bin_coeff.shape[1]))[None, :]
        # # encoded_1d = (bin_coeff * bin_basis).sum(axis=1)
        # encoded_1d = (bin_coeff * bin_basis[::-1]).sum(axis=1)
        # # vt.unique_rows(sklearn.preprocessing.MultiLabelBinarizer().fit_transform(encoded_2d))
        # [v.encoded_df.values for k, v in samples.items()]
        # encoded_df_1d = pd.concat([v.encoded_df for k, v in samples.items()], axis=1)
        return encoded_1d

Source File: _utils.py From qlik-py-tools with MIT License

6 votes

def scale(df, missing="zeros", scaler="robust", **kwargs):
    """
    Scale values in a Data Frame using the relevant sklearn preprocessing method.
    Valid options for the scaler are: standard, minmax, maxabs, robust, quantile
    Missing values must be dealt with before the scaling is applied. 
    Valid options specified through the missing parameter are: zeros, mean, median, mode
    """
    
    scalers = {'standard':'StandardScaler', 'minmax':'MinMaxScaler', 'maxabs':'MaxAbsScaler',\
               'robust':'RobustScaler', 'quantile':'QuantileTransformer'}
    
    s = getattr(preprocessing, scalers[scaler])
    s = s(**kwargs)
    
    df = fillna(df, method=missing)
    df = pd.DataFrame(s.fit_transform(df), index=df.index, columns=df.columns)
    
    return df

Source File: sklearn_intent_classifier.py From rasa_nlu with Apache License 2.0

6 votes

def __init__(self,
                 component_config: Dict[Text, Any] = None,
                 clf: 'sklearn.model_selection.GridSearchCV' = None,
                 le: Optional['sklearn.preprocessing.LabelEncoder'] = None
                 ) -> None:
        """Construct a new intent classifier using the sklearn framework."""
        from sklearn.preprocessing import LabelEncoder

        super(SklearnIntentClassifier, self).__init__(component_config)

        if le is not None:
            self.le = le
        else:
            self.le = LabelEncoder()
        self.clf = clf

        _sklearn_numpy_warning_fix()

Source File: sklearn_intent_classifier.py From rasa-for-botfront with Apache License 2.0

6 votes

def load(
        cls,
        meta: Dict[Text, Any],
        model_dir: Optional[Text] = None,
        model_metadata: Optional[Metadata] = None,
        cached_component: Optional["SklearnIntentClassifier"] = None,
        **kwargs: Any,
    ) -> "SklearnIntentClassifier":
        from sklearn.preprocessing import LabelEncoder

        classifier_file = os.path.join(model_dir, meta.get("classifier"))
        encoder_file = os.path.join(model_dir, meta.get("encoder"))

        if os.path.exists(classifier_file):
            classifier = io_utils.json_unpickle(classifier_file)
            classes = io_utils.json_unpickle(encoder_file)
            encoder = LabelEncoder()
            encoder.classes_ = classes
            return cls(meta, classifier, encoder)
        else:
            return cls(meta)

Source File: sklearn_intent_classifier.py From rasa-for-botfront with Apache License 2.0

6 votes

def __init__(
        self,
        component_config: Optional[Dict[Text, Any]] = None,
        clf: "sklearn.model_selection.GridSearchCV" = None,
        le: Optional["sklearn.preprocessing.LabelEncoder"] = None,
    ) -> None:
        """Construct a new intent classifier using the sklearn framework."""
        from sklearn.preprocessing import LabelEncoder

        super().__init__(component_config)

        if le is not None:
            self.le = le
        else:
            self.le = LabelEncoder()
        self.clf = clf

Source File: deep_learning.py From EliteQuant_Python with Apache License 2.0

5 votes

def normalize_data(df):
    min_max_scaler = sklearn.preprocessing.MinMaxScaler()
    df['open'] = min_max_scaler.fit_transform(df.open.values.reshape(-1,1))
    df['high'] = min_max_scaler.fit_transform(df.high.values.reshape(-1,1))
    df['low'] = min_max_scaler.fit_transform(df.low.values.reshape(-1,1))
    df['close'] = min_max_scaler.fit_transform(df['close'].values.reshape(-1,1))
    return df


# function to create train, validation, test data given stock data and sequence length
# use previous 19 days to predict today

Source File: ordinal_encoder.py From lale with Apache License 2.0

5 votes

def __init__(self, categories='auto', dtype='float64', handle_unknown='ignore', encode_unknown_with='auto'):
        self._hyperparams = {
            'categories': categories,
            'dtype': dtype}
        self.handle_unknown = handle_unknown
        self.encode_unknown_with = encode_unknown_with
        self._wrapped_model = sklearn.preprocessing.OrdinalEncoder(**self._hyperparams)
        self.unknown_categories_mapping = [] #used during inverse transform to keep track of mapping of unknown categories

Source File: _utils.py From qlik-py-tools with MIT License

5 votes

def get_scaler(df, missing="zeros", scaler="StandardScaler", **kwargs):
    """
    Fit a sklearn scaler on a Data Frame and return the scaler.
    Valid options for the scaler are: StandardScaler, MinMaxScaler, MaxAbsScaler, RobustScaler, QuantileTransformer
    Missing values must be dealt with before the scaling is applied. 
    Valid options specified through the missing parameter are: zeros, mean, median, mode
    """

    s = getattr(preprocessing, scaler)
    s = s(**kwargs)

    df = fillna(df, method=missing)
    
    return s.fit(df)

Source File: preproc.py From fathom with Apache License 2.0

5 votes

def mfcc_features(filename):
  """Preprocessing per CTC paper.

  (These are not the simpler linear spectrogram features alone as in Deep
  Speech).

  Properties:
  - 10ms frames with 5ms overlap
  - 12 MFCCs with 26 filter banks
  - replace first MFCC with energy (TODO: log-energy)
  - add first-order derivatives for all of the above
  - total: 26 coefficients
  """
  d, sr = librosa.load(filename)

  frame_length_seconds = 0.010
  frame_overlap_seconds = 0.005

  mfccs = librosa.feature.mfcc(d, sr, n_mfcc=1+12, n_fft=int(frame_overlap_seconds*sr), hop_length=int(frame_overlap_seconds*sr))

  # energy (TODO: log?)
  energy = librosa.feature.rmse(d, n_fft=int(frame_overlap_seconds*sr), hop_length=int(frame_overlap_seconds*sr))

  mfccs[0] = energy # replace first MFCC with energy, per convention

  deltas = librosa.feature.delta(mfccs, order=1)
  mfccs_plus_deltas = np.vstack([mfccs, deltas])

  coeffs = sklearn.preprocessing.scale(mfccs_plus_deltas, axis=1)

  return coeffs

Source File: min_max_scaler.py From lale with Apache License 2.0

5 votes

def __init__(self, **hyperparams):
        self._hyperparams = hyperparams
        self._wrapped_model = sklearn.preprocessing.MinMaxScaler(
            **self._hyperparams)

Source File: test_gReadUntil.py From RUscripts with MIT License

5 votes

def process_ref_fasta_orig(ref_fasta,model_kmer_means,seqlen,kmerlen):
    print "processing the reference fasta."
    kmer_len=kmerlen
    kmer_means=dict()
    for record in SeqIO.parse(ref_fasta, 'fasta'):
        kmer_means[record.id]=dict()
        kmer_means[record.id]["F"]=list()
        kmer_means[record.id]["R"]=list()
        kmer_means[record.id]["Fprime"]=list()
        kmer_means[record.id]["Rprime"]=list()
        print "ID", record.id
        print "length", len(record.seq)
        print "FORWARD STRAND"

        seq = record.seq
        for x in range(len(seq)+1-kmer_len):
            kmer = str(seq[x:x+kmer_len])
            kmer_means[record.id]["F"].append(float(model_kmer_means[kmer]))
            #if model_kmer_means[kmer]:
                #print x, kmer, model_kmer_means[kmer]

        print "REVERSE STRAND"
        seq = revcomp = record.seq.reverse_complement()
        for x in range(len(seq)+1-kmer_len):
            kmer = str(seq[x:x+kmer_len])
            kmer_means[record.id]["R"].append(float(model_kmer_means[kmer]))

        kmer_means[record.id]["Fprime"]=sklearn.preprocessing.scale(kmer_means[record.id]["F"], axis=0, with_mean=True, with_std=True, copy=True)
        kmer_means[record.id]["Rprime"]=sklearn.preprocessing.scale(kmer_means[record.id]["R"], axis=0, with_mean=True, with_std=True, copy=True)
    return kmer_means

#######################################################################

Source File: search_fusion_weights.py From HumanRecognition with MIT License

5 votes

def cal_feature_similarity(detections_0, detections_1, feature_name):
    features_0 = get_features(detections_0, feature_name)    
    features_1 = get_features(detections_1, feature_name)
    features_0 = sklearn.preprocessing.normalize(features_0)
    features_1 = sklearn.preprocessing.normalize(features_1)
    similarity = sklearn.metrics.pairwise.cosine_similarity(features_0, features_1)
    similarity = 1.0/(1+np.exp(-(config.beta0 + config.beta1 * similarity)))
    return similarity

Source File: performance_test.py From HumanRecognition with MIT License

5 votes

def cal_feature_similarity(detections_0, detections_1, feature_name):
    features_0 = get_features(detections_0, feature_name)    
    features_1 = get_features(detections_1, feature_name)
    features_0 = sklearn.preprocessing.normalize(features_0)
    features_1 = sklearn.preprocessing.normalize(features_1)
    similarity = sklearn.metrics.pairwise.cosine_similarity(features_0, features_1)
    similarity = 1.0/(1+np.exp(-(config.beta0 + config.beta1 * similarity)))
    return similarity

Source File: min_max_scaler.py From lale with Apache License 2.0

5 votes

def partial_fit(self, X, y=None):
      if not hasattr(self, "_wrapped_model"):
        self._wrapped_model = sklearn.preprocessing.MinMaxScaler(
            **self._hyperparams)
      self._wrapped_model.partial_fit(X)
      return self

Source File: one_hot_encoder.py From lale with Apache License 2.0

5 votes

def __init__(self, **hyperparams):
        self._hyperparams = hyperparams
        self._wrapped_model = sklearn.preprocessing.OneHotEncoder(**self._hyperparams)

Source File: unit_tests.py From pynisher with MIT License

5 votes

def svc_example(n_samples = 10000, n_features = 4):
	from sklearn.svm import LinearSVC
	from sklearn.preprocessing import PolynomialFeatures
	from sklearn.datasets import make_classification
	
	X,Y = make_classification(n_samples, n_features)
	#pp = PolynomialFeatures(degree=3)
	
	#X = pp.fit_transform(X)
	m = LinearSVC()
	m.fit(X,Y)

Source File: gen_video_feature.py From 1.FaceRecognition with MIT License

5 votes

def get_feature(buffer):
  global emb_size
  input_count = len(buffer)
  if use_flip:
    input_count *= 2
  network_count = input_count
  if input_count%ctx_num!=0:
    network_count = (input_count//ctx_num+1)*ctx_num

  input_blob = np.zeros( (network_count, 3, image_shape[1], image_shape[2]), dtype=np.float32)
  idx = 0
  for item in buffer:
    img = cv2.imread(item)[:,:,::-1] #to rgb
    img = np.transpose( img, (2,0,1) )
    attempts = [0,1] if use_flip else [0]
    for flipid in attempts:
      _img = np.copy(img)
      if flipid==1:
        do_flip(_img)
      input_blob[idx] = _img
      idx+=1
  data = mx.nd.array(input_blob)
  db = mx.io.DataBatch(data=(data,))
  net.model.forward(db, is_train=False)
  _embedding = net.model.get_outputs()[0].asnumpy()
  _embedding = _embedding[0:input_count]
  if emb_size==0:
    emb_size = _embedding.shape[1]
    print('set emb_size to ', emb_size)
  embedding = np.zeros( (len(buffer), emb_size), dtype=np.float32 )
  if use_flip:
    embedding1 = _embedding[0::2]
    embedding2 = _embedding[1::2]
    embedding = embedding1+embedding2
  else:
    embedding = _embedding
  embedding = sklearn.preprocessing.normalize(embedding)
  return embedding

Source File: gen_image_feature.py From 1.FaceRecognition with MIT License

5 votes

def get_feature(buffer):
  global emb_size
  if use_flip:
    input_blob = np.zeros( (len(buffer)*2, 3, image_shape[1], image_shape[2] ) )
  else:
    input_blob = np.zeros( (len(buffer), 3, image_shape[1], image_shape[2] ) )
  idx = 0
  for item in buffer:
    img = cv2.imread(item)[:,:,::-1] #to rgb
    img = np.transpose( img, (2,0,1) )
    attempts = [0,1] if use_flip else [0]
    for flipid in attempts:
      _img = np.copy(img)
      if flipid==1:
        do_flip(_img)
      input_blob[idx] = _img
      idx+=1
  data = mx.nd.array(input_blob)
  db = mx.io.DataBatch(data=(data,))
  net.model.forward(db, is_train=False)
  _embedding = net.model.get_outputs()[0].asnumpy()
  if emb_size==0:
    emb_size = _embedding.shape[1]
    print('set emb_size to ', emb_size)
  embedding = np.zeros( (len(buffer), emb_size), dtype=np.float32 )
  if use_flip:
    embedding1 = _embedding[0::2]
    embedding2 = _embedding[1::2]
    embedding = embedding1+embedding2
  else:
    embedding = _embedding
  embedding = sklearn.preprocessing.normalize(embedding)
  return embedding

Source File: gen_glint.py From 1.FaceRecognition with MIT License

5 votes

def get_feature(buffer):
  global emb_size
  if use_flip:
    input_blob = np.zeros( (len(buffer)*2, 3, image_shape[1], image_shape[2] ) )
  else:
    input_blob = np.zeros( (len(buffer), 3, image_shape[1], image_shape[2] ) )
  idx = 0
  for item in buffer:
    img = face_preprocess.read_image(item[0], mode='rgb')
    img = face_preprocess.preprocess(img, bbox=None, landmark=item[1], image_size='%d,%d'%(image_shape[1], image_shape[2]))
    img = np.transpose( img, (2,0,1) )
    attempts = [0,1] if use_flip else [0]
    for flipid in attempts:
      _img = np.copy(img)
      if flipid==1:
        do_flip(_img)
      input_blob[idx] = _img
      idx+=1
  data = mx.nd.array(input_blob)
  db = mx.io.DataBatch(data=(data,))
  net.model.forward(db, is_train=False)
  _embedding = net.model.get_outputs()[0].asnumpy()
  if emb_size==0:
    emb_size = _embedding.shape[1]
    print('set emb_size to ', emb_size)
  embedding = np.zeros( (len(buffer), emb_size), dtype=np.float32 )
  if use_flip:
    embedding1 = _embedding[0::2]
    embedding2 = _embedding[1::2]
    embedding = embedding1+embedding2
  else:
    embedding = _embedding
  embedding = sklearn.preprocessing.normalize(embedding)
  return embedding

Source File: gen_megaface.py From 1.FaceRecognition with MIT License

5 votes

def get_feature(imgs, nets):
  count = len(imgs)
  data = mx.nd.zeros(shape = (count*2, 3, imgs[0].shape[0], imgs[0].shape[1]))
  for idx, img in enumerate(imgs):
    img = img[:,:,::-1] #to rgb
    img = np.transpose( img, (2,0,1) )
    for flipid in [0,1]:
      _img = np.copy(img)
      if flipid==1:
        _img = _img[:,:,::-1]
      _img = nd.array(_img)
      data[count*flipid+idx] = _img

  F = []
  for net in nets:
    db = mx.io.DataBatch(data=(data,))
    net.model.forward(db, is_train=False)
    x = net.model.get_outputs()[0].asnumpy()
    embedding = x[0:count,:] + x[count:,:]
    embedding = sklearn.preprocessing.normalize(embedding)
    #print('emb', embedding.shape)
    F.append(embedding)
  F = np.concatenate(F, axis=1)
  F = sklearn.preprocessing.normalize(F)
  #print('F', F.shape)
  return F

Source File: gen_megaface.py From insightface with MIT License

5 votes

def get_feature(imgs, nets):
  count = len(imgs)
  data = mx.nd.zeros(shape = (count*2, 3, imgs[0].shape[0], imgs[0].shape[1]))
  for idx, img in enumerate(imgs):
    img = img[:,:,::-1] #to rgb
    img = np.transpose( img, (2,0,1) )
    for flipid in [0,1]:
      _img = np.copy(img)
      if flipid==1:
        _img = _img[:,:,::-1]
      _img = nd.array(_img)
      data[count*flipid+idx] = _img

  F = []
  for net in nets:
    db = mx.io.DataBatch(data=(data,))
    net.model.forward(db, is_train=False)
    x = net.model.get_outputs()[0].asnumpy()
    embedding = x[0:count,:] + x[count:,:]
    embedding = sklearn.preprocessing.normalize(embedding)
    #print('emb', embedding.shape)
    F.append(embedding)
  F = np.concatenate(F, axis=1)
  F = sklearn.preprocessing.normalize(F)
  #print('F', F.shape)
  return F

Source File: classifiers.py From seizure-prediction with MIT License

5 votes

def predict_proba(self, X):
        predictions = self.predict(X)
        predictions = sklearn.preprocessing.scale(predictions)
        predictions = 1.0 / (1.0 + np.exp(-0.5 * predictions))
        return np.vstack((1.0 - predictions, predictions)).T

Source File: gen_video_feature.py From insightface with MIT License

5 votes

def get_feature(buffer):
  global emb_size
  input_count = len(buffer)
  if use_flip:
    input_count *= 2
  network_count = input_count
  if input_count%ctx_num!=0:
    network_count = (input_count//ctx_num+1)*ctx_num

  input_blob = np.zeros( (network_count, 3, image_shape[1], image_shape[2]), dtype=np.float32)
  idx = 0
  for item in buffer:
    img = cv2.imread(item)[:,:,::-1] #to rgb
    img = np.transpose( img, (2,0,1) )
    attempts = [0,1] if use_flip else [0]
    for flipid in attempts:
      _img = np.copy(img)
      if flipid==1:
        do_flip(_img)
      input_blob[idx] = _img
      idx+=1
  data = mx.nd.array(input_blob)
  db = mx.io.DataBatch(data=(data,))
  net.model.forward(db, is_train=False)
  _embedding = net.model.get_outputs()[0].asnumpy()
  _embedding = _embedding[0:input_count]
  if emb_size==0:
    emb_size = _embedding.shape[1]
    print('set emb_size to ', emb_size)
  embedding = np.zeros( (len(buffer), emb_size), dtype=np.float32 )
  if use_flip:
    embedding1 = _embedding[0::2]
    embedding2 = _embedding[1::2]
    embedding = embedding1+embedding2
  else:
    embedding = _embedding
  embedding = sklearn.preprocessing.normalize(embedding)
  return embedding

Source File: gen_image_feature.py From insightface with MIT License

5 votes

def get_feature(buffer):
  global emb_size
  if use_flip:
    input_blob = np.zeros( (len(buffer)*2, 3, image_shape[1], image_shape[2] ) )
  else:
    input_blob = np.zeros( (len(buffer), 3, image_shape[1], image_shape[2] ) )
  idx = 0
  for item in buffer:
    img = cv2.imread(item)[:,:,::-1] #to rgb
    img = np.transpose( img, (2,0,1) )
    attempts = [0,1] if use_flip else [0]
    for flipid in attempts:
      _img = np.copy(img)
      if flipid==1:
        do_flip(_img)
      input_blob[idx] = _img
      idx+=1
  data = mx.nd.array(input_blob)
  db = mx.io.DataBatch(data=(data,))
  net.model.forward(db, is_train=False)
  _embedding = net.model.get_outputs()[0].asnumpy()
  if emb_size==0:
    emb_size = _embedding.shape[1]
    print('set emb_size to ', emb_size)
  embedding = np.zeros( (len(buffer), emb_size), dtype=np.float32 )
  if use_flip:
    embedding1 = _embedding[0::2]
    embedding2 = _embedding[1::2]
    embedding = embedding1+embedding2
  else:
    embedding = _embedding
  embedding = sklearn.preprocessing.normalize(embedding)
  return embedding

Source File: gen_glint.py From insightface with MIT License

5 votes

def get_feature(buffer):
  global emb_size
  if use_flip:
    input_blob = np.zeros( (len(buffer)*2, 3, image_shape[1], image_shape[2] ) )
  else:
    input_blob = np.zeros( (len(buffer), 3, image_shape[1], image_shape[2] ) )
  idx = 0
  for item in buffer:
    img = face_preprocess.read_image(item[0], mode='rgb')
    img = face_preprocess.preprocess(img, bbox=None, landmark=item[1], image_size='%d,%d'%(image_shape[1], image_shape[2]))
    img = np.transpose( img, (2,0,1) )
    attempts = [0,1] if use_flip else [0]
    for flipid in attempts:
      _img = np.copy(img)
      if flipid==1:
        do_flip(_img)
      input_blob[idx] = _img
      idx+=1
  data = mx.nd.array(input_blob)
  db = mx.io.DataBatch(data=(data,))
  net.model.forward(db, is_train=False)
  _embedding = net.model.get_outputs()[0].asnumpy()
  if emb_size==0:
    emb_size = _embedding.shape[1]
    print('set emb_size to ', emb_size)
  embedding = np.zeros( (len(buffer), emb_size), dtype=np.float32 )
  if use_flip:
    embedding1 = _embedding[0::2]
    embedding2 = _embedding[1::2]
    embedding = embedding1+embedding2
  else:
    embedding = _embedding
  embedding = sklearn.preprocessing.normalize(embedding)
  return embedding

Source File: classical.py From netharn with Apache License 2.0

4 votes

def _make_est_func(self):
        import sklearn
        from sklearn import multiclass  # NOQA
        from sklearn import ensemble  # NOQA
        from sklearn import neural_network  # NOQA
        from sklearn import svm  # NOQA
        from sklearn import preprocessing  # NOQA
        from sklearn import pipeline  # NOQA
        from functools import partial

        wrap_type = self.wrap_type
        est_type = self.est_type

        multiclass_wrapper = {
            None: ub.identity,
            'OVR': sklearn.multiclass.OneVsRestClassifier,
            'OVO': sklearn.multiclass.OneVsOneClassifier,
        }[wrap_type]
        est_class = {
            'RF': sklearn.ensemble.RandomForestClassifier,
            'SVC': sklearn.svm.SVC,
            'Logit': partial(sklearn.linear_model.LogisticRegression, solver='lbfgs'),
            'MLP': sklearn.neural_network.MLPClassifier,
        }[est_type]

        est_kw = self.est_kw
        try:
            from sklearn.impute import SimpleImputer
            Imputer = SimpleImputer
            import numpy as np
            NAN = np.nan
        except Exception:
            from sklearn.preprocessing import Imputer
            NAN = 'NaN'
        if est_type == 'MLP':
            def make_estimator():
                pipe = sklearn.pipeline.Pipeline([
                    ('inputer', Imputer(
                        missing_values=NAN, strategy='mean')),
                    # ('scale', sklearn.preprocessing.StandardScaler),
                    ('est', est_class(**est_kw)),
                ])
                return multiclass_wrapper(pipe)
        elif est_type == 'Logit':
            def make_estimator():
                pipe = sklearn.pipeline.Pipeline([
                    ('inputer', Imputer(
                        missing_values=NAN, strategy='mean')),
                    ('est', est_class(**est_kw)),
                ])
                return multiclass_wrapper(pipe)
        else:
            def make_estimator():
                return multiclass_wrapper(est_class(**est_kw))

        return make_estimator

Python sklearn.preprocessing() Examples