Python sklearn.preprocessing.normalize() Examples
The following are 30
code examples of sklearn.preprocessing.normalize().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
sklearn.preprocessing
, or try the search function
.
Example #1
Source File: ml_elm.py From Python-ELM with MIT License | 8 votes |
def main(): from sklearn import preprocessing from sklearn.datasets import fetch_openml as fetch_mldata from sklearn.model_selection import train_test_split db_name = 'diabetes' data_set = fetch_mldata(db_name) data_set.data = preprocessing.normalize(data_set.data) tmp = data_set.target tmpL = [ 1 if i == "tested_positive" else -1 for i in tmp] data_set.target = tmpL X_train, X_test, y_train, y_test = train_test_split( data_set.data, data_set.target, test_size=0.4) mlelm = MLELM(hidden_units=(10, 30, 200)).fit(X_train, y_train) elm = ELM(200).fit(X_train, y_train) print("MLELM Accuracy %0.3f " % mlelm.score(X_test, y_test)) print("ELM Accuracy %0.3f " % elm.score(X_test, y_test))
Example #2
Source File: literal_encoder.py From MultiKE with MIT License | 6 votes |
def __init__(self, word_vec_list, args, input_dimension=1500, hidden_dimensions=None): self.session = load_session() self.args = args self.weights, self.biases = {}, {} self.input_dimension = input_dimension if hidden_dimensions is None: hidden_dimensions = [1024, 512, self.args.dim] self.hidden_dimensions = hidden_dimensions self.layer_num = len(self.hidden_dimensions) self.encoder_output = None self.decoder_output = None self.decoder_op = None self.word_vec_list = np.reshape(word_vec_list, [len(word_vec_list), input_dimension]) if self.args.encoder_normalize: self.word_vec_list = preprocessing.normalize(self.word_vec_list) self._init_graph() self._loss_optimizer() tf.global_variables_initializer().run(session=self.session)
Example #3
Source File: test_pairwise.py From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def test_cosine_similarity(): # Test the cosine_similarity. rng = np.random.RandomState(0) X = rng.random_sample((5, 4)) Y = rng.random_sample((3, 4)) Xcsr = csr_matrix(X) Ycsr = csr_matrix(Y) for X_, Y_ in ((X, None), (X, Y), (Xcsr, None), (Xcsr, Ycsr)): # Test that the cosine is kernel is equal to a linear kernel when data # has been previously normalized by L2-norm. K1 = pairwise_kernels(X_, Y=Y_, metric="cosine") X_ = normalize(X_) if Y_ is not None: Y_ = normalize(Y_) K2 = pairwise_kernels(X_, Y=Y_, metric="linear") assert_array_almost_equal(K1, K2)
Example #4
Source File: grarep.py From OpenNE with MIT License | 6 votes |
def train(self): self.adj = self.getAdjMat() self.node_size = self.adj.shape[0] self.Ak = np.matrix(np.identity(self.node_size)) self.RepMat = np.zeros((self.node_size, int(self.dim*self.Kstep))) for i in range(self.Kstep): print('Kstep =', i) self.Ak = np.dot(self.Ak, self.adj) probTranMat = self.GetProbTranMat(self.Ak) Rk = self.GetRepUseSVD(probTranMat, 0.5) Rk = normalize(Rk, axis=1, norm='l2') self.RepMat[:, self.dim*i:self.dim*(i+1)] = Rk[:, :] # get embeddings self.vectors = {} look_back = self.g.look_back_list for i, embedding in enumerate(self.RepMat): self.vectors[look_back[i]] = embedding
Example #5
Source File: vectorizer.py From robotreviewer with GNU General Public License v3.0 | 6 votes |
def transform(self, X_si, high=None, low=None, limit=None): """ Same as HashingVectorizer transform, except allows for interaction list, which is an iterable the same length as X filled with True/False. This method adds an empty row to docs labelled as False. """ analyzer = self.build_analyzer() X = self._get_hasher().transform( analyzer(self._deal_with_input(doc)) for doc in X_si) X.data.fill(1) if self.norm is not None: X = normalize(X, norm=self.norm, copy=False) if low: X = self._limit_features(X, low=low) return X
Example #6
Source File: neu.py From karateclub with GNU General Public License v3.0 | 6 votes |
def _update_embedding(self, graph, original_embedding): r"""Performs the Network Embedding Update on the original embedding. Args: original_embedding (Numpy array): An array containing an embedding. graph (NetworkX graph): The embedded graph. Return types: embedding (Numpy array): An array containing the updated embedding. """ embedding = self._normalize_embedding(original_embedding) adjacency = nx.adjacency_matrix(graph, nodelist=range(graph.number_of_nodes())) normalized_adjacency = normalize(adjacency, norm='l1', axis=1) for _ in range(self.iterations): embedding = (embedding + self.L1*(normalized_adjacency @ embedding) + self.L2*(normalized_adjacency @ (normalized_adjacency @ embedding))) return embedding
Example #7
Source File: prone.py From nodevectors with MIT License | 6 votes |
def pre_factorization(G, n_components, exponent): """ Network Embedding as Sparse Matrix Factorization """ C1 = preprocessing.normalize(G, "l1") # Prepare negative samples neg = np.array(C1.sum(axis=0))[0] ** exponent neg = neg / neg.sum() neg = sparse.diags(neg, format="csr") neg = G.dot(neg) # Set negative elements to 1 -> 0 when log C1.data[C1.data <= 0] = 1 neg.data[neg.data <= 0] = 1 C1.data = np.log(C1.data) neg.data = np.log(neg.data) C1 -= neg features_matrix = ProNE.tsvd_rand(C1, n_components=n_components) return features_matrix
Example #8
Source File: streaming_random_patches.py From scikit-multiflow with BSD 3-Clause "New" or "Revised" License | 6 votes |
def _predict_proba(self, X): y_proba = np.asarray([0.]) for i in range(len(self.ensemble)): y_proba_temp = self.ensemble[i].predict_proba(X) if np.sum(y_proba_temp) > 0.0: y_proba_temp = normalize(y_proba_temp, norm='l1')[0].copy() acc = self.ensemble[i].performance_evaluator.accuracy_score() if not self.disable_weighted_vote and acc > 0.0: y_proba_temp *= acc # Check array length consistency if len(y_proba_temp) != len(y_proba): if len(y_proba_temp) > len(y_proba): y_proba.resize((len(y_proba_temp), ), refcheck=False) else: y_proba_temp.resize((len(y_proba), ), refcheck=False) # Add values y_proba += y_proba_temp return y_proba
Example #9
Source File: process.py From geosketch with MIT License | 6 votes |
def load_names(data_names, norm=True, log1p=False, verbose=True): # Load datasets. datasets = [] genes_list = [] n_cells = 0 for name in data_names: X_i, genes_i = load_data(name) if norm: X_i = normalize(X_i, axis=1) if log1p: X_i = np.log1p(X_i) X_i = csr_matrix(X_i) datasets.append(X_i) genes_list.append(genes_i) n_cells += X_i.shape[0] if verbose: print('Loaded {} with {} genes and {} cells'. format(name, X_i.shape[1], X_i.shape[0])) if verbose: print('Found {} cells among all datasets' .format(n_cells)) return datasets, genes_list, n_cells
Example #10
Source File: eval.py From SARC with MIT License | 6 votes |
def parse(): parser = argparse.ArgumentParser() parser.add_argument('dataset', help='pol or main', type=str) parser.add_argument('-n', '--n', default=1, help='Number of grams', type=int) parser.add_argument('--min_count', default=1, help='Min count', type=int) parser.add_argument('--embedding', default=CCGLOVE, help='embedding file', type=str) parser.add_argument('--weights', default=None, help='weights to use for ngrams (e.g. sif, None)', type=str) parser.add_argument('-norm', '--normalize', action='store_true', help='Normalize vectors') parser.add_argument('-l', '--lower', action='store_true', help='Whether or not to lowercase text') parser.add_argument('-e', '--embed', action='store_true', help='Use embeddings instead of bong') return parser.parse_args()
Example #11
Source File: MultiKE_Late.py From MultiKE with MIT License | 6 votes |
def _compute_weight(embeds1, embeds2, embeds3): def min_max_normalization(mat): min_ = np.min(mat) max_ = np.max(mat) return (mat - min_) / (max_ - min_) other_embeds = (embeds1 + embeds2 + embeds3) / 3 # other_embeds = (embeds2 + embeds3) / 2 other_embeds = preprocessing.normalize(other_embeds) embeds1 = preprocessing.normalize(embeds1) # sim_mat = sim(embeds1, other_embeds, metric='cosine') sim_mat = np.matmul(embeds1, other_embeds.T) # sim_mat = 1 - euclidean_distances(embeds1, other_embeds) weights = np.diag(sim_mat) # print(weights.shape, np.mean(weights)) # weights = min_max_normalization(weights) print(weights.shape, np.mean(weights)) return np.mean(weights)
Example #12
Source File: MultiKE_Late.py From MultiKE with MIT License | 6 votes |
def test(model, embed_choice='avg', w=(1, 1, 1)): if embed_choice == 'nv': ent_embeds = model.name_embeds.eval(session=model.session) elif embed_choice == 'rv': ent_embeds = model.rv_ent_embeds.eval(session=model.session) elif embed_choice == 'av': ent_embeds = model.av_ent_embeds.eval(session=model.session) elif embed_choice == 'final': ent_embeds = model.ent_embeds.eval(session=model.session) elif embed_choice == 'avg': ent_embeds = w[0] * model.name_embeds.eval(session=model.session) + \ w[1] * model.rv_ent_embeds.eval(session=model.session) + \ w[2] * model.av_ent_embeds.eval(session=model.session) else: # wavg ent_embeds = model.ent_embeds print(embed_choice, 'test results:') embeds1 = ent_embeds[model.kgs.test_entities1,] embeds2 = ent_embeds[model.kgs.test_entities2,] hits1_12, mrr_12 = eva.valid(embeds1, embeds2, None, model.args.top_k, model.args.test_threads_num, normalize=True) del embeds1, embeds2 gc.collect() return mrr_12
Example #13
Source File: STFIWF.py From 2016CCF-sougou with Apache License 2.0 | 6 votes |
def _char_wb_ngrams(self, text_document): """Whitespace sensitive char-n-gram tokenization. Tokenize text_document into a sequence of character n-grams excluding any whitespace (operating only inside word boundaries)""" # normalize white spaces text_document = self._white_spaces.sub(" ", text_document) min_n, max_n = self.ngram_range ngrams = [] for w in text_document.split(): w = ' ' + w + ' ' w_len = len(w) for n in xrange(min_n, max_n + 1): offset = 0 ngrams.append(w[offset:offset + n]) while offset + n < w_len: offset += 1 ngrams.append(w[offset:offset + n]) if offset == 0: # count a short word (w_len < n) only once break return ngrams
Example #14
Source File: MultiKE_Late.py From MultiKE with MIT License | 6 votes |
def valid(model, embed_choice='avg', w=(1, 1, 1)): if embed_choice == 'nv': ent_embeds = model.name_embeds.eval(session=model.session) elif embed_choice == 'rv': ent_embeds = model.rv_ent_embeds.eval(session=model.session) elif embed_choice == 'av': ent_embeds = model.av_ent_embeds.eval(session=model.session) elif embed_choice == 'final': ent_embeds = model.ent_embeds.eval(session=model.session) elif embed_choice == 'avg': ent_embeds = w[0] * model.name_embeds.eval(session=model.session) + \ w[1] * model.rv_ent_embeds.eval(session=model.session) + \ w[2] * model.av_ent_embeds.eval(session=model.session) else: # 'final' ent_embeds = model.ent_embeds print(embed_choice, 'valid results:') embeds1 = ent_embeds[model.kgs.valid_entities1,] embeds2 = ent_embeds[model.kgs.valid_entities2 + model.kgs.test_entities2,] hits1_12, mrr_12 = eva.valid(embeds1, embeds2, None, model.args.top_k, model.args.test_threads_num, normalize=True) del embeds1, embeds2 gc.collect() return mrr_12
Example #15
Source File: data_model.py From MultiKE with MIT License | 6 votes |
def _generate_name_vectors_mat(self): name_ordered_list = list() num = len(self.entities) print("total entities:", num) entity_id_uris_dic = dict(zip(self.kgs.kg1.entities_id_dict.values(), self.kgs.kg1.entities_id_dict.keys())) entity_id_uris_dic2 = dict(zip(self.kgs.kg2.entities_id_dict.values(), self.kgs.kg2.entities_id_dict.keys())) entity_id_uris_dic.update(entity_id_uris_dic2) print('total entities ids:', len(entity_id_uris_dic)) assert len(entity_id_uris_dic) == num for i in range(num): assert i in entity_id_uris_dic entity_uri = entity_id_uris_dic.get(i) assert entity_uri in self.entity_local_name_dict entity_name = self.entity_local_name_dict.get(entity_uri) entity_name_index = self.literal_id_dic.get(entity_name) name_ordered_list.append(entity_name_index) print('name_ordered_list', len(name_ordered_list)) name_mat = self.literal_vectors_mat[name_ordered_list, ] print("entity name embeddings mat:", type(name_mat), name_mat.shape) if self.args.literal_normalize: name_mat = preprocessing.normalize(name_mat) self.local_name_vectors = name_mat
Example #16
Source File: STFIWF.py From 2016CCF-sougou with Apache License 2.0 | 6 votes |
def strip_accents_unicode(s): """Transform accentuated unicode symbols into their simple counterpart Warning: the python-level loop and join operations make this implementation 20 times slower than the strip_accents_ascii basic normalization. See also -------- strip_accents_ascii Remove accentuated char for any unicode symbol that has a direct ASCII equivalent. """ normalized = unicodedata.normalize('NFKD', s) if normalized == s: return s else: return ''.join([c for c in normalized if not unicodedata.combining(c)])
Example #17
Source File: process.py From scanorama with MIT License | 6 votes |
def load_names(data_names, norm=True, log1p=False, verbose=True): # Load datasets. datasets = [] genes_list = [] n_cells = 0 for name in data_names: X_i, genes_i = load_data(name) if norm: X_i = normalize(X_i, axis=1) if log1p: X_i = np.log1p(X_i) X_i = csr_matrix(X_i) datasets.append(X_i) genes_list.append(genes_i) n_cells += X_i.shape[0] if verbose: print('Loaded {} with {} genes and {} cells'. format(name, X_i.shape[1], X_i.shape[0])) if verbose: print('Found {} cells among all datasets' .format(n_cells)) return datasets, genes_list, n_cells
Example #18
Source File: other.py From StageDP with MIT License | 6 votes |
def vectorize(features, vocab): """ Transform a features list into a numeric vector with a given vocab :type dpvocab: dict :param dpvocab: vocab for distributional representation :type projmat: scipy.lil_matrix :param projmat: projection matrix for disrep """ vec = lil_matrix((1, len(vocab))) for feat in features: try: fidx = vocab[feat] vec[0, fidx] += 1.0 except KeyError: pass # Normalization vec = normalize(vec) return vec
Example #19
Source File: data_model.py From MultiKE with MIT License | 5 votes |
def _generate_attribute_value_vectors(self): self.literal_set = set(self.literal_list) values_set = set() cleaned_attribute_triples_list1, _, _ = clear_attribute_triples(self.kgs.kg1.local_attribute_triples_list) cleaned_attribute_triples_list2, _, _ = clear_attribute_triples(self.kgs.kg2.local_attribute_triples_list) attribute_triples_list1, attribute_triples_list2 = set(), set() for h, a, v in cleaned_attribute_triples_list1: if v in self.literal_set: values_set.add(v) attribute_triples_list1.add((h, a, v)) for h, a, v in cleaned_attribute_triples_list2: if v in self.literal_set: values_set.add(v) attribute_triples_list2.add((h, a, v)) print("selected attribute triples", len(attribute_triples_list1), len(attribute_triples_list2)) values_id_dic = dict() values_list = list(values_set) num = len(values_list) for i in range(num): values_id_dic[values_list[i]] = i id_attribute_triples1 = set([(h, a, int(values_id_dic[v])) for (h, a, v) in attribute_triples_list1]) id_attribute_triples2 = set([(h, a, int(values_id_dic[v])) for (h, a, v) in attribute_triples_list2]) self.kgs.kg1.set_attributes(id_attribute_triples1) self.kgs.kg2.set_attributes(id_attribute_triples2) sup_triples1, sup_triples2 = generate_sup_attribute_triples(self.kgs.train_links, self.kgs.kg1.av_dict, self.kgs.kg2.av_dict) self.kgs.kg1.add_sup_attribute_triples(sup_triples1) self.kgs.kg2.add_sup_attribute_triples(sup_triples2) num = len(values_id_dic) value_ordered_list = list() for i in range(num): value = values_list[i] value_index = self.literal_id_dic.get(value) value_ordered_list.append(value_index) print('value_ordered_list', len(value_ordered_list)) value_vectors = self.literal_vectors_mat[value_ordered_list, ] print("value embeddings mat:", type(value_vectors), value_vectors.shape) if self.args.literal_normalize: value_vectors = preprocessing.normalize(value_vectors) self.value_vectors = value_vectors
Example #20
Source File: tree_tensor_network_mnist.py From Tree-Tensor-Networks-in-Machine-Learning with MIT License | 5 votes |
def contract_unit(self, tensor0, tensor1, tensor2, tensor3, tensor4, Num): temp = self.contract_local(tensor1, tensor2, tensor3, tensor4, Num) tensor_result = tn.contract( tensor0, temp, ["1", "2", "3", "4"], ["a", "b", "c", "d"]) if len(tensor_result.shape) == 2: tensor_result.data = preprocessing.normalize( tensor_result.data, axis=0, norm='l2') # normalization else: for i in range(tensor_result.shape[1]): # normalization tensor_result.data[:, i, :] = preprocessing.normalize( tensor_result.data[:, i, :], axis=0, norm='l2') return tensor_result
Example #21
Source File: classifier.py From Video-Highlight-Detection with MIT License | 5 votes |
def fit(self,x,y): log.l.info('=======> fitting...') if self.if_grid_search: self.model=self._build_model(**self.model_best_params) if self.model_name=='svm' and self.model_kernel=='x2': x-=MIN_FEATURE x=preprocessing.normalize(x,norm='l1') self.model.fit(x,y)
Example #22
Source File: match_lost_kc.py From lostX with MIT License | 5 votes |
def normalize_wid(ftIn): return normalize(normalize(ftIn),axis=0)
Example #23
Source File: initializers.py From MultiKE with MIT License | 5 votes |
def random_unit_init(shape, name, is_l2_norm, dtype=None): with tf.name_scope('random_unit_init'): vectors = list() for i in range(shape[0]): vectors.append([random.gauss(0, 1) for j in range(shape[1])]) embeddings = tf.Variable(preprocessing.normalize(np.matrix(vectors)), name=name, dtype=dtype) return tf.nn.l2_normalize(embeddings, 1) if is_l2_norm else embeddings
Example #24
Source File: tree_tensor_network_mnist.py From Tree-Tensor-Networks-in-Machine-Learning with MIT License | 5 votes |
def contract_special(self, tensor0, tensor1, lab1, tensor2, lab2, tensor3, lab3, Num): temp = self.contract_local3(tensor1, tensor2, tensor3, Num) tensor_result = tn.contract( tensor0, temp, [lab1, lab2, lab3], ["a", "b", "c"]) tensor_result.data = tensor_result.data.transpose(1, 0, 2) tensor_result.labels[0], tensor_result.labels[1] = tensor_result.labels[1], tensor_result.labels[0] for i in range(tensor_result.shape[1]): # normalization tensor_result.data[:, i, :] = preprocessing.normalize( tensor_result.data[:, i, :], axis=0, norm='l2') return tensor_result
Example #25
Source File: text.py From pyxclib with MIT License | 5 votes |
def transform(self, raw_documents, num_threads=1): X = self._compute_countf(raw_documents) if self.sublinear_tf: np.log(X.data, X.data) X.data += 1 if self.use_idf: X = X * self.idf if self.norm: X = normalize(X, norm=self.norm, copy=False) return X
Example #26
Source File: sparse.py From pyxclib with MIT License | 5 votes |
def normalize(X, norm='l2', copy=False): """Normalize sparse or dense matrix Arguments: --------- X: csr_matrix or csc_matrix sparse matrix norm: str, optional, default='l2' normalize with l1/l2 copy: boolean, optional, default=False whether to copy data or not """ features = sk_normalize(X, norm=norm, copy=copy) return features
Example #27
Source File: features.py From pyxclib with MIT License | 5 votes |
def normalize(self, norm='max', copy=False): self.X = scale(self.X, copy=copy, norm=norm)
Example #28
Source File: labels.py From pyxclib with MIT License | 5 votes |
def normalize(self, norm='max', copy=False): self.Y = scale(self.Y, copy=copy, norm=norm) if self._valid else None
Example #29
Source File: MultiKE_Late.py From MultiKE with MIT License | 5 votes |
def valid_WVA(model): nv_ent_embeds1 = tf.nn.embedding_lookup(model.name_embeds, model.kgs.valid_entities1).eval(session=model.session) rv_ent_embeds1 = tf.nn.embedding_lookup(model.rv_ent_embeds, model.kgs.valid_entities1).eval(session=model.session) av_ent_embeds1 = tf.nn.embedding_lookup(model.av_ent_embeds, model.kgs.valid_entities1).eval(session=model.session) weight11, weight21, weight31 = wva(nv_ent_embeds1, rv_ent_embeds1, av_ent_embeds1) test_list = model.kgs.valid_entities2 + model.kgs.test_entities2 nv_ent_embeds2 = tf.nn.embedding_lookup(model.name_embeds, test_list).eval(session=model.session) rv_ent_embeds2 = tf.nn.embedding_lookup(model.rv_ent_embeds, test_list).eval(session=model.session) av_ent_embeds2 = tf.nn.embedding_lookup(model.av_ent_embeds, test_list).eval(session=model.session) weight12, weight22, weight32 = wva(nv_ent_embeds2, rv_ent_embeds2, av_ent_embeds2) weight1 = weight11 + weight12 weight2 = weight21 + weight22 weight3 = weight31 + weight32 all_weight = weight1 + weight2 + weight3 weight1 /= all_weight weight2 /= all_weight weight3 /= all_weight print('weights', weight1, weight2, weight3) embeds1 = weight1 * nv_ent_embeds1 + \ weight2 * rv_ent_embeds1 + \ weight3 * av_ent_embeds1 embeds2 = weight1 * nv_ent_embeds2 + \ weight2 * rv_ent_embeds2 + \ weight3 * av_ent_embeds2 print('wvag valid results:') hits1_12, mrr_12 = eva.valid(embeds1, embeds2, None, model.args.top_k, model.args.test_threads_num, normalize=True) del nv_ent_embeds1, rv_ent_embeds1, av_ent_embeds1 del nv_ent_embeds2, rv_ent_embeds2, av_ent_embeds2 del embeds1, embeds2 gc.collect() return mrr_12
Example #30
Source File: helpers.py From user-behavior-anomaly-detector with MIT License | 5 votes |
def normalize(dataset): # Normalize X, shape (n_samples, n_features) return normalize(dataset) # datasetX = datasetX / float(self.settings.getint("Data", "vocabulary_size")) ## File operations ##