Python config.RANDOM_SEED Examples
The following are 18
code examples of config.RANDOM_SEED().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
config
, or try the search function
.
Example #1
Source File: extreme_ensemble_selection.py From kaggle-HomeDepot with MIT License | 6 votes |
def main(options): # create sub folder subm_folder = "%s/ensemble_selection"%config.SUBM_DIR os_utils._create_dirs( [subm_folder] ) subm_prefix = "%s/test.pred.[%s]" % (subm_folder, options.outfile) # get model list log_folder = "%s/level%d_models"%(config.LOG_DIR, options.level-1) model_list = get_model_list(log_folder, options.size) # get instance splitter if options.level not in [2, 3]: inst_splitter = None elif options.level == 2: inst_splitter = splitter_level2 elif options.level == 3: inst_splitter = splitter_level3 ees = ExtremeEnsembleSelection( model_folder=config.OUTPUT_DIR, model_list=model_list, subm_prefix=subm_prefix, weight_opt_max_evals=options.weight_opt_max_evals, w_min=-1., w_max=1., inst_subsample=options.inst_subsample, inst_subsample_replacement=options.inst_subsample_replacement, inst_splitter=inst_splitter, model_subsample=options.model_subsample, model_subsample_replacement=options.model_subsample_replacement, bagging_size=options.bagging_size, init_top_k=options.init_top_k, epsilon=options.epsilon, multiprocessing=False, multiprocessing_num_cores=config.NUM_CORES, enable_extreme=options.enable_extreme, random_seed=config.RANDOM_SEED ) ees.go()
Example #2
Source File: feature_vector_space.py From kaggle-HomeDepot with MIT License | 6 votes |
def transform(self): # ngrams obs_ngrams = list(map(lambda x: ngram_utils._ngrams(x.split(" "), self.obs_ngram, "_"), self.obs_corpus)) target_ngrams = list(map(lambda x: ngram_utils._ngrams(x.split(" "), self.target_ngram, "_"), self.target_corpus)) # cooccurrence ngrams cooc_terms = list(map(lambda lst1,lst2: self._get_cooc_terms(lst1, lst2, "X"), obs_ngrams, target_ngrams)) ## tfidf tfidf = self._init_word_ngram_tfidf(ngram=1) X = tfidf.fit_transform(cooc_terms) ## svd svd = TruncatedSVD(n_components=self.svd_dim, n_iter=self.svd_n_iter, random_state=config.RANDOM_SEED) return svd.fit_transform(X) # 2nd in CrowdFlower (preprocessing_mikhail.py)
Example #3
Source File: task.py From NFETC with MIT License | 6 votes |
def _get_model(self): np.random.seed(config.RANDOM_SEED) kwargs = { "sequence_length": config.MAX_DOCUMENT_LENGTH, "mention_length": config.MENTION_SIZE, "num_classes": self.num_types, "vocab_size": self.embedding.vocab_size, "embedding_size": self.embedding.embedding_dim, "position_size": self.embedding.position_size, "pretrained_embedding": self.embedding.embedding, "wpe": np.random.random_sample((self.embedding.position_size, self.hparams.wpe_dim)), "type_info": self.type_info, "hparams": self.hparams } if "nfetc" in self.model_name: return NFETC(**kwargs) else: raise AttributeError("Invalid model name!")
Example #4
Source File: feature_vector_space.py From kaggle-HomeDepot with MIT License | 6 votes |
def transform(self): ## get common vocabulary tfidf = self._init_char_ngram_tfidf(self.ngram) tfidf.fit(list(self.obs_corpus) + list(self.target_corpus)) vocabulary = tfidf.vocabulary_ ## obs tfidf tfidf = self._init_char_ngram_tfidf(self.ngram, vocabulary) X_obs = tfidf.fit_transform(self.obs_corpus) ## targetument tfidf tfidf = self._init_char_ngram_tfidf(self.ngram, vocabulary) X_target = tfidf.fit_transform(self.target_corpus) ## svd svd = TruncatedSVD(n_components=self.svd_dim, n_iter=self.svd_n_iter, random_state=config.RANDOM_SEED) svd.fit(scipy.sparse.vstack((X_obs, X_target))) X_obs = svd.transform(X_obs) X_target = svd.transform(X_target) ## cosine similarity sim = list(map(dist_utils._cosine_sim, X_obs, X_target)) sim = np.asarray(sim).squeeze() return sim # ------------------- Char Distribution Based features ------------------ # 2nd in CrowdFlower (preprocessing_stanislav.py)
Example #5
Source File: stratifiedKfold.py From RecommenderSystems with MIT License | 6 votes |
def main(): train_x, train_y = _load_data() print('loading data done!') folds = list(StratifiedKFold(n_splits=10, shuffle=True, random_state=config.RANDOM_SEED).split(train_x, train_y)) fold_index = [] for i,(train_id, valid_id) in enumerate(folds): fold_index.append(valid_id) print("fold num: %d" % (len(fold_index))) fold_index = np.array(fold_index) np.save(config.DATA_PATH + "fold_index.npy", fold_index) save_x_y(fold_index, train_x, train_y) print("save train_x_y done!") fold_index = np.load(config.DATA_PATH + "fold_index.npy") save_i(fold_index) print("save index done!")
Example #6
Source File: stratifiedKfold.py From AutoInt with MIT License | 6 votes |
def main(): train_x, train_y = _load_data() print('loading data done!') folds = list(StratifiedKFold(n_splits=10, shuffle=True, random_state=config.RANDOM_SEED).split(train_x, train_y)) fold_index = [] for i,(train_id, valid_id) in enumerate(folds): fold_index.append(valid_id) print("fold num: %d" % (len(fold_index))) fold_index = np.array(fold_index) np.save(config.DATA_PATH + "fold_index.npy", fold_index) save_x_y(fold_index, train_x, train_y) print("save train_x_y done!") fold_index = np.load(config.DATA_PATH + "fold_index.npy") save_i(fold_index) print("save index done!")
Example #7
Source File: feature_vector_space.py From kaggle-HomeDepot with MIT License | 5 votes |
def transform(self): tfidf = self._init_word_ngram_tfidf(self.ngram) X = tfidf.fit_transform(self.obs_corpus) svd = TruncatedSVD(n_components = self.svd_dim, n_iter=self.svd_n_iter, random_state=config.RANDOM_SEED) return svd.fit_transform(X)
Example #8
Source File: nfetc.py From NFETC with MIT License | 5 votes |
def add_hidden_layer(self, x, idx): dim = self.feature_dim if idx == 0 else self.hidden_size with tf.variable_scope("hidden_%d" % idx): W = tf.get_variable("W", shape=[dim, self.hidden_size], initializer=tf.contrib.layers.xavier_initializer(seed=config.RANDOM_SEED)) b = tf.get_variable("b", shape=[self.hidden_size], initializer=tf.contrib.layers.xavier_initializer(seed=config.RANDOM_SEED)) h = tf.nn.xw_plus_b(x, W, b) h_norm = tf.layers.batch_normalization(h, training=self.phase) h_drop = tf.nn.dropout(tf.nn.relu(h_norm), self.dense_dropout, seed=config.RANDOM_SEED) return h_drop
Example #9
Source File: task.py From HRERE with MIT License | 5 votes |
def _get_model(self): np.random.seed(config.RANDOM_SEED) kwargs = { "sequence_length": config.MAX_DOCUMENT_LENGTH, "num_classes": config.NUM_RELATION, "vocab_size": self.embedding.vocab_size, "embedding_size": self.embedding.embedding_dim, "position_size": self.embedding.position_size, "pretrained_embedding": self.embedding.embedding, "wpe": np.random.random_sample((self.embedding.position_size, self.hparams.wpe_size)), "hparams": self.hparams, } if "base" in self.model_name: return BiLSTM(**kwargs) elif "complex_hrere" in self.model_name: kwargs["entity_embedding1"] = self.entity_embedding1 kwargs["entity_embedding2"] = self.entity_embedding2 kwargs["relation_embedding1"] = self.relation_embedding1 kwargs["relation_embedding2"] = self.relation_embedding2 return ComplexHRERE(**kwargs) elif "real_hrere" in self.model_name: kwargs["entity_embedding"] = self.entity_embedding kwargs["relation_embedding"] = self.relation_embedding return RealHRERE(**kwargs) else: raise AttributeError("Invalid model name!")
Example #10
Source File: bilstm.py From HRERE with MIT License | 5 votes |
def add_hidden_layer(self, x, idx): dim = self.output_dim if idx == 0 else self.hidden_size with tf.variable_scope("hidden_%d" % idx): W = tf.get_variable("W", shape=[dim, self.hidden_size], initializer=tf.contrib.layers.xavier_initializer(seed=config.RANDOM_SEED)) b = tf.get_variable("b", shape=[self.hidden_size], initializer=tf.contrib.layers.xavier_initializer(seed=config.RANDOM_SEED)) h = tf.nn.xw_plus_b(x, W, b) h_norm = tf.layers.batch_normalization(h, training=self.phase) h_drop = tf.nn.dropout(tf.nn.relu(h_norm), self.dense_dropout, seed=config.RANDOM_SEED) return h_drop
Example #11
Source File: real_hrere.py From HRERE with MIT License | 5 votes |
def add_hidden_layer(self, x, idx): dim = self.output_dim if idx == 0 else self.hidden_size with tf.variable_scope("hidden_%d" % idx): W = tf.get_variable("W", shape=[dim, self.hidden_size], initializer=tf.contrib.layers.xavier_initializer(seed=config.RANDOM_SEED)) b = tf.get_variable("b", shape=[self.hidden_size], initializer=tf.contrib.layers.xavier_initializer(seed=config.RANDOM_SEED)) self.var_list2.append(W) self.var_list2.append(b) h = tf.nn.xw_plus_b(x, W, b) h_norm = tf.layers.batch_normalization(h, training=self.phase) h_drop = tf.nn.dropout(tf.nn.relu(h_norm), self.dense_dropout, seed=config.RANDOM_SEED) return h_drop
Example #12
Source File: splitter.py From kaggle-HomeDepot with MIT License | 5 votes |
def __init__(self, dfTrain, dfTest, n_iter=5, random_state=config.RANDOM_SEED, verbose=False, plot=False, split_param=[0.5, 0.25, 0.5]): self.dfTrain = dfTrain self.dfTest = dfTest self.n_iter = n_iter self.random_state = random_state self.verbose = verbose self.plot = plot self.split_param = split_param
Example #13
Source File: feature_vector_space.py From kaggle-HomeDepot with MIT License | 5 votes |
def transform(self): ## tfidf tfidf = self._init_word_ngram_tfidf(ngram=self.ngram) X_obs = tfidf.fit_transform(self.obs_corpus) X_target = tfidf.fit_transform(self.target_corpus) X_tfidf = scipy.sparse.hstack([X_obs, X_target]).tocsr() ## svd svd = TruncatedSVD(n_components=self.svd_dim, n_iter=self.svd_n_iter, random_state=config.RANDOM_SEED) X_svd = svd.fit_transform(X_tfidf) return X_svd # -------------------------------- TSNE ------------------------------------------ # 2nd in CrowdFlower (preprocessing_mikhail.py)
Example #14
Source File: feature_vector_space.py From kaggle-HomeDepot with MIT License | 5 votes |
def transform(self): tfidf = self._init_char_ngram_tfidf(self.ngram) X = tfidf.fit_transform(self.obs_corpus) svd = TruncatedSVD(n_components=self.svd_dim, n_iter=self.svd_n_iter, random_state=config.RANDOM_SEED) return svd.fit_transform(X) # ------------------------ Cooccurrence LSA ------------------------------- # 1st in CrowdFlower
Example #15
Source File: bilstm.py From HRERE with MIT License | 4 votes |
def add_prediction_op(self): self.add_embedding() with tf.name_scope("sentence_repr"): attention_w = tf.get_variable("attention_w", [self.state_size, 1]) cell_forward = tf.contrib.rnn.LSTMCell(self.state_size) cell_backward = tf.contrib.rnn.LSTMCell(self.state_size) cell_forward = tf.contrib.rnn.DropoutWrapper(cell_forward, input_keep_prob=self.dense_dropout, output_keep_prob=self.rnn_dropout, seed=config.RANDOM_SEED) cell_backward = tf.contrib.rnn.DropoutWrapper(cell_backward, input_keep_prob=self.dense_dropout, output_keep_prob=self.rnn_dropout, seed=config.RANDOM_SEED) outputs, states = tf.nn.bidirectional_dynamic_rnn( cell_forward, cell_backward, self.input_sentences, sequence_length=self.input_textlen_flatten, dtype=tf.float32) outputs_added = tf.nn.tanh(tf.add(outputs[0], outputs[1])) alpha = tf.nn.softmax(tf.reshape(tf.matmul(tf.reshape(outputs_added, [-1, self.state_size]), attention_w), [-1, self.sequence_length])) alpha = tf.expand_dims(alpha, 1) self.sen_repr = tf.squeeze(tf.matmul(alpha, outputs_added)) self.output_features = self.sen_repr self.output_dim = self.state_size with tf.name_scope("sentence_att"): attention_A = tf.get_variable("attention_A", shape=[self.output_dim]) query_r = tf.get_variable("query_r", shape=[self.output_dim, 1]) sen_repre = tf.tanh(self.output_features) sen_alpha = tf.expand_dims(tf.nn.softmax(tf.reshape(tf.matmul(tf.multiply(sen_repre, attention_A), query_r), [-1, config.BAG_SIZE])), 1) sen_s = tf.reshape(tf.matmul(sen_alpha, tf.reshape(sen_repre, [-1, config.BAG_SIZE, self.output_dim])), [-1, self.output_dim]) h_drop = tf.nn.dropout(tf.nn.relu(sen_s), self.dense_dropout, seed=config.RANDOM_SEED) h_drop.set_shape([None, self.output_dim]) h_output = tf.layers.batch_normalization(h_drop, training=self.phase) for i in range(self.hidden_layers): h_output = self.add_hidden_layer(h_output, i) with tf.variable_scope("output"): W = tf.get_variable("W", shape=[self.hidden_size, self.num_classes], initializer=tf.contrib.layers.xavier_initializer(seed=config.RANDOM_SEED)) b = tf.get_variable("b", shape=[self.num_classes], initializer=tf.contrib.layers.xavier_initializer(seed=config.RANDOM_SEED)) self.scores = tf.nn.xw_plus_b(h_output, W, b, name="scores") self.probs = tf.nn.softmax(self.scores, name="probs") self.predictions = tf.argmax(self.probs, 1, name="predictions")
Example #16
Source File: task.py From HRERE with MIT License | 4 votes |
def __init__(self, model_name, runs, params_dict, logger): print("Loading data...") words, positions, heads, tails, labels = pkl_utils._load(config.GROUPED_TRAIN_DATA) words_test, positions_test, heads_test, tails_test, labels_test = pkl_utils._load(config.GROUPED_TEST_DATA) # noqa self.embedding = embedding_utils.Embedding( config.EMBEDDING_DATA, list([s for bags in words for s in bags]) + list([s for bags in words_test for s in bags]), config.MAX_DOCUMENT_LENGTH) print("Preprocessing data...") textlen = np.array([[self.embedding.len_transform(x) for x in y] for y in words]) words = np.array([[self.embedding.text_transform(x) for x in y] for y in words]) positions = np.array([[self.embedding.position_transform(x) for x in y] for y in positions]) textlen_test = np.array([[self.embedding.len_transform(x) for x in y] for y in words_test]) words_test = np.array([[self.embedding.text_transform(x) for x in y] for y in words_test]) positions_test = np.array([[self.embedding.position_transform(x) for x in y] for y in positions_test]) # noqa ss = ShuffleSplit(n_splits=1, test_size=0.1, random_state=config.RANDOM_SEED) for train_index, valid_index in ss.split(np.zeros(len(labels)), labels): words_train, words_valid = words[train_index], words[valid_index] textlen_train, textlen_valid = textlen[train_index], textlen[valid_index] positions_train, positions_valid = positions[train_index], positions[valid_index] heads_train, heads_valid = heads[train_index], heads[valid_index] tails_train, tails_valid = tails[train_index], tails[valid_index] labels_train, labels_valid = labels[train_index], labels[valid_index] if "hrere" in model_name: self.full_set = list(zip(words, textlen, positions, heads, tails, labels)) self.train_set = list(zip(words_train, textlen_train, positions_train, heads_train, tails_train, labels_train)) # noqa self.valid_set = list(zip(words_valid, textlen_valid, positions_valid, heads_valid, tails_valid, labels_valid)) # noqa self.test_set = list(zip(words_test, textlen_test, positions_test, heads_test, tails_test, labels_test)) # noqa if "complex" in model_name: self.entity_embedding1 = np.load(config.ENTITY_EMBEDDING1) self.entity_embedding2 = np.load(config.ENTITY_EMBEDDING2) self.relation_embedding1 = np.load(config.RELATION_EMBEDDING1) self.relation_embedding2 = np.load(config.RELATION_EMBEDDING2) else: self.entity_embedding = np.load(config.ENTITY_EMBEDDING) self.relation_embedding = np.load(config.RELATION_EMBEDDING) else: self.full_set = list(zip(words, textlen, positions, labels)) self.train_set = list(zip(words_train, textlen_train, positions_train, labels_train)) # noqa self.valid_set = list(zip(words_valid, textlen_valid, positions_valid, labels_valid)) # noqa self.test_set = list(zip(words_test, textlen_test, positions_test, labels_test)) # noqa self.model_name = model_name self.runs = runs self.params_dict = params_dict self.hparams = AttrDict(params_dict) self.logger = logger self.model = self._get_model() self.saver = tf.train.Saver(tf.global_variables()) checkpoint_dir = os.path.abspath(config.CHECKPOINT_DIR) if not os.path.exists(checkpoint_dir): os.makedirs(checkpoint_dir) self.checkpoint_prefix = os.path.join(checkpoint_dir, self.__str__())
Example #17
Source File: splitter.py From kaggle-HomeDepot with MIT License | 4 votes |
def main(): dfTrain = pd.read_csv(config.TRAIN_DATA, encoding="ISO-8859-1") dfTest = pd.read_csv(config.TEST_DATA, encoding="ISO-8859-1") # splits for level1 splitter = HomedepotSplitter(dfTrain=dfTrain, dfTest=dfTest, n_iter=config.N_RUNS, random_state=config.RANDOM_SEED, verbose=True, plot=True, # tune these params to get a close distribution split_param=[0.5, 0.25, 0.5], ) splitter.split() splitter.save("%s/splits_level1.pkl"%config.SPLIT_DIR) splits_level1 = splitter.splits ## splits for level2 splits_level1 = pkl_utils._load("%s/splits_level1.pkl"%config.SPLIT_DIR) splits_level2 = [0]*config.N_RUNS for run, (trainInd, validInd) in enumerate(splits_level1): dfValid = dfTrain.iloc[validInd].copy() splitter2 = HomedepotSplitter(dfTrain=dfValid, dfTest=dfTest, n_iter=1, random_state=run, verbose=True, # tune these params to get a close distribution split_param=[0.5, 0.15, 0.6]) splitter2.split() splits_level2[run] = splitter2.splits[0] pkl_utils._save("%s/splits_level2.pkl"%config.SPLIT_DIR, splits_level2) ## splits for level3 splits_level2 = pkl_utils._load("%s/splits_level2.pkl"%config.SPLIT_DIR) splits_level3 = [0]*config.N_RUNS for run, (trainInd, validInd) in enumerate(splits_level2): dfValid = dfTrain.iloc[validInd].copy() splitter3 = HomedepotSplitter(dfTrain=dfValid, dfTest=dfTest, n_iter=1, random_state=run, verbose=True, # tune these params to get a close distribution split_param=[0.5, 0.15, 0.7]) splitter3.split() splits_level3[run] = splitter3.splits[0] pkl_utils._save("%s/splits_level3.pkl"%config.SPLIT_DIR, splits_level3)
Example #18
Source File: nfetc.py From NFETC with MIT License | 4 votes |
def add_prediction_op(self): self.add_embedding() with tf.name_scope("sentence_repr"): attention_w = tf.get_variable("attention_w", [self.state_size, 1]) cell_forward = tf.contrib.rnn.LSTMCell(self.state_size) cell_backward = tf.contrib.rnn.LSTMCell(self.state_size) cell_forward = tf.contrib.rnn.DropoutWrapper(cell_forward, input_keep_prob=self.dense_dropout, output_keep_prob=self.rnn_dropout, seed=config.RANDOM_SEED) cell_backward = tf.contrib.rnn.DropoutWrapper(cell_backward, input_keep_prob=self.dense_dropout, output_keep_prob=self.rnn_dropout, seed=config.RANDOM_SEED) outputs, states = tf.nn.bidirectional_dynamic_rnn( cell_forward, cell_backward, self.input_sentences, sequence_length=self.input_textlen, dtype=tf.float32) outputs_added = tf.nn.tanh(tf.add(outputs[0], outputs[1])) alpha = tf.nn.softmax(tf.reshape(tf.matmul(tf.reshape(outputs_added, [-1, self.state_size]), attention_w), [-1, self.sequence_length])) alpha = tf.expand_dims(alpha, 1) self.sen_repr = tf.squeeze(tf.matmul(alpha, outputs_added)) with tf.name_scope("mention_repr"): cell = tf.contrib.rnn.LSTMCell(self.state_size) cell = tf.contrib.rnn.DropoutWrapper(cell, input_keep_prob=self.dense_dropout, output_keep_prob=self.rnn_dropout, seed=config.RANDOM_SEED) outputs, states = tf.nn.dynamic_rnn( cell, self.embedded_mentions, sequence_length=self.input_mentionlen, dtype=tf.float32) self.men_repr = self.extract_last_relevant(outputs, self.input_mentionlen) self.features = tf.concat([self.sen_repr, self.men_repr, self.mention_embedding], -1) self.feature_dim = self.state_size * 2 + self.embedding_size h_drop = tf.nn.dropout(tf.nn.relu(self.features), self.dense_dropout, seed=config.RANDOM_SEED) h_drop.set_shape([None, self.feature_dim]) h_output = tf.layers.batch_normalization(h_drop, training=self.phase) for i in range(self.hidden_layers): h_output = self.add_hidden_layer(h_output, i) if self.hidden_layers == 0: self.hidden_size = self.feature_dim with tf.variable_scope("output"): W = tf.get_variable("W", shape=[self.hidden_size, self.num_classes], initializer=tf.contrib.layers.xavier_initializer(seed=config.RANDOM_SEED)) b = tf.get_variable("b", shape=[self.num_classes], initializer=tf.contrib.layers.xavier_initializer(seed=config.RANDOM_SEED)) self.scores = tf.nn.xw_plus_b(h_output, W, b, name="scores") self.proba = tf.nn.softmax(self.scores, name="proba") self.adjusted_proba = tf.matmul(self.proba, self.tune) self.adjusted_proba = tf.clip_by_value(self.adjusted_proba, 1e-10, 1.0) self.predictions = tf.argmax(self.adjusted_proba, 1, name="predictions")