Python sklearn.utils.class_weight.compute_class_weight() Examples
The following are 21
code examples of sklearn.utils.class_weight.compute_class_weight().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
sklearn.utils.class_weight
, or try the search function
.
Example #1
Source File: cnn_class.py From eyenet with MIT License | 7 votes |
def split_data(self, y_file_path, X, test_data_size=0.2): """ Split data into test and training data sets. INPUT y_file_path: path to CSV containing labels X: NumPy array of arrays test_data_size: size of test/train split. Value from 0 to 1 OUTPUT Four arrays: X_train, X_test, y_train, and y_test """ # labels = pd.read_csv(y_file_path, nrows=60) labels = pd.read_csv(y_file_path) self.X = np.load(X) self.y = np.array(labels['level']) self.weights = class_weight.compute_class_weight('balanced', np.unique(self.y), self.y) self.test_data_size = test_data_size self.X_train, self.X_test, self.y_train, self.y_test = train_test_split(self.X, self.y, test_size=self.test_data_size, random_state=42)
Example #2
Source File: test_class_weight.py From twitter-stock-recommendation with MIT License | 6 votes |
def test_compute_class_weight_balanced_negative(): # Test compute_class_weight when labels are negative # Test with balanced class labels. classes = np.array([-2, -1, 0]) y = np.asarray([-1, -1, 0, 0, -2, -2]) cw = compute_class_weight("balanced", classes, y) assert_equal(len(cw), len(classes)) assert_array_almost_equal(cw, np.array([1., 1., 1.])) # Test with unbalanced class labels. y = np.asarray([-1, 0, 0, -2, -2, -2]) cw = compute_class_weight("balanced", classes, y) assert_equal(len(cw), len(classes)) class_counts = np.bincount(y + 2) assert_almost_equal(np.dot(cw, class_counts), y.shape[0]) assert_array_almost_equal(cw, [2. / 3, 2., 1.])
Example #3
Source File: test_class_weight.py From twitter-stock-recommendation with MIT License | 6 votes |
def test_compute_class_weight_dict(): classes = np.arange(3) class_weights = {0: 1.0, 1: 2.0, 2: 3.0} y = np.asarray([0, 0, 1, 2]) cw = compute_class_weight(class_weights, classes, y) # When the user specifies class weights, compute_class_weights should just # return them. assert_array_almost_equal(np.asarray([1.0, 2.0, 3.0]), cw) # When a class weight is specified that isn't in classes, a ValueError # should get raised msg = 'Class label 4 not present.' class_weights = {0: 1.0, 1: 2.0, 2: 3.0, 4: 1.5} assert_raise_message(ValueError, msg, compute_class_weight, class_weights, classes, y) msg = 'Class label -1 not present.' class_weights = {-1: 5.0, 0: 1.0, 1: 2.0, 2: 3.0} assert_raise_message(ValueError, msg, compute_class_weight, class_weights, classes, y)
Example #4
Source File: test_class_weight.py From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def test_compute_class_weight_dict(): classes = np.arange(3) class_weights = {0: 1.0, 1: 2.0, 2: 3.0} y = np.asarray([0, 0, 1, 2]) cw = compute_class_weight(class_weights, classes, y) # When the user specifies class weights, compute_class_weights should just # return them. assert_array_almost_equal(np.asarray([1.0, 2.0, 3.0]), cw) # When a class weight is specified that isn't in classes, a ValueError # should get raised msg = 'Class label 4 not present.' class_weights = {0: 1.0, 1: 2.0, 2: 3.0, 4: 1.5} assert_raise_message(ValueError, msg, compute_class_weight, class_weights, classes, y) msg = 'Class label -1 not present.' class_weights = {-1: 5.0, 0: 1.0, 1: 2.0, 2: 3.0} assert_raise_message(ValueError, msg, compute_class_weight, class_weights, classes, y)
Example #5
Source File: conv_net.py From arxiv-twitterbot with MIT License | 6 votes |
def fit(self, batch_size, epochs, save_best_model_to_filepath=None): checkpoint = ModelCheckpoint(save_best_model_to_filepath, monitor='val_acc', verbose=1, save_best_only=True, mode='max') weights = class_weight.compute_class_weight('balanced', np.unique(self.train_labels), self.train_labels) weights[1] = weights[1] * 5 # Fit the model self.model.fit(self.x_train, self.y_train, batch_size=batch_size, epochs=epochs, class_weight=None if not self.balance_classes else weights, callbacks=[checkpoint] if save_best_model_to_filepath is not None else [], validation_data=[self.x_test, self.y_test]) if save_best_model_to_filepath: self.model = load_model(save_best_model_to_filepath) return self.model
Example #6
Source File: cnn_class.py From AI_in_Medicine_Clinical_Imaging_Classification with MIT License | 6 votes |
def split_data(self, y_file_path, X, test_data_size=0.2): """ Split data into test and training data sets. INPUT y_file_path: path to CSV containing labels X: NumPy array of arrays test_data_size: size of test/train split. Value from 0 to 1 OUTPUT Four arrays: X_train, X_test, y_train, and y_test """ # labels = pd.read_csv(y_file_path, nrows=60) labels = pd.read_csv(y_file_path) self.X = np.load(X) self.y = np.array(labels['level']) self.weights = class_weight.compute_class_weight('balanced', np.unique(self.y), self.y) self.test_data_size = test_data_size self.X_train, self.X_test, self.y_train, self.y_test = train_test_split(self.X, self.y, test_size=self.test_data_size, random_state=42)
Example #7
Source File: dataFunctions.py From baseline with Apache License 2.0 | 6 votes |
def calculate_class_weights(params): """ Computes the class weights for the training data and writes out to a json file :param params: global parameters, used to find location of the dataset and json file :return: """ counts = {} for i in range(0,params.num_labels): counts[i] = 0 trainingData = json.load(open(params.files['training_struct'])) ytrain = [] for i,currData in enumerate(trainingData): ytrain.append(currData['category']) counts[currData['category']] += 1 print(i) classWeights = class_weight.compute_class_weight('balanced', np.unique(ytrain), np.array(ytrain)) with open(params.files['class_weight'], 'w') as json_file: json.dump(classWeights.tolist(), json_file)
Example #8
Source File: zil.py From incremental_learning.pytorch with MIT License | 5 votes |
def get_class_weights_raw(self, targets): unique_targets = np.unique(targets) class_weights = compute_class_weight('balanced', unique_targets, targets) return torch.tensor(class_weights).to(self._device).float() # ----------- # Constraints # -----------
Example #9
Source File: test_class_weight.py From twitter-stock-recommendation with MIT License | 5 votes |
def test_compute_class_weight_balanced_unordered(): # Test compute_class_weight when classes are unordered classes = np.array([1, 0, 3]) y = np.asarray([1, 0, 0, 3, 3, 3]) cw = compute_class_weight("balanced", classes, y) class_counts = np.bincount(y)[classes] assert_almost_equal(np.dot(cw, class_counts), y.shape[0]) assert_array_almost_equal(cw, [2., 1., 2. / 3])
Example #10
Source File: test_class_weight.py From twitter-stock-recommendation with MIT License | 5 votes |
def test_compute_class_weight_not_present(): # Raise error when y does not contain all class labels classes = np.arange(4) y = np.asarray([0, 0, 0, 1, 1, 2]) assert_raises(ValueError, compute_class_weight, "balanced", classes, y) # Fix exception in error message formatting when missing label is a string # https://github.com/scikit-learn/scikit-learn/issues/8312 assert_raise_message(ValueError, 'Class label label_not_present not present', compute_class_weight, {'label_not_present': 1.}, classes, y) # Raise error when y has items not in classes classes = np.arange(2) assert_raises(ValueError, compute_class_weight, "balanced", classes, y) assert_raises(ValueError, compute_class_weight, {0: 1., 1: 2.}, classes, y)
Example #11
Source File: test_class_weight.py From twitter-stock-recommendation with MIT License | 5 votes |
def test_compute_class_weight(): # Test (and demo) compute_class_weight. y = np.asarray([2, 2, 2, 3, 3, 4]) classes = np.unique(y) cw = compute_class_weight("balanced", classes, y) # total effect of samples is preserved class_counts = np.bincount(y)[2:] assert_almost_equal(np.dot(cw, class_counts), y.shape[0]) assert_true(cw[0] < cw[1] < cw[2])
Example #12
Source File: classifier_selection.py From causallib with Apache License 2.0 | 5 votes |
def _select_classifier_from_list(candidates, X, A, n_splits=5, seed=None, loss_type='01'): accuracies = np.zeros(len(candidates)) class_weight = compute_class_weight('balanced', np.unique(A), A)[LabelEncoder().fit_transform(A)] if n_splits >= 2: cv = KFold(n_splits=n_splits, shuffle=True, random_state=seed) for model_idx, m in enumerate(candidates): if loss_type == '01': pred = cross_val_predict(m, X=X, y=A, cv=cv, fit_params={'sample_weight': class_weight}).reshape(-1) else: ps = cross_val_predict(m, X=X, y=A, cv=cv, fit_params={'sample_weight': class_weight}, method='predict_proba') pred = ps[:, 1] else: for model_idx, m in enumerate(candidates): m.fit(X, A, sample_weight=class_weight) if loss_type == '01': pred = m.predict(X=X) else: pred = m.predict_proba(X=X)[:, 1] if loss_type == '01': accuracies[model_idx] = np.sum(class_weight[pred == A]) / np.sum(class_weight) else: logl = np.zeros(A.shape) logl[A == -1] = np.log(1.0 - pred[A == -1]) logl[A == 1] = np.log(pred[A == 1]) accuracies[model_idx] = np.sum(class_weight * logl) / np.sum(class_weight) i_best = np.argmax(accuracies) # print('accuracies =', accuracies, "accuracies-sorted", sorted(accuracies)) # print('Selected model {} {}'.format(i_best, candidates[i_best])) return candidates[i_best]
Example #13
Source File: generate_class_weights.py From Pytorch-Project-Template with MIT License | 5 votes |
def calculate_weigths_labels(): class Config: mode = "train" num_classes = 21 batch_size = 32 max_epoch = 150 validate_every = 2 checkpoint_file = "checkpoint.pth.tar" data_loader = "VOCDataLoader" data_root = "../data/pascal_voc_seg/" data_loader_workers = 4 pin_memory = True async_loading = True # Create an instance from the data loader from tqdm import tqdm data_loader = VOCDataLoader(Config) z = np.zeros((Config.num_classes,)) # Initialize tqdm tqdm_batch = tqdm(data_loader.train_loader, total=data_loader.train_iterations) for _, y in tqdm_batch: labels = y.numpy().astype(np.uint8).ravel().tolist() z += np.bincount(labels, minlength=Config.num_classes) tqdm_batch.close() # ret = compute_class_weight(class_weight='balanced', classes=np.arange(21), y=np.asarray(labels, dtype=np.uint8)) total_frequency = np.sum(z) print(z) print(total_frequency) class_weights = [] for frequency in z: class_weight = 1 / (np.log(1.02 + (frequency / total_frequency))) class_weights.append(class_weight) ret = np.array(class_weights) np.save('../pretrained_weights/voc2012_256_class_weights', ret) print(ret)
Example #14
Source File: zil.py From incremental_learning.pytorch with MIT License | 5 votes |
def get_class_weights(self, loader): targets = [] for input_dict in loader: targets.append(input_dict["targets"]) targets = torch.cat(targets).cpu().numpy() unique_targets = np.unique(targets) class_weights = compute_class_weight('balanced', unique_targets, targets) return torch.tensor(class_weights).to(self._device).float()
Example #15
Source File: losses.py From pytorch_segmentation with MIT License | 5 votes |
def get_weights(target): t_np = target.view(-1).data.cpu().numpy() classes, counts = np.unique(t_np, return_counts=True) cls_w = np.median(counts) / counts #cls_w = class_weight.compute_class_weight('balanced', classes, t_np) weights = np.ones(7) weights[classes] = cls_w return torch.from_numpy(weights).float().cuda()
Example #16
Source File: Keras_utils.py From coling2018_fake-news-challenge with Apache License 2.0 | 5 votes |
def calculate_class_weight(y_train, no_classes=2): # https://datascience.stackexchange.com/questions/13490/how-to-set-class-weights-for-imbalanced-classes-in-keras from sklearn.utils import class_weight class_weight_list = class_weight.compute_class_weight('balanced', np.unique(y_train), y_train) class_weights = {} for i in range(no_classes): class_weights[i] = class_weight_list[i] print(class_weights) return class_weights
Example #17
Source File: test_class_weight.py From Mastering-Elasticsearch-7.0 with MIT License | 5 votes |
def test_compute_class_weight_balanced_unordered(): # Test compute_class_weight when classes are unordered classes = np.array([1, 0, 3]) y = np.asarray([1, 0, 0, 3, 3, 3]) cw = compute_class_weight("balanced", classes, y) class_counts = np.bincount(y)[classes] assert_almost_equal(np.dot(cw, class_counts), y.shape[0]) assert_array_almost_equal(cw, [2., 1., 2. / 3])
Example #18
Source File: test_class_weight.py From Mastering-Elasticsearch-7.0 with MIT License | 5 votes |
def test_compute_class_weight_balanced_negative(): # Test compute_class_weight when labels are negative # Test with balanced class labels. classes = np.array([-2, -1, 0]) y = np.asarray([-1, -1, 0, 0, -2, -2]) cw = compute_class_weight("balanced", classes, y) assert_equal(len(cw), len(classes)) assert_array_almost_equal(cw, np.array([1., 1., 1.])) # Test with unbalanced class labels. y = np.asarray([-1, 0, 0, -2, -2, -2]) cw = compute_class_weight("balanced", classes, y) assert_equal(len(cw), len(classes)) class_counts = np.bincount(y + 2) assert_almost_equal(np.dot(cw, class_counts), y.shape[0]) assert_array_almost_equal(cw, [2. / 3, 2., 1.])
Example #19
Source File: test_class_weight.py From Mastering-Elasticsearch-7.0 with MIT License | 5 votes |
def test_compute_class_weight_not_present(): # Raise error when y does not contain all class labels classes = np.arange(4) y = np.asarray([0, 0, 0, 1, 1, 2]) assert_raises(ValueError, compute_class_weight, "balanced", classes, y) # Fix exception in error message formatting when missing label is a string # https://github.com/scikit-learn/scikit-learn/issues/8312 assert_raise_message(ValueError, 'Class label label_not_present not present', compute_class_weight, {'label_not_present': 1.}, classes, y) # Raise error when y has items not in classes classes = np.arange(2) assert_raises(ValueError, compute_class_weight, "balanced", classes, y) assert_raises(ValueError, compute_class_weight, {0: 1., 1: 2.}, classes, y)
Example #20
Source File: test_class_weight.py From Mastering-Elasticsearch-7.0 with MIT License | 5 votes |
def test_compute_class_weight(): # Test (and demo) compute_class_weight. y = np.asarray([2, 2, 2, 3, 3, 4]) classes = np.unique(y) cw = compute_class_weight("balanced", classes, y) # total effect of samples is preserved class_counts = np.bincount(y)[2:] assert_almost_equal(np.dot(cw, class_counts), y.shape[0]) assert cw[0] < cw[1] < cw[2]
Example #21
Source File: data_silo.py From FARM with Apache License 2.0 | 5 votes |
def calculate_class_weights(self, task_name, source="train"): """ For imbalanced datasets, we can calculate class weights that can be used later in the loss function of the prediction head to upweight the loss of minorities. :param task_name: name of the task as used in the processor :type task_name: str """ tensor_name = self.processor.tasks[task_name]["label_tensor_name"] label_list = self.processor.tasks[task_name]["label_list"] tensor_idx = list(self.tensor_names).index(tensor_name) # we need at least ONE observation for each label to avoid division by zero in compute_class_weights. observed_labels = copy.deepcopy(label_list) if source == "all": datasets = self.data.values() elif source == "train": datasets = [self.data["train"]] else: raise Exception("source argument expects one of [\"train\", \"all\"]") for dataset in datasets: if "multilabel" in self.processor.tasks[task_name]["task_type"]: for x in dataset: observed_labels += [label_list[label_id] for label_id in (x[tensor_idx] == 1).nonzero()] else: observed_labels += [label_list[x[tensor_idx].item()] for x in dataset] #TODO scale e.g. via logarithm to avoid crazy spikes for rare classes class_weights = compute_class_weight("balanced", np.asarray(label_list), observed_labels) # conversion necessary to have class weights of same type as model weights class_weights = class_weights.astype(np.float32) return class_weights