Python Examples of keras.utils.to

Source File: get_data.py From Audio-Vision with MIT License

6 votes

def get_train_data(input_img_h5, input_ques_h5, data_limit=215359):
    img_data = h5py.File(input_img_h5)
    ques_data = h5py.File(input_ques_h5)
  
    img_data = np.array(img_data['images_train'])
    img_pos_train = ques_data['img_pos_train'][:data_limit]
    train_img_data = np.array([img_data[_-1,:] for _ in img_pos_train])
    # Normalizing images
    tem = np.sqrt(np.sum(np.multiply(train_img_data, train_img_data), axis=1))
    train_img_data = np.divide(train_img_data, np.transpose(np.tile(tem,(4096,1))))

    #shifting padding to left side
    ques_train = np.array(ques_data['ques_train'])[:data_limit, :]
    ques_length_train = np.array(ques_data['ques_length_train'])[:data_limit]
    ques_train = right_align(ques_train, ques_length_train)

    train_X = [train_img_data, ques_train]
    # NOTE should've consturcted one-hots using exhausitve list of answers, cause some answers may not be in dataset
    # To temporarily rectify this, all those answer indices is set to 1 in validation set
    train_y = to_categorical(ques_data['answers'])[:data_limit, :]

    return train_X, train_y

Source File: np_utils_test.py From DeepLearning_Wavelet-LSTM with MIT License

6 votes

def test_to_categorical():
    num_classes = 5
    shapes = [(1,), (3,), (4, 3), (5, 4, 3), (3, 1), (3, 2, 1)]
    expected_shapes = [(1, num_classes),
                       (3, num_classes),
                       (4, 3, num_classes),
                       (5, 4, 3, num_classes),
                       (3, num_classes),
                       (3, 2, num_classes)]
    labels = [np.random.randint(0, num_classes, shape) for shape in shapes]
    one_hots = [to_categorical(label, num_classes) for label in labels]
    for label, one_hot, expected_shape in zip(labels,
                                              one_hots,
                                              expected_shapes):
        # Check shape
        assert one_hot.shape == expected_shape
        # Make sure there are only 0s and 1s
        assert np.array_equal(one_hot, one_hot.astype(bool))
        # Make sure there is exactly one 1 in a row
        assert np.all(one_hot.sum(axis=-1) == 1)
        # Get original labels back from one hots
        assert np.all(np.argmax(one_hot, -1).reshape(label.shape) == label)

Source File: keras_bert_ner_bi_lstm.py From nlp_xiaojiang with MIT License

6 votes

def label_tagging(data_x_s, tag_label2index, len_max=32):
    """
        根据类别字典dict、语料y和最大文本长度l，padding和to_categorical
    :param data_x_s: list
    :param tag_label2index:dict 
    :param len_max: int
    :return: list
    """
    tag_labels = []
    for data_x in data_x_s:
        if len(data_x) <= len_max-2:
            tag_labels.append([tag_label2index['O']] + [tag_label2index[i] for i in data_x] + [tag_label2index['O'] for i in range(len_max - len(data_x) - 1)])
        else:
            tag_labels.append([tag_label2index['O']] + [tag_label2index[i] for i in data_x[:len_max-1]] + [tag_label2index['O']])

    tag_labels_pad = pad_sequences(sequences=tag_labels, maxlen=len_max, dtype='int32',
                                padding='post', truncating='post', value=tag_label2index['O'])
    one_hot_y = to_categorical(tag_labels_pad, num_classes=len(tag_label2index))

    label_num = len(set(["".join(str(i)) for i in tag_labels]))
    # tag_labels_pad_to = to_categorical(y=tag_labels_pad.tolist(), num_classes=label_num)
    return one_hot_y, label_num

Source File: data_generator.py From imageatm with Apache License 2.0

6 votes

def _data_generator(self, batch_samples: List[dict]) -> Tuple[np.array, np.array]:
        """Generates data from samples in specified batch."""
        #  initialize images and labels tensors for faster processing
        dims = self.img_crop_dims if self.train == True else self.img_load_dims
        X = np.empty((len(batch_samples), *dims, 3))
        y = np.empty((len(batch_samples), self.n_classes))

        for i, sample in enumerate(batch_samples):
            # load and randomly augment image
            img_file = self.image_dir / sample['image_id']
            img = np.asarray(load_image(img_file, self.img_load_dims))
            if self.train == True:
                img = random_crop(img, self.img_crop_dims)
            X[i,] = img

            # TODO: more efficient by preprocessing
            y[i,] = to_categorical([sample['label']], num_classes=self.n_classes)

        # apply basenet specific preprocessing
        # input is 4D numpy array of RGB values within [0, 255]
        X = self.basenet_preprocess(X)

        return X, y

Source File: load_data.py From Image-Caption-Generator with MIT License

6 votes

def create_sequences(tokenizer, max_length, captions_list, image):
	# X1 : input for image features
	# X2 : input for text features
	# y  : output word
	X1, X2, y = list(), list(), list()
	vocab_size = len(tokenizer.word_index) + 1
	# Walk through each caption for the image
	for caption in captions_list:
		# Encode the sequence
		seq = tokenizer.texts_to_sequences([caption])[0]
		# Split one sequence into multiple X,y pairs
		for i in range(1, len(seq)):
			# Split into input and output pair
			in_seq, out_seq = seq[:i], seq[i]
			# Pad input sequence
			in_seq = pad_sequences([in_seq], maxlen=max_length)[0]
			# Encode output sequence
			out_seq = to_categorical([out_seq], num_classes=vocab_size)[0]
			# Store
			X1.append(image)
			X2.append(in_seq)
			y.append(out_seq)
	return X1, X2, y

# Data generator, intended to be used in a call to model.fit_generator()

Source File: data_loader.py From bi-lstm-crf with Apache License 2.0

6 votes

def generator_from_data(self, X, Y):
        steps = 0
        total_size = X.shape[0]
        while True:
            if steps >= self.shuffle_batch:
                indicates = list(range(total_size))
                np.random.shuffle(indicates)
                X = X[indicates]
                Y = Y[indicates]
                steps = 0
            sample_index = np.random.randint(0, total_size - self.batch_size)
            ret_x = X[sample_index:sample_index + self.batch_size]
            ret_y = Y[sample_index:sample_index + self.batch_size]

            if not self.sparse_target:
                ret_y = to_categorical(ret_y, num_classes=self.tgt_vocab_size + 1)
            else:
                ret_y = np.expand_dims(ret_y, 2)
            yield ret_x, ret_y
            steps += 1

Source File: np_utils_test.py From DeepLearning_Wavelet-LSTM with MIT License

6 votes

def test_to_categorical():
    num_classes = 5
    shapes = [(1,), (3,), (4, 3), (5, 4, 3), (3, 1), (3, 2, 1)]
    expected_shapes = [(1, num_classes),
                       (3, num_classes),
                       (4, 3, num_classes),
                       (5, 4, 3, num_classes),
                       (3, num_classes),
                       (3, 2, num_classes)]
    labels = [np.random.randint(0, num_classes, shape) for shape in shapes]
    one_hots = [to_categorical(label, num_classes) for label in labels]
    for label, one_hot, expected_shape in zip(labels,
                                              one_hots,
                                              expected_shapes):
        # Check shape
        assert one_hot.shape == expected_shape
        # Make sure there are only 0s and 1s
        assert np.array_equal(one_hot, one_hot.astype(bool))
        # Make sure there is exactly one 1 in a row
        assert np.all(one_hot.sum(axis=-1) == 1)
        # Get original labels back from one hots
        assert np.all(np.argmax(one_hot, -1).reshape(label.shape) == label)

Source File: np_utils_test.py From DeepLearning_Wavelet-LSTM with MIT License

6 votes

def test_to_categorical():
    num_classes = 5
    shapes = [(1,), (3,), (4, 3), (5, 4, 3), (3, 1), (3, 2, 1)]
    expected_shapes = [(1, num_classes),
                       (3, num_classes),
                       (4, 3, num_classes),
                       (5, 4, 3, num_classes),
                       (3, num_classes),
                       (3, 2, num_classes)]
    labels = [np.random.randint(0, num_classes, shape) for shape in shapes]
    one_hots = [to_categorical(label, num_classes) for label in labels]
    for label, one_hot, expected_shape in zip(labels,
                                              one_hots,
                                              expected_shapes):
        # Check shape
        assert one_hot.shape == expected_shape
        # Make sure there are only 0s and 1s
        assert np.array_equal(one_hot, one_hot.astype(bool))
        # Make sure there is exactly one 1 in a row
        assert np.all(one_hot.sum(axis=-1) == 1)
        # Get original labels back from one hots
        assert np.all(np.argmax(one_hot, -1).reshape(label.shape) == label)

Source File: np_utils_test.py From DeepLearning_Wavelet-LSTM with MIT License

6 votes

def test_to_categorical():
    num_classes = 5
    shapes = [(1,), (3,), (4, 3), (5, 4, 3), (3, 1), (3, 2, 1)]
    expected_shapes = [(1, num_classes),
                       (3, num_classes),
                       (4, 3, num_classes),
                       (5, 4, 3, num_classes),
                       (3, num_classes),
                       (3, 2, num_classes)]
    labels = [np.random.randint(0, num_classes, shape) for shape in shapes]
    one_hots = [to_categorical(label, num_classes) for label in labels]
    for label, one_hot, expected_shape in zip(labels,
                                              one_hots,
                                              expected_shapes):
        # Check shape
        assert one_hot.shape == expected_shape
        # Make sure there are only 0s and 1s
        assert np.array_equal(one_hot, one_hot.astype(bool))
        # Make sure there is exactly one 1 in a row
        assert np.all(one_hot.sum(axis=-1) == 1)
        # Get original labels back from one hots
        assert np.all(np.argmax(one_hot, -1).reshape(label.shape) == label)

Source File: np_utils_test.py From DeepLearning_Wavelet-LSTM with MIT License

6 votes

def test_to_categorical():
    num_classes = 5
    shapes = [(1,), (3,), (4, 3), (5, 4, 3), (3, 1), (3, 2, 1)]
    expected_shapes = [(1, num_classes),
                       (3, num_classes),
                       (4, 3, num_classes),
                       (5, 4, 3, num_classes),
                       (3, num_classes),
                       (3, 2, num_classes)]
    labels = [np.random.randint(0, num_classes, shape) for shape in shapes]
    one_hots = [to_categorical(label, num_classes) for label in labels]
    for label, one_hot, expected_shape in zip(labels,
                                              one_hots,
                                              expected_shapes):
        # Check shape
        assert one_hot.shape == expected_shape
        # Make sure there are only 0s and 1s
        assert np.array_equal(one_hot, one_hot.astype(bool))
        # Make sure there is exactly one 1 in a row
        assert np.all(one_hot.sum(axis=-1) == 1)
        # Get original labels back from one hots
        assert np.all(np.argmax(one_hot, -1).reshape(label.shape) == label)

Source File: np_utils_test.py From DeepLearning_Wavelet-LSTM with MIT License

6 votes

def test_to_categorical():
    num_classes = 5
    shapes = [(1,), (3,), (4, 3), (5, 4, 3), (3, 1), (3, 2, 1)]
    expected_shapes = [(1, num_classes),
                       (3, num_classes),
                       (4, 3, num_classes),
                       (5, 4, 3, num_classes),
                       (3, num_classes),
                       (3, 2, num_classes)]
    labels = [np.random.randint(0, num_classes, shape) for shape in shapes]
    one_hots = [to_categorical(label, num_classes) for label in labels]
    for label, one_hot, expected_shape in zip(labels,
                                              one_hots,
                                              expected_shapes):
        # Check shape
        assert one_hot.shape == expected_shape
        # Make sure there are only 0s and 1s
        assert np.array_equal(one_hot, one_hot.astype(bool))
        # Make sure there is exactly one 1 in a row
        assert np.all(one_hot.sum(axis=-1) == 1)
        # Get original labels back from one hots
        assert np.all(np.argmax(one_hot, -1).reshape(label.shape) == label)

Source File: 3leveldcnet.py From Multi-level-DCNet with GNU General Public License v3.0

6 votes

def load_dataset():
    # Load the dataset from Keras
    from keras.datasets import cifar10
    (x_train, y_train), (x_test, y_test) = cifar10.load_data()

    # Preprocessing the dataset
    x_train = x_train.astype('float32')
    x_test = x_test.astype('float32')
    x_train= preprocess_input(x_train)
    x_test= preprocess_input(x_test)
    x_train = x_train.reshape(-1, 32, 32, 3).astype('float32') 
    x_test = x_test.reshape(-1, 32, 32, 3).astype('float32')
    y_train = to_categorical(y_train.astype('float32'))
    y_test = to_categorical(y_test.astype('float32'))

    return (x_train, y_train), (x_test, y_test)

Source File: np_utils_test.py From DeepLearning_Wavelet-LSTM with MIT License

6 votes

def test_to_categorical():
    num_classes = 5
    shapes = [(1,), (3,), (4, 3), (5, 4, 3), (3, 1), (3, 2, 1)]
    expected_shapes = [(1, num_classes),
                       (3, num_classes),
                       (4, 3, num_classes),
                       (5, 4, 3, num_classes),
                       (3, num_classes),
                       (3, 2, num_classes)]
    labels = [np.random.randint(0, num_classes, shape) for shape in shapes]
    one_hots = [to_categorical(label, num_classes) for label in labels]
    for label, one_hot, expected_shape in zip(labels,
                                              one_hots,
                                              expected_shapes):
        # Check shape
        assert one_hot.shape == expected_shape
        # Make sure there are only 0s and 1s
        assert np.array_equal(one_hot, one_hot.astype(bool))
        # Make sure there is exactly one 1 in a row
        assert np.all(one_hot.sum(axis=-1) == 1)
        # Get original labels back from one hots
        assert np.all(np.argmax(one_hot, -1).reshape(label.shape) == label)

Source File: data_loader.py From bi-lstm-crf with Apache License 2.0

6 votes

def load_sents_from_file(self, file_path, encoding):
        with open(file_path, encoding=encoding) as f:
            sent, chunk = [], []
            for line in f:
                line = line[:-1]
                chars, tags = line.split(self.sent_delimiter)
                sent.append(chars.split(self.word_delimiter))
                chunk.append(tags.split(self.word_delimiter))
                if len(sent) >= self.batch_size:
                    sent = self.src_tokenizer.texts_to_sequences(sent)
                    chunk = self.tgt_tokenizer.texts_to_sequences(chunk)
                    sent, chunk = self._pad_seq(sent, chunk)
                    if not self.sparse_target:
                        chunk = to_categorical(chunk, num_classes=self.tgt_vocab_size + 1)
                    yield sent, chunk
                    sent, chunk = [], []

Source File: data.py From icassp19 with MIT License

6 votes

def __getitem__(self, index):
        """
        takes an index (batch number) and returns one batch of self.batch_size
        :param index:
        :return:
        """
        # index is taken care of by the Sequencer inherited
        indexes = self.indexes[index * self.batch_size:(index + 1) * self.batch_size]

        # fetch labels for the batch
        y_int = np.empty((self.batch_size, 1), dtype='int')
        for tt in np.arange(self.batch_size):
            y_int[tt] = int(self.labels[indexes[tt]])
        y_cat = to_categorical(y_int, num_classes=self.n_classes)

        # fetch features for the batch and adjust format to input CNN
        # (batch_size, 1, time, freq) for channels_first
        features = self.features[indexes, np.newaxis]
        return features, y_cat

Source File: task_sentiment_virtual_adversarial_training.py From bert4keras with Apache License 2.0

6 votes

def __iter__(self, random=False):
        batch_token_ids, batch_segment_ids, batch_labels = [], [], []
        for is_end, (text, label) in self.sample(random):
            token_ids, segment_ids = tokenizer.encode(text, maxlen=maxlen)
            batch_token_ids.append(token_ids)
            batch_segment_ids.append(segment_ids)
            batch_labels.append(label)
            if len(batch_token_ids) == self.batch_size or is_end:
                batch_token_ids = sequence_padding(batch_token_ids)
                batch_segment_ids = sequence_padding(batch_segment_ids)
                batch_labels = to_categorical(batch_labels, num_classes)
                yield [batch_token_ids, batch_segment_ids], batch_labels
                batch_token_ids, batch_segment_ids, batch_labels = [], [], []


# 转换数据集

Source File: get_data.py From Audio-Vision with MIT License

6 votes

def get_train_data(input_img_h5, input_ques_h5, data_limit=215359):
    img_data = h5py.File(input_img_h5)
    ques_data = h5py.File(input_ques_h5)
  
    img_data = np.array(img_data['images_train'])
    img_pos_train = ques_data['img_pos_train'][:data_limit]
    train_img_data = np.array([img_data[_-1,:] for _ in img_pos_train])
    # Normalizing images
    tem = np.sqrt(np.sum(np.multiply(train_img_data, train_img_data), axis=1))
    train_img_data = np.divide(train_img_data, np.transpose(np.tile(tem,(4096,1))))

    #shifting padding to left side
    ques_train = np.array(ques_data['ques_train'])[:data_limit, :]
    ques_length_train = np.array(ques_data['ques_length_train'])[:data_limit]
    ques_train = right_align(ques_train, ques_length_train)

    train_X = [train_img_data, ques_train]
    # NOTE should've consturcted one-hots using exhausitve list of answers, cause some answers may not be in dataset
    # To temporarily rectify this, all those answer indices is set to 1 in validation set
    train_y = to_categorical(ques_data['answers'])[:data_limit, :]

    return train_X, train_y

Source File: data.py From five-video-classification-methods with MIT License

5 votes

def get_class_one_hot(self, class_str):
        """Given a class as a string, return its number in the classes
        list. This lets us encode and one-hot it for training."""
        # Encode it first.
        label_encoded = self.classes.index(class_str)

        # Now one-hot it.
        label_hot = to_categorical(label_encoded, len(self.classes))

        assert len(label_hot) == len(self.classes)

        return label_hot

Source File: mlearn.py From easy12306 with Artistic License 2.0

5 votes

def load_data(fn='texts.npz', to=False):
    data = np.load(fn)
    texts, labels = data['texts'], data['labels']
    texts = texts / 255.0
    _, h, w = texts.shape
    texts.shape = (-1, h, w, 1)
    if to:
        labels = to_categorical(labels)
    n = int(texts.shape[0] * 0.9)   # 90%用于训练，10%用于测试
    return (texts[:n], labels[:n]), (texts[n:], labels[n:])

Source File: pseudo_cifar.py From Pseudo-Label-Keras with MIT License

5 votes

def test_generator(self):
        while True:
            indices = np.arange(self.y_test.shape[0])
            np.random.shuffle(indices)
            for i in range(len(indices)//self.batch_size):
                current_indices = indices[i*self.batch_size:(i+1)*self.batch_size]
                X_batch = (self.X_test[current_indices] / 255.0).astype(np.float32)
                y_batch = to_categorical(self.y_test[current_indices], self.n_classes)
                y_batch = np.c_[y_batch, np.repeat(0.0, y_batch.shape[0])] # flagは0とする
                yield X_batch, y_batch

Source File: pseudo_cifar.py From Pseudo-Label-Keras with MIT License

5 votes

def train_generator(self):
        while True:
            X, y, flag = self.train_mixture()
            n_batch = X.shape[0] // self.batch_size
            for i in range(n_batch):
                X_batch = (X[i*self.batch_size:(i+1)*self.batch_size]/255.0).astype(np.float32)
                y_batch = to_categorical(y[i*self.batch_size:(i+1)*self.batch_size], self.n_classes)
                y_batch = np.c_[y_batch, flag[i*self.batch_size:(i+1)*self.batch_size]]
                yield X_batch, y_batch

Source File: mobilenet_transfer_pseudo_cifar.py From Pseudo-Label-Keras with MIT License

5 votes

def test_generator(self):
        while True:
            indices = np.arange(self.y_test.shape[0])
            np.random.shuffle(indices)
            for i in range(len(indices)//self.batch_size):
                current_indices = indices[i*self.batch_size:(i+1)*self.batch_size]
                X_batch = (self.X_test[current_indices] / 255.0).astype(np.float32)
                y_batch = to_categorical(self.y_test[current_indices], self.n_classes)
                y_batch = np.c_[y_batch, np.repeat(0.0, y_batch.shape[0])] # flagは0とする
                yield X_batch, y_batch

Source File: pseudo_pretrain_cifar.py From Pseudo-Label-Keras with MIT License

5 votes

def train(n_labeled_data):
    model = create_cnn()
    
    pseudo = PseudoCallback(model, n_labeled_data, min(512, n_labeled_data))

    # pretrain
    model.compile("adam", loss="categorical_crossentropy", metrics=["acc"])
    model.fit(pseudo.X_train_labeled/255.0, to_categorical(pseudo.y_train_labeled),
              batch_size=pseudo.batch_size, epochs=30,
              validation_data=(pseudo.X_test/255.0, to_categorical(pseudo.y_test)))
    pseudo.y_train_unlabeled_prediction = np.argmax(
            model.predict(pseudo.X_train_unlabeled), axis=-1,).reshape(-1, 1)

    #main-train
    model.compile("adam", loss=pseudo.loss_function, metrics=[pseudo.accuracy])

    if not os.path.exists("result_pseudo"):
        os.mkdir("result_pseudo")

    hist = model.fit_generator(pseudo.train_generator(), steps_per_epoch=pseudo.train_steps_per_epoch,
                               validation_data=pseudo.test_generator(), callbacks=[pseudo],
                               validation_steps=pseudo.test_stepes_per_epoch, epochs=100).history
    hist["labeled_accuracy"] = pseudo.labeled_accuracy
    hist["unlabeled_accuracy"] = pseudo.unlabeled_accuracy

    with open(f"result_pseudo/history_{n_labeled_data:05}.dat", "wb") as fp:
        pickle.dump(hist, fp)

Source File: pseudo_pretrain_cifar.py From Pseudo-Label-Keras with MIT License

5 votes

def test_generator(self):
        while True:
            indices = np.arange(self.y_test.shape[0])
            np.random.shuffle(indices)
            for i in range(len(indices)//self.batch_size):
                current_indices = indices[i*self.batch_size:(i+1)*self.batch_size]
                X_batch = (self.X_test[current_indices] / 255.0).astype(np.float32)
                y_batch = to_categorical(self.y_test[current_indices], self.n_classes)
                y_batch = np.c_[y_batch, np.repeat(0.0, y_batch.shape[0])] # flagは0とする
                yield X_batch, y_batch

Source File: pseudo_pretrain_cifar.py From Pseudo-Label-Keras with MIT License

5 votes

def train_generator(self):
        while True:
            X, y, flag = self.train_mixture()
            n_batch = X.shape[0] // self.batch_size
            for i in range(n_batch):
                X_batch = (X[i*self.batch_size:(i+1)*self.batch_size]/255.0).astype(np.float32)
                y_batch = to_categorical(y[i*self.batch_size:(i+1)*self.batch_size], self.n_classes)
                y_batch = np.c_[y_batch, flag[i*self.batch_size:(i+1)*self.batch_size]]
                yield X_batch, y_batch

Source File: mobilenet_pseudo_cifar.py From Pseudo-Label-Keras with MIT License

5 votes

def test_generator(self):
        while True:
            indices = np.arange(self.y_test.shape[0])
            np.random.shuffle(indices)
            for i in range(len(indices)//self.batch_size):
                current_indices = indices[i*self.batch_size:(i+1)*self.batch_size]
                X_batch = (self.X_test[current_indices] / 255.0).astype(np.float32)
                y_batch = to_categorical(self.y_test[current_indices], self.n_classes)
                y_batch = np.c_[y_batch, np.repeat(0.0, y_batch.shape[0])] # flagは0とする
                yield X_batch, y_batch

Source File: mobilenet_pseudo_cifar.py From Pseudo-Label-Keras with MIT License

5 votes

def train_generator(self):
        while True:
            X, y, flag = self.train_mixture()
            n_batch = X.shape[0] // self.batch_size
            for i in range(n_batch):
                X_batch = (X[i*self.batch_size:(i+1)*self.batch_size]/255.0).astype(np.float32)
                y_batch = to_categorical(y[i*self.batch_size:(i+1)*self.batch_size], self.n_classes)
                y_batch = np.c_[y_batch, flag[i*self.batch_size:(i+1)*self.batch_size]]
                yield X_batch, y_batch

Source File: get_data.py From Audio-Vision with MIT License

5 votes

def get_test_data(input_img_h5, input_ques_h5,metadata,val_annotations_path):
    img_data = h5py.File(input_img_h5)
    ques_data = h5py.File(input_ques_h5)
    with open(val_annotations_path, 'r') as an_file:
        annotations = json.loads(an_file.read())

    img_data = np.array(img_data['images_test'])
    img_pos_train = ques_data['img_pos_test']
    train_img_data = np.array([img_data[_-1,:] for _ in img_pos_train])
    tem = np.sqrt(np.sum(np.multiply(train_img_data, train_img_data), axis=1))
    train_img_data = np.divide(train_img_data, np.transpose(np.tile(tem,(4096,1))))

    ques_train = np.array(ques_data['ques_test'])
    ques_length_train = np.array(ques_data['ques_length_test'])
    ques_train = right_align(ques_train, ques_length_train)

    # Convert all last index to 0, coz embeddings were made that way :/
    for _ in ques_train:
        if 12602 in _:
            _[_==12602] = 0

    val_X = [train_img_data, ques_train]

    ans_to_ix = {str(ans):int(i) for i,ans in metadata['ix_to_ans'].items()}
    ques_annotations = {}
    for _ in annotations['annotations']:
        idx = ans_to_ix.get(_['multiple_choice_answer'].lower())
        _['multiple_choice_answer_idx'] = 1 if idx in [None, 1000] else idx
        ques_annotations[_['question_id']] = _

    abs_val_y = [ques_annotations[ques_id]['multiple_choice_answer_idx'] for ques_id in ques_data['question_id_test']]
    abs_val_y = to_categorical(np.array(abs_val_y))

    multi_val_y = [list(set([ans_to_ix.get(_['answer'].lower()) for _ in ques_annotations[ques_id]['answers']])) for ques_id in ques_data['question_id_test']]
    for i,_ in enumerate(multi_val_y):
        multi_val_y[i] = [1 if ans in [None, 1000] else ans for ans in _]

    return val_X, abs_val_y, multi_val_y

Source File: data_processor.py From tying-wv-and-wc with MIT License

5 votes

def format(self, word_seq, vocab_size, sequence_size):
        words = []
        nexts = []
        sequence_count = (len(word_seq) - 1) // sequence_size
        for i in range(sequence_count):
            start = i * sequence_size
            words.append(word_seq[start:start + sequence_size])
            next_seq = word_seq[(start + 1):(start + 1 + sequence_size)]
            next_seq_as_one_hot = to_categorical(next_seq, vocab_size)  # to one hot vector
            nexts.append(next_seq_as_one_hot)
        
        words = np.array(words)
        nexts = np.array(nexts)

        return words, nexts

Source File: mobilenet_supervised_cifar.py From Pseudo-Label-Keras with MIT License

5 votes

def train(n_labeled_data):
    model = create_cnn()
    (X_train, y_train), (X_test, y_test) = cifar10.load_data()

    indices = np.arange(X_train.shape[0])
    np.random.shuffle(indices)

    y_test_true = np.ravel(y_test)
    X_train = X_train[indices[:n_labeled_data]] / 255.0
    X_test = X_test / 255.0
    y_train = to_categorical(y_train[indices[:n_labeled_data]], 10)
    y_test = to_categorical(y_test, 10)
    
    model.compile("adam", loss="categorical_crossentropy", metrics=["acc"])

    if not os.path.exists("result_mobilenet"):
        os.mkdir("result_mobilenet")

    hist = model.fit(X_train, y_train, batch_size=min(n_labeled_data, 256), 
                     validation_data=(X_test, y_test), epochs=100).history

    with open(f"result_mobilenet/history_{n_labeled_data:05}.dat", "wb") as fp:
        pickle.dump(hist, fp)

    # tsne-plot
    emb_model = Model(model.input, model.layers[-2].output)
    embedding = emb_model.predict(X_test)
    proj = TSNE(n_components=2).fit_transform(embedding)
    cmp = plt.get_cmap("tab10")
    plt.figure()
    for i in range(10):
        select_flag = y_test_true == i
        plt_latent = proj[select_flag, :]
        plt.scatter(plt_latent[:,0], plt_latent[:,1], color=cmp(i), marker=".")
    plt.savefig(f"result_mobilenet/embedding_{n_labeled_data:05}.png")

Python keras.utils.to_categorical() Examples