Python tflearn.data_utils.to_categorical() Examples

The following are 6 code examples of tflearn.data_utils.to_categorical(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module tflearn.data_utils , or try the search function .
Example #1
Source File: data_helpers.py    From Text-Pairs-Relation-Classification with Apache License 2.0 6 votes vote down vote up
def pad_data(data, pad_seq_len):
    """
    Padding each sentence of research data according to the max sentence length.
    Return the padded data and data labels.

    Args:
        data: The research data
        pad_seq_len: The max sentence length of research data
    Returns:
        data_front: The padded front data
        data_behind: The padded behind data
        onehot_labels: The one-hot labels
    """
    data_front = pad_sequences(data.front_tokenindex, maxlen=pad_seq_len, value=0.)
    data_behind = pad_sequences(data.behind_tokenindex, maxlen=pad_seq_len, value=0.)
    onehot_labels = to_categorical(data.labels, nb_classes=2)
    return data_front, data_behind, onehot_labels 
Example #2
Source File: utils.py    From URLNet with Apache License 2.0 5 votes vote down vote up
def prep_train_test(pos_x, neg_x, dev_pct): 
    np.random.seed(10) 
    shuffle_indices=np.random.permutation(np.arange(len(pos_x)))
    pos_x_shuffled = pos_x[shuffle_indices]
    dev_idx = -1 * int(dev_pct * float(len(pos_x)))
    pos_train = pos_x_shuffled[:dev_idx]
    pos_test = pos_x_shuffled[dev_idx:]

    np.random.seed(10)
    shuffle_indices=np.random.permutation(np.arange(len(neg_x)))
    neg_x_shuffled = neg_x[shuffle_indices]
    dev_idx = -1 * int(dev_pct * float(len(neg_x)))
    neg_train = neg_x_shuffled[:dev_idx]
    neg_test = neg_x_shuffled[dev_idx:] 

    x_train = np.array(list(pos_train) + list(neg_train))
    y_train = len(pos_train)*[1] + len(neg_train)*[0]
    x_test = np.array(list(pos_test) + list(neg_test))
    y_test = len(pos_test)*[1] + len(neg_test)*[0]

    y_train = to_categorical(y_train, nb_classes=2)
    y_test = to_categorical(y_test, nb_classes=2) 

    np.random.seed(10)
    shuffle_indices = np.random.permutation(np.arange(len(x_train)))
    x_train = x_train[shuffle_indices]
    y_train = y_train[shuffle_indices]

    np.random.seed(10) 
    shuffle_indices = np.random.permutation(np.arange(len(x_test)))
    x_test = x_test[shuffle_indices]
    y_test = y_test[shuffle_indices] 
    
    print("Train Mal/Ben split: {}/{}".format(len(pos_train), len(neg_train)))
    print("Test Mal/Ben split: {}/{}".format(len(pos_test), len(neg_test)))
    print("Train/Test split: {}/{}".format(len(y_train), len(y_test)))
    print("Train/Test split: {}/{}".format(len(x_train), len(x_test)))

    return x_train, y_train, x_test, y_test 
Example #3
Source File: predict.py    From GarvinBook with MIT License 5 votes vote down vote up
def load_data(dirname="cifar-10-batches-py", one_hot=False):
    X_train = []
    Y_train = []
    for i in range(1, 6):
        fpath = os.path.join(dirname, 'data_batch_' + str(i))
        data, labels = load_batch(fpath)
        if i == 1:
            X_train = data
            Y_train = labels
        else:
            X_train = np.concatenate([X_train, data], axis=0)
            Y_train = np.concatenate([Y_train, labels], axis=0)

    fpath = os.path.join(dirname, 'test_batch')
    X_test, Y_test = load_batch(fpath)

    X_train = np.dstack((X_train[:, :1024], X_train[:, 1024:2048],
                         X_train[:, 2048:])) / 255.
    X_train = np.reshape(X_train, [-1, 32, 32, 3])
    X_test = np.dstack((X_test[:, :1024], X_test[:, 1024:2048],
                        X_test[:, 2048:])) / 255.
    X_test = np.reshape(X_test, [-1, 32, 32, 3])

    if one_hot:
        Y_train = to_categorical(Y_train, 10)
        Y_test = to_categorical(Y_test, 10)

    return (X_train, Y_train), (X_test, Y_test) 
Example #4
Source File: train.py    From GarvinBook with MIT License 5 votes vote down vote up
def load_data(dirname="cifar-10-batches-py", one_hot=False):
    X_train = []
    Y_train = []
    for i in range(1, 6):
        fpath = os.path.join(dirname, 'data_batch_' + str(i))
        data, labels = load_batch(fpath)
        if i == 1:
            X_train = data
            Y_train = labels
        else:
            X_train = np.concatenate([X_train, data], axis=0)
            Y_train = np.concatenate([Y_train, labels], axis=0)

    fpath = os.path.join(dirname, 'test_batch')
    X_test, Y_test = load_batch(fpath)

    X_train = np.dstack((X_train[:, :1024], X_train[:, 1024:2048],
                         X_train[:, 2048:])) / 255.
    X_train = np.reshape(X_train, [-1, 32, 32, 3])
    X_test = np.dstack((X_test[:, :1024], X_test[:, 1024:2048],
                        X_test[:, 2048:])) / 255.
    X_test = np.reshape(X_test, [-1, 32, 32, 3])

    if one_hot:
        Y_train = to_categorical(Y_train, 10)
        Y_test = to_categorical(Y_test, 10)

    return (X_train, Y_train), (X_test, Y_test) 
Example #5
Source File: rnn_utils.py    From sign-language-gesture-recognition with MIT License 4 votes vote down vote up
def get_data(input_data_dump, num_frames_per_video, labels, ifTrain):
    """Get the data from our saved predictions or pooled features."""

    # Local vars.
    X = []
    y = []
    temp_list = deque()

    # Open and get the features.
    with open(input_data_dump, 'rb') as fin:
        frames = pickle.load(fin)
        for i, frame in enumerate(frames):

            features = frame[0]
            actual = frame[1].lower()

            # frameCount = frame[2]

            # Convert our labels into binary.
            actual = labels[actual]

            # Add to the queue.
            if len(temp_list) == num_frames_per_video - 1:
                temp_list.append(features)
                flat = list(temp_list)
                X.append(np.array(flat))
                y.append(actual)
                temp_list.clear()
            else:
                temp_list.append(features)
                continue

    print("Class Name\tNumeric Label")
    for key in labels:
        print("%s\t\t%d" % (key, labels[key]))

    # Numpy.
    X = np.array(X)
    y = np.array(y)

    print("Dataset shape: ", X.shape)

    # One-hot encoded categoricals.
    y = to_categorical(y, len(labels))

    # Split into train and test.
    if ifTrain:
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
        return X_train, X_test, y_train, y_test
    else:
        return X, y 
Example #6
Source File: rnn_utils.py    From continuous-online-video-classification-blog with MIT License 4 votes vote down vote up
def get_data(filename, num_frames, num_classes, input_length):
    """Get the data from our saved predictions or pooled features."""

    # Local vars.
    X = []
    y = []
    temp_list = deque()

    # Open and get the features.
    with open(filename, 'rb') as fin:
        frames = pickle.load(fin)

        for i, frame in enumerate(frames):
            features = frame[0]
            actual = frame[1]

            # Convert our labels into binary.
            if actual == 'ad':
                actual = 1
            else:
                actual = 0

            # Add to the queue.
            if len(temp_list) == num_frames - 1:
                temp_list.append(features)
                flat = list(temp_list)
                X.append(np.array(flat))
                y.append(actual)
                temp_list.popleft()
            else:
                temp_list.append(features)
                continue

    print("Total dataset size: %d" % len(X))

    # Numpy.
    X = np.array(X)
    y = np.array(y)

    # Reshape.
    X = X.reshape(-1, num_frames, input_length)

    # One-hot encoded categoricals.
    y = to_categorical(y, num_classes)

    # Split into train and test.
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.1, random_state=42)

    return X_train, X_test, y_train, y_test