Python Examples of data.load

Source File: client.py From SplunkForPCAP with MIT License

6 votes

def restart_required(self):
        """Indicates whether splunkd is in a state that requires a restart.

        :return: A ``boolean`` that indicates whether a restart is required.

        """
        response = self.get("messages").body.read()
        messages = data.load(response)['feed']
        if 'entry' not in messages:
            result = False
        else:
            if isinstance(messages['entry'], dict):
                titles = [messages['entry']['title']]
            else:
                titles = [x['title'] for x in messages['entry']]
            result = 'restart_required' in titles
        return result

Source File: load.py From kaggle-ndsb with MIT License

6 votes

def load_train(self):
        labels = utils.one_hot(data.labels_train, m=121).astype(np.float32)
        split = np.load(DEFAULT_VALIDATION_SPLIT_PATH)

        split = np.load(DEFAULT_VALIDATION_SPLIT_PATH)
        indices_train = split['indices_train']
        indices_valid = split['indices_valid']

        image_shapes = np.asarray([img.shape for img in data.load('train')]).astype(np.float32)
        self.image_shapes_train = image_shapes[indices_train]
        self.image_shapes_valid = image_shapes[indices_valid]

        self.y_train = np.load(self.train_pred_file).astype(np.float32)
        self.y_valid = np.load(self.valid_pred_file).astype(np.float32)
        self.labels_train = labels[indices_train]
        self.labels_valid = labels[indices_valid]

Source File: load.py From kaggle-ndsb with MIT License

6 votes

def load_train(self):
        labels = utils.one_hot(data.labels_train, m=121).astype(np.float32)
        split = np.load(DEFAULT_VALIDATION_SPLIT_PATH)

        split = np.load(DEFAULT_VALIDATION_SPLIT_PATH)
        indices_train = split['indices_train']
        indices_valid = split['indices_valid']

        image_shapes = np.asarray([img.shape for img in data.load('train')]).astype(np.float32)
        moments = np.load("data/image_moment_stats_v1_train.pkl")

        centroid_distance = np.abs(moments["centroids"][:, [1, 0]] - image_shapes / 2)
        info = np.concatenate((centroid_distance, image_shapes, moments["angles"][:, None], moments["minor_axes"][:, None], moments["major_axes"][:, None]), 1).astype(np.float32)

        self.info_train = info[indices_train]
        self.info_valid = info[indices_valid]

        self.y_train = np.load(self.train_pred_file).astype(np.float32)
        self.y_valid = np.load(self.valid_pred_file).astype(np.float32)
        self.labels_train = labels[indices_train]
        self.labels_valid = labels[indices_valid]

Source File: load.py From kaggle-ndsb with MIT License

6 votes

def load_test(self):
        self.y_test = np.load(self.test_pred_file).astype(np.float32)
        self.images_test = data.load('test')
        features = np.load("data/features_test.pkl").item()

        if "aaronmoments" in self.features:
            print "aaronmoments"
            def normalize(x):
                return x
                # return (x - x.mean(axis=0,keepdims=True))/x.std(axis=0,keepdims=True)
            image_shapes = np.asarray([img.shape for img in self.images_test]).astype(np.float32)
            moments = np.load("data/image_moment_stats_v1_test.pkl")
            centroid_distance = np.abs(moments["centroids"][:, [1, 0]] - image_shapes / 2)
            angles = moments["angles"][:, None]
            minor_axes = moments["minor_axes"][:, None]
            major_axes = moments["major_axes"][:, None]
            centroid_distance = normalize(centroid_distance)
            angles = normalize(angles)
            minor_axes = normalize(minor_axes)
            major_axes = normalize(major_axes)
            features["aaronmoments"] = np.concatenate([centroid_distance,angles,minor_axes,major_axes], 1).astype(np.float32)

        self.info_test = np.concatenate([features[feat] for feat in self.features], 1).astype(np.float32)

Source File: client.py From splunk-elasticsearch with Apache License 2.0

6 votes

def restart_required(self):
        """Indicates whether splunkd is in a state that requires a restart.

        :return: A ``boolean`` that indicates whether a restart is required.

        """
        response = self.get("messages").body.read()
        messages = data.load(response)['feed']
        if 'entry' not in messages:
            result = False
        else:
            if isinstance(messages['entry'], dict):
                titles = [messages['entry']['title']]
            else:
                titles = [x['title'] for x in messages['entry']]
            result = 'restart_required' in titles
        return result

Source File: client.py From splunk-ref-pas-code with Apache License 2.0

6 votes

def restart_required(self):
        """Indicates whether splunkd is in a state that requires a restart.

        :return: A ``boolean`` that indicates whether a restart is required.

        """
        response = self.get("messages").body.read()
        messages = data.load(response)['feed']
        if 'entry' not in messages:
            result = False
        else:
            if isinstance(messages['entry'], dict):
                titles = [messages['entry']['title']]
            else:
                titles = [x['title'] for x in messages['entry']]
            result = 'restart_required' in titles
        return result

Source File: client.py From splunk-ref-pas-code with Apache License 2.0

5 votes

def refresh(self, state=None):
        """Refreshes the state of this entity.

        If *state* is provided, load it as the new state for this
        entity. Otherwise, make a roundtrip to the server (by calling
        the :meth:`read` method of ``self``) to fetch an updated state,
        plus at most two additional round trips if
        the ``autologin`` field of :func:`connect` is set to ``True``.

        :param state: Entity-specific arguments (optional).
        :type state: ``dict``
        :raises EntityDeletedException: Raised if the entity no longer exists on
            the server.

        **Example**::

            import splunklib.client as client
            s = client.connect(...)
            search = s.apps['search']
            search.refresh()
        """
        if state is not None:
            self._state = state
        else:
            self._state = self.read(self.get())
        return self

Source File: load.py From kaggle-ndsb with MIT License

5 votes

def load_test(self):
        self.images_test = data.load('test')

Source File: load.py From kaggle-ndsb with MIT License

5 votes

def load_train(self):
        images = data.load('train')
        labels = utils.one_hot(data.labels_train, m=121).astype(np.float32)

        split = np.load(self.validation_split_path)
        indices_train = split['indices_train']
        indices_valid = split['indices_valid']

        self.images_train = images[indices_train]
        self.labels_train = labels[indices_train]
        self.images_valid = images[indices_valid]
        self.labels_valid = labels[indices_valid]

Source File: client.py From splunk-elasticsearch with Apache License 2.0

5 votes

def _load_atom(response, match=None):
    return data.load(response.body.read(), match)


# Load an array of atom entries from the body of the given response

Source File: client.py From splunk-ref-pas-code with Apache License 2.0

5 votes

def _load_atom(response, match=None):
    return data.load(response.body.read(), match)

# Load an array of atom entries from the body of the given response

Source File: client.py From splunk-elasticsearch with Apache License 2.0

5 votes

def refresh(self, state=None):
        """Refreshes the state of this entity.

        If *state* is provided, load it as the new state for this
        entity. Otherwise, make a roundtrip to the server (by calling
        the :meth:`read` method of ``self``) to fetch an updated state,
        plus at most two additional round trips if
        the ``autologin`` field of :func:`connect` is set to ``True``.

        :param state: Entity-specific arguments (optional).
        :type state: ``dict``
        :raises EntityDeletedException: Raised if the entity no longer exists on
            the server.

        **Example**::

            import splunklib.client as client
            s = client.connect(...)
            search = s.apps['search']
            search.refresh()
        """
        if state is not None:
            self._state = state
        else:
            self._state = self.read(self.get())
        return self

Source File: client.py From SplunkForPCAP with MIT License

5 votes

def _load_atom(response, match=None):
    return data.load(response.body.read(), match)


# Load an array of atom entries from the body of the given response

Source File: load.py From kaggle-ndsb with MIT License

5 votes

def load_test(self):
        self.y_test = np.load(self.test_pred_file).astype(np.float32)
        self.images_test = data.load('test')
        image_shapes_test = np.asarray([img.shape for img in self.images_test]).astype(np.float32)
        moments_test = np.load("data/image_moment_stats_v1_test.pkl")
        centroid_distance = np.abs(moments_test["centroids"][:, [1, 0]] - image_shapes_test / 2)
        self.info_test = np.concatenate((centroid_distance, image_shapes_test, moments_test["angles"][:, None], moments_test["minor_axes"][:, None], moments_test["major_axes"][:, None]), 1).astype(np.float32)
        # self.info_test = np.concatenate((image_shapes_test, moments_test["centroids"], moments_test["minor_axes"][:, None], moments_test["major_axes"][:, None]), 1).astype(np.float32)

Source File: load.py From kaggle-ndsb with MIT License

5 votes

def load_train(self):
        labels = utils.one_hot(data.labels_train, m=121).astype(np.float32)
        split = np.load(DEFAULT_VALIDATION_SPLIT_PATH)

        split = np.load(DEFAULT_VALIDATION_SPLIT_PATH)
        indices_train = split['indices_train']
        indices_valid = split['indices_valid']
        features = np.load("data/features_train.pkl").item()

        if "aaronmoments" in self.features:
            print "aaronmoments"
            def normalize(x):
                return x
                # return (x - x.mean(axis=0,keepdims=True))/x.std(axis=0,keepdims=True)
            image_shapes = np.asarray([img.shape for img in data.load('train')]).astype(np.float32)
            moments = np.load("data/image_moment_stats_v1_train.pkl")
            centroid_distance = np.abs(moments["centroids"][:, [1, 0]] - image_shapes / 2)
            angles = moments["angles"][:, None]
            minor_axes = moments["minor_axes"][:, None]
            major_axes = moments["major_axes"][:, None]
            centroid_distance = normalize(centroid_distance)
            angles = normalize(angles)
            minor_axes = normalize(minor_axes)
            major_axes = normalize(major_axes)
            features["aaronmoments"] = np.concatenate([centroid_distance,angles,minor_axes,major_axes], 1).astype(np.float32)

        info = np.concatenate([features[feat] for feat in self.features], 1).astype(np.float32)

        print info.shape

        self.info_train = info[indices_train]
        self.info_valid = info[indices_valid]

        self.y_train = np.load(self.train_pred_file).astype(np.float32)
        self.y_valid = np.load(self.valid_pred_file).astype(np.float32)
        self.labels_train = labels[indices_train]
        self.labels_valid = labels[indices_valid]

Source File: client.py From SplunkForPCAP with MIT License

5 votes

def refresh(self, state=None):
        """Refreshes the state of this entity.

        If *state* is provided, load it as the new state for this
        entity. Otherwise, make a roundtrip to the server (by calling
        the :meth:`read` method of ``self``) to fetch an updated state,
        plus at most two additional round trips if
        the ``autologin`` field of :func:`connect` is set to ``True``.

        :param state: Entity-specific arguments (optional).
        :type state: ``dict``
        :raises EntityDeletedException: Raised if the entity no longer exists on
            the server.

        **Example**::

            import splunklib.client as client
            s = client.connect(...)
            search = s.apps['search']
            search.refresh()
        """
        if state is not None:
            self._state = state
        else:
            self._state = self.read(self.get())
        return self

Source File: load.py From kaggle-ndsb with MIT License

4 votes

def load_train(self):
        train_images = data.load('train')
        train_labels = utils.one_hot(data.labels_train).astype(np.float32)

        if ("valid_pred_file" in self.__dict__):
            valid_pseudo_labels = np.load(self.valid_pred_file).astype(np.float32)
        else:
            print "No valid_pred_file set. Only using test-set for pseudolabeling!!"

        shuffle = np.load("test_shuffle_seed0.npy")
        if not ("shard" in self.__dict__):
            raise ValueError("Missing argument: shard: (should be value in {0, 1, 2})")
        if not self.shard in [0, 1, 2]:
            raise ValueError("Wrong argument: shard: (should be value in {0, 1, 2})")
        N = len(shuffle)
        if self.shard == 0:
            train_shard = shuffle[N/3:]
        if self.shard == 1:
            train_shard = np.concatenate((shuffle[:N/3], shuffle[2*N/3:]))
        if self.shard == 2:
            train_shard = shuffle[:2*N/3]

        test_images = data.load('test')[train_shard]
        test_pseudo_labels = np.load(self.test_pred_file)[train_shard].astype(np.float32)
        print test_pseudo_labels.shape

        if not hasattr(self, 'validation_split_path'):
            self.validation_split_path = DEFAULT_VALIDATION_SPLIT_PATH
        split = np.load(self.validation_split_path)
        indices_train = split['indices_train']
        indices_valid = split['indices_valid']

        self.images_train = train_images[indices_train]
        self.labels_train = train_labels[indices_train]
        if ("valid_pred_file" in self.__dict__):
            self.images_pseudo = np.concatenate((train_images[indices_valid], test_images), 0)
            self.labels_pseudo = np.concatenate((valid_pseudo_labels, test_pseudo_labels), 0)
        else:
            self.images_pseudo = test_images
            self.labels_pseudo = test_pseudo_labels

        self.images_valid = train_images[indices_valid]
        self.labels_valid = train_labels[indices_valid]

Source File: client.py From SplunkForPCAP with MIT License

4 votes

def iter(self, offset=0, count=None, pagesize=None, **kwargs):
        """Iterates over the collection.

        This method is equivalent to the :meth:`list` method, but
        it returns an iterator and can load a certain number of entities at a
        time from the server.

        :param offset: The index of the first entity to return (optional).
        :type offset: ``integer``
        :param count: The maximum number of entities to return (optional).
        :type count: ``integer``
        :param pagesize: The number of entities to load (optional).
        :type pagesize: ``integer``
        :param kwargs: Additional arguments (optional):

            - "search" (``string``): The search query to filter responses.

            - "sort_dir" (``string``): The direction to sort returned items:
              "asc" or "desc".

            - "sort_key" (``string``): The field to use for sorting (optional).

            - "sort_mode" (``string``): The collating sequence for sorting
              returned items: "auto", "alpha", "alpha_case", or "num".

        :type kwargs: ``dict``

        **Example**::

            import splunklib.client as client
            s = client.connect(...)
            for saved_search in s.saved_searches.iter(pagesize=10):
                # Loads 10 saved searches at a time from the
                # server.
                ...
        """
        assert pagesize is None or pagesize > 0
        if count is None:
            count = self.null_count
        fetched = 0
        while count == self.null_count or fetched < count:
            response = self.get(count=pagesize or count, offset=offset, **kwargs)
            items = self._load_list(response)
            N = len(items)
            fetched += N
            for item in items:
                yield item
            if pagesize is None or N < pagesize:
                break
            offset += N
            logging.debug("pagesize=%d, fetched=%d, offset=%d, N=%d, kwargs=%s", pagesize, fetched, offset, N, kwargs)

    # kwargs: count, offset, search, sort_dir, sort_key, sort_mode

Source File: models_iic.py From IIC with MIT License

4 votes

def __classification_accuracy(self, sess, iter_init, idx, y_ph=None):
        """
        :param sess: TensorFlow session
        :param iter_init: TensorFlow data iterator initializer associated
        :param idx: insertion index (i.e. epoch - 1)
        :param y_ph: TensorFlow placeholder for unseen labels
        :return: None
        """
        if self.perf is None or y_ph is None:
            return

        # initialize results
        y = np.zeros([0, 1])
        y_hats = [np.zeros([0, 1])] * self.num_B_sub_heads

        # initialize unsupervised data iterator
        sess.run(iter_init)

        # loop over the batches within the unsupervised data iterator
        print('Evaluating classification accuracy... ')
        while True:
            try:
                # grab the results
                results = sess.run([self.y_hats, y_ph], feed_dict={self.is_training: False})

                # load metrics
                for i in range(self.num_B_sub_heads):
                    y_hats[i] = np.concatenate((y_hats[i], np.expand_dims(results[0][i], axis=1)))
                if y_ph is not None:
                    y = np.concatenate((y, np.expand_dims(results[1], axis=1)))

                # _, ax = plt.subplots(2, 10)
                # i_rand = np.random.choice(results[3].shape[0], 10)
                # for i in range(10):
                #     ax[0, i].imshow(results[3][i_rand[i]][:, :, 0], origin='upper', vmin=0, vmax=1)
                #     ax[0, i].set_xticks([])
                #     ax[0, i].set_yticks([])
                #     ax[1, i].imshow(results[4][i_rand[i]][:, :, 0], origin='upper', vmin=0, vmax=1)
                #     ax[1, i].set_xticks([])
                #     ax[1, i].set_yticks([])
                # plt.show()

            # iterator will throw this error when its out of data
            except tf.errors.OutOfRangeError:
                break

        # compute classification accuracy
        if y_ph is not None:
            class_errors = [unsupervised_labels(y, y_hats[i], self.k_B, self.k_B)
                            for i in range(self.num_B_sub_heads)]
            self.perf['class_err_min'][idx] = np.min(class_errors)
            self.perf['class_err_avg'][idx] = np.mean(class_errors)
            self.perf['class_err_max'][idx] = np.max(class_errors)

        # metrics are done
        print('Done')

Source File: client.py From splunk-elasticsearch with Apache License 2.0

4 votes

def iter(self, offset=0, count=None, pagesize=None, **kwargs):
        """Iterates over the collection.

        This method is equivalent to the :meth:`list` method, but
        it returns an iterator and can load a certain number of entities at a
        time from the server.

        :param offset: The index of the first entity to return (optional).
        :type offset: ``integer``
        :param count: The maximum number of entities to return (optional).
        :type count: ``integer``
        :param pagesize: The number of entities to load (optional).
        :type pagesize: ``integer``
        :param kwargs: Additional arguments (optional):

            - "search" (``string``): The search query to filter responses.

            - "sort_dir" (``string``): The direction to sort returned items:
              "asc" or "desc".

            - "sort_key" (``string``): The field to use for sorting (optional).

            - "sort_mode" (``string``): The collating sequence for sorting
              returned items: "auto", "alpha", "alpha_case", or "num".

        :type kwargs: ``dict``

        **Example**::

            import splunklib.client as client
            s = client.connect(...)
            for saved_search in s.saved_searches.iter(pagesize=10):
                # Loads 10 saved searches at a time from the
                # server.
                ...
        """
        assert pagesize is None or pagesize > 0
        if count is None:
            count = self.null_count
        fetched = 0
        while count == self.null_count or fetched < count:
            response = self.get(count=pagesize or count, offset=offset, **kwargs)
            items = self._load_list(response)
            N = len(items)
            fetched += N
            for item in items:
                yield item
            if pagesize is None or N < pagesize:
                break
            offset += N
            logging.debug("pagesize=%d, fetched=%d, offset=%d, N=%d, kwargs=%s", pagesize, fetched, offset, N, kwargs)

    # kwargs: count, offset, search, sort_dir, sort_key, sort_mode

Source File: main.py From punctuator2 with MIT License

4 votes

def get_minibatch(file_name, batch_size, shuffle, with_pauses=False):

    dataset = data.load(file_name)

    if shuffle:
        np.random.shuffle(dataset)

    X_batch = []
    Y_batch = []
    if with_pauses:
        P_batch = []

    if len(dataset) < batch_size:
        print("WARNING: Not enough samples in '%s'. Reduce mini-batch size to %d or use a dataset with at least %d words." % (
            file_name,
            len(dataset),
            MINIBATCH_SIZE * data.MAX_SEQUENCE_LEN))

    for subsequence in dataset:

        X_batch.append(subsequence[0])
        Y_batch.append(subsequence[1])
        if with_pauses:
            P_batch.append(subsequence[2])
        
        if len(X_batch) == batch_size:

            # Transpose, because the model assumes the first axis is time
            X = np.array(X_batch, dtype=np.int32).T
            Y = np.array(Y_batch, dtype=np.int32).T
            if with_pauses:
                P = np.array(P_batch, dtype=theano.config.floatX).T
            
            if with_pauses:
                yield X, Y, P
            else:
                yield X, Y

            X_batch = []
            Y_batch = []
            if with_pauses:
                P_batch = []

Source File: data_update.py From Deep-Reinforcement-Learning-in-Large-Discrete-Action-Spaces with MIT License

4 votes

def update_pickle_file(file_name, eps=0, k=0, v=0):
    d_old = data_old.Data(file_name)
    d_old.load()
    print(file_name, 'loaded')
    # d_old.print_fields()

    d_new = data.Data()
    d_new.set_agent('Wolp',
                    int(d_old.get_data('max_actions')[0]),
                    k,
                    v)
    d_new.set_experiment(d_old.get_data('experiment')[0],
                         [-3],
                         [3],
                         eps)

    space = action_space.Space([-3], [3], int(d_old.get_data('max_actions')[0]))
    # print(space.get_space())
    # d_new.print_data()

    done = d_old.get_data('done')
    actors_result = d_old.get_data('actors_result')
    actions = d_old.get_data('actions')
    state_0 = d_old.get_data('state_0').tolist()
    state_1 = d_old.get_data('state_1').tolist()
    state_2 = d_old.get_data('state_2').tolist()
    state_3 = d_old.get_data('state_3').tolist()
    rewards = d_old.get_data('rewards').tolist()
    ep = 0
    temp = 0
    l = len(done)
    for i in range(l):
        d_new.set_action(space.import_point(actions[i]).tolist())
        d_new.set_actors_action(space.import_point(actors_result[i]).tolist())
        d_new.set_ndn_action(space.import_point(
            space.search_point(actors_result[i], 1)[0]).tolist())
        state = [state_0[i], state_1[i], state_2[i], state_3[i]]
        d_new.set_state(state)
        d_new.set_reward(1)
        if done[i] > 0:
            # print(ep, i - temp, 'progress', i / l)
            temp = i

            ep += 1
            # if ep % 200 == 199:
            #     d_new.finish_and_store_episode()
            # else:
            d_new.end_of_episode()

    d_new.save()

Source File: client.py From splunk-ref-pas-code with Apache License 2.0

4 votes

def iter(self, offset=0, count=None, pagesize=None, **kwargs):
        """Iterates over the collection.

        This method is equivalent to the :meth:`list` method, but
        it returns an iterator and can load a certain number of entities at a
        time from the server.

        :param offset: The index of the first entity to return (optional).
        :type offset: ``integer``
        :param count: The maximum number of entities to return (optional).
        :type count: ``integer``
        :param pagesize: The number of entities to load (optional).
        :type pagesize: ``integer``
        :param kwargs: Additional arguments (optional):

            - "search" (``string``): The search query to filter responses.

            - "sort_dir" (``string``): The direction to sort returned items:
              "asc" or "desc".

            - "sort_key" (``string``): The field to use for sorting (optional).

            - "sort_mode" (``string``): The collating sequence for sorting
              returned items: "auto", "alpha", "alpha_case", or "num".

        :type kwargs: ``dict``

        **Example**::

            import splunklib.client as client
            s = client.connect(...)
            for saved_search in s.saved_searches.iter(pagesize=10):
                # Loads 10 saved searches at a time from the
                # server.
                ...
        """
        assert pagesize is None or pagesize > 0
        if count is None:
            count = self.null_count
        fetched = 0
        while count == self.null_count or fetched < count:
            response = self.get(count=pagesize or count, offset=offset, **kwargs)
            items = self._load_list(response)
            N = len(items)
            fetched += N
            for item in items:
                yield item
            if pagesize is None or N < pagesize:
                break
            offset += N
            logging.debug("pagesize=%d, fetched=%d, offset=%d, N=%d, kwargs=%s", pagesize, fetched, offset, N, kwargs)

    # kwargs: count, offset, search, sort_dir, sort_key, sort_mode

Python data.load() Examples