Python sklearn.datasets.load_files() Examples
The following are 16
code examples of sklearn.datasets.load_files().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
sklearn.datasets
, or try the search function
.
Example #1
Source File: test_base.py From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def test_default_empty_load_files(load_files_root): res = load_files(load_files_root) assert_equal(len(res.filenames), 0) assert_equal(len(res.target_names), 0) assert_equal(res.DESCR, None)
Example #2
Source File: test_base.py From Mastering-Elasticsearch-7.0 with MIT License | 5 votes |
def test_default_load_files(test_category_dir_1, test_category_dir_2, load_files_root): res = load_files(load_files_root) assert_equal(len(res.filenames), 1) assert_equal(len(res.target_names), 2) assert_equal(res.DESCR, None) assert_equal(res.data, [b"Hello World!\n"])
Example #3
Source File: test_base.py From Mastering-Elasticsearch-7.0 with MIT License | 5 votes |
def test_load_files_w_categories_desc_and_encoding( test_category_dir_1, test_category_dir_2, load_files_root): category = os.path.abspath(test_category_dir_1).split('/').pop() res = load_files(load_files_root, description="test", categories=category, encoding="utf-8") assert_equal(len(res.filenames), 1) assert_equal(len(res.target_names), 1) assert_equal(res.DESCR, "test") assert_equal(res.data, ["Hello World!\n"])
Example #4
Source File: test_base.py From Mastering-Elasticsearch-7.0 with MIT License | 5 votes |
def test_load_files_wo_load_content( test_category_dir_1, test_category_dir_2, load_files_root): res = load_files(load_files_root, load_content=False) assert_equal(len(res.filenames), 1) assert_equal(len(res.target_names), 2) assert_equal(res.DESCR, None) assert_equal(res.get('data'), None)
Example #5
Source File: base.py From cherry with MIT License | 5 votes |
def _load_data_from_local( model, categories=None, encoding=None): ''' 1. Find local cache files 2. If we can't find the cache files 3.1 Try to create cache files using data files inside `datasets`. 2.2 Raise error if create cache files failed. ''' model_path = os.path.join(DATA_DIR, model) cache_path = os.path.join(model_path, model + '.pkz') if os.path.exists(cache_path): try: with open(cache_path, 'rb') as f: compressed_content = f.read() uncompressed_content = codecs.decode( compressed_content, 'zlib_codec') return pickle.loads(uncompressed_content)['all'] except Exception as e: # Can't load cache files error = ('Can\'t load cached data from {0}. ' 'Please try again after delete cache files.'.format(model)) raise NotSupportError(error) cache = dict(all=load_files( model_path, categories=categories, encoding=encoding)) compressed_content = codecs.encode(pickle.dumps(cache), 'zlib_codec') with open(cache_path, 'wb') as f: f.write(compressed_content) return cache['all']
Example #6
Source File: model.py From skorch with BSD 3-Clause "New" or "Revised" License | 5 votes |
def __call__(self): download() dataset = load_files(self.path, categories=['pos', 'neg']) X, y = dataset['data'], dataset['target'] X = np.asarray([x.decode() for x in X]) # decode from bytes return X, y
Example #7
Source File: lease_train.py From lexpredict-contraxsuite with GNU Affero General Public License v3.0 | 5 votes |
def load_lease_dataset(root): return load_files(root)
Example #8
Source File: data_helpers.py From text-classification with Apache License 2.0 | 5 votes |
def get_datasets_localdata(container_path=None, categories=None, load_content=True, encoding='utf-8', shuffle=True, random_state=42): """ Load text files with categories as subfolder names. Individual samples are assumed to be files stored a two levels folder structure. :param container_path: The path of the container :param categories: List of classes to choose, all classes are chosen by default (if empty or omitted) :param shuffle: shuffle the list or not :param random_state: seed integer to shuffle the dataset :return: data and labels of the dataset """ datasets = load_files(container_path=container_path, categories=categories, load_content=load_content, shuffle=shuffle, encoding=encoding, random_state=random_state) return datasets
Example #9
Source File: generic.py From opentc with MIT License | 5 votes |
def __init__(self, cfg=None): """ Load text files with categories as subfolder names. Individual samples are assumed to be files stored a two levels folder structure. :param container_path: The path of the container :param categories: List of classes to choose, all classes are chosen by default (if empty or omitted) :param shuffle: shuffle the list or not :param random_state: seed integer to shuffle the dataset :return: data and labels of the dataset """ super().__init__() self.__dataset__ = load_files(container_path=cfg['container_path'], categories=cfg['categories'], load_content=cfg['load_content'], shuffle=cfg['shuffle'], encoding=cfg['encoding'], random_state=cfg['random_state'])
Example #10
Source File: cnn_text_util.py From opentc with MIT License | 5 votes |
def get_datasets_localdata(container_path=None, categories=None, load_content=True, encoding='utf-8', shuffle=True, random_state=42): """ Load text files with categories as subfolder names. Individual samples are assumed to be files stored a two levels folder structure. :param container_path: The path of the container :param categories: List of classes to choose, all classes are chosen by default (if empty or omitted) :param shuffle: shuffle the list or not :param random_state: seed integer to shuffle the dataset :return: data and labels of the dataset """ datasets = load_files(container_path=container_path, categories=categories, load_content=load_content, shuffle=shuffle, encoding=encoding, random_state=random_state) return datasets
Example #11
Source File: test_base.py From twitter-stock-recommendation with MIT License | 5 votes |
def test_default_empty_load_files(): res = load_files(LOAD_FILES_ROOT) assert_equal(len(res.filenames), 0) assert_equal(len(res.target_names), 0) assert_equal(res.DESCR, None)
Example #12
Source File: test_base.py From twitter-stock-recommendation with MIT License | 5 votes |
def test_default_load_files(): res = load_files(LOAD_FILES_ROOT) assert_equal(len(res.filenames), 1) assert_equal(len(res.target_names), 2) assert_equal(res.DESCR, None) assert_equal(res.data, [b("Hello World!\n")])
Example #13
Source File: test_base.py From twitter-stock-recommendation with MIT License | 5 votes |
def test_load_files_w_categories_desc_and_encoding(): category = os.path.abspath(TEST_CATEGORY_DIR1).split('/').pop() res = load_files(LOAD_FILES_ROOT, description="test", categories=category, encoding="utf-8") assert_equal(len(res.filenames), 1) assert_equal(len(res.target_names), 1) assert_equal(res.DESCR, "test") assert_equal(res.data, [u("Hello World!\n")])
Example #14
Source File: test_base.py From twitter-stock-recommendation with MIT License | 5 votes |
def test_load_files_wo_load_content(): res = load_files(LOAD_FILES_ROOT, load_content=False) assert_equal(len(res.filenames), 1) assert_equal(len(res.target_names), 2) assert_equal(res.DESCR, None) assert_equal(res.get('data'), None)
Example #15
Source File: batch_represent.py From facenet-demo with MIT License | 4 votes |
def main(args): with tf.Graph().as_default(): with tf.Session() as sess: # create output directory if it doesn't exist output_dir = os.path.expanduser(args.output_dir) if not os.path.isdir(output_dir): os.makedirs(output_dir) # load the model print("Loading trained model...\n") meta_file, ckpt_file = facenet.get_model_filenames(os.path.expanduser(args.trained_model_dir)) facenet.load_model(args.trained_model_dir, meta_file, ckpt_file) # grab all image paths and labels print("Finding image paths and targets...\n") data = load_files(args.data_dir, load_content=False, shuffle=False) labels_array = data['target'] paths = data['filenames'] # Get input and output tensors images_placeholder = tf.get_default_graph().get_tensor_by_name("input:0") embeddings = tf.get_default_graph().get_tensor_by_name("embeddings:0") phase_train_placeholder = tf.get_default_graph().get_tensor_by_name("phase_train:0") image_size = images_placeholder.get_shape()[1] embedding_size = embeddings.get_shape()[1] # Run forward pass to calculate embeddings print('Generating embeddings from images...\n') start_time = time.time() batch_size = args.batch_size nrof_images = len(paths) nrof_batches = int(np.ceil(1.0*nrof_images / batch_size)) emb_array = np.zeros((nrof_images, embedding_size)) for i in xrange(nrof_batches): start_index = i*batch_size end_index = min((i+1)*batch_size, nrof_images) paths_batch = paths[start_index:end_index] images = facenet.load_data(paths_batch, do_random_crop=False, do_random_flip=False, image_size=image_size, do_prewhiten=True) feed_dict = { images_placeholder:images, phase_train_placeholder:False} emb_array[start_index:end_index,:] = sess.run(embeddings, feed_dict=feed_dict) time_avg_forward_pass = (time.time() - start_time) / float(nrof_images) print("Forward pass took avg of %.3f[seconds/image] for %d images\n" % (time_avg_forward_pass, nrof_images)) print("Finally saving embeddings and gallery to: %s" % (output_dir)) # save the gallery and embeddings (signatures) as numpy arrays to disk np.save(os.path.join(output_dir, "gallery.npy"), labels_array) np.save(os.path.join(output_dir, "signatures.npy"), emb_array)
Example #16
Source File: batch_represent.py From facenet with MIT License | 4 votes |
def main(args): with tf.Graph().as_default(): with tf.Session() as sess: # create output directory if it doesn't exist output_dir = os.path.expanduser(args.output_dir) if not os.path.isdir(output_dir): os.makedirs(output_dir) # load the model print("Loading trained model...\n") meta_file, ckpt_file = facenet.get_model_filenames(os.path.expanduser(args.trained_model_dir)) facenet.load_model(args.trained_model_dir, meta_file, ckpt_file) # grab all image paths and labels print("Finding image paths and targets...\n") data = load_files(args.data_dir, load_content=False, shuffle=False) labels_array = data['target'] paths = data['filenames'] # Get input and output tensors images_placeholder = tf.get_default_graph().get_tensor_by_name("input:0") embeddings = tf.get_default_graph().get_tensor_by_name("embeddings:0") phase_train_placeholder = tf.get_default_graph().get_tensor_by_name("phase_train:0") image_size = images_placeholder.get_shape()[1] embedding_size = embeddings.get_shape()[1] # Run forward pass to calculate embeddings print('Generating embeddings from images...\n') start_time = time.time() batch_size = args.batch_size nrof_images = len(paths) nrof_batches = int(np.ceil(1.0*nrof_images / batch_size)) emb_array = np.zeros((nrof_images, embedding_size)) for i in xrange(nrof_batches): start_index = i*batch_size end_index = min((i+1)*batch_size, nrof_images) paths_batch = paths[start_index:end_index] images = facenet.load_data(paths_batch, do_random_crop=False, do_random_flip=False, image_size=image_size, do_prewhiten=True) feed_dict = { images_placeholder:images, phase_train_placeholder:False} emb_array[start_index:end_index,:] = sess.run(embeddings, feed_dict=feed_dict) time_avg_forward_pass = (time.time() - start_time) / float(nrof_images) print("Forward pass took avg of %.3f[seconds/image] for %d images\n" % (time_avg_forward_pass, nrof_images)) print("Finally saving embeddings and gallery to: %s" % (output_dir)) # save the gallery and embeddings (signatures) as numpy arrays to disk np.save(os.path.join(output_dir, "gallery.npy"), labels_array) np.save(os.path.join(output_dir, "signatures.npy"), emb_array)