Python extract labels
60 Python code examples are found related to "
extract labels".
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
Example 1
Source File: dataset.py From classification-of-encrypted-traffic with MIT License | 6 votes |
def extract_labels(dataframe, one_hot=False, num_classes=10): """Extract the labels into a 1D uint8 numpy array [index]. Args: dataframe: A pandas dataframe object. one_hot: Does one hot encoding for the result. num_classes: Number of classes for the one hot encoding. Returns: labels: a 1D uint8 numpy array. """ print('Extracting labels', ) labels = dataframe['label'].values labels = _label_encoder.fit_transform(labels) if one_hot: return dense_to_one_hot(labels, num_classes) return labels
Example 2
Source File: ingest.py From ngraph-python with Apache License 2.0 | 6 votes |
def extract_labels(self, setn): if not os.path.exists(self.devkit): raise IOError(("Metadata file {} not found. Ensure you have ImageNet downloaded" ).format(self.devkit)) with tarfile.open(self.devkit, "r:gz") as tf: synsetfile = 'ILSVRC2012_devkit_t12/data/meta.mat' valfile = 'ILSVRC2012_devkit_t12/data/ILSVRC2012_validation_ground_truth.txt' if setn == 'train': # get the synset mapping by hacking around matlab's terrible compressed format meta_buff = tf.extractfile(synsetfile).read() decomp = zlib.decompressobj() self.synsets = re.findall(re.compile('n\d+'), decomp.decompress(meta_buff[136:])) return {s: i for i, s in enumerate(self.synsets)} elif setn == 'val': # get the ground truth validation labels and offset to zero return {"%08d" % (i + 1): int(x) - 1 for i, x in enumerate(tf.extractfile(valfile))} else: raise ValueError("Unknown set name: {}".format(setn))
Example 3
Source File: preprocessing.py From deepwriting with MIT License | 6 votes |
def extract_eoc_labels(dataset): """ Creates a label showing end of a character in a sequence. Args: dataset: Returns: """ dataset['eoc_labels'] = [] for idx, char_labels in enumerate(dataset['char_labels']): eoc_label = utils_hw.label_end_of_sub_sequences(char_labels) eoc_label = np.expand_dims(np.float32(eoc_label), axis=1) # Assuming the last stroke is always end-of-char dataset['eoc_labels'].append(eoc_label) return dataset
Example 4
Source File: elastic_items.py From grimoirelab-elk with GNU General Public License v3.0 | 6 votes |
def extract_repo_labels(repo): """Extract the labels declared in the repositories within the projects.json, and remove them to avoid breaking already existing functionalities. :param repo: repo url in projects.json """ processed_repo = repo labels_lst = [] pattern = re.compile(PROJECTS_JSON_LABELS_PATTERN) matchObj = pattern.match(repo) if matchObj: labels_info = matchObj.group(1) labels = matchObj.group(2) labels_lst = [label.strip() for label in labels.split(',')] processed_repo = processed_repo.replace(labels_info, '').strip() return processed_repo, labels_lst
Example 5
Source File: mnist_data.py From active_learning_coreset with MIT License | 6 votes |
def extract_labels(filename, one_hot=False): """Extract the labels into a 1D uint8 numpy array [index].""" print('Extracting', filename) with gzip.open(filename) as bytestream: magic = _read32(bytestream) if magic != 2049: raise ValueError( 'Invalid magic number %d in MNIST label file: %s' % (magic, filename)) num_items = _read32(bytestream) print(num_items) buf = bytestream.read(num_items[0]) labels = numpy.frombuffer(buf, dtype=numpy.uint8) if one_hot: return dense_to_one_hot(labels) return labels
Example 6
Source File: format.py From tensorlang with Apache License 2.0 | 6 votes |
def extract_labels(f, one_hot=False, num_classes=10): """Extract the labels into a 1D uint8 numpy array [index]. Args: f: A file object that can be passed into a gzip reader. one_hot: Does one hot encoding for the result. num_classes: Number of classes for the one hot encoding. Returns: labels: a 1D uint8 numpy array. Raises: ValueError: If the bystream doesn't start with 2049. """ with gzip.GzipFile(fileobj=f) as bytestream: magic = _read32(bytestream) if magic != 2049: raise ValueError('Invalid magic number %d in MNIST label file: %s' % (magic, f.name)) num_items = _read32(bytestream) buf = bytestream.read(num_items) labels = numpy.frombuffer(buf, dtype=numpy.uint8) if one_hot: return dense_to_one_hot(labels, num_classes) return labels
Example 7
Source File: extractor.py From articlequality with MIT License | 6 votes |
def extract_labels(self, text): """ Extracts a set of labels for a version of text by parsing templates. :Parameters: text : `str` Wikitext markup to extract labels from :Returns: An iterator over (project, label) pairs """ # filter_text is an initial fast pass to weed out wikitext that # can't contain the template (eg. because the template name # never appears) if hasattr(self, 'filter_text'): if not self.filter_text(text): return parsed_text = mwp.parse(text) templates = parsed_text.filter_templates() for template in templates: yield from self.from_template(template)
Example 8
Source File: input.py From DOTA_models with Apache License 2.0 | 5 votes |
def extract_mnist_labels(filename, num_images): """ Extract the labels into a vector of int64 label IDs. """ # if not os.path.exists(file): if not tf.gfile.Exists(filename+".npy"): with gzip.open(filename) as bytestream: bytestream.read(8) buf = bytestream.read(1 * num_images) labels = np.frombuffer(buf, dtype=np.uint8).astype(np.int32) np.save(filename, labels) return labels else: with tf.gfile.Open(filename+".npy", mode='r') as file_obj: return np.load(file_obj)
Example 9
Source File: convolutional.py From DOTA_models with Apache License 2.0 | 5 votes |
def extract_labels(filename, num_images): """Extract the labels into a vector of int64 label IDs.""" print('Extracting', filename) with gzip.open(filename) as bytestream: bytestream.read(8) buf = bytestream.read(1 * num_images) labels = numpy.frombuffer(buf, dtype=numpy.uint8).astype(numpy.int64) return labels
Example 10
Source File: input_data.py From IntroToDeepLearning with MIT License | 5 votes |
def extract_labels(filename, one_hot=False): """Extract the labels into a 1D uint8 numpy array [index].""" print('Extracting', filename) with gzip.open(filename) as bytestream: magic = _read32(bytestream) if magic != 2049: raise ValueError( 'Invalid magic number %d in MNIST label file: %s' % (magic, filename)) num_items = _read32(bytestream) buf = bytestream.read(num_items) labels = numpy.frombuffer(buf, dtype=numpy.uint8) if one_hot: return dense_to_one_hot(labels) return labels
Example 11
Source File: mnist_input_data.py From python-esppy with Apache License 2.0 | 5 votes |
def extract_labels(filename, one_hot=False): """Extract the labels into a 1D uint8 numpy array [index].""" print('Extracting %s' % filename) with gzip.open(filename) as bytestream: magic = _read32(bytestream) if magic != 2049: raise ValueError( 'Invalid magic number %d in MNIST label file: %s' % (magic, filename)) num_items = _read32(bytestream) buf = bytestream.read(num_items) labels = numpy.frombuffer(buf, dtype=numpy.uint8) if one_hot: return dense_to_one_hot(labels) return labels
Example 12
Source File: mnist.py From dataflow with Apache License 2.0 | 5 votes |
def extract_labels(filename): """Extract the labels into a 1D uint8 numpy array [index].""" with gzip.open(filename) as bytestream: magic = _read32(bytestream) if magic != 2049: raise ValueError( 'Invalid magic number %d in MNIST label file: %s' % (magic, filename)) num_items = _read32(bytestream) buf = bytestream.read(num_items) labels = numpy.frombuffer(buf, dtype=numpy.uint8) return labels
Example 13
Source File: train_data.py From subsync with Apache License 2.0 | 5 votes |
def extract_labels(srt, samples): subs = pysrt.open(srt) labels = np.zeros(samples) for sub in subs: start = timeToPos(sub.start) end = timeToPos(sub.end)+1 for i in range(start, end): if i < len(labels): labels[i] = 1 return labels
Example 14
Source File: construct_pdbbind_df.py From deepchem with MIT License | 5 votes |
def extract_labels(pdbbind_label_file): """Extract labels from pdbbind label file.""" assert os.path.isfile(pdbbind_label_file) labels = {} with open(pdbbind_label_file) as f: content = f.readlines() for line in content: if line[0] == "#": continue line = line.split() # lines in the label file have format # PDB-code Resolution Release-Year -logKd Kd reference ligand-name #print line[0], line[3] labels[line[0]] = line[3] return labels
Example 15
Source File: mnist_dataset.py From AIX360 with Apache License 2.0 | 5 votes |
def extract_labels(filename, num_images): with gzip.open(filename) as bytestream: bytestream.read(8) buf = bytestream.read(1 * num_images) labels = np.frombuffer(buf, dtype=np.uint8) return (np.arange(10) == labels[:, None]).astype(np.float32)
Example 16
Source File: input_data.py From cloudml-samples with Apache License 2.0 | 5 votes |
def extract_labels(filename, one_hot=False, num_classes=10): """Extract the labels into a 1D uint8 numpy array [index].""" print('Extracting', filename) with open(filename, 'rb') as f, gzip.GzipFile(fileobj=f) as bytestream: magic = _read32(bytestream) if magic != 2049: raise ValueError('Invalid magic number %d in MNIST label file: %s' % (magic, filename)) num_items = _read32(bytestream) buf = bytestream.read(num_items) labels = numpy.frombuffer(buf, dtype=numpy.uint8) if one_hot: return dense_to_one_hot(labels, num_classes) return labels
Example 17
Source File: pandas_io.py From lambda-packs with MIT License | 5 votes |
def extract_pandas_labels(labels): """Extract data from pandas.DataFrame for labels. Args: labels: `pandas.DataFrame` or `pandas.Series` containing one column of labels to be extracted. Returns: A numpy `ndarray` of labels from the DataFrame. Raises: ValueError: if more than one column is found or type is not int, float or bool. """ if isinstance(labels, pd.DataFrame): # pandas.Series also belongs to DataFrame if len(labels.columns) > 1: raise ValueError('Only one column for labels is allowed.') bad_data = [column for column in labels if labels[column].dtype.name not in PANDAS_DTYPES] if not bad_data: return labels.values else: error_report = ["'" + str(column) + "' type=" + str(labels[column].dtype.name) for column in bad_data] raise ValueError('Data types for extracting labels must be int, ' 'float, or bool. Found: ' + ', '.join(error_report)) else: return labels
Example 18
Source File: mnist.py From lambda-packs with MIT License | 5 votes |
def extract_labels(f, one_hot=False, num_classes=10): """Extract the labels into a 1D uint8 numpy array [index]. Args: f: A file object that can be passed into a gzip reader. one_hot: Does one hot encoding for the result. num_classes: Number of classes for the one hot encoding. Returns: labels: a 1D uint8 numpy array. Raises: ValueError: If the bystream doesn't start with 2049. """ print('Extracting', f.name) with gzip.GzipFile(fileobj=f) as bytestream: magic = _read32(bytestream) if magic != 2049: raise ValueError('Invalid magic number %d in MNIST label file: %s' % (magic, f.name)) num_items = _read32(bytestream) buf = bytestream.read(num_items) labels = numpy.frombuffer(buf, dtype=numpy.uint8) if one_hot: return dense_to_one_hot(labels, num_classes) return labels
Example 19
Source File: dask_io.py From lambda-packs with MIT License | 5 votes |
def extract_dask_labels(labels): """Extract data from dask.Series or dask.DataFrame for labels. Given a distributed dask.DataFrame or dask.Series containing exactly one column or name, this operation returns a single dask.DataFrame or dask.Series that can be iterated over. Args: labels: A distributed dask.DataFrame or dask.Series with exactly one column or name. Returns: A dask.DataFrame or dask.Series that can be iterated over. If the supplied argument is neither a dask.DataFrame nor a dask.Series this operation returns it without modification. Raises: ValueError: If the supplied dask.DataFrame contains more than one column or the supplied dask.Series contains more than one name. """ if isinstance(labels, dd.DataFrame): ncol = labels.columns elif isinstance(labels, dd.Series): ncol = labels.name if isinstance(labels, allowed_classes): if len(ncol) > 1: raise ValueError('Only one column for labels is allowed.') return _construct_dask_df_with_divisions(labels) else: return labels
Example 20
Source File: api.py From sregistry-cli with Mozilla Public License 2.0 | 5 votes |
def extract_labels(self): """extract_labels will write a file of key value pairs including maintainer, and labels. Parameters ========== manifest: the manifest to use """ labels = self._get_config("Labels") if labels in [[], "", None]: labels = None return labels
Example 21
Source File: mnist.py From DDRL with Apache License 2.0 | 5 votes |
def extract_labels(filename): """Extract the labels into a 1D uint8 numpy array [index].""" with gzip.open(filename) as bytestream: magic = _read32(bytestream) if magic != 2049: raise ValueError( 'Invalid magic number %d in MNIST label file: %s' % (magic, filename)) num_items = _read32(bytestream) buf = bytestream.read(num_items) labels = numpy.frombuffer(buf, dtype=numpy.uint8) return labels
Example 22
Source File: mnist.py From dvae with Apache License 2.0 | 5 votes |
def extract_labels(self, filename): """Extract the labels into a vector of int64 label IDs.""" print('Extracting', filename) with gzip.open(filename) as bytestream: magic = self.read_header_int(bytestream) if magic != 2049: raise ValueError('Invalid magic for MNIST labels') num_labels = self.read_header_int(bytestream) buf = bytestream.read(1 * num_labels) labels = np.frombuffer(buf, dtype=np.uint8).astype(np.int64) return labels
Example 23
Source File: input_data.py From Net2Net with MIT License | 5 votes |
def extract_labels(filename, one_hot=False): """Extract the labels into a 1D uint8 numpy array [index].""" print('Extracting', filename) with gzip.open(filename) as bytestream: magic = _read32(bytestream) if magic != 2049: raise ValueError( 'Invalid magic number %d in MNIST label file: %s' % (magic, filename)) num_items = _read32(bytestream) buf = bytestream.read(num_items) labels = numpy.frombuffer(buf, dtype=numpy.uint8) if one_hot: return dense_to_one_hot(labels) return labels
Example 24
Source File: preprocessing.py From WaterNet with MIT License | 5 votes |
def extract_features_and_labels(dataset, tile_size, only_cache=False): """For each satellite image and its corresponding shapefiles in the dataset create tiled features and labels.""" features = [] labels = [] for geotiff_path, shapefile_paths in dataset: tiled_features, tiled_labels = create_tiled_features_and_labels( geotiff_path, shapefile_paths, tile_size, only_cache) features += tiled_features labels += tiled_labels return features, labels
Example 25
Source File: input_data.py From Digit-Recognizer with MIT License | 5 votes |
def extract_labels(filename, one_hot=False): """Extract the labels into a 1D uint8 numpy array [index].""" print('Extracting', filename) with tf.gfile.Open(filename, 'rb') as f, gzip.GzipFile(fileobj=f) as bytestream: magic = _read32(bytestream) if magic != 2049: raise ValueError( 'Invalid magic number %d in MNIST label file: %s' % (magic, filename)) num_items = _read32(bytestream) buf = bytestream.read(num_items) labels = numpy.frombuffer(buf, dtype=numpy.uint8) if one_hot: return dense_to_one_hot(labels) return labels
Example 26
Source File: utils.py From deep-pwning with MIT License | 5 votes |
def extract_labels(filename, num_images): """Extract the labels into a vector of int64 label IDs.""" print('Extracting', filename) with gzip.open(filename) as bytestream: bytestream.read(8) buf = bytestream.read(1 * num_images) labels = np.frombuffer(buf, dtype=np.uint8).astype(np.int64) return labels
Example 27
Source File: input_data.py From variational-autoencoder with Apache License 2.0 | 5 votes |
def extract_labels(filename, one_hot=False): """Extract the labels into a 1D uint8 numpy array [index].""" print 'Extracting', filename with gzip.open(filename) as bytestream: magic = _read32(bytestream) if magic != 2049: raise ValueError( 'Invalid magic number %d in MNIST label file: %s' % (magic, filename)) num_items = _read32(bytestream) buf = bytestream.read(num_items) labels = numpy.frombuffer(buf, dtype=numpy.uint8) if one_hot: return dense_to_one_hot(labels) return labels
Example 28
Source File: image.py From brainiak with Apache License 2.0 | 5 votes |
def extract_labels(self) -> np.ndarray: """Extract condition labels. Returns ------- np.ndarray The condition label of each epoch. """ condition_idxs, epoch_idxs, _ = np.where(self) _, unique_epoch_idxs = np.unique(epoch_idxs, return_index=True) return condition_idxs[unique_epoch_idxs]
Example 29
Source File: convnet.py From CNN-from-Scratch with GNU General Public License v3.0 | 5 votes |
def extract_labels(filename, num_images): """Extract the labels into a vector of int64 label IDs.""" print('Extracting', filename) with gzip.open(filename) as bytestream: bytestream.read(8) buf = bytestream.read(1 * num_images) labels = np.frombuffer(buf, dtype=np.uint8).astype(np.int64) return labels
Example 30
Source File: setup_mnist.py From Contrastive-Explanation-Method with Apache License 2.0 | 5 votes |
def extract_labels(filename, num_images): with gzip.open(filename) as bytestream: bytestream.read(8) buf = bytestream.read(1 * num_images) labels = np.frombuffer(buf, dtype=np.uint8) return (np.arange(10) == labels[:, None]).astype(np.float32)
Example 31
Source File: local_mnist.py From magenta with Apache License 2.0 | 5 votes |
def extract_labels(f, one_hot=False, num_classes=10): """Extract the labels into a 1D uint8 np array [index]. Args: f: A file object that can be passed into a gzip reader. one_hot: Does one hot encoding for the result. num_classes: Number of classes for the one hot encoding. Returns: labels: a 1D uint8 np array. Raises: ValueError: If the bystream doesn't start with 2049. """ tf.logging.info('Extracting', f.name) with gzip.GzipFile(fileobj=f) as bytestream: magic = _read32(bytestream) if magic != 2049: raise ValueError( 'Invalid magic number %d in MNIST label file: %s' % (magic, f.name)) num_items = _read32(bytestream) buf = bytestream.read(num_items) labels = np.frombuffer(buf, dtype=np.uint8) if one_hot: return dense_to_one_hot(labels, num_classes) return labels
Example 32
Source File: setup.py From breaking_defensive_distillation with GNU General Public License v3.0 | 5 votes |
def extract_labels(filename, num_images): """Extract the labels into a 1-hot matrix [image index, label index].""" with gzip.open(filename) as bytestream: bytestream.read(8) buf = bytestream.read(1 * num_images) labels = np.frombuffer(buf, dtype=np.uint8) # Convert to dense 1-hot representation. return (np.arange(NUM_LABELS) == labels[:, None]).astype(np.float32) # Get the data.
Example 33
Source File: reuters.py From KATE with BSD 3-Clause "New" or "Revised" License | 5 votes |
def extract_labels(docs, path, output): # it will be fast if docs is a dict instead of a list doc_labels = defaultdict(set) with open(path, 'r') as f: for line in f: label, did, _ = line.strip('\n').split() if did in docs: doc_labels[did].add(label) doc_labels = dict([(x, list(y)) for x, y in doc_labels.iteritems()]) dump_json(doc_labels, output) return doc_labels
Example 34
Source File: ucb.py From plastering with MIT License | 5 votes |
def extract_raw_ucb_labels(): buildings = ['SODA', 'SDH', 'IBM'] labels = set() example_dict = {} for building in buildings: filename='./groundtruth/{0}-GROUND-TRUTH'.format(building) with open(filename, 'r') as fp: rawlines = [line[:-1] for line in fp.readlines()] for i, sentence in enumerate(rawlines[::2]): i *= 2 print('{0}th line'.format(i)) encoded = rawlines[i+1] splitted = encoded.split(',') for elem in splitted: [label, word, t] = elem.split(':') if t == 'c': labels.add(label) example_dict[label] = sentence with open('groundtruth/ucb_raw_labels.txt', 'w') as fp: fp.write('{\n') for label in labels: fp.write(' "{0}": \n'.format(label)) fp.write('}') with open('groundtruth/ucb_label_sentence_map.json', 'w') as fp: json.dump(example_dict, fp, indent=2)
Example 35
Source File: mnist_data.py From ladder with GNU General Public License v3.0 | 5 votes |
def extract_labels(filename, verbose=True): """Extract the labels into a 1D uint8 numpy array [index].""" if verbose: print('Extracting', filename) with gzip.open(filename) as bytestream: magic = _read32(bytestream) if magic != 2049: raise ValueError( 'Invalid magic number %d in MNIST label file: %s' % (magic, filename)) num_items = _read32(bytestream) buf = bytestream.read(num_items) labels = np.frombuffer(buf, dtype=np.uint8) return labels
Example 36
Source File: preprocessing.py From mimic3-benchmarks with MIT License | 5 votes |
def extract_diagnosis_labels(diagnoses): global diagnosis_labels diagnoses['VALUE'] = 1 labels = diagnoses[['ICUSTAY_ID', 'ICD9_CODE', 'VALUE']].drop_duplicates()\ .pivot(index='ICUSTAY_ID', columns='ICD9_CODE', values='VALUE').fillna(0).astype(int) for l in diagnosis_labels: if l not in labels: labels[l] = 0 labels = labels[diagnosis_labels] return labels.rename(dict(zip(diagnosis_labels, ['Diagnosis ' + d for d in diagnosis_labels])), axis=1)