Python extract labels

60 Python code examples are found related to " extract labels". You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example.
Example 1
Source File: dataset.py    From classification-of-encrypted-traffic with MIT License 6 votes vote down vote up
def extract_labels(dataframe, one_hot=False, num_classes=10):
    """Extract the labels into a 1D uint8 numpy array [index].

    Args:
    dataframe: A pandas dataframe object.
    one_hot: Does one hot encoding for the result.
    num_classes: Number of classes for the one hot encoding.

    Returns:
    labels: a 1D uint8 numpy array.
    """
    print('Extracting labels', )
    labels = dataframe['label'].values
    labels = _label_encoder.fit_transform(labels)
    if one_hot:
        return dense_to_one_hot(labels, num_classes)
    return labels 
Example 2
Source File: ingest.py    From ngraph-python with Apache License 2.0 6 votes vote down vote up
def extract_labels(self, setn):
        if not os.path.exists(self.devkit):
            raise IOError(("Metadata file {} not found. Ensure you have ImageNet downloaded"
                           ).format(self.devkit))

        with tarfile.open(self.devkit, "r:gz") as tf:
            synsetfile = 'ILSVRC2012_devkit_t12/data/meta.mat'
            valfile = 'ILSVRC2012_devkit_t12/data/ILSVRC2012_validation_ground_truth.txt'

            if setn == 'train':
                # get the synset mapping by hacking around matlab's terrible compressed format
                meta_buff = tf.extractfile(synsetfile).read()
                decomp = zlib.decompressobj()
                self.synsets = re.findall(re.compile('n\d+'), decomp.decompress(meta_buff[136:]))
                return {s: i for i, s in enumerate(self.synsets)}
            elif setn == 'val':
                # get the ground truth validation labels and offset to zero
                return {"%08d" % (i + 1): int(x) - 1 for i, x in
                        enumerate(tf.extractfile(valfile))}
            else:
                raise ValueError("Unknown set name: {}".format(setn)) 
Example 3
Source File: preprocessing.py    From deepwriting with MIT License 6 votes vote down vote up
def extract_eoc_labels(dataset):
    """
    Creates a label showing end of a character in a sequence.
    Args:
        dataset:

    Returns:

    """
    dataset['eoc_labels'] = []
    for idx, char_labels in enumerate(dataset['char_labels']):
        eoc_label = utils_hw.label_end_of_sub_sequences(char_labels)
        eoc_label = np.expand_dims(np.float32(eoc_label), axis=1) # Assuming the last stroke is always end-of-char
        dataset['eoc_labels'].append(eoc_label)

    return dataset 
Example 4
Source File: elastic_items.py    From grimoirelab-elk with GNU General Public License v3.0 6 votes vote down vote up
def extract_repo_labels(repo):
        """Extract the labels declared in the repositories within the projects.json, and
        remove them to avoid breaking already existing functionalities.

        :param repo: repo url in projects.json
        """
        processed_repo = repo
        labels_lst = []

        pattern = re.compile(PROJECTS_JSON_LABELS_PATTERN)
        matchObj = pattern.match(repo)

        if matchObj:
            labels_info = matchObj.group(1)
            labels = matchObj.group(2)
            labels_lst = [label.strip() for label in labels.split(',')]
            processed_repo = processed_repo.replace(labels_info, '').strip()

        return processed_repo, labels_lst 
Example 5
Source File: mnist_data.py    From active_learning_coreset with MIT License 6 votes vote down vote up
def extract_labels(filename, one_hot=False):
  """Extract the labels into a 1D uint8 numpy array [index]."""
  print('Extracting', filename)
  with gzip.open(filename) as bytestream:
    magic = _read32(bytestream)
    if magic != 2049:
      raise ValueError(
          'Invalid magic number %d in MNIST label file: %s' %
          (magic, filename))
    num_items = _read32(bytestream)
    print(num_items)
    buf = bytestream.read(num_items[0])
    labels = numpy.frombuffer(buf, dtype=numpy.uint8)
    if one_hot:
      return dense_to_one_hot(labels)
    return labels 
Example 6
Source File: format.py    From tensorlang with Apache License 2.0 6 votes vote down vote up
def extract_labels(f, one_hot=False, num_classes=10):
  """Extract the labels into a 1D uint8 numpy array [index].
  Args:
    f: A file object that can be passed into a gzip reader.
    one_hot: Does one hot encoding for the result.
    num_classes: Number of classes for the one hot encoding.
  Returns:
    labels: a 1D uint8 numpy array.
  Raises:
    ValueError: If the bystream doesn't start with 2049.
  """
  with gzip.GzipFile(fileobj=f) as bytestream:
    magic = _read32(bytestream)
    if magic != 2049:
      raise ValueError('Invalid magic number %d in MNIST label file: %s' %
                       (magic, f.name))
    num_items = _read32(bytestream)
    buf = bytestream.read(num_items)
    labels = numpy.frombuffer(buf, dtype=numpy.uint8)
    if one_hot:
      return dense_to_one_hot(labels, num_classes)
    return labels 
Example 7
Source File: extractor.py    From articlequality with MIT License 6 votes vote down vote up
def extract_labels(self, text):
        """
        Extracts a set of labels for a version of text by parsing templates.

        :Parameters:
            text : `str`
                Wikitext markup to extract labels from

        :Returns:
            An iterator over (project, label) pairs
        """
        # filter_text is an initial fast pass to weed out wikitext that
        # can't contain the template (eg. because the template name
        # never appears)
        if hasattr(self, 'filter_text'):
            if not self.filter_text(text):
                return

        parsed_text = mwp.parse(text)
        templates = parsed_text.filter_templates()
        for template in templates:

            yield from self.from_template(template) 
Example 8
Source File: input.py    From DOTA_models with Apache License 2.0 5 votes vote down vote up
def extract_mnist_labels(filename, num_images):
  """
  Extract the labels into a vector of int64 label IDs.
  """
  # if not os.path.exists(file):
  if not tf.gfile.Exists(filename+".npy"):
    with gzip.open(filename) as bytestream:
      bytestream.read(8)
      buf = bytestream.read(1 * num_images)
      labels = np.frombuffer(buf, dtype=np.uint8).astype(np.int32)
      np.save(filename, labels)
    return labels
  else:
    with tf.gfile.Open(filename+".npy", mode='r') as file_obj:
      return np.load(file_obj) 
Example 9
Source File: convolutional.py    From DOTA_models with Apache License 2.0 5 votes vote down vote up
def extract_labels(filename, num_images):
  """Extract the labels into a vector of int64 label IDs."""
  print('Extracting', filename)
  with gzip.open(filename) as bytestream:
    bytestream.read(8)
    buf = bytestream.read(1 * num_images)
    labels = numpy.frombuffer(buf, dtype=numpy.uint8).astype(numpy.int64)
  return labels 
Example 10
Source File: input_data.py    From IntroToDeepLearning with MIT License 5 votes vote down vote up
def extract_labels(filename, one_hot=False):
  """Extract the labels into a 1D uint8 numpy array [index]."""
  print('Extracting', filename)
  with gzip.open(filename) as bytestream:
    magic = _read32(bytestream)
    if magic != 2049:
      raise ValueError(
          'Invalid magic number %d in MNIST label file: %s' %
          (magic, filename))
    num_items = _read32(bytestream)
    buf = bytestream.read(num_items)
    labels = numpy.frombuffer(buf, dtype=numpy.uint8)
    if one_hot:
      return dense_to_one_hot(labels)
    return labels 
Example 11
Source File: mnist_input_data.py    From python-esppy with Apache License 2.0 5 votes vote down vote up
def extract_labels(filename, one_hot=False):
  """Extract the labels into a 1D uint8 numpy array [index]."""
  print('Extracting %s' % filename)
  with gzip.open(filename) as bytestream:
    magic = _read32(bytestream)
    if magic != 2049:
      raise ValueError(
          'Invalid magic number %d in MNIST label file: %s' %
          (magic, filename))
    num_items = _read32(bytestream)
    buf = bytestream.read(num_items)
    labels = numpy.frombuffer(buf, dtype=numpy.uint8)
    if one_hot:
      return dense_to_one_hot(labels)
    return labels 
Example 12
Source File: mnist.py    From dataflow with Apache License 2.0 5 votes vote down vote up
def extract_labels(filename):
    """Extract the labels into a 1D uint8 numpy array [index]."""
    with gzip.open(filename) as bytestream:
        magic = _read32(bytestream)
        if magic != 2049:
            raise ValueError(
                'Invalid magic number %d in MNIST label file: %s' %
                (magic, filename))
        num_items = _read32(bytestream)
        buf = bytestream.read(num_items)
        labels = numpy.frombuffer(buf, dtype=numpy.uint8)
        return labels 
Example 13
Source File: train_data.py    From subsync with Apache License 2.0 5 votes vote down vote up
def extract_labels(srt, samples):
    subs = pysrt.open(srt)
    labels = np.zeros(samples)
    for sub in subs:
        start = timeToPos(sub.start)
        end = timeToPos(sub.end)+1
        for i in range(start, end):
            if i < len(labels):
                labels[i] = 1

    return labels 
Example 14
Source File: construct_pdbbind_df.py    From deepchem with MIT License 5 votes vote down vote up
def extract_labels(pdbbind_label_file):
  """Extract labels from pdbbind label file."""
  assert os.path.isfile(pdbbind_label_file)
  labels = {}
  with open(pdbbind_label_file) as f:
    content = f.readlines()
    for line in content:
      if line[0] == "#":
        continue
      line = line.split()
      # lines in the label file have format
      # PDB-code Resolution Release-Year -logKd Kd reference ligand-name
      #print line[0], line[3]
      labels[line[0]] = line[3]
  return labels 
Example 15
Source File: mnist_dataset.py    From AIX360 with Apache License 2.0 5 votes vote down vote up
def extract_labels(filename, num_images):
    with gzip.open(filename) as bytestream:
        bytestream.read(8)
        buf = bytestream.read(1 * num_images)
        labels = np.frombuffer(buf, dtype=np.uint8)
    return (np.arange(10) == labels[:, None]).astype(np.float32) 
Example 16
Source File: input_data.py    From cloudml-samples with Apache License 2.0 5 votes vote down vote up
def extract_labels(filename, one_hot=False, num_classes=10):
  """Extract the labels into a 1D uint8 numpy array [index]."""
  print('Extracting', filename)
  with open(filename, 'rb') as f, gzip.GzipFile(fileobj=f) as bytestream:
    magic = _read32(bytestream)
    if magic != 2049:
      raise ValueError('Invalid magic number %d in MNIST label file: %s' %
                       (magic, filename))
    num_items = _read32(bytestream)
    buf = bytestream.read(num_items)
    labels = numpy.frombuffer(buf, dtype=numpy.uint8)
    if one_hot:
      return dense_to_one_hot(labels, num_classes)
    return labels 
Example 17
Source File: pandas_io.py    From lambda-packs with MIT License 5 votes vote down vote up
def extract_pandas_labels(labels):
  """Extract data from pandas.DataFrame for labels.

  Args:
    labels: `pandas.DataFrame` or `pandas.Series` containing one column of
      labels to be extracted.

  Returns:
    A numpy `ndarray` of labels from the DataFrame.

  Raises:
    ValueError: if more than one column is found or type is not int, float or
      bool.
  """
  if isinstance(labels,
                pd.DataFrame):  # pandas.Series also belongs to DataFrame
    if len(labels.columns) > 1:
      raise ValueError('Only one column for labels is allowed.')

    bad_data = [column for column in labels
                if labels[column].dtype.name not in PANDAS_DTYPES]
    if not bad_data:
      return labels.values
    else:
      error_report = ["'" + str(column) + "' type="
                      + str(labels[column].dtype.name) for column in bad_data]
      raise ValueError('Data types for extracting labels must be int, '
                       'float, or bool. Found: ' + ', '.join(error_report))
  else:
    return labels 
Example 18
Source File: mnist.py    From lambda-packs with MIT License 5 votes vote down vote up
def extract_labels(f, one_hot=False, num_classes=10):
  """Extract the labels into a 1D uint8 numpy array [index].

  Args:
    f: A file object that can be passed into a gzip reader.
    one_hot: Does one hot encoding for the result.
    num_classes: Number of classes for the one hot encoding.

  Returns:
    labels: a 1D uint8 numpy array.

  Raises:
    ValueError: If the bystream doesn't start with 2049.
  """
  print('Extracting', f.name)
  with gzip.GzipFile(fileobj=f) as bytestream:
    magic = _read32(bytestream)
    if magic != 2049:
      raise ValueError('Invalid magic number %d in MNIST label file: %s' %
                       (magic, f.name))
    num_items = _read32(bytestream)
    buf = bytestream.read(num_items)
    labels = numpy.frombuffer(buf, dtype=numpy.uint8)
    if one_hot:
      return dense_to_one_hot(labels, num_classes)
    return labels 
Example 19
Source File: dask_io.py    From lambda-packs with MIT License 5 votes vote down vote up
def extract_dask_labels(labels):
  """Extract data from dask.Series or dask.DataFrame for labels.

  Given a distributed dask.DataFrame or dask.Series containing exactly one
  column or name, this operation returns a single dask.DataFrame or dask.Series
  that can be iterated over.

  Args:
    labels: A distributed dask.DataFrame or dask.Series with exactly one
            column or name.

  Returns:
    A dask.DataFrame or dask.Series that can be iterated over.
    If the supplied argument is neither a dask.DataFrame nor a dask.Series this
    operation returns it without modification.

  Raises:
    ValueError: If the supplied dask.DataFrame contains more than one
                column or the supplied dask.Series contains more than
                one name.
  """
  if isinstance(labels, dd.DataFrame):
    ncol = labels.columns
  elif isinstance(labels, dd.Series):
    ncol = labels.name
  if isinstance(labels, allowed_classes):
    if len(ncol) > 1:
      raise ValueError('Only one column for labels is allowed.')
    return _construct_dask_df_with_divisions(labels)
  else:
    return labels 
Example 20
Source File: api.py    From sregistry-cli with Mozilla Public License 2.0 5 votes vote down vote up
def extract_labels(self):
    """extract_labels will write a file of key value pairs including
       maintainer, and labels.
    
    Parameters
    ==========
    manifest: the manifest to use
    
    """
    labels = self._get_config("Labels")
    if labels in [[], "", None]:
        labels = None

    return labels 
Example 21
Source File: mnist.py    From DDRL with Apache License 2.0 5 votes vote down vote up
def extract_labels(filename):
    """Extract the labels into a 1D uint8 numpy array [index]."""
    with gzip.open(filename) as bytestream:
        magic = _read32(bytestream)
        if magic != 2049:
            raise ValueError(
              'Invalid magic number %d in MNIST label file: %s' %
              (magic, filename))
        num_items = _read32(bytestream)
        buf = bytestream.read(num_items)
        labels = numpy.frombuffer(buf, dtype=numpy.uint8)
        return labels 
Example 22
Source File: mnist.py    From dvae with Apache License 2.0 5 votes vote down vote up
def extract_labels(self, filename):
        """Extract the labels into a vector of int64 label IDs."""
        print('Extracting', filename)
        with gzip.open(filename) as bytestream:
            magic = self.read_header_int(bytestream)
            if magic != 2049:
                raise ValueError('Invalid magic for MNIST labels')

            num_labels = self.read_header_int(bytestream)
            buf = bytestream.read(1 * num_labels)
            labels = np.frombuffer(buf, dtype=np.uint8).astype(np.int64)
        return labels 
Example 23
Source File: input_data.py    From Net2Net with MIT License 5 votes vote down vote up
def extract_labels(filename, one_hot=False):
    """Extract the labels into a 1D uint8 numpy array [index]."""
    print('Extracting', filename)
    with gzip.open(filename) as bytestream:
        magic = _read32(bytestream)
        if magic != 2049:
            raise ValueError(
                'Invalid magic number %d in MNIST label file: %s' %
                (magic, filename))
        num_items = _read32(bytestream)
        buf = bytestream.read(num_items)
        labels = numpy.frombuffer(buf, dtype=numpy.uint8)
        if one_hot:
            return dense_to_one_hot(labels)
        return labels 
Example 24
Source File: preprocessing.py    From WaterNet with MIT License 5 votes vote down vote up
def extract_features_and_labels(dataset, tile_size, only_cache=False):
    """For each satellite image and its corresponding shapefiles in the dataset create
    tiled features and labels."""
    features = []
    labels = []

    for geotiff_path, shapefile_paths in dataset:
        tiled_features, tiled_labels = create_tiled_features_and_labels(
            geotiff_path, shapefile_paths, tile_size, only_cache)

        features += tiled_features
        labels += tiled_labels

    return features, labels 
Example 25
Source File: input_data.py    From Digit-Recognizer with MIT License 5 votes vote down vote up
def extract_labels(filename, one_hot=False):
  """Extract the labels into a 1D uint8 numpy array [index]."""
  print('Extracting', filename)
  with tf.gfile.Open(filename, 'rb') as f, gzip.GzipFile(fileobj=f) as bytestream:
    magic = _read32(bytestream)
    if magic != 2049:
      raise ValueError(
          'Invalid magic number %d in MNIST label file: %s' %
          (magic, filename))
    num_items = _read32(bytestream)
    buf = bytestream.read(num_items)
    labels = numpy.frombuffer(buf, dtype=numpy.uint8)
    if one_hot:
      return dense_to_one_hot(labels)
    return labels 
Example 26
Source File: utils.py    From deep-pwning with MIT License 5 votes vote down vote up
def extract_labels(filename, num_images):
    """Extract the labels into a vector of int64 label IDs."""
    print('Extracting', filename)
    with gzip.open(filename) as bytestream:
        bytestream.read(8)
        buf = bytestream.read(1 * num_images)
        labels = np.frombuffer(buf, dtype=np.uint8).astype(np.int64)
    return labels 
Example 27
Source File: input_data.py    From variational-autoencoder with Apache License 2.0 5 votes vote down vote up
def extract_labels(filename, one_hot=False):
    """Extract the labels into a 1D uint8 numpy array [index]."""
    print 'Extracting', filename
    with gzip.open(filename) as bytestream:
        magic = _read32(bytestream)
        if magic != 2049:
            raise ValueError(
                'Invalid magic number %d in MNIST label file: %s' %
                (magic, filename))
        num_items = _read32(bytestream)
        buf = bytestream.read(num_items)
        labels = numpy.frombuffer(buf, dtype=numpy.uint8)
        if one_hot:
            return dense_to_one_hot(labels)
        return labels 
Example 28
Source File: image.py    From brainiak with Apache License 2.0 5 votes vote down vote up
def extract_labels(self) -> np.ndarray:
        """Extract condition labels.

        Returns
        -------
        np.ndarray
            The condition label of each epoch.
        """
        condition_idxs, epoch_idxs, _ = np.where(self)
        _, unique_epoch_idxs = np.unique(epoch_idxs, return_index=True)
        return condition_idxs[unique_epoch_idxs] 
Example 29
Source File: convnet.py    From CNN-from-Scratch with GNU General Public License v3.0 5 votes vote down vote up
def extract_labels(filename, num_images):
	"""Extract the labels into a vector of int64 label IDs."""
	print('Extracting', filename)
	with gzip.open(filename) as bytestream:
		bytestream.read(8)
		buf = bytestream.read(1 * num_images)
		labels = np.frombuffer(buf, dtype=np.uint8).astype(np.int64)
	return labels 
Example 30
Source File: setup_mnist.py    From Contrastive-Explanation-Method with Apache License 2.0 5 votes vote down vote up
def extract_labels(filename, num_images):
    with gzip.open(filename) as bytestream:
        bytestream.read(8)
        buf = bytestream.read(1 * num_images)
        labels = np.frombuffer(buf, dtype=np.uint8)
    return (np.arange(10) == labels[:, None]).astype(np.float32) 
Example 31
Source File: local_mnist.py    From magenta with Apache License 2.0 5 votes vote down vote up
def extract_labels(f, one_hot=False, num_classes=10):
  """Extract the labels into a 1D uint8 np array [index].

  Args:
    f: A file object that can be passed into a gzip reader.
    one_hot: Does one hot encoding for the result.
    num_classes: Number of classes for the one hot encoding.

  Returns:
    labels: a 1D uint8 np array.

  Raises:
    ValueError: If the bystream doesn't start with 2049.
  """
  tf.logging.info('Extracting', f.name)
  with gzip.GzipFile(fileobj=f) as bytestream:
    magic = _read32(bytestream)
    if magic != 2049:
      raise ValueError(
          'Invalid magic number %d in MNIST label file: %s' % (magic, f.name))
    num_items = _read32(bytestream)
    buf = bytestream.read(num_items)
    labels = np.frombuffer(buf, dtype=np.uint8)
    if one_hot:
      return dense_to_one_hot(labels, num_classes)
    return labels 
Example 32
Source File: setup.py    From breaking_defensive_distillation with GNU General Public License v3.0 5 votes vote down vote up
def extract_labels(filename, num_images):
  """Extract the labels into a 1-hot matrix [image index, label index]."""
  with gzip.open(filename) as bytestream:
    bytestream.read(8)
    buf = bytestream.read(1 * num_images)
    labels = np.frombuffer(buf, dtype=np.uint8)
  # Convert to dense 1-hot representation.
  return (np.arange(NUM_LABELS) == labels[:, None]).astype(np.float32)


# Get the data. 
Example 33
Source File: reuters.py    From KATE with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def extract_labels(docs, path, output):
    # it will be fast if docs is a dict instead of a list
    doc_labels = defaultdict(set)
    with open(path, 'r') as f:
        for line in f:
            label, did, _ = line.strip('\n').split()
            if did in docs:
                doc_labels[did].add(label)
    doc_labels = dict([(x, list(y)) for x, y in doc_labels.iteritems()])
    dump_json(doc_labels, output)

    return doc_labels 
Example 34
Source File: ucb.py    From plastering with MIT License 5 votes vote down vote up
def extract_raw_ucb_labels():
    buildings = ['SODA', 'SDH', 'IBM']
    labels = set()
    example_dict = {}
    for building in buildings:
        filename='./groundtruth/{0}-GROUND-TRUTH'.format(building)
        with open(filename, 'r') as fp:
            rawlines = [line[:-1] for line in fp.readlines()]

        for i, sentence in enumerate(rawlines[::2]):
            i *= 2
            print('{0}th line'.format(i))
            encoded = rawlines[i+1]
            splitted = encoded.split(',')
            for elem in splitted:
                [label, word, t] = elem.split(':')
                if t == 'c':
                    labels.add(label)
                    example_dict[label] = sentence
    with open('groundtruth/ucb_raw_labels.txt', 'w') as fp:
        fp.write('{\n')
        for label in labels:
            fp.write('  "{0}": \n'.format(label))
        fp.write('}')

    with open('groundtruth/ucb_label_sentence_map.json', 'w') as fp:
        json.dump(example_dict, fp, indent=2) 
Example 35
Source File: mnist_data.py    From ladder with GNU General Public License v3.0 5 votes vote down vote up
def extract_labels(filename, verbose=True):
    """Extract the labels into a 1D uint8 numpy array [index]."""
    if verbose:
        print('Extracting', filename)
    with gzip.open(filename) as bytestream:
        magic = _read32(bytestream)
        if magic != 2049:
            raise ValueError(
              'Invalid magic number %d in MNIST label file: %s' %
              (magic, filename))
        num_items = _read32(bytestream)
        buf = bytestream.read(num_items)
        labels = np.frombuffer(buf, dtype=np.uint8)
        return labels 
Example 36
Source File: preprocessing.py    From mimic3-benchmarks with MIT License 5 votes vote down vote up
def extract_diagnosis_labels(diagnoses):
    global diagnosis_labels
    diagnoses['VALUE'] = 1
    labels = diagnoses[['ICUSTAY_ID', 'ICD9_CODE', 'VALUE']].drop_duplicates()\
                      .pivot(index='ICUSTAY_ID', columns='ICD9_CODE', values='VALUE').fillna(0).astype(int)
    for l in diagnosis_labels:
        if l not in labels:
            labels[l] = 0
    labels = labels[diagnosis_labels]
    return labels.rename(dict(zip(diagnosis_labels, ['Diagnosis ' + d for d in diagnosis_labels])), axis=1)