Python extract labels

Source File: dataset.py From classification-of-encrypted-traffic with MIT License

6 votes

def extract_labels(dataframe, one_hot=False, num_classes=10):
    """Extract the labels into a 1D uint8 numpy array [index].

    Args:
    dataframe: A pandas dataframe object.
    one_hot: Does one hot encoding for the result.
    num_classes: Number of classes for the one hot encoding.

    Returns:
    labels: a 1D uint8 numpy array.
    """
    print('Extracting labels', )
    labels = dataframe['label'].values
    labels = _label_encoder.fit_transform(labels)
    if one_hot:
        return dense_to_one_hot(labels, num_classes)
    return labels

Source File: ingest.py From ngraph-python with Apache License 2.0

6 votes

def extract_labels(self, setn):
        if not os.path.exists(self.devkit):
            raise IOError(("Metadata file {} not found. Ensure you have ImageNet downloaded"
                           ).format(self.devkit))

        with tarfile.open(self.devkit, "r:gz") as tf:
            synsetfile = 'ILSVRC2012_devkit_t12/data/meta.mat'
            valfile = 'ILSVRC2012_devkit_t12/data/ILSVRC2012_validation_ground_truth.txt'

            if setn == 'train':
                # get the synset mapping by hacking around matlab's terrible compressed format
                meta_buff = tf.extractfile(synsetfile).read()
                decomp = zlib.decompressobj()
                self.synsets = re.findall(re.compile('n\d+'), decomp.decompress(meta_buff[136:]))
                return {s: i for i, s in enumerate(self.synsets)}
            elif setn == 'val':
                # get the ground truth validation labels and offset to zero
                return {"%08d" % (i + 1): int(x) - 1 for i, x in
                        enumerate(tf.extractfile(valfile))}
            else:
                raise ValueError("Unknown set name: {}".format(setn))

Source File: preprocessing.py From deepwriting with MIT License

6 votes

def extract_eoc_labels(dataset):
    """
    Creates a label showing end of a character in a sequence.
    Args:
        dataset:

    Returns:

    """
    dataset['eoc_labels'] = []
    for idx, char_labels in enumerate(dataset['char_labels']):
        eoc_label = utils_hw.label_end_of_sub_sequences(char_labels)
        eoc_label = np.expand_dims(np.float32(eoc_label), axis=1) # Assuming the last stroke is always end-of-char
        dataset['eoc_labels'].append(eoc_label)

    return dataset

Source File: elastic_items.py From grimoirelab-elk with GNU General Public License v3.0

6 votes

def extract_repo_labels(repo):
        """Extract the labels declared in the repositories within the projects.json, and
        remove them to avoid breaking already existing functionalities.

        :param repo: repo url in projects.json
        """
        processed_repo = repo
        labels_lst = []

        pattern = re.compile(PROJECTS_JSON_LABELS_PATTERN)
        matchObj = pattern.match(repo)

        if matchObj:
            labels_info = matchObj.group(1)
            labels = matchObj.group(2)
            labels_lst = [label.strip() for label in labels.split(',')]
            processed_repo = processed_repo.replace(labels_info, '').strip()

        return processed_repo, labels_lst

Source File: mnist_data.py From active_learning_coreset with MIT License

6 votes

def extract_labels(filename, one_hot=False):
  """Extract the labels into a 1D uint8 numpy array [index]."""
  print('Extracting', filename)
  with gzip.open(filename) as bytestream:
    magic = _read32(bytestream)
    if magic != 2049:
      raise ValueError(
          'Invalid magic number %d in MNIST label file: %s' %
          (magic, filename))
    num_items = _read32(bytestream)
    print(num_items)
    buf = bytestream.read(num_items[0])
    labels = numpy.frombuffer(buf, dtype=numpy.uint8)
    if one_hot:
      return dense_to_one_hot(labels)
    return labels

Source File: format.py From tensorlang with Apache License 2.0

6 votes

def extract_labels(f, one_hot=False, num_classes=10):
  """Extract the labels into a 1D uint8 numpy array [index].
  Args:
    f: A file object that can be passed into a gzip reader.
    one_hot: Does one hot encoding for the result.
    num_classes: Number of classes for the one hot encoding.
  Returns:
    labels: a 1D uint8 numpy array.
  Raises:
    ValueError: If the bystream doesn't start with 2049.
  """
  with gzip.GzipFile(fileobj=f) as bytestream:
    magic = _read32(bytestream)
    if magic != 2049:
      raise ValueError('Invalid magic number %d in MNIST label file: %s' %
                       (magic, f.name))
    num_items = _read32(bytestream)
    buf = bytestream.read(num_items)
    labels = numpy.frombuffer(buf, dtype=numpy.uint8)
    if one_hot:
      return dense_to_one_hot(labels, num_classes)
    return labels

Source File: extractor.py From articlequality with MIT License

6 votes

def extract_labels(self, text):
        """
        Extracts a set of labels for a version of text by parsing templates.

        :Parameters:
            text : `str`
                Wikitext markup to extract labels from

        :Returns:
            An iterator over (project, label) pairs
        """
        # filter_text is an initial fast pass to weed out wikitext that
        # can't contain the template (eg. because the template name
        # never appears)
        if hasattr(self, 'filter_text'):
            if not self.filter_text(text):
                return

        parsed_text = mwp.parse(text)
        templates = parsed_text.filter_templates()
        for template in templates:

            yield from self.from_template(template)

Source File: input.py From DOTA_models with Apache License 2.0

5 votes

def extract_mnist_labels(filename, num_images):
  """
  Extract the labels into a vector of int64 label IDs.
  """
  # if not os.path.exists(file):
  if not tf.gfile.Exists(filename+".npy"):
    with gzip.open(filename) as bytestream:
      bytestream.read(8)
      buf = bytestream.read(1 * num_images)
      labels = np.frombuffer(buf, dtype=np.uint8).astype(np.int32)
      np.save(filename, labels)
    return labels
  else:
    with tf.gfile.Open(filename+".npy", mode='r') as file_obj:
      return np.load(file_obj)

Source File: convolutional.py From DOTA_models with Apache License 2.0

5 votes

def extract_labels(filename, num_images):
  """Extract the labels into a vector of int64 label IDs."""
  print('Extracting', filename)
  with gzip.open(filename) as bytestream:
    bytestream.read(8)
    buf = bytestream.read(1 * num_images)
    labels = numpy.frombuffer(buf, dtype=numpy.uint8).astype(numpy.int64)
  return labels

Source File: input_data.py From IntroToDeepLearning with MIT License

5 votes

def extract_labels(filename, one_hot=False):
  """Extract the labels into a 1D uint8 numpy array [index]."""
  print('Extracting', filename)
  with gzip.open(filename) as bytestream:
    magic = _read32(bytestream)
    if magic != 2049:
      raise ValueError(
          'Invalid magic number %d in MNIST label file: %s' %
          (magic, filename))
    num_items = _read32(bytestream)
    buf = bytestream.read(num_items)
    labels = numpy.frombuffer(buf, dtype=numpy.uint8)
    if one_hot:
      return dense_to_one_hot(labels)
    return labels

Source File: mnist_input_data.py From python-esppy with Apache License 2.0

5 votes

def extract_labels(filename, one_hot=False):
  """Extract the labels into a 1D uint8 numpy array [index]."""
  print('Extracting %s' % filename)
  with gzip.open(filename) as bytestream:
    magic = _read32(bytestream)
    if magic != 2049:
      raise ValueError(
          'Invalid magic number %d in MNIST label file: %s' %
          (magic, filename))
    num_items = _read32(bytestream)
    buf = bytestream.read(num_items)
    labels = numpy.frombuffer(buf, dtype=numpy.uint8)
    if one_hot:
      return dense_to_one_hot(labels)
    return labels

Source File: mnist.py From dataflow with Apache License 2.0

5 votes

def extract_labels(filename):
    """Extract the labels into a 1D uint8 numpy array [index]."""
    with gzip.open(filename) as bytestream:
        magic = _read32(bytestream)
        if magic != 2049:
            raise ValueError(
                'Invalid magic number %d in MNIST label file: %s' %
                (magic, filename))
        num_items = _read32(bytestream)
        buf = bytestream.read(num_items)
        labels = numpy.frombuffer(buf, dtype=numpy.uint8)
        return labels

Source File: train_data.py From subsync with Apache License 2.0

5 votes

def extract_labels(srt, samples):
    subs = pysrt.open(srt)
    labels = np.zeros(samples)
    for sub in subs:
        start = timeToPos(sub.start)
        end = timeToPos(sub.end)+1
        for i in range(start, end):
            if i < len(labels):
                labels[i] = 1

    return labels

Source File: construct_pdbbind_df.py From deepchem with MIT License

5 votes

def extract_labels(pdbbind_label_file):
  """Extract labels from pdbbind label file."""
  assert os.path.isfile(pdbbind_label_file)
  labels = {}
  with open(pdbbind_label_file) as f:
    content = f.readlines()
    for line in content:
      if line[0] == "#":
        continue
      line = line.split()
      # lines in the label file have format
      # PDB-code Resolution Release-Year -logKd Kd reference ligand-name
      #print line[0], line[3]
      labels[line[0]] = line[3]
  return labels

Source File: mnist_dataset.py From AIX360 with Apache License 2.0

5 votes

def extract_labels(filename, num_images):
    with gzip.open(filename) as bytestream:
        bytestream.read(8)
        buf = bytestream.read(1 * num_images)
        labels = np.frombuffer(buf, dtype=np.uint8)
    return (np.arange(10) == labels[:, None]).astype(np.float32)

Source File: input_data.py From cloudml-samples with Apache License 2.0

5 votes

def extract_labels(filename, one_hot=False, num_classes=10):
  """Extract the labels into a 1D uint8 numpy array [index]."""
  print('Extracting', filename)
  with open(filename, 'rb') as f, gzip.GzipFile(fileobj=f) as bytestream:
    magic = _read32(bytestream)
    if magic != 2049:
      raise ValueError('Invalid magic number %d in MNIST label file: %s' %
                       (magic, filename))
    num_items = _read32(bytestream)
    buf = bytestream.read(num_items)
    labels = numpy.frombuffer(buf, dtype=numpy.uint8)
    if one_hot:
      return dense_to_one_hot(labels, num_classes)
    return labels

Source File: pandas_io.py From lambda-packs with MIT License

5 votes

def extract_pandas_labels(labels):
  """Extract data from pandas.DataFrame for labels.

  Args:
    labels: `pandas.DataFrame` or `pandas.Series` containing one column of
      labels to be extracted.

  Returns:
    A numpy `ndarray` of labels from the DataFrame.

  Raises:
    ValueError: if more than one column is found or type is not int, float or
      bool.
  """
  if isinstance(labels,
                pd.DataFrame):  # pandas.Series also belongs to DataFrame
    if len(labels.columns) > 1:
      raise ValueError('Only one column for labels is allowed.')

    bad_data = [column for column in labels
                if labels[column].dtype.name not in PANDAS_DTYPES]
    if not bad_data:
      return labels.values
    else:
      error_report = ["'" + str(column) + "' type="
                      + str(labels[column].dtype.name) for column in bad_data]
      raise ValueError('Data types for extracting labels must be int, '
                       'float, or bool. Found: ' + ', '.join(error_report))
  else:
    return labels

Source File: mnist.py From lambda-packs with MIT License

5 votes

def extract_labels(f, one_hot=False, num_classes=10):
  """Extract the labels into a 1D uint8 numpy array [index].

  Args:
    f: A file object that can be passed into a gzip reader.
    one_hot: Does one hot encoding for the result.
    num_classes: Number of classes for the one hot encoding.

  Returns:
    labels: a 1D uint8 numpy array.

  Raises:
    ValueError: If the bystream doesn't start with 2049.
  """
  print('Extracting', f.name)
  with gzip.GzipFile(fileobj=f) as bytestream:
    magic = _read32(bytestream)
    if magic != 2049:
      raise ValueError('Invalid magic number %d in MNIST label file: %s' %
                       (magic, f.name))
    num_items = _read32(bytestream)
    buf = bytestream.read(num_items)
    labels = numpy.frombuffer(buf, dtype=numpy.uint8)
    if one_hot:
      return dense_to_one_hot(labels, num_classes)
    return labels

Source File: dask_io.py From lambda-packs with MIT License

5 votes

def extract_dask_labels(labels):
  """Extract data from dask.Series or dask.DataFrame for labels.

  Given a distributed dask.DataFrame or dask.Series containing exactly one
  column or name, this operation returns a single dask.DataFrame or dask.Series
  that can be iterated over.

  Args:
    labels: A distributed dask.DataFrame or dask.Series with exactly one
            column or name.

  Returns:
    A dask.DataFrame or dask.Series that can be iterated over.
    If the supplied argument is neither a dask.DataFrame nor a dask.Series this
    operation returns it without modification.

  Raises:
    ValueError: If the supplied dask.DataFrame contains more than one
                column or the supplied dask.Series contains more than
                one name.
  """
  if isinstance(labels, dd.DataFrame):
    ncol = labels.columns
  elif isinstance(labels, dd.Series):
    ncol = labels.name
  if isinstance(labels, allowed_classes):
    if len(ncol) > 1:
      raise ValueError('Only one column for labels is allowed.')
    return _construct_dask_df_with_divisions(labels)
  else:
    return labels

Source File: api.py From sregistry-cli with Mozilla Public License 2.0

5 votes

def extract_labels(self):
    """extract_labels will write a file of key value pairs including
       maintainer, and labels.
    
    Parameters
    ==========
    manifest: the manifest to use
    
    """
    labels = self._get_config("Labels")
    if labels in [[], "", None]:
        labels = None

    return labels

Source File: mnist.py From DDRL with Apache License 2.0

5 votes

def extract_labels(filename):
    """Extract the labels into a 1D uint8 numpy array [index]."""
    with gzip.open(filename) as bytestream:
        magic = _read32(bytestream)
        if magic != 2049:
            raise ValueError(
              'Invalid magic number %d in MNIST label file: %s' %
              (magic, filename))
        num_items = _read32(bytestream)
        buf = bytestream.read(num_items)
        labels = numpy.frombuffer(buf, dtype=numpy.uint8)
        return labels

Source File: mnist.py From dvae with Apache License 2.0

5 votes

def extract_labels(self, filename):
        """Extract the labels into a vector of int64 label IDs."""
        print('Extracting', filename)
        with gzip.open(filename) as bytestream:
            magic = self.read_header_int(bytestream)
            if magic != 2049:
                raise ValueError('Invalid magic for MNIST labels')

            num_labels = self.read_header_int(bytestream)
            buf = bytestream.read(1 * num_labels)
            labels = np.frombuffer(buf, dtype=np.uint8).astype(np.int64)
        return labels

Source File: input_data.py From Net2Net with MIT License

5 votes

def extract_labels(filename, one_hot=False):
    """Extract the labels into a 1D uint8 numpy array [index]."""
    print('Extracting', filename)
    with gzip.open(filename) as bytestream:
        magic = _read32(bytestream)
        if magic != 2049:
            raise ValueError(
                'Invalid magic number %d in MNIST label file: %s' %
                (magic, filename))
        num_items = _read32(bytestream)
        buf = bytestream.read(num_items)
        labels = numpy.frombuffer(buf, dtype=numpy.uint8)
        if one_hot:
            return dense_to_one_hot(labels)
        return labels

Source File: preprocessing.py From WaterNet with MIT License

5 votes

def extract_features_and_labels(dataset, tile_size, only_cache=False):
    """For each satellite image and its corresponding shapefiles in the dataset create
    tiled features and labels."""
    features = []
    labels = []

    for geotiff_path, shapefile_paths in dataset:
        tiled_features, tiled_labels = create_tiled_features_and_labels(
            geotiff_path, shapefile_paths, tile_size, only_cache)

        features += tiled_features
        labels += tiled_labels

    return features, labels

Source File: input_data.py From Digit-Recognizer with MIT License

5 votes

def extract_labels(filename, one_hot=False):
  """Extract the labels into a 1D uint8 numpy array [index]."""
  print('Extracting', filename)
  with tf.gfile.Open(filename, 'rb') as f, gzip.GzipFile(fileobj=f) as bytestream:
    magic = _read32(bytestream)
    if magic != 2049:
      raise ValueError(
          'Invalid magic number %d in MNIST label file: %s' %
          (magic, filename))
    num_items = _read32(bytestream)
    buf = bytestream.read(num_items)
    labels = numpy.frombuffer(buf, dtype=numpy.uint8)
    if one_hot:
      return dense_to_one_hot(labels)
    return labels

Source File: utils.py From deep-pwning with MIT License

5 votes

def extract_labels(filename, num_images):
    """Extract the labels into a vector of int64 label IDs."""
    print('Extracting', filename)
    with gzip.open(filename) as bytestream:
        bytestream.read(8)
        buf = bytestream.read(1 * num_images)
        labels = np.frombuffer(buf, dtype=np.uint8).astype(np.int64)
    return labels

Source File: input_data.py From variational-autoencoder with Apache License 2.0

5 votes

def extract_labels(filename, one_hot=False):
    """Extract the labels into a 1D uint8 numpy array [index]."""
    print 'Extracting', filename
    with gzip.open(filename) as bytestream:
        magic = _read32(bytestream)
        if magic != 2049:
            raise ValueError(
                'Invalid magic number %d in MNIST label file: %s' %
                (magic, filename))
        num_items = _read32(bytestream)
        buf = bytestream.read(num_items)
        labels = numpy.frombuffer(buf, dtype=numpy.uint8)
        if one_hot:
            return dense_to_one_hot(labels)
        return labels

Source File: image.py From brainiak with Apache License 2.0

5 votes

def extract_labels(self) -> np.ndarray:
        """Extract condition labels.

        Returns
        -------
        np.ndarray
            The condition label of each epoch.
        """
        condition_idxs, epoch_idxs, _ = np.where(self)
        _, unique_epoch_idxs = np.unique(epoch_idxs, return_index=True)
        return condition_idxs[unique_epoch_idxs]

Source File: convnet.py From CNN-from-Scratch with GNU General Public License v3.0

5 votes

def extract_labels(filename, num_images):
	"""Extract the labels into a vector of int64 label IDs."""
	print('Extracting', filename)
	with gzip.open(filename) as bytestream:
		bytestream.read(8)
		buf = bytestream.read(1 * num_images)
		labels = np.frombuffer(buf, dtype=np.uint8).astype(np.int64)
	return labels

Source File: setup_mnist.py From Contrastive-Explanation-Method with Apache License 2.0

5 votes

def extract_labels(filename, num_images):
    with gzip.open(filename) as bytestream:
        bytestream.read(8)
        buf = bytestream.read(1 * num_images)
        labels = np.frombuffer(buf, dtype=np.uint8)
    return (np.arange(10) == labels[:, None]).astype(np.float32)

Source File: local_mnist.py From magenta with Apache License 2.0

5 votes

def extract_labels(f, one_hot=False, num_classes=10):
  """Extract the labels into a 1D uint8 np array [index].

  Args:
    f: A file object that can be passed into a gzip reader.
    one_hot: Does one hot encoding for the result.
    num_classes: Number of classes for the one hot encoding.

  Returns:
    labels: a 1D uint8 np array.

  Raises:
    ValueError: If the bystream doesn't start with 2049.
  """
  tf.logging.info('Extracting', f.name)
  with gzip.GzipFile(fileobj=f) as bytestream:
    magic = _read32(bytestream)
    if magic != 2049:
      raise ValueError(
          'Invalid magic number %d in MNIST label file: %s' % (magic, f.name))
    num_items = _read32(bytestream)
    buf = bytestream.read(num_items)
    labels = np.frombuffer(buf, dtype=np.uint8)
    if one_hot:
      return dense_to_one_hot(labels, num_classes)
    return labels

Source File: setup.py From breaking_defensive_distillation with GNU General Public License v3.0

5 votes

def extract_labels(filename, num_images):
  """Extract the labels into a 1-hot matrix [image index, label index]."""
  with gzip.open(filename) as bytestream:
    bytestream.read(8)
    buf = bytestream.read(1 * num_images)
    labels = np.frombuffer(buf, dtype=np.uint8)
  # Convert to dense 1-hot representation.
  return (np.arange(NUM_LABELS) == labels[:, None]).astype(np.float32)


# Get the data.

Source File: reuters.py From KATE with BSD 3-Clause "New" or "Revised" License

5 votes

def extract_labels(docs, path, output):
    # it will be fast if docs is a dict instead of a list
    doc_labels = defaultdict(set)
    with open(path, 'r') as f:
        for line in f:
            label, did, _ = line.strip('\n').split()
            if did in docs:
                doc_labels[did].add(label)
    doc_labels = dict([(x, list(y)) for x, y in doc_labels.iteritems()])
    dump_json(doc_labels, output)

    return doc_labels

Source File: ucb.py From plastering with MIT License

5 votes

def extract_raw_ucb_labels():
    buildings = ['SODA', 'SDH', 'IBM']
    labels = set()
    example_dict = {}
    for building in buildings:
        filename='./groundtruth/{0}-GROUND-TRUTH'.format(building)
        with open(filename, 'r') as fp:
            rawlines = [line[:-1] for line in fp.readlines()]

        for i, sentence in enumerate(rawlines[::2]):
            i *= 2
            print('{0}th line'.format(i))
            encoded = rawlines[i+1]
            splitted = encoded.split(',')
            for elem in splitted:
                [label, word, t] = elem.split(':')
                if t == 'c':
                    labels.add(label)
                    example_dict[label] = sentence
    with open('groundtruth/ucb_raw_labels.txt', 'w') as fp:
        fp.write('{\n')
        for label in labels:
            fp.write('  "{0}": \n'.format(label))
        fp.write('}')

    with open('groundtruth/ucb_label_sentence_map.json', 'w') as fp:
        json.dump(example_dict, fp, indent=2)

Source File: mnist_data.py From ladder with GNU General Public License v3.0

5 votes

def extract_labels(filename, verbose=True):
    """Extract the labels into a 1D uint8 numpy array [index]."""
    if verbose:
        print('Extracting', filename)
    with gzip.open(filename) as bytestream:
        magic = _read32(bytestream)
        if magic != 2049:
            raise ValueError(
              'Invalid magic number %d in MNIST label file: %s' %
              (magic, filename))
        num_items = _read32(bytestream)
        buf = bytestream.read(num_items)
        labels = np.frombuffer(buf, dtype=np.uint8)
        return labels

Source File: preprocessing.py From mimic3-benchmarks with MIT License

5 votes

def extract_diagnosis_labels(diagnoses):
    global diagnosis_labels
    diagnoses['VALUE'] = 1
    labels = diagnoses[['ICUSTAY_ID', 'ICD9_CODE', 'VALUE']].drop_duplicates()\
                      .pivot(index='ICUSTAY_ID', columns='ICD9_CODE', values='VALUE').fillna(0).astype(int)
    for l in diagnosis_labels:
        if l not in labels:
            labels[l] = 0
    labels = labels[diagnosis_labels]
    return labels.rename(dict(zip(diagnosis_labels, ['Diagnosis ' + d for d in diagnosis_labels])), axis=1)