Python utils.preprocess() Examples

The following are 12 code examples of utils.preprocess(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module utils , or try the search function .
Example #1
Source File: tagger.py    From nagisa with MIT License 6 votes vote down vote up
def decode(self, words, lower=False):
        """ Return the words with tags of the given words.

        args:
            - words (list): Input words.
            - lower (bool, optional): If lower is True, all uppercase characters in a list \
                            of the words are converted into lowercase characters.
        return:
            - object : The object of the words with tags.
        """
        if not type(words) == list:
            raise AssertionError("Please input a list of words.")
        words = [utils.preprocess_without_rstrip(w) if w == " " or w == " "
                 else utils.preprocess(w) for w in words]
        postags = self._postagging(words, lower)
        return postags 
Example #2
Source File: deep_dream.py    From PyTorch-Deep-Dream with MIT License 6 votes vote down vote up
def deep_dream(image, model, iterations, lr, octave_scale, num_octaves):
    """ Main deep dream method """
    image = preprocess(image).unsqueeze(0).cpu().data.numpy()

    # Extract image representations for each octave
    octaves = [image]
    for _ in range(num_octaves - 1):
        octaves.append(nd.zoom(octaves[-1], (1, 1, 1 / octave_scale, 1 / octave_scale), order=1))

    detail = np.zeros_like(octaves[-1])
    for octave, octave_base in enumerate(tqdm.tqdm(octaves[::-1], desc="Dreaming")):
        if octave > 0:
            # Upsample detail to new octave dimension
            detail = nd.zoom(detail, np.array(octave_base.shape) / np.array(detail.shape), order=1)
        # Add deep dream detail from previous octave to new base
        input_image = octave_base + detail
        # Get new deep dream image
        dreamed_image = dream(input_image, model, iterations, lr)
        # Extract deep dream details
        detail = dreamed_image - octave_base

    return deprocess(dreamed_image) 
Example #3
Source File: tagger.py    From nagisa with MIT License 5 votes vote down vote up
def __init__(self, vocabs=None, params=None, hp=None, single_word_list=None):
        if vocabs is None:
            vocabs = base + '/data/nagisa_v001.dict'
        if params is None:
            params = base + '/data/nagisa_v001.model'
        if hp is None:
            hp = base + '/data/nagisa_v001.hp'

        # Load vocaburary files
        vocabs = utils.load_data(vocabs)
        self._uni2id, self._bi2id, self._word2id, self._pos2id, self._word2postags = vocabs
        self._id2pos = {v:k for k, v in self._pos2id.items()}
        self.id2pos  = self._id2pos
        self.postags = [postag for postag in self._pos2id.keys()]
        # Load a hyper-parameter file
        self._hp = utils.load_data(hp)
        # Construct a word segmentation model and a pos tagging model
        self._model = model.Model(self._hp, params)

        # If a word is included in the single_word_list,
        # it is recognized as a single word forcibly.
        self.pattern = None
        if single_word_list:
            single_word_list = [utils.preprocess(w) for w in single_word_list if len(w) > 1]
            single_word_list = [w.replace('(', '\(').replace(')', '\)')
                                for w in single_word_list]
            single_word_list = sorted(single_word_list, key=lambda x:-len(x))
            if len(single_word_list) > 0:
                self.pattern = re.compile('|'.join(single_word_list))

        # If use_noun_heuristic is True, nouns are more lilely to appear.
        if u'名詞' in self._pos2id:
            self.use_noun_heuristic = True
        else:
            self.use_noun_heuristic = False 
Example #4
Source File: drive.py    From car-behavioral-cloning with MIT License 5 votes vote down vote up
def telemetry(sid, data):
    if data:
        # The current steering angle of the car
        steering_angle = float(data["steering_angle"])
        # The current throttle of the car
        throttle = float(data["throttle"])
        # The current speed of the car
        speed = float(data["speed"])
        # The current image from the center camera of the car
        image = Image.open(BytesIO(base64.b64decode(data["image"])))
        # save frame
        if args.image_folder != '':
            timestamp = datetime.utcnow().strftime('%Y_%m_%d_%H_%M_%S_%f')[:-3]
            image_filename = os.path.join(args.image_folder, timestamp)
            image.save('{}.jpg'.format(image_filename))
            
        try:
            image = np.asarray(image)       # from PIL image to numpy array
            image = utils.preprocess(image) # apply the preprocessing
            image = np.array([image])       # the model expects 4D array

            # predict the steering angle for the image
            steering_angle = float(model.predict(image, batch_size=1))
            # lower the throttle as the speed increases
            # if the speed is above the current speed limit, we are on a downhill.
            # make sure we slow down first and then go back to the original max speed.
            global speed_limit
            if speed > speed_limit:
                speed_limit = MIN_SPEED  # slow down
            else:
                speed_limit = MAX_SPEED
            throttle = 1.0 - steering_angle**2 - (speed/speed_limit)**2

            print('{} {} {}'.format(steering_angle, throttle, speed))
            send_control(steering_angle, throttle)
        except Exception as e:
            print(e)
        
    else:
        # NOTE: DON'T EDIT THIS.
        sio.emit('manual', data={}, skip_sid=True) 
Example #5
Source File: detect.py    From yolo-tf with GNU Lesser General Public License v3.0 5 votes vote down vote up
def std(image):
    return utils.preprocess.per_image_standardization(image) 
Example #6
Source File: detect.py    From yolo-tf with GNU Lesser General Public License v3.0 5 votes vote down vote up
def detect(sess, model, names, image, path):
    preprocess = eval(args.preprocess)
    _, height, width, _ = image.get_shape().as_list()
    _image = read_image(path)
    image_original = np.array(np.uint8(_image))
    if len(image_original.shape) == 2:
        image_original = np.repeat(np.expand_dims(image_original, -1), 3, 2)
    image_height, image_width, _ = image_original.shape
    image_std = preprocess(np.array(np.uint8(_image.resize((width, height)))).astype(np.float32))
    feed_dict = {image: np.expand_dims(image_std, 0)}
    tensors = [model.conf, model.xy_min, model.xy_max]
    conf, xy_min, xy_max = sess.run([tf.check_numerics(t, t.op.name) for t in tensors], feed_dict=feed_dict)
    boxes = utils.postprocess.non_max_suppress(conf[0], xy_min[0], xy_max[0], args.threshold, args.threshold_iou)
    scale = [image_width / model.cell_width, image_height / model.cell_height]
    fig = plt.figure()
    ax = fig.gca()
    ax.imshow(image_original)
    colors = [prop['color'] for _, prop in zip(names, itertools.cycle(plt.rcParams['axes.prop_cycle']))]
    cnt = 0
    for _conf, _xy_min, _xy_max in boxes:
        index = np.argmax(_conf)
        if _conf[index] > args.threshold:
            wh = _xy_max - _xy_min
            _xy_min = _xy_min * scale
            _wh = wh * scale
            linewidth = min(_conf[index] * 10, 3)
            ax.add_patch(patches.Rectangle(_xy_min, _wh[0], _wh[1], linewidth=linewidth, edgecolor=colors[index], facecolor='none'))
            ax.annotate(names[index] + ' (%.1f%%)' % (_conf[index] * 100), _xy_min, color=colors[index])
            cnt += 1
    fig.canvas.set_window_title('%d objects detected' % cnt)
    ax.set_xticks([])
    ax.set_yticks([])
    return fig 
Example #7
Source File: detect.py    From yolo-tf with GNU Lesser General Public License v3.0 5 votes vote down vote up
def make_args():
    parser = argparse.ArgumentParser()
    parser.add_argument('path', help='input image path')
    parser.add_argument('-c', '--config', nargs='+', default=['config.ini'], help='config file')
    parser.add_argument('-p', '--preprocess', default='std', help='the preprocess function')
    parser.add_argument('-t', '--threshold', type=float, default=0.3)
    parser.add_argument('--threshold_iou', type=float, default=0.4, help='IoU threshold')
    parser.add_argument('-e', '--exts', nargs='+', default=['.jpg', '.png'])
    parser.add_argument('--level', default='info', help='logging level')
    return parser.parse_args() 
Example #8
Source File: extract_features.py    From DocFace with MIT License 5 votes vote down vote up
def main(args):
    # Get the configuration file
    config = utils.import_file(os.path.join(args.model_dir, 'config.py'), 'config')
    
    # Get the paths of the aligned images
    with open(args.image_list) as f:
        paths = [line.strip() for line in f]
    print('%d images to load.' % len(paths))
    assert(len(paths)>0)
    

    # Pre-process the images
    images = utils.preprocess(paths, config, False)
    switch = np.array([utils.is_typeB(p) for p in paths])
    print('%d type A images and %d type B images.' % (np.sum(switch), np.sum(~switch)))


    # Load model files and config file
    if config.use_sibling:
        network = SiblingNetwork()
    else:
        network = BaseNetwork()
    network.load_model(args.model_dir)


    # Run forward pass to calculate embeddings
    if config.use_sibling:
        embeddings = network.extract_feature(images, switch, args.batch_size, verbose=True)
    else:
        embeddings = network.extract_feature(images, args.batch_size, verbose=True)


    # Output the extracted features
    np.save(args.output, embeddings) 
Example #9
Source File: code_draft.py    From NLP with MIT License 5 votes vote down vote up
def read_data_from_file(data_path):
    maybe_download()
    with open(data_path) as f:
        text = f.read()

    ###########################################################
    # ------------------- Preprocessing -----------------------
    # 1. Tokenize punctuations e.g. period -> <PERIOD>
    # 2. Remove words that show up five times or fewer
    words = utils.preprocess(text)

    # Hmm, let's take a look at the processed data
    print('First 30 words:', words[:30])
    print('Total words:', len(words))
    print('Total unique words:', len(set(words)))

    # Create two dictionaries to convert words to integers
    vocab_to_int, int_to_vocab = utils.create_lookup_tables(words)
    n_vocab = len(int_to_vocab)

    # Convert words into integers
    int_words = [vocab_to_int[w] for w in words]

    ###########################################################
    # ------------------- Subsampling -------------------------
    # Some words like "the", "a", "of" etc don't provide much
    # information. So we might want to remove some of them.
    # This results in faster and better result.
    # The probability that a word is discarded is
    # P(w) = 1 - sqrt(1 / frequency(w))
    each_word_count = Counter(int_words)
    total_count = len(int_words)
    threshold = 1e-5  # FLAGS.drop_word_threshold

    freqs = {word: count/total_count for word,
             count in each_word_count.items()}
    probs = {word: 1 - np.sqrt(threshold/freqs[word])
             for word in each_word_count}

    train_words = [word for word in int_words if random.random() <
                   (1 - probs[word])]

    print('After subsampling, first 30 words:', train_words[:30])
    print('After subsampling, total words:', len(train_words))

    # Subsampling makes it worse for eliminating contextual info
    # return train_words, int_to_vocab, vocab_to_int, n_vocab
    return int_words, int_to_vocab, vocab_to_int, n_vocab 
Example #10
Source File: tagger.py    From nagisa with MIT License 4 votes vote down vote up
def wakati(self, text, lower=False):
        """Word segmentation function. Return the segmented words.

        args:
            - text (str): An input sentence.
            - lower (bool): If lower is True, all uppercase characters in a list \
                            of the words are converted into lowercase characters.

        return:
            - words (list): A list of the words.
        """
        text = utils.preprocess(text)
        lower_text = text.lower()
        feats = utils.feature_extraction(text=lower_text,
                                         uni2id=self._uni2id,
                                         bi2id=self._bi2id,
                                         dictionary=self._word2id,
                                         window_size=self._hp['WINDOW_SIZE'])
        obs  = self._model.encode_ws(feats)
        obs  = [ob.npvalue() for ob in obs]
        tags = utils.np_viterbi(self._model.trans_array, obs)

        # A word can be recognized as a single word forcibly.
        if self.pattern:
            for match in self.pattern.finditer(text):
                span = match.span()
                span_s = span[0]
                span_e = span[1]

                if (span_e - span_s) == 1:
                    tags[span_s:span_e] = [3]
                else:
                    tags[span_s:span_e] = [0]+[1]*((span_e-span_s)-2)+[2]

                if span_s != 0:
                    previous_tag = tags[span_s-1]
                    if previous_tag == 0:   # 0 is BEGIN tag
                        tags[span_s-1] = 3  # 3 is SINGLE tag
                    elif previous_tag == 1: # 1 is MIDDEL tag
                        tags[span_s-1] = 2  # 2 is END tag

                if span_e != len(text):
                    next_tag = tags[span_e]
                    if next_tag == 1:    # 1 is MIDDEL tag
                        tags[span_e] = 0 # 0 is BEGIN tag
                    elif next_tag == 2:  # 2 is END tag
                        tags[span_e] = 3 # 3 is SINGLE tag

        if lower is True:
            words = utils.segmenter_for_bmes(lower_text, tags)
        else:
            words = utils.segmenter_for_bmes(text, tags)
        return words 
Example #11
Source File: classify.py    From a-PyTorch-Tutorial-to-Text-Classification with MIT License 4 votes vote down vote up
def classify(document):
    """
    Classify a document with the Hierarchial Attention Network (HAN).

    :param document: a document in text form
    :return: pre-processed tokenized document, class scores, attention weights for words, attention weights for sentences, sentence lengths
    """
    # A list to store the document tokenized into words
    doc = list()

    # Tokenize document into sentences
    sentences = list()
    for paragraph in preprocess(document).splitlines():
        sentences.extend([s for s in sent_tokenizer.tokenize(paragraph)])

    # Tokenize sentences into words
    for s in sentences[:sentence_limit]:
        w = word_tokenizer.tokenize(s)[:word_limit]
        if len(w) == 0:
            continue
        doc.append(w)

    # Number of sentences in the document
    sentences_in_doc = len(doc)
    sentences_in_doc = torch.LongTensor([sentences_in_doc]).to(device)  # (1)

    # Number of words in each sentence
    words_in_each_sentence = list(map(lambda s: len(s), doc))
    words_in_each_sentence = torch.LongTensor(words_in_each_sentence).unsqueeze(0).to(device)  # (1, n_sentences)

    # Encode document with indices from the word map
    encoded_doc = list(
        map(lambda s: list(map(lambda w: word_map.get(w, word_map['<unk>']), s)) + [0] * (word_limit - len(s)),
            doc)) + [[0] * word_limit] * (sentence_limit - len(doc))
    encoded_doc = torch.LongTensor(encoded_doc).unsqueeze(0).to(device)

    # Apply the HAN model
    scores, word_alphas, sentence_alphas = model(encoded_doc, sentences_in_doc,
                                                 words_in_each_sentence)  # (1, n_classes), (1, n_sentences, max_sent_len_in_document), (1, n_sentences)
    scores = scores.squeeze(0)  # (n_classes)
    scores = nn.functional.softmax(scores, dim=0)  # (n_classes)
    word_alphas = word_alphas.squeeze(0)  # (n_sentences, max_sent_len_in_document)
    sentence_alphas = sentence_alphas.squeeze(0)  # (n_sentences)
    words_in_each_sentence = words_in_each_sentence.squeeze(0)  # (n_sentences)

    return doc, scores, word_alphas, sentence_alphas, words_in_each_sentence 
Example #12
Source File: prepare_data.py    From NLP with MIT License 4 votes vote down vote up
def read_data_from_file(data_path):
    maybe_download()
    with open(data_path) as f:
        text = f.read()

    ###########################################################
    # ------------------- Preprocessing -----------------------
    # 1. Tokenize punctuations e.g. period -> <PERIOD>
    # 2. Remove words that show up five times or fewer
    words = utils.preprocess(text)

    # Hmm, let's take a look at the processed data
    print('First 30 words:', words[:30])
    print('Total words:', len(words))
    print('Total unique words:', len(set(words)))

    # Create two dictionaries to convert words to integers
    vocab_to_int, int_to_vocab = utils.create_lookup_tables(words)
    n_vocab = len(int_to_vocab)

    # Convert words into integers
    int_words = [vocab_to_int[w] for w in words]

    ###########################################################
    # ------------------- Subsampling -------------------------
    # Some words like "the", "a", "of" etc don't provide much
    # information. So we might want to remove some of them.
    # This results in faster and better result.
    # The probability that a word is discarded is
    # P(w) = 1 - sqrt(1 / frequency(w))
    each_word_count = Counter(int_words)
    total_count = len(int_words)
    threshold = FLAGS.drop_word_threshold

    freqs = {word: count/total_count for word,
             count in each_word_count.items()}
    probs = {word: 1 - np.sqrt(threshold/freqs[word])
             for word in each_word_count}

    train_words = [word for word in int_words if random.random() <
                   (1 - probs[word])]

    print('After subsampling, first 30 words:', train_words[:30])
    print('After subsampling, total words:', len(train_words))

    return train_words, int_to_vocab, vocab_to_int, n_vocab