Python utils.preprocess() Examples
The following are 12
code examples of utils.preprocess().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
utils
, or try the search function
.
Example #1
Source File: tagger.py From nagisa with MIT License | 6 votes |
def decode(self, words, lower=False): """ Return the words with tags of the given words. args: - words (list): Input words. - lower (bool, optional): If lower is True, all uppercase characters in a list \ of the words are converted into lowercase characters. return: - object : The object of the words with tags. """ if not type(words) == list: raise AssertionError("Please input a list of words.") words = [utils.preprocess_without_rstrip(w) if w == " " or w == "ă" else utils.preprocess(w) for w in words] postags = self._postagging(words, lower) return postags
Example #2
Source File: deep_dream.py From PyTorch-Deep-Dream with MIT License | 6 votes |
def deep_dream(image, model, iterations, lr, octave_scale, num_octaves): """ Main deep dream method """ image = preprocess(image).unsqueeze(0).cpu().data.numpy() # Extract image representations for each octave octaves = [image] for _ in range(num_octaves - 1): octaves.append(nd.zoom(octaves[-1], (1, 1, 1 / octave_scale, 1 / octave_scale), order=1)) detail = np.zeros_like(octaves[-1]) for octave, octave_base in enumerate(tqdm.tqdm(octaves[::-1], desc="Dreaming")): if octave > 0: # Upsample detail to new octave dimension detail = nd.zoom(detail, np.array(octave_base.shape) / np.array(detail.shape), order=1) # Add deep dream detail from previous octave to new base input_image = octave_base + detail # Get new deep dream image dreamed_image = dream(input_image, model, iterations, lr) # Extract deep dream details detail = dreamed_image - octave_base return deprocess(dreamed_image)
Example #3
Source File: tagger.py From nagisa with MIT License | 5 votes |
def __init__(self, vocabs=None, params=None, hp=None, single_word_list=None): if vocabs is None: vocabs = base + '/data/nagisa_v001.dict' if params is None: params = base + '/data/nagisa_v001.model' if hp is None: hp = base + '/data/nagisa_v001.hp' # Load vocaburary files vocabs = utils.load_data(vocabs) self._uni2id, self._bi2id, self._word2id, self._pos2id, self._word2postags = vocabs self._id2pos = {v:k for k, v in self._pos2id.items()} self.id2pos = self._id2pos self.postags = [postag for postag in self._pos2id.keys()] # Load a hyper-parameter file self._hp = utils.load_data(hp) # Construct a word segmentation model and a pos tagging model self._model = model.Model(self._hp, params) # If a word is included in the single_word_list, # it is recognized as a single word forcibly. self.pattern = None if single_word_list: single_word_list = [utils.preprocess(w) for w in single_word_list if len(w) > 1] single_word_list = [w.replace('(', '\(').replace(')', '\)') for w in single_word_list] single_word_list = sorted(single_word_list, key=lambda x:-len(x)) if len(single_word_list) > 0: self.pattern = re.compile('|'.join(single_word_list)) # If use_noun_heuristic is True, nouns are more lilely to appear. if u'ĺčŠ' in self._pos2id: self.use_noun_heuristic = True else: self.use_noun_heuristic = False
Example #4
Source File: drive.py From car-behavioral-cloning with MIT License | 5 votes |
def telemetry(sid, data): if data: # The current steering angle of the car steering_angle = float(data["steering_angle"]) # The current throttle of the car throttle = float(data["throttle"]) # The current speed of the car speed = float(data["speed"]) # The current image from the center camera of the car image = Image.open(BytesIO(base64.b64decode(data["image"]))) # save frame if args.image_folder != '': timestamp = datetime.utcnow().strftime('%Y_%m_%d_%H_%M_%S_%f')[:-3] image_filename = os.path.join(args.image_folder, timestamp) image.save('{}.jpg'.format(image_filename)) try: image = np.asarray(image) # from PIL image to numpy array image = utils.preprocess(image) # apply the preprocessing image = np.array([image]) # the model expects 4D array # predict the steering angle for the image steering_angle = float(model.predict(image, batch_size=1)) # lower the throttle as the speed increases # if the speed is above the current speed limit, we are on a downhill. # make sure we slow down first and then go back to the original max speed. global speed_limit if speed > speed_limit: speed_limit = MIN_SPEED # slow down else: speed_limit = MAX_SPEED throttle = 1.0 - steering_angle**2 - (speed/speed_limit)**2 print('{} {} {}'.format(steering_angle, throttle, speed)) send_control(steering_angle, throttle) except Exception as e: print(e) else: # NOTE: DON'T EDIT THIS. sio.emit('manual', data={}, skip_sid=True)
Example #5
Source File: detect.py From yolo-tf with GNU Lesser General Public License v3.0 | 5 votes |
def std(image): return utils.preprocess.per_image_standardization(image)
Example #6
Source File: detect.py From yolo-tf with GNU Lesser General Public License v3.0 | 5 votes |
def detect(sess, model, names, image, path): preprocess = eval(args.preprocess) _, height, width, _ = image.get_shape().as_list() _image = read_image(path) image_original = np.array(np.uint8(_image)) if len(image_original.shape) == 2: image_original = np.repeat(np.expand_dims(image_original, -1), 3, 2) image_height, image_width, _ = image_original.shape image_std = preprocess(np.array(np.uint8(_image.resize((width, height)))).astype(np.float32)) feed_dict = {image: np.expand_dims(image_std, 0)} tensors = [model.conf, model.xy_min, model.xy_max] conf, xy_min, xy_max = sess.run([tf.check_numerics(t, t.op.name) for t in tensors], feed_dict=feed_dict) boxes = utils.postprocess.non_max_suppress(conf[0], xy_min[0], xy_max[0], args.threshold, args.threshold_iou) scale = [image_width / model.cell_width, image_height / model.cell_height] fig = plt.figure() ax = fig.gca() ax.imshow(image_original) colors = [prop['color'] for _, prop in zip(names, itertools.cycle(plt.rcParams['axes.prop_cycle']))] cnt = 0 for _conf, _xy_min, _xy_max in boxes: index = np.argmax(_conf) if _conf[index] > args.threshold: wh = _xy_max - _xy_min _xy_min = _xy_min * scale _wh = wh * scale linewidth = min(_conf[index] * 10, 3) ax.add_patch(patches.Rectangle(_xy_min, _wh[0], _wh[1], linewidth=linewidth, edgecolor=colors[index], facecolor='none')) ax.annotate(names[index] + ' (%.1f%%)' % (_conf[index] * 100), _xy_min, color=colors[index]) cnt += 1 fig.canvas.set_window_title('%d objects detected' % cnt) ax.set_xticks([]) ax.set_yticks([]) return fig
Example #7
Source File: detect.py From yolo-tf with GNU Lesser General Public License v3.0 | 5 votes |
def make_args(): parser = argparse.ArgumentParser() parser.add_argument('path', help='input image path') parser.add_argument('-c', '--config', nargs='+', default=['config.ini'], help='config file') parser.add_argument('-p', '--preprocess', default='std', help='the preprocess function') parser.add_argument('-t', '--threshold', type=float, default=0.3) parser.add_argument('--threshold_iou', type=float, default=0.4, help='IoU threshold') parser.add_argument('-e', '--exts', nargs='+', default=['.jpg', '.png']) parser.add_argument('--level', default='info', help='logging level') return parser.parse_args()
Example #8
Source File: extract_features.py From DocFace with MIT License | 5 votes |
def main(args): # Get the configuration file config = utils.import_file(os.path.join(args.model_dir, 'config.py'), 'config') # Get the paths of the aligned images with open(args.image_list) as f: paths = [line.strip() for line in f] print('%d images to load.' % len(paths)) assert(len(paths)>0) # Pre-process the images images = utils.preprocess(paths, config, False) switch = np.array([utils.is_typeB(p) for p in paths]) print('%d type A images and %d type B images.' % (np.sum(switch), np.sum(~switch))) # Load model files and config file if config.use_sibling: network = SiblingNetwork() else: network = BaseNetwork() network.load_model(args.model_dir) # Run forward pass to calculate embeddings if config.use_sibling: embeddings = network.extract_feature(images, switch, args.batch_size, verbose=True) else: embeddings = network.extract_feature(images, args.batch_size, verbose=True) # Output the extracted features np.save(args.output, embeddings)
Example #9
Source File: code_draft.py From NLP with MIT License | 5 votes |
def read_data_from_file(data_path): maybe_download() with open(data_path) as f: text = f.read() ########################################################### # ------------------- Preprocessing ----------------------- # 1. Tokenize punctuations e.g. period -> <PERIOD> # 2. Remove words that show up five times or fewer words = utils.preprocess(text) # Hmm, let's take a look at the processed data print('First 30 words:', words[:30]) print('Total words:', len(words)) print('Total unique words:', len(set(words))) # Create two dictionaries to convert words to integers vocab_to_int, int_to_vocab = utils.create_lookup_tables(words) n_vocab = len(int_to_vocab) # Convert words into integers int_words = [vocab_to_int[w] for w in words] ########################################################### # ------------------- Subsampling ------------------------- # Some words like "the", "a", "of" etc don't provide much # information. So we might want to remove some of them. # This results in faster and better result. # The probability that a word is discarded is # P(w) = 1 - sqrt(1 / frequency(w)) each_word_count = Counter(int_words) total_count = len(int_words) threshold = 1e-5 # FLAGS.drop_word_threshold freqs = {word: count/total_count for word, count in each_word_count.items()} probs = {word: 1 - np.sqrt(threshold/freqs[word]) for word in each_word_count} train_words = [word for word in int_words if random.random() < (1 - probs[word])] print('After subsampling, first 30 words:', train_words[:30]) print('After subsampling, total words:', len(train_words)) # Subsampling makes it worse for eliminating contextual info # return train_words, int_to_vocab, vocab_to_int, n_vocab return int_words, int_to_vocab, vocab_to_int, n_vocab
Example #10
Source File: tagger.py From nagisa with MIT License | 4 votes |
def wakati(self, text, lower=False): """Word segmentation function. Return the segmented words. args: - text (str): An input sentence. - lower (bool): If lower is True, all uppercase characters in a list \ of the words are converted into lowercase characters. return: - words (list): A list of the words. """ text = utils.preprocess(text) lower_text = text.lower() feats = utils.feature_extraction(text=lower_text, uni2id=self._uni2id, bi2id=self._bi2id, dictionary=self._word2id, window_size=self._hp['WINDOW_SIZE']) obs = self._model.encode_ws(feats) obs = [ob.npvalue() for ob in obs] tags = utils.np_viterbi(self._model.trans_array, obs) # A word can be recognized as a single word forcibly. if self.pattern: for match in self.pattern.finditer(text): span = match.span() span_s = span[0] span_e = span[1] if (span_e - span_s) == 1: tags[span_s:span_e] = [3] else: tags[span_s:span_e] = [0]+[1]*((span_e-span_s)-2)+[2] if span_s != 0: previous_tag = tags[span_s-1] if previous_tag == 0: # 0 is BEGIN tag tags[span_s-1] = 3 # 3 is SINGLE tag elif previous_tag == 1: # 1 is MIDDEL tag tags[span_s-1] = 2 # 2 is END tag if span_e != len(text): next_tag = tags[span_e] if next_tag == 1: # 1 is MIDDEL tag tags[span_e] = 0 # 0 is BEGIN tag elif next_tag == 2: # 2 is END tag tags[span_e] = 3 # 3 is SINGLE tag if lower is True: words = utils.segmenter_for_bmes(lower_text, tags) else: words = utils.segmenter_for_bmes(text, tags) return words
Example #11
Source File: classify.py From a-PyTorch-Tutorial-to-Text-Classification with MIT License | 4 votes |
def classify(document): """ Classify a document with the Hierarchial Attention Network (HAN). :param document: a document in text form :return: pre-processed tokenized document, class scores, attention weights for words, attention weights for sentences, sentence lengths """ # A list to store the document tokenized into words doc = list() # Tokenize document into sentences sentences = list() for paragraph in preprocess(document).splitlines(): sentences.extend([s for s in sent_tokenizer.tokenize(paragraph)]) # Tokenize sentences into words for s in sentences[:sentence_limit]: w = word_tokenizer.tokenize(s)[:word_limit] if len(w) == 0: continue doc.append(w) # Number of sentences in the document sentences_in_doc = len(doc) sentences_in_doc = torch.LongTensor([sentences_in_doc]).to(device) # (1) # Number of words in each sentence words_in_each_sentence = list(map(lambda s: len(s), doc)) words_in_each_sentence = torch.LongTensor(words_in_each_sentence).unsqueeze(0).to(device) # (1, n_sentences) # Encode document with indices from the word map encoded_doc = list( map(lambda s: list(map(lambda w: word_map.get(w, word_map['<unk>']), s)) + [0] * (word_limit - len(s)), doc)) + [[0] * word_limit] * (sentence_limit - len(doc)) encoded_doc = torch.LongTensor(encoded_doc).unsqueeze(0).to(device) # Apply the HAN model scores, word_alphas, sentence_alphas = model(encoded_doc, sentences_in_doc, words_in_each_sentence) # (1, n_classes), (1, n_sentences, max_sent_len_in_document), (1, n_sentences) scores = scores.squeeze(0) # (n_classes) scores = nn.functional.softmax(scores, dim=0) # (n_classes) word_alphas = word_alphas.squeeze(0) # (n_sentences, max_sent_len_in_document) sentence_alphas = sentence_alphas.squeeze(0) # (n_sentences) words_in_each_sentence = words_in_each_sentence.squeeze(0) # (n_sentences) return doc, scores, word_alphas, sentence_alphas, words_in_each_sentence
Example #12
Source File: prepare_data.py From NLP with MIT License | 4 votes |
def read_data_from_file(data_path): maybe_download() with open(data_path) as f: text = f.read() ########################################################### # ------------------- Preprocessing ----------------------- # 1. Tokenize punctuations e.g. period -> <PERIOD> # 2. Remove words that show up five times or fewer words = utils.preprocess(text) # Hmm, let's take a look at the processed data print('First 30 words:', words[:30]) print('Total words:', len(words)) print('Total unique words:', len(set(words))) # Create two dictionaries to convert words to integers vocab_to_int, int_to_vocab = utils.create_lookup_tables(words) n_vocab = len(int_to_vocab) # Convert words into integers int_words = [vocab_to_int[w] for w in words] ########################################################### # ------------------- Subsampling ------------------------- # Some words like "the", "a", "of" etc don't provide much # information. So we might want to remove some of them. # This results in faster and better result. # The probability that a word is discarded is # P(w) = 1 - sqrt(1 / frequency(w)) each_word_count = Counter(int_words) total_count = len(int_words) threshold = FLAGS.drop_word_threshold freqs = {word: count/total_count for word, count in each_word_count.items()} probs = {word: 1 - np.sqrt(threshold/freqs[word]) for word in each_word_count} train_words = [word for word in int_words if random.random() < (1 - probs[word])] print('After subsampling, first 30 words:', train_words[:30]) print('After subsampling, total words:', len(train_words)) return train_words, int_to_vocab, vocab_to_int, n_vocab