Python tqdm.tqdm() Examples

The following are 30 code examples of tqdm.tqdm(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module tqdm , or try the search function .
Example #1
Source File: utils.py    From comet-commonsense with Apache License 2.0 8 votes vote down vote up
def encode(self, texts, verbose=True):
        texts_tokens = []
        if verbose:
            for text in tqdm(texts, ncols=80, leave=False):
                text = self.nlp(text_standardize(ftfy.fix_text(text)))
                text_tokens = []
                for token in text:
                    text_tokens.extend(
                        [self.encoder.get(t, 0) for t in
                         self.bpe(token.text.lower()).split(' ')])
                texts_tokens.append(text_tokens)
        else:
            for text in texts:
                text = self.nlp(text_standardize(ftfy.fix_text(text)))
                text_tokens = []
                for token in text:
                    text_tokens.extend(
                        [self.encoder.get(t, 0) for t in
                         self.bpe(token.text.lower()).split(' ')])
                texts_tokens.append(text_tokens)
        return texts_tokens 
Example #2
Source File: datasets.py    From pruning_yolov3 with GNU General Public License v3.0 8 votes vote down vote up
def convert_images2bmp():
    # cv2.imread() jpg at 230 img/s, *.bmp at 400 img/s
    for path in ['../coco/images/val2014/', '../coco/images/train2014/']:
        folder = os.sep + Path(path).name
        output = path.replace(folder, folder + 'bmp')
        if os.path.exists(output):
            shutil.rmtree(output)  # delete output folder
        os.makedirs(output)  # make new output folder

        for f in tqdm(glob.glob('%s*.jpg' % path)):
            save_name = f.replace('.jpg', '.bmp').replace(folder, folder + 'bmp')
            cv2.imwrite(save_name, cv2.imread(f))

    for label_path in ['../coco/trainvalno5k.txt', '../coco/5k.txt']:
        with open(label_path, 'r') as file:
            lines = file.read()
        lines = lines.replace('2014/', '2014bmp/').replace('.jpg', '.bmp').replace(
            '/Users/glennjocher/PycharmProjects/', '../')
        with open(label_path.replace('5k', '5k_bmp'), 'w') as file:
            file.write(lines) 
Example #3
Source File: generate.py    From post--memorization-in-rnns with MIT License 7 votes vote down vote up
def save_tfrecord(filename, dataset, verbose=False):
    observations = len(dataset['length'])

    serialized = []
    with Pool(processes=4) as pool:
        for serialized_string in tqdm(pool.imap(
            tfrecord_serializer,
            zip(dataset['length'], dataset['source'], dataset['target']),
            chunksize=10
        ), total=observations, disable=not verbose):
            serialized.append(serialized_string)

    # Save seriealized dataset
    writer = tf.python_io.TFRecordWriter(
        filename,
        options=tf.python_io.TFRecordOptions(
            tf.python_io.TFRecordCompressionType.ZLIB
        )
    )

    for serialized_string in tqdm(serialized, disable=not verbose):
        writer.write(serialized_string)

    writer.close() 
Example #4
Source File: dataset.py    From dynamic-training-with-apache-mxnet-on-aws with Apache License 2.0 6 votes vote down vote up
def load_embedding(self, f, reset=[]):
        vectors = {}
        for line in tqdm(f.readlines(), desc='Loading embeddings'):
            tokens = line.rstrip('\n').split(' ')
            word = tokens[0].lower() if self.lower else tokens[0]
            if self.include_unseen:
                self.add(word)
            if word in self.tok2idx:
                vectors[word] = [float(x) for x in tokens[1:]]
        dim = len(vectors.values()[0])
        def to_vector(tok):
            if tok in vectors and tok not in reset:
                return vectors[tok]
            elif tok not in vectors:
                return np.random.normal(-0.05, 0.05, size=dim)
            else:
                return [0.0]*dim
        self.embed = mx.nd.array([vectors[tok] if tok in vectors and tok not in reset
                                  else [0.0]*dim for tok in self.idx2tok]) 
Example #5
Source File: dqn.py    From Pytorch-Project-Template with MIT License 6 votes vote down vote up
def train(self):
        """
        Training loop based on the number of episodes
        :return:
        """
        for episode in tqdm(range(self.current_episode, self.config.num_episodes)):
            self.current_episode = episode
            # reset environment
            self.env.reset()
            self.train_one_epoch()
            # The target network has its weights kept frozen most of the time
            if self.current_episode % self.config.target_update == 0:
                self.target_model.load_state_dict(self.policy_model.state_dict())

        self.env.render()
        self.env.close() 
Example #6
Source File: Embed.py    From pytorch_NER_BiLSTM_CNN_CRF with Apache License 2.0 6 votes vote down vote up
def _read_file(path):
        """
        :param path: embed file path
        :return:
        """
        embed_dict = {}
        with open(path, encoding='utf-8') as f:
            lines = f.readlines()
            lines = tqdm.tqdm(lines)
            for line in lines:
                values = line.strip().split(' ')
                if len(values) == 1 or len(values) == 2 or len(values) == 3:
                    continue
                w, v = values[0], values[1:]
                embed_dict[w] = v
        return embed_dict 
Example #7
Source File: pregenerate_training_data.py    From tpu_pretrain with Apache License 2.0 6 votes vote down vote up
def input_file_to_training_data(args, input_file, epoch, tokenizer, num_files):
    print(input_file)
    with DocumentDatabase(reduce_memory=args.reduce_memory) as docs:
        with open(input_file) as f:
            doc = []
            for line in tqdm(f, desc="Loading Dataset", unit=" lines"):
                line = line.strip()
                if line == "":
                    docs.add_document(doc)
                    doc = []
                else:
                    tokens = tokenizer.tokenize(line)
                    doc.append(tokens)
            if doc:
                docs.add_document(doc)  # If the last doc didn't end on a newline, make sure it still gets added
        if len(docs) <= 1:
            exit("ERROR: No document breaks were found in the input file! These are necessary to allow the script to "
                    "ensure that random NextSentences are not sampled from the same document. Please add blank lines to "
                    "indicate breaks between documents in your input file. If your dataset does not contain multiple "
                    "documents, blank lines can be inserted at any natural boundary, such as the ends of chapters, "
                    "sections or paragraphs.")

        for i in range(args.epochs_to_generate):
            create_training_file(docs, tokenizer, args, epoch + i * num_files) 
Example #8
Source File: utils.py    From pruning_yolov3 with GNU General Public License v3.0 6 votes vote down vote up
def coco_single_class_labels(path='../coco/labels/train2014/', label_class=43):
    # Makes single-class coco datasets. from utils.utils import *; coco_single_class_labels()
    if os.path.exists('new/'):
        shutil.rmtree('new/')  # delete output folder
    os.makedirs('new/')  # make new output folder
    os.makedirs('new/labels/')
    os.makedirs('new/images/')
    for file in tqdm(sorted(glob.glob('%s/*.*' % path))):
        with open(file, 'r') as f:
            labels = np.array([x.split() for x in f.read().splitlines()], dtype=np.float32)
        i = labels[:, 0] == label_class
        if any(i):
            img_file = file.replace('labels', 'images').replace('txt', 'jpg')
            labels[:, 0] = 0  # reset class to 0
            with open('new/images.txt', 'a') as f:  # add image to dataset list
                f.write(img_file + '\n')
            with open('new/labels/' + Path(file).name, 'a') as f:  # write label
                for l in labels[i]:
                    f.write('%g %.6f %.6f %.6f %.6f\n' % tuple(l))
            shutil.copyfile(src=img_file, dst='new/images/' + Path(file).name.replace('txt', 'jpg'))  # copy images 
Example #9
Source File: preprocessing.py    From Image-Caption-Generator with MIT License 6 votes vote down vote up
def extract_features(path, model_type):
	if model_type == 'inceptionv3':
		from keras.applications.inception_v3 import preprocess_input
		target_size = (299, 299)
	elif model_type == 'vgg16':
		from keras.applications.vgg16 import preprocess_input
		target_size = (224, 224)
	# Get CNN Model from model.py
	model = CNNModel(model_type)
	features = dict()
	# Extract features from each photo
	for name in tqdm(os.listdir(path)):
		# Loading and resizing image
		filename = path + name
		image = load_img(filename, target_size=target_size)
		# Convert the image pixels to a numpy array
		image = img_to_array(image)
		# Reshape data for the model
		image = image.reshape((1, image.shape[0], image.shape[1], image.shape[2]))
		# Prepare the image for the CNN Model model
		image = preprocess_input(image)
		# Pass image into model to get encoded features
		feature = model.predict(image, verbose=0)
		# Store encoded features for the image
		image_id = name.split('.')[0]
		features[image_id] = feature
	return features 
Example #10
Source File: utils.py    From pruning_yolov3 with GNU General Public License v3.0 6 votes vote down vote up
def crop_images_random(path='../images/', scale=0.50):  # from utils.utils import *; crop_images_random()
    # crops images into random squares up to scale fraction
    # WARNING: overwrites images!
    for file in tqdm(sorted(glob.glob('%s/*.*' % path))):
        img = cv2.imread(file)  # BGR
        if img is not None:
            h, w = img.shape[:2]

            # create random mask
            a = 30  # minimum size (pixels)
            mask_h = random.randint(a, int(max(a, h * scale)))  # mask height
            mask_w = mask_h  # mask width

            # box
            xmin = max(0, random.randint(0, w) - mask_w // 2)
            ymin = max(0, random.randint(0, h) - mask_h // 2)
            xmax = min(w, xmin + mask_w)
            ymax = min(h, ymin + mask_h)

            # apply random color mask
            cv2.imwrite(file, img[ymin:ymax, xmin:xmax]) 
Example #11
Source File: gla_gpu.py    From Deep_VoiceChanger with MIT License 6 votes vote down vote up
def auto_inverse(self, whole_spectrum):
        whole_spectrum = np.copy(whole_spectrum).astype(complex)
        whole_spectrum[whole_spectrum < 1] = 1
        overwrap = self.buffer_size * 2
        height = whole_spectrum.shape[0]
        parallel_dif = (height-overwrap) // self.parallel
        if height < self.parallel*overwrap:
            raise Exception('voice length is too small to use gpu, or parallel number is too big')

        spec = [self.inverse(whole_spectrum[range(i, i+parallel_dif*self.parallel, parallel_dif), :]) for i in tqdm.tqdm(range(parallel_dif+overwrap))]
        spec = spec[overwrap:]
        spec = np.concatenate(spec, axis=1)
        spec = spec.reshape(-1, self.wave_len)

        #Below code don't consider wave_len and wave_dif, I'll fix.
        wave = np.fft.ifft(spec, axis=1).real
        pad = np.zeros((wave.shape[0], 2), dtype=float)
        wave = np.concatenate([wave, pad], axis=1)

        dst = np.zeros((wave.shape[0]+3)*self.wave_dif, dtype=float)
        for i in range(4):
            w = wave[range(i, wave.shape[0], 4),:]
            w = w.reshape(-1)
            dst[i*self.wave_dif:i*self.wave_dif+len(w)] += w
        return dst*0.5 
Example #12
Source File: test_classification_tree.py    From Kaggler with MIT License 6 votes vote down vote up
def test():
    data = np.random.randint(0, 1000, size=(N_OBS, N_FEATURE))
    y = np.random.randint(2, size=N_OBS)

    train = data[0:N_OBS // 2]
    ytrain = y[0:N_OBS // 2]
    test = data[N_OBS // 2:]
    ytest = y[N_OBS // 2:]

    learner = ClassificationTree(number_of_features=N_FEATURE)

    for t, x in enumerate(tqdm(train)):
        learner.update(x, ytrain[t])

    correct_num = 0
    for t, x in enumerate(tqdm(test)):
        y_pred = learner.predict(x)
        if y_pred == ytest[t]:
            correct_num += 1

    print(correct_num) 
Example #13
Source File: trainer.py    From treelstm.pytorch with MIT License 6 votes vote down vote up
def test(self, dataset):
        self.model.eval()
        with torch.no_grad():
            total_loss = 0.0
            predictions = torch.zeros(len(dataset), dtype=torch.float, device='cpu')
            indices = torch.arange(1, dataset.num_classes + 1, dtype=torch.float, device='cpu')
            for idx in tqdm(range(len(dataset)), desc='Testing epoch  ' + str(self.epoch) + ''):
                ltree, linput, rtree, rinput, label = dataset[idx]
                target = utils.map_label_to_target(label, dataset.num_classes)
                linput, rinput = linput.to(self.device), rinput.to(self.device)
                target = target.to(self.device)
                output = self.model(ltree, linput, rtree, rinput)
                loss = self.criterion(output, target)
                total_loss += loss.item()
                output = output.squeeze().to('cpu')
                predictions[idx] = torch.dot(indices, torch.exp(output))
        return total_loss / len(dataset), predictions 
Example #14
Source File: autocomplete.py    From post--memorization-in-rnns with MIT License 6 votes vote down vote up
def save_tfrecord(filename, dataset, verbose=False):
    observations = len(dataset['length'])

    serialized = []
    with Pool(processes=4) as pool:
        for serialized_string in tqdm(pool.imap(
            tfrecord_serializer,
            zip(dataset['length'], dataset['source'], dataset['target']),
            chunksize=10
        ), total=observations, disable=not verbose):
            serialized.append(serialized_string)

    # Save seriealized dataset
    writer = tf.python_io.TFRecordWriter(
        filename,
        options=tf.python_io.TFRecordOptions(
            tf.python_io.TFRecordCompressionType.ZLIB
        )
    )

    for serialized_string in tqdm(serialized, disable=not verbose):
        writer.write(serialized_string)

    writer.close() 
Example #15
Source File: extraction.py    From git2net with GNU Affero General Public License v3.0 6 votes vote down vote up
def _process_repo_serial(git_repo_dir, sqlite_db_file, commits, extraction_settings):
    """ Processes all commits in a given git repository in a serial manner.

    Args:
        git_repo_dir: path to the git repository that is mined
        sqlite_db_file: path (including database name) where the sqlite database will be created
        commits: list of commits that have to be processed
        extraction_settings: settings for the extraction

    Returns:
        sqlite database will be written at specified location
    """

    git_repo = pydriller.GitRepository(git_repo_dir)

    con = sqlite3.connect(sqlite_db_file)

    for commit in tqdm(commits, desc='Serial'):
        args = {'git_repo_dir': git_repo_dir, 'commit_hash': commit.hash, 'extraction_settings': extraction_settings}
        result = _process_commit(args)

        if not result['edits'].empty:
            result['edits'].to_sql('edits', con, if_exists='append', index=False)
        if not result['commit'].empty:
            result['commit'].to_sql('commits', con, if_exists='append', index=False) 
Example #16
Source File: pools.py    From python-pool-performance with MIT License 6 votes vote down vote up
def run_test(work_type: FunctionType, job_sets: Sequence, trials: int,
             pool_class: type, worker_count: int) -> Mapping:
    pool = pool_class(worker_count)
    if work_type == 'compute':
        test_func = pool.run_compute_test
    elif work_type == 'network':
        test_func = pool.run_network_test
    else:
        raise Exception("Invalid work type: {}".format(work_type))
    results = map(
        lambda jobs: test_func(jobs, trials, show_progress=True),
        tqdm(job_sets, desc=pool_class.__name__),
    )
    summarized_results = list(map(summarize_test, results))
    pool.destroy_pool()
    return summarized_results 
Example #17
Source File: trainer.py    From treelstm.pytorch with MIT License 6 votes vote down vote up
def train(self, dataset):
        self.model.train()
        self.optimizer.zero_grad()
        total_loss = 0.0
        indices = torch.randperm(len(dataset), dtype=torch.long, device='cpu')
        for idx in tqdm(range(len(dataset)), desc='Training epoch ' + str(self.epoch + 1) + ''):
            ltree, linput, rtree, rinput, label = dataset[indices[idx]]
            target = utils.map_label_to_target(label, dataset.num_classes)
            linput, rinput = linput.to(self.device), rinput.to(self.device)
            target = target.to(self.device)
            output = self.model(ltree, linput, rtree, rinput)
            loss = self.criterion(output, target)
            total_loss += loss.item()
            loss.backward()
            if idx % self.args.batchsize == 0 and idx > 0:
                self.optimizer.step()
                self.optimizer.zero_grad()
        self.epoch += 1
        return total_loss / len(dataset)

    # helper function for testing 
Example #18
Source File: pointmass.py    From cs294-112_hws with MIT License 5 votes vote down vote up
def create_visualization(self, dirname, density=False):
        for s in os.listdir(dirname):
            for i in tqdm(range(100)):
                self.visualize(None, i, os.path.join(dirname, s))
            self.create_gif(os.path.join(dirname, str(s))) 
Example #19
Source File: evaluator.py    From easy-faster-rcnn.pytorch with MIT License 5 votes vote down vote up
def evaluate(self, model: Model) -> Tuple[float, str]:
        all_image_ids, all_detection_bboxes, all_detection_classes, all_detection_probs = [], [], [], []

        with torch.no_grad():
            for _, (image_id_batch, image_batch, scale_batch, _, _) in enumerate(tqdm(self._dataloader)):
                image_batch = image_batch.cuda()
                assert image_batch.shape[0] == 1, 'do not use batch size more than 1 on evaluation'

                detection_bboxes, detection_classes, detection_probs, detection_batch_indices = \
                    model.eval().forward(image_batch)

                scale_batch = scale_batch[detection_batch_indices].unsqueeze(dim=-1).expand_as(detection_bboxes).to(device=detection_bboxes.device)
                detection_bboxes = detection_bboxes / scale_batch

                kept_indices = (detection_probs > 0.05).nonzero().view(-1)
                detection_bboxes = detection_bboxes[kept_indices]
                detection_classes = detection_classes[kept_indices]
                detection_probs = detection_probs[kept_indices]
                detection_batch_indices = detection_batch_indices[kept_indices]

                all_detection_bboxes.extend(detection_bboxes.tolist())
                all_detection_classes.extend(detection_classes.tolist())
                all_detection_probs.extend(detection_probs.tolist())
                all_image_ids.extend([image_id_batch[i] for i in detection_batch_indices])

        mean_ap, detail = self._dataset.evaluate(self._path_to_results_dir, all_image_ids, all_detection_bboxes, all_detection_classes, all_detection_probs)
        return mean_ap, detail 
Example #20
Source File: mnist_eager.py    From dockerfiles with Apache License 2.0 5 votes vote down vote up
def train(epochs):
  for epoch in range(epochs):
    for (batch, (images, labels)) in tqdm(enumerate(dataset)):
      train_step(images, labels)
    print ('Epoch {} finished'.format(epoch)) 
Example #21
Source File: run_mujoco.py    From lirpg with MIT License 5 votes vote down vote up
def runner(env, policy_func, load_model_path, timesteps_per_batch, number_trajs,
           stochastic_policy, save=False, reuse=False):

    # Setup network
    # ----------------------------------------
    ob_space = env.observation_space
    ac_space = env.action_space
    pi = policy_func("pi", ob_space, ac_space, reuse=reuse)
    U.initialize()
    # Prepare for rollouts
    # ----------------------------------------
    U.load_state(load_model_path)

    obs_list = []
    acs_list = []
    len_list = []
    ret_list = []
    for _ in tqdm(range(number_trajs)):
        traj = traj_1_generator(pi, env, timesteps_per_batch, stochastic=stochastic_policy)
        obs, acs, ep_len, ep_ret = traj['ob'], traj['ac'], traj['ep_len'], traj['ep_ret']
        obs_list.append(obs)
        acs_list.append(acs)
        len_list.append(ep_len)
        ret_list.append(ep_ret)
    if stochastic_policy:
        print('stochastic policy:')
    else:
        print('deterministic policy:')
    if save:
        filename = load_model_path.split('/')[-1] + '.' + env.spec.id
        np.savez(filename, obs=np.array(obs_list), acs=np.array(acs_list),
                 lens=np.array(len_list), rets=np.array(ret_list))
    avg_len = sum(len_list)/len(len_list)
    avg_ret = sum(ret_list)/len(ret_list)
    print("Average length:", avg_len)
    print("Average return:", avg_ret)
    return avg_len, avg_ret


# Sample one trajectory (until trajectory end) 
Example #22
Source File: tqdm_download.py    From post--memorization-in-rnns with MIT License 5 votes vote down vote up
def download(*args, desc=None, **kwargs):
    last_b = [0]

    def _download_hook(b=1, bsize=1, tsize=None):
        if tsize is not None:
            t.total = tsize
        t.update((b - last_b[0]) * bsize)
        last_b[0] = b

    with tqdm(unit='B', unit_scale=True, miniters=1, desc=desc) as t:
        urllib.request.urlretrieve(*args,
                                   reporthook=_download_hook, data=None,
                                   **kwargs) 
Example #23
Source File: generate.py    From post--memorization-in-rnns with MIT License 5 votes vote down vote up
def make_source_target_alignment(words, char_map, max_length,
                                 verbose=False):
    space_char_code = char_map[' ']

    source = []
    source_current = []
    target = []
    target_current = []
    length = []
    length_current = 0

    for word in tqdm(words, disable=not verbose):
        if length_current + len(word) + 1 > max_length:
            # concatenate current data and move it to storage
            source.append(np.concatenate(source_current))
            target.append(np.concatenate(target_current))
            length.append(length_current)

            # prepear for new source and target
            source_current = []
            target_current = []
            length_current = 0

        # add source and target, while maintaining the current total length
        source_current.append(
            np.array([space_char_code] + [char_map[char] for char in word],
                     dtype='int32')
        )
        target_current.append(
            np.array([char_map[char] for char in word] + [space_char_code],
                     dtype='int32')
        )
        length_current += 1 + len(word)

    # concatenate remaning data and move it to storage
    if length_current > 0:
        source.append(np.concatenate(source_current))
        target.append(np.concatenate(target_current))
        length.append(length_current)

    return (length, source, target) 
Example #24
Source File: behavior_clone.py    From lirpg with MIT License 5 votes vote down vote up
def learn(env, policy_func, dataset, optim_batch_size=128, max_iters=1e4,
          adam_epsilon=1e-5, optim_stepsize=3e-4,
          ckpt_dir=None, log_dir=None, task_name=None,
          verbose=False):

    val_per_iter = int(max_iters/10)
    ob_space = env.observation_space
    ac_space = env.action_space
    pi = policy_func("pi", ob_space, ac_space)  # Construct network for new policy
    # placeholder
    ob = U.get_placeholder_cached(name="ob")
    ac = pi.pdtype.sample_placeholder([None])
    stochastic = U.get_placeholder_cached(name="stochastic")
    loss = tf.reduce_mean(tf.square(ac-pi.ac))
    var_list = pi.get_trainable_variables()
    adam = MpiAdam(var_list, epsilon=adam_epsilon)
    lossandgrad = U.function([ob, ac, stochastic], [loss]+[U.flatgrad(loss, var_list)])

    U.initialize()
    adam.sync()
    logger.log("Pretraining with Behavior Cloning...")
    for iter_so_far in tqdm(range(int(max_iters))):
        ob_expert, ac_expert = dataset.get_next_batch(optim_batch_size, 'train')
        train_loss, g = lossandgrad(ob_expert, ac_expert, True)
        adam.update(g, optim_stepsize)
        if verbose and iter_so_far % val_per_iter == 0:
            ob_expert, ac_expert = dataset.get_next_batch(-1, 'val')
            val_loss, _ = lossandgrad(ob_expert, ac_expert, True)
            logger.log("Training loss: {}, Validation loss: {}".format(train_loss, val_loss))

    if ckpt_dir is None:
        savedir_fname = tempfile.TemporaryDirectory().name
    else:
        savedir_fname = osp.join(ckpt_dir, task_name)
    U.save_state(savedir_fname, var_list=pi.get_variables())
    return savedir_fname 
Example #25
Source File: convert_lmdb.py    From torch-toolbox with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def generate_lmdb_dataset(
        data_set: Dataset,
        save_dir: str,
        name: str,
        num_workers=0,
        max_size_rate=1.0,
        write_frequency=5000):
    data_loader = DataLoader(
        data_set,
        num_workers=num_workers,
        collate_fn=lambda x: x)
    num_samples = len(data_set)
    check_dir(save_dir)
    lmdb_path = os.path.join(save_dir, '{}.lmdb'.format(name))
    db = lmdb.open(lmdb_path, subdir=False,
                   map_size=int(1099511627776 * max_size_rate),
                   readonly=False, meminit=True, map_async=True)
    txn = db.begin(write=True)
    for idx, data in enumerate(tqdm(data_loader)):
        txn.put(get_key(idx), dumps_pyarrow(data[0]))
        if idx % write_frequency == 0 and idx > 0:
            txn.commit()
            txn = db.begin(write=True)
    txn.put(b'__len__', dumps_pyarrow(num_samples))
    try:
        classes = data_set.classes
        class_to_idx = data_set.class_to_idx
        txn.put(b'classes', dumps_pyarrow(classes))
        txn.put(b'class_to_idx', dumps_pyarrow(class_to_idx))
    except AttributeError:
        pass

    txn.commit()
    db.sync()
    db.close() 
Example #26
Source File: sync_checkpoint_to_s3.py    From tpu_pretrain with Apache License 2.0 5 votes vote down vote up
def sync(local_dir, keypath, bucket):
    try:
        my_bucket = s3_resource.Bucket(bucket)
        for path, subdirs, files in os.walk(local_dir):
            path = path.replace("\\","/")
            directory_name = keypath
            for file in tqdm(files):
                my_bucket.upload_file(os.path.join(path, file), f"{keypath}/{file}")

    except Exception as err:
        print(err) 
Example #27
Source File: bag_re.py    From OpenNRE with MIT License 5 votes vote down vote up
def eval_model(self, eval_loader):
        self.model.eval()
        with torch.no_grad():
            t = tqdm(eval_loader)
            pred_result = []
            for iter, data in enumerate(t):
                if torch.cuda.is_available():
                    for i in range(len(data)):
                        try:
                            data[i] = data[i].cuda()
                        except:
                            pass
                label = data[0]
                bag_name = data[1]
                scope = data[2]
                args = data[3:]
                logits = self.model(None, scope, *args, train=False, bag_size=self.bag_size) # results after softmax
                logits = logits.cpu().numpy()
                for i in range(len(logits)):
                    for relid in range(self.model.module.num_class):
                        if self.model.module.id2rel[relid] != 'NA':
                            pred_result.append({
                                'entpair': bag_name[i][:2], 
                                'relation': self.model.module.id2rel[relid], 
                                'score': logits[i][relid]
                            })
            result = eval_loader.dataset.eval(pred_result)
        return result 
Example #28
Source File: sentence_re.py    From OpenNRE with MIT License 5 votes vote down vote up
def eval_model(self, eval_loader):
        self.eval()
        avg_acc = AverageMeter()
        pred_result = []
        with torch.no_grad():
            t = tqdm(eval_loader)
            for iter, data in enumerate(t):
                if torch.cuda.is_available():
                    for i in range(len(data)):
                        try:
                            data[i] = data[i].cuda()
                        except:
                            pass
                label = data[0]
                args = data[1:]        
                logits = self.parallel_model(*args)
                score, pred = logits.max(-1) # (B)
                # Save result
                for i in range(pred.size(0)):
                    pred_result.append(pred[i].item())
                # Log
                acc = float((pred == label).long().sum()) / label.size(0)
                avg_acc.update(acc, pred.size(0))
                t.set_postfix(acc=avg_acc.avg)
        result = eval_loader.dataset.eval(pred_result)
        return result 
Example #29
Source File: extraction.py    From git2net with GNU Affero General Public License v3.0 5 votes vote down vote up
def _process_repo_parallel(git_repo_dir, sqlite_db_file, commits, extraction_settings):
    """ Processes all commits in a given git repository in a parallel manner.

    Args:
        git_repo_dir: path to the git repository that is mined
        sqlite_db_file: path (including database name) where the sqlite database will be created
        commits: list of commits that are already in the database
        extraction_settings: settings for the extraction

    Returns:
        sqlite database will be written at specified location
    """

    args = [{'git_repo_dir': git_repo_dir, 'commit_hash': commit.hash, 'extraction_settings': extraction_settings}
            for commit in commits]

    # suggestion by marco-c (github.com/ishepard/pydriller/issues/110)
    def _init(git_repo_dir, git_init_lock_):
        global git_init_lock
        git_init_lock = git_init_lock_

    con = sqlite3.connect(sqlite_db_file)
    with multiprocessing.Pool(extraction_settings['no_of_processes'],
                              initializer=_init, initargs=(git_repo_dir,git_init_lock)) as p:
        with tqdm(total=len(args), desc='Parallel ({0} processes)' \
                  .format(extraction_settings['no_of_processes'])) as pbar:
            for result in p.imap_unordered(_process_commit, args, chunksize=extraction_settings['chunksize']):
                if not result['edits'].empty:
                    result['edits'].to_sql('edits', con, if_exists='append', index=False)
                if not result['commit'].empty:
                    result['commit'].to_sql('commits', con, if_exists='append', index=False)
                pbar.update(1) 
Example #30
Source File: extraction.py    From git2net with GNU Affero General Public License v3.0 5 votes vote down vote up
def identify_file_renaming(git_repo_dir):
    """ Identifies all names and locations different files in a repository have had.

    Args:
        git_repo_dir: path to the git repository that is mined

    Returns:
        dag: pathpy DAG object depicting the renaming process
        aliases: dictionary containing all aliases for all files
    """

    # TODO: Consider corner case where file is renamed and new file with old name is created.
    git_repo = pydriller.GitRepository(git_repo_dir)

    dag = pp.DAG()
    for commit in tqdm(list(git_repo.get_list_commits()), desc='Creating DAG'):
        for modification in commit.modifications:

            if (modification.new_path not in dag.nodes) and \
               (modification.old_path == modification.new_path) and \
               (modification.change_type == pydriller.domain.commit.ModificationType.ADD):
                if modification.new_path not in dag.nodes:
                        dag.add_node(modification.new_path)
            elif modification.old_path != modification.new_path:
                if pd.isnull(modification.old_path):
                    if modification.new_path not in dag.nodes:
                        dag.add_node(modification.new_path)
                elif pd.isnull(modification.new_path):
                    pass
                else:
                    dag.add_edge(modification.new_path, modification.old_path)

    dag.make_acyclic()
    nodes = [k for k, v in dag.nodes.items() if v['indegree'] == 0 and not v['outdegree'] == 0]
    aliases = {z: y[-1] for x in nodes for y in dag.routes_from_node(x) for z in y[:-1]}

    return dag, aliases