Python Examples of tqdm.tqdm

Source File: utils.py From comet-commonsense with Apache License 2.0

8 votes

def encode(self, texts, verbose=True):
        texts_tokens = []
        if verbose:
            for text in tqdm(texts, ncols=80, leave=False):
                text = self.nlp(text_standardize(ftfy.fix_text(text)))
                text_tokens = []
                for token in text:
                    text_tokens.extend(
                        [self.encoder.get(t, 0) for t in
                         self.bpe(token.text.lower()).split(' ')])
                texts_tokens.append(text_tokens)
        else:
            for text in texts:
                text = self.nlp(text_standardize(ftfy.fix_text(text)))
                text_tokens = []
                for token in text:
                    text_tokens.extend(
                        [self.encoder.get(t, 0) for t in
                         self.bpe(token.text.lower()).split(' ')])
                texts_tokens.append(text_tokens)
        return texts_tokens

Source File: datasets.py From pruning_yolov3 with GNU General Public License v3.0

8 votes

def convert_images2bmp():
    # cv2.imread() jpg at 230 img/s, *.bmp at 400 img/s
    for path in ['../coco/images/val2014/', '../coco/images/train2014/']:
        folder = os.sep + Path(path).name
        output = path.replace(folder, folder + 'bmp')
        if os.path.exists(output):
            shutil.rmtree(output)  # delete output folder
        os.makedirs(output)  # make new output folder

        for f in tqdm(glob.glob('%s*.jpg' % path)):
            save_name = f.replace('.jpg', '.bmp').replace(folder, folder + 'bmp')
            cv2.imwrite(save_name, cv2.imread(f))

    for label_path in ['../coco/trainvalno5k.txt', '../coco/5k.txt']:
        with open(label_path, 'r') as file:
            lines = file.read()
        lines = lines.replace('2014/', '2014bmp/').replace('.jpg', '.bmp').replace(
            '/Users/glennjocher/PycharmProjects/', '../')
        with open(label_path.replace('5k', '5k_bmp'), 'w') as file:
            file.write(lines)

Source File: generate.py From post--memorization-in-rnns with MIT License

7 votes

def save_tfrecord(filename, dataset, verbose=False):
    observations = len(dataset['length'])

    serialized = []
    with Pool(processes=4) as pool:
        for serialized_string in tqdm(pool.imap(
            tfrecord_serializer,
            zip(dataset['length'], dataset['source'], dataset['target']),
            chunksize=10
        ), total=observations, disable=not verbose):
            serialized.append(serialized_string)

    # Save seriealized dataset
    writer = tf.python_io.TFRecordWriter(
        filename,
        options=tf.python_io.TFRecordOptions(
            tf.python_io.TFRecordCompressionType.ZLIB
        )
    )

    for serialized_string in tqdm(serialized, disable=not verbose):
        writer.write(serialized_string)

    writer.close()

Source File: dataset.py From dynamic-training-with-apache-mxnet-on-aws with Apache License 2.0

6 votes

def load_embedding(self, f, reset=[]):
        vectors = {}
        for line in tqdm(f.readlines(), desc='Loading embeddings'):
            tokens = line.rstrip('\n').split(' ')
            word = tokens[0].lower() if self.lower else tokens[0]
            if self.include_unseen:
                self.add(word)
            if word in self.tok2idx:
                vectors[word] = [float(x) for x in tokens[1:]]
        dim = len(vectors.values()[0])
        def to_vector(tok):
            if tok in vectors and tok not in reset:
                return vectors[tok]
            elif tok not in vectors:
                return np.random.normal(-0.05, 0.05, size=dim)
            else:
                return [0.0]*dim
        self.embed = mx.nd.array([vectors[tok] if tok in vectors and tok not in reset
                                  else [0.0]*dim for tok in self.idx2tok])

Source File: dqn.py From Pytorch-Project-Template with MIT License

6 votes

def train(self):
        """
        Training loop based on the number of episodes
        :return:
        """
        for episode in tqdm(range(self.current_episode, self.config.num_episodes)):
            self.current_episode = episode
            # reset environment
            self.env.reset()
            self.train_one_epoch()
            # The target network has its weights kept frozen most of the time
            if self.current_episode % self.config.target_update == 0:
                self.target_model.load_state_dict(self.policy_model.state_dict())

        self.env.render()
        self.env.close()

Source File: Embed.py From pytorch_NER_BiLSTM_CNN_CRF with Apache License 2.0

6 votes

def _read_file(path):
        """
        :param path: embed file path
        :return:
        """
        embed_dict = {}
        with open(path, encoding='utf-8') as f:
            lines = f.readlines()
            lines = tqdm.tqdm(lines)
            for line in lines:
                values = line.strip().split(' ')
                if len(values) == 1 or len(values) == 2 or len(values) == 3:
                    continue
                w, v = values[0], values[1:]
                embed_dict[w] = v
        return embed_dict

Source File: pregenerate_training_data.py From tpu_pretrain with Apache License 2.0

6 votes

def input_file_to_training_data(args, input_file, epoch, tokenizer, num_files):
    print(input_file)
    with DocumentDatabase(reduce_memory=args.reduce_memory) as docs:
        with open(input_file) as f:
            doc = []
            for line in tqdm(f, desc="Loading Dataset", unit=" lines"):
                line = line.strip()
                if line == "":
                    docs.add_document(doc)
                    doc = []
                else:
                    tokens = tokenizer.tokenize(line)
                    doc.append(tokens)
            if doc:
                docs.add_document(doc)  # If the last doc didn't end on a newline, make sure it still gets added
        if len(docs) <= 1:
            exit("ERROR: No document breaks were found in the input file! These are necessary to allow the script to "
                    "ensure that random NextSentences are not sampled from the same document. Please add blank lines to "
                    "indicate breaks between documents in your input file. If your dataset does not contain multiple "
                    "documents, blank lines can be inserted at any natural boundary, such as the ends of chapters, "
                    "sections or paragraphs.")

        for i in range(args.epochs_to_generate):
            create_training_file(docs, tokenizer, args, epoch + i * num_files)

Source File: utils.py From pruning_yolov3 with GNU General Public License v3.0

6 votes

def coco_single_class_labels(path='../coco/labels/train2014/', label_class=43):
    # Makes single-class coco datasets. from utils.utils import *; coco_single_class_labels()
    if os.path.exists('new/'):
        shutil.rmtree('new/')  # delete output folder
    os.makedirs('new/')  # make new output folder
    os.makedirs('new/labels/')
    os.makedirs('new/images/')
    for file in tqdm(sorted(glob.glob('%s/*.*' % path))):
        with open(file, 'r') as f:
            labels = np.array([x.split() for x in f.read().splitlines()], dtype=np.float32)
        i = labels[:, 0] == label_class
        if any(i):
            img_file = file.replace('labels', 'images').replace('txt', 'jpg')
            labels[:, 0] = 0  # reset class to 0
            with open('new/images.txt', 'a') as f:  # add image to dataset list
                f.write(img_file + '\n')
            with open('new/labels/' + Path(file).name, 'a') as f:  # write label
                for l in labels[i]:
                    f.write('%g %.6f %.6f %.6f %.6f\n' % tuple(l))
            shutil.copyfile(src=img_file, dst='new/images/' + Path(file).name.replace('txt', 'jpg'))  # copy images

Source File: preprocessing.py From Image-Caption-Generator with MIT License

6 votes

def extract_features(path, model_type):
	if model_type == 'inceptionv3':
		from keras.applications.inception_v3 import preprocess_input
		target_size = (299, 299)
	elif model_type == 'vgg16':
		from keras.applications.vgg16 import preprocess_input
		target_size = (224, 224)
	# Get CNN Model from model.py
	model = CNNModel(model_type)
	features = dict()
	# Extract features from each photo
	for name in tqdm(os.listdir(path)):
		# Loading and resizing image
		filename = path + name
		image = load_img(filename, target_size=target_size)
		# Convert the image pixels to a numpy array
		image = img_to_array(image)
		# Reshape data for the model
		image = image.reshape((1, image.shape[0], image.shape[1], image.shape[2]))
		# Prepare the image for the CNN Model model
		image = preprocess_input(image)
		# Pass image into model to get encoded features
		feature = model.predict(image, verbose=0)
		# Store encoded features for the image
		image_id = name.split('.')[0]
		features[image_id] = feature
	return features

Source File: utils.py From pruning_yolov3 with GNU General Public License v3.0

6 votes

def crop_images_random(path='../images/', scale=0.50):  # from utils.utils import *; crop_images_random()
    # crops images into random squares up to scale fraction
    # WARNING: overwrites images!
    for file in tqdm(sorted(glob.glob('%s/*.*' % path))):
        img = cv2.imread(file)  # BGR
        if img is not None:
            h, w = img.shape[:2]

            # create random mask
            a = 30  # minimum size (pixels)
            mask_h = random.randint(a, int(max(a, h * scale)))  # mask height
            mask_w = mask_h  # mask width

            # box
            xmin = max(0, random.randint(0, w) - mask_w // 2)
            ymin = max(0, random.randint(0, h) - mask_h // 2)
            xmax = min(w, xmin + mask_w)
            ymax = min(h, ymin + mask_h)

            # apply random color mask
            cv2.imwrite(file, img[ymin:ymax, xmin:xmax])

Source File: gla_gpu.py From Deep_VoiceChanger with MIT License

6 votes

def auto_inverse(self, whole_spectrum):
        whole_spectrum = np.copy(whole_spectrum).astype(complex)
        whole_spectrum[whole_spectrum < 1] = 1
        overwrap = self.buffer_size * 2
        height = whole_spectrum.shape[0]
        parallel_dif = (height-overwrap) // self.parallel
        if height < self.parallel*overwrap:
            raise Exception('voice length is too small to use gpu, or parallel number is too big')

        spec = [self.inverse(whole_spectrum[range(i, i+parallel_dif*self.parallel, parallel_dif), :]) for i in tqdm.tqdm(range(parallel_dif+overwrap))]
        spec = spec[overwrap:]
        spec = np.concatenate(spec, axis=1)
        spec = spec.reshape(-1, self.wave_len)

        #Below code don't consider wave_len and wave_dif, I'll fix.
        wave = np.fft.ifft(spec, axis=1).real
        pad = np.zeros((wave.shape[0], 2), dtype=float)
        wave = np.concatenate([wave, pad], axis=1)

        dst = np.zeros((wave.shape[0]+3)*self.wave_dif, dtype=float)
        for i in range(4):
            w = wave[range(i, wave.shape[0], 4),:]
            w = w.reshape(-1)
            dst[i*self.wave_dif:i*self.wave_dif+len(w)] += w
        return dst*0.5

Source File: test_classification_tree.py From Kaggler with MIT License

6 votes

def test():
    data = np.random.randint(0, 1000, size=(N_OBS, N_FEATURE))
    y = np.random.randint(2, size=N_OBS)

    train = data[0:N_OBS // 2]
    ytrain = y[0:N_OBS // 2]
    test = data[N_OBS // 2:]
    ytest = y[N_OBS // 2:]

    learner = ClassificationTree(number_of_features=N_FEATURE)

    for t, x in enumerate(tqdm(train)):
        learner.update(x, ytrain[t])

    correct_num = 0
    for t, x in enumerate(tqdm(test)):
        y_pred = learner.predict(x)
        if y_pred == ytest[t]:
            correct_num += 1

    print(correct_num)

Source File: trainer.py From treelstm.pytorch with MIT License

6 votes

def test(self, dataset):
        self.model.eval()
        with torch.no_grad():
            total_loss = 0.0
            predictions = torch.zeros(len(dataset), dtype=torch.float, device='cpu')
            indices = torch.arange(1, dataset.num_classes + 1, dtype=torch.float, device='cpu')
            for idx in tqdm(range(len(dataset)), desc='Testing epoch  ' + str(self.epoch) + ''):
                ltree, linput, rtree, rinput, label = dataset[idx]
                target = utils.map_label_to_target(label, dataset.num_classes)
                linput, rinput = linput.to(self.device), rinput.to(self.device)
                target = target.to(self.device)
                output = self.model(ltree, linput, rtree, rinput)
                loss = self.criterion(output, target)
                total_loss += loss.item()
                output = output.squeeze().to('cpu')
                predictions[idx] = torch.dot(indices, torch.exp(output))
        return total_loss / len(dataset), predictions

Source File: autocomplete.py From post--memorization-in-rnns with MIT License

6 votes

def save_tfrecord(filename, dataset, verbose=False):
    observations = len(dataset['length'])

    serialized = []
    with Pool(processes=4) as pool:
        for serialized_string in tqdm(pool.imap(
            tfrecord_serializer,
            zip(dataset['length'], dataset['source'], dataset['target']),
            chunksize=10
        ), total=observations, disable=not verbose):
            serialized.append(serialized_string)

    # Save seriealized dataset
    writer = tf.python_io.TFRecordWriter(
        filename,
        options=tf.python_io.TFRecordOptions(
            tf.python_io.TFRecordCompressionType.ZLIB
        )
    )

    for serialized_string in tqdm(serialized, disable=not verbose):
        writer.write(serialized_string)

    writer.close()

Source File: extraction.py From git2net with GNU Affero General Public License v3.0

6 votes

def _process_repo_serial(git_repo_dir, sqlite_db_file, commits, extraction_settings):
    """ Processes all commits in a given git repository in a serial manner.

    Args:
        git_repo_dir: path to the git repository that is mined
        sqlite_db_file: path (including database name) where the sqlite database will be created
        commits: list of commits that have to be processed
        extraction_settings: settings for the extraction

    Returns:
        sqlite database will be written at specified location
    """

    git_repo = pydriller.GitRepository(git_repo_dir)

    con = sqlite3.connect(sqlite_db_file)

    for commit in tqdm(commits, desc='Serial'):
        args = {'git_repo_dir': git_repo_dir, 'commit_hash': commit.hash, 'extraction_settings': extraction_settings}
        result = _process_commit(args)

        if not result['edits'].empty:
            result['edits'].to_sql('edits', con, if_exists='append', index=False)
        if not result['commit'].empty:
            result['commit'].to_sql('commits', con, if_exists='append', index=False)

Source File: pools.py From python-pool-performance with MIT License

6 votes

def run_test(work_type: FunctionType, job_sets: Sequence, trials: int,
             pool_class: type, worker_count: int) -> Mapping:
    pool = pool_class(worker_count)
    if work_type == 'compute':
        test_func = pool.run_compute_test
    elif work_type == 'network':
        test_func = pool.run_network_test
    else:
        raise Exception("Invalid work type: {}".format(work_type))
    results = map(
        lambda jobs: test_func(jobs, trials, show_progress=True),
        tqdm(job_sets, desc=pool_class.__name__),
    )
    summarized_results = list(map(summarize_test, results))
    pool.destroy_pool()
    return summarized_results

Source File: trainer.py From treelstm.pytorch with MIT License

6 votes

def train(self, dataset):
        self.model.train()
        self.optimizer.zero_grad()
        total_loss = 0.0
        indices = torch.randperm(len(dataset), dtype=torch.long, device='cpu')
        for idx in tqdm(range(len(dataset)), desc='Training epoch ' + str(self.epoch + 1) + ''):
            ltree, linput, rtree, rinput, label = dataset[indices[idx]]
            target = utils.map_label_to_target(label, dataset.num_classes)
            linput, rinput = linput.to(self.device), rinput.to(self.device)
            target = target.to(self.device)
            output = self.model(ltree, linput, rtree, rinput)
            loss = self.criterion(output, target)
            total_loss += loss.item()
            loss.backward()
            if idx % self.args.batchsize == 0 and idx > 0:
                self.optimizer.step()
                self.optimizer.zero_grad()
        self.epoch += 1
        return total_loss / len(dataset)

    # helper function for testing

Source File: pointmass.py From cs294-112_hws with MIT License

5 votes

def create_visualization(self, dirname, density=False):
        for s in os.listdir(dirname):
            for i in tqdm(range(100)):
                self.visualize(None, i, os.path.join(dirname, s))
            self.create_gif(os.path.join(dirname, str(s)))

Source File: evaluator.py From easy-faster-rcnn.pytorch with MIT License

5 votes

def evaluate(self, model: Model) -> Tuple[float, str]:
        all_image_ids, all_detection_bboxes, all_detection_classes, all_detection_probs = [], [], [], []

        with torch.no_grad():
            for _, (image_id_batch, image_batch, scale_batch, _, _) in enumerate(tqdm(self._dataloader)):
                image_batch = image_batch.cuda()
                assert image_batch.shape[0] == 1, 'do not use batch size more than 1 on evaluation'

                detection_bboxes, detection_classes, detection_probs, detection_batch_indices = \
                    model.eval().forward(image_batch)

                scale_batch = scale_batch[detection_batch_indices].unsqueeze(dim=-1).expand_as(detection_bboxes).to(device=detection_bboxes.device)
                detection_bboxes = detection_bboxes / scale_batch

                kept_indices = (detection_probs > 0.05).nonzero().view(-1)
                detection_bboxes = detection_bboxes[kept_indices]
                detection_classes = detection_classes[kept_indices]
                detection_probs = detection_probs[kept_indices]
                detection_batch_indices = detection_batch_indices[kept_indices]

                all_detection_bboxes.extend(detection_bboxes.tolist())
                all_detection_classes.extend(detection_classes.tolist())
                all_detection_probs.extend(detection_probs.tolist())
                all_image_ids.extend([image_id_batch[i] for i in detection_batch_indices])

        mean_ap, detail = self._dataset.evaluate(self._path_to_results_dir, all_image_ids, all_detection_bboxes, all_detection_classes, all_detection_probs)
        return mean_ap, detail

Source File: mnist_eager.py From dockerfiles with Apache License 2.0

5 votes

def train(epochs):
  for epoch in range(epochs):
    for (batch, (images, labels)) in tqdm(enumerate(dataset)):
      train_step(images, labels)
    print ('Epoch {} finished'.format(epoch))

Source File: run_mujoco.py From lirpg with MIT License

5 votes

def runner(env, policy_func, load_model_path, timesteps_per_batch, number_trajs,
           stochastic_policy, save=False, reuse=False):

    # Setup network
    # ----------------------------------------
    ob_space = env.observation_space
    ac_space = env.action_space
    pi = policy_func("pi", ob_space, ac_space, reuse=reuse)
    U.initialize()
    # Prepare for rollouts
    # ----------------------------------------
    U.load_state(load_model_path)

    obs_list = []
    acs_list = []
    len_list = []
    ret_list = []
    for _ in tqdm(range(number_trajs)):
        traj = traj_1_generator(pi, env, timesteps_per_batch, stochastic=stochastic_policy)
        obs, acs, ep_len, ep_ret = traj['ob'], traj['ac'], traj['ep_len'], traj['ep_ret']
        obs_list.append(obs)
        acs_list.append(acs)
        len_list.append(ep_len)
        ret_list.append(ep_ret)
    if stochastic_policy:
        print('stochastic policy:')
    else:
        print('deterministic policy:')
    if save:
        filename = load_model_path.split('/')[-1] + '.' + env.spec.id
        np.savez(filename, obs=np.array(obs_list), acs=np.array(acs_list),
                 lens=np.array(len_list), rets=np.array(ret_list))
    avg_len = sum(len_list)/len(len_list)
    avg_ret = sum(ret_list)/len(ret_list)
    print("Average length:", avg_len)
    print("Average return:", avg_ret)
    return avg_len, avg_ret


# Sample one trajectory (until trajectory end)

Source File: tqdm_download.py From post--memorization-in-rnns with MIT License

5 votes

def download(*args, desc=None, **kwargs):
    last_b = [0]

    def _download_hook(b=1, bsize=1, tsize=None):
        if tsize is not None:
            t.total = tsize
        t.update((b - last_b[0]) * bsize)
        last_b[0] = b

    with tqdm(unit='B', unit_scale=True, miniters=1, desc=desc) as t:
        urllib.request.urlretrieve(*args,
                                   reporthook=_download_hook, data=None,
                                   **kwargs)

Source File: generate.py From post--memorization-in-rnns with MIT License

5 votes

def make_source_target_alignment(words, char_map, max_length,
                                 verbose=False):
    space_char_code = char_map[' ']

    source = []
    source_current = []
    target = []
    target_current = []
    length = []
    length_current = 0

    for word in tqdm(words, disable=not verbose):
        if length_current + len(word) + 1 > max_length:
            # concatenate current data and move it to storage
            source.append(np.concatenate(source_current))
            target.append(np.concatenate(target_current))
            length.append(length_current)

            # prepear for new source and target
            source_current = []
            target_current = []
            length_current = 0

        # add source and target, while maintaining the current total length
        source_current.append(
            np.array([space_char_code] + [char_map[char] for char in word],
                     dtype='int32')
        )
        target_current.append(
            np.array([char_map[char] for char in word] + [space_char_code],
                     dtype='int32')
        )
        length_current += 1 + len(word)

    # concatenate remaning data and move it to storage
    if length_current > 0:
        source.append(np.concatenate(source_current))
        target.append(np.concatenate(target_current))
        length.append(length_current)

    return (length, source, target)

Source File: behavior_clone.py From lirpg with MIT License

5 votes

def learn(env, policy_func, dataset, optim_batch_size=128, max_iters=1e4,
          adam_epsilon=1e-5, optim_stepsize=3e-4,
          ckpt_dir=None, log_dir=None, task_name=None,
          verbose=False):

    val_per_iter = int(max_iters/10)
    ob_space = env.observation_space
    ac_space = env.action_space
    pi = policy_func("pi", ob_space, ac_space)  # Construct network for new policy
    # placeholder
    ob = U.get_placeholder_cached(name="ob")
    ac = pi.pdtype.sample_placeholder([None])
    stochastic = U.get_placeholder_cached(name="stochastic")
    loss = tf.reduce_mean(tf.square(ac-pi.ac))
    var_list = pi.get_trainable_variables()
    adam = MpiAdam(var_list, epsilon=adam_epsilon)
    lossandgrad = U.function([ob, ac, stochastic], [loss]+[U.flatgrad(loss, var_list)])

    U.initialize()
    adam.sync()
    logger.log("Pretraining with Behavior Cloning...")
    for iter_so_far in tqdm(range(int(max_iters))):
        ob_expert, ac_expert = dataset.get_next_batch(optim_batch_size, 'train')
        train_loss, g = lossandgrad(ob_expert, ac_expert, True)
        adam.update(g, optim_stepsize)
        if verbose and iter_so_far % val_per_iter == 0:
            ob_expert, ac_expert = dataset.get_next_batch(-1, 'val')
            val_loss, _ = lossandgrad(ob_expert, ac_expert, True)
            logger.log("Training loss: {}, Validation loss: {}".format(train_loss, val_loss))

    if ckpt_dir is None:
        savedir_fname = tempfile.TemporaryDirectory().name
    else:
        savedir_fname = osp.join(ckpt_dir, task_name)
    U.save_state(savedir_fname, var_list=pi.get_variables())
    return savedir_fname

Source File: convert_lmdb.py From torch-toolbox with BSD 3-Clause "New" or "Revised" License

5 votes

def generate_lmdb_dataset(
        data_set: Dataset,
        save_dir: str,
        name: str,
        num_workers=0,
        max_size_rate=1.0,
        write_frequency=5000):
    data_loader = DataLoader(
        data_set,
        num_workers=num_workers,
        collate_fn=lambda x: x)
    num_samples = len(data_set)
    check_dir(save_dir)
    lmdb_path = os.path.join(save_dir, '{}.lmdb'.format(name))
    db = lmdb.open(lmdb_path, subdir=False,
                   map_size=int(1099511627776 * max_size_rate),
                   readonly=False, meminit=True, map_async=True)
    txn = db.begin(write=True)
    for idx, data in enumerate(tqdm(data_loader)):
        txn.put(get_key(idx), dumps_pyarrow(data[0]))
        if idx % write_frequency == 0 and idx > 0:
            txn.commit()
            txn = db.begin(write=True)
    txn.put(b'__len__', dumps_pyarrow(num_samples))
    try:
        classes = data_set.classes
        class_to_idx = data_set.class_to_idx
        txn.put(b'classes', dumps_pyarrow(classes))
        txn.put(b'class_to_idx', dumps_pyarrow(class_to_idx))
    except AttributeError:
        pass

    txn.commit()
    db.sync()
    db.close()

Source File: sync_checkpoint_to_s3.py From tpu_pretrain with Apache License 2.0

5 votes

def sync(local_dir, keypath, bucket):
    try:
        my_bucket = s3_resource.Bucket(bucket)
        for path, subdirs, files in os.walk(local_dir):
            path = path.replace("\\","/")
            directory_name = keypath
            for file in tqdm(files):
                my_bucket.upload_file(os.path.join(path, file), f"{keypath}/{file}")

    except Exception as err:
        print(err)

Source File: bag_re.py From OpenNRE with MIT License

5 votes

def eval_model(self, eval_loader):
        self.model.eval()
        with torch.no_grad():
            t = tqdm(eval_loader)
            pred_result = []
            for iter, data in enumerate(t):
                if torch.cuda.is_available():
                    for i in range(len(data)):
                        try:
                            data[i] = data[i].cuda()
                        except:
                            pass
                label = data[0]
                bag_name = data[1]
                scope = data[2]
                args = data[3:]
                logits = self.model(None, scope, *args, train=False, bag_size=self.bag_size) # results after softmax
                logits = logits.cpu().numpy()
                for i in range(len(logits)):
                    for relid in range(self.model.module.num_class):
                        if self.model.module.id2rel[relid] != 'NA':
                            pred_result.append({
                                'entpair': bag_name[i][:2], 
                                'relation': self.model.module.id2rel[relid], 
                                'score': logits[i][relid]
                            })
            result = eval_loader.dataset.eval(pred_result)
        return result

Source File: sentence_re.py From OpenNRE with MIT License

5 votes

def eval_model(self, eval_loader):
        self.eval()
        avg_acc = AverageMeter()
        pred_result = []
        with torch.no_grad():
            t = tqdm(eval_loader)
            for iter, data in enumerate(t):
                if torch.cuda.is_available():
                    for i in range(len(data)):
                        try:
                            data[i] = data[i].cuda()
                        except:
                            pass
                label = data[0]
                args = data[1:]        
                logits = self.parallel_model(*args)
                score, pred = logits.max(-1) # (B)
                # Save result
                for i in range(pred.size(0)):
                    pred_result.append(pred[i].item())
                # Log
                acc = float((pred == label).long().sum()) / label.size(0)
                avg_acc.update(acc, pred.size(0))
                t.set_postfix(acc=avg_acc.avg)
        result = eval_loader.dataset.eval(pred_result)
        return result

Source File: extraction.py From git2net with GNU Affero General Public License v3.0

5 votes

def _process_repo_parallel(git_repo_dir, sqlite_db_file, commits, extraction_settings):
    """ Processes all commits in a given git repository in a parallel manner.

    Args:
        git_repo_dir: path to the git repository that is mined
        sqlite_db_file: path (including database name) where the sqlite database will be created
        commits: list of commits that are already in the database
        extraction_settings: settings for the extraction

    Returns:
        sqlite database will be written at specified location
    """

    args = [{'git_repo_dir': git_repo_dir, 'commit_hash': commit.hash, 'extraction_settings': extraction_settings}
            for commit in commits]

    # suggestion by marco-c (github.com/ishepard/pydriller/issues/110)
    def _init(git_repo_dir, git_init_lock_):
        global git_init_lock
        git_init_lock = git_init_lock_

    con = sqlite3.connect(sqlite_db_file)
    with multiprocessing.Pool(extraction_settings['no_of_processes'],
                              initializer=_init, initargs=(git_repo_dir,git_init_lock)) as p:
        with tqdm(total=len(args), desc='Parallel ({0} processes)' \
                  .format(extraction_settings['no_of_processes'])) as pbar:
            for result in p.imap_unordered(_process_commit, args, chunksize=extraction_settings['chunksize']):
                if not result['edits'].empty:
                    result['edits'].to_sql('edits', con, if_exists='append', index=False)
                if not result['commit'].empty:
                    result['commit'].to_sql('commits', con, if_exists='append', index=False)
                pbar.update(1)

Source File: extraction.py From git2net with GNU Affero General Public License v3.0

5 votes

def identify_file_renaming(git_repo_dir):
    """ Identifies all names and locations different files in a repository have had.

    Args:
        git_repo_dir: path to the git repository that is mined

    Returns:
        dag: pathpy DAG object depicting the renaming process
        aliases: dictionary containing all aliases for all files
    """

    # TODO: Consider corner case where file is renamed and new file with old name is created.
    git_repo = pydriller.GitRepository(git_repo_dir)

    dag = pp.DAG()
    for commit in tqdm(list(git_repo.get_list_commits()), desc='Creating DAG'):
        for modification in commit.modifications:

            if (modification.new_path not in dag.nodes) and \
               (modification.old_path == modification.new_path) and \
               (modification.change_type == pydriller.domain.commit.ModificationType.ADD):
                if modification.new_path not in dag.nodes:
                        dag.add_node(modification.new_path)
            elif modification.old_path != modification.new_path:
                if pd.isnull(modification.old_path):
                    if modification.new_path not in dag.nodes:
                        dag.add_node(modification.new_path)
                elif pd.isnull(modification.new_path):
                    pass
                else:
                    dag.add_edge(modification.new_path, modification.old_path)

    dag.make_acyclic()
    nodes = [k for k, v in dag.nodes.items() if v['indegree'] == 0 and not v['outdegree'] == 0]
    aliases = {z: y[-1] for x in nodes for y in dag.routes_from_node(x) for z in y[:-1]}

    return dag, aliases

Python tqdm.tqdm() Examples