Python Examples of collections.defaultdict

Source File: news_corpus_generator.py From news-corpus-builder with MIT License

6 votes

def __init__(self,corpus_dir,datastore_type='file',db_name='corpus.db'):
        '''
        Read links and associated categories for specified articles 
        in text file seperated by a space

        Args:
            corpus_dir (str): The directory to save the generated corpus
            datastore_type (Optional[str]): Format to save generated corpus.
                                            Specify either 'file' or 'sqlite'.
            db_name (Optional[str]): Name of database if 'sqlite' is selected.
        '''

        self.g = Goose({'browser_user_agent': 'Mozilla','parser_class':'soup'})
        #self.g = Goose({'browser_user_agent': 'Mozilla'})
        self.corpus_dir = corpus_dir
        self.datastore_type = datastore_type
        self.db_name = db_name
        self.stats = defaultdict(int)

        self._create_corpus_dir(self.corpus_dir)

        self.db = None
        if self.datastore_type == 'sqlite':
            self.db = self.corpus_dir + '/' + self.db_name
            self._set_up_db(self.db)

Source File: DDPAE.py From DDPAE-video-prediction with MIT License

6 votes

def sample_latent(self, input, input_latent_mu, input_latent_sigma, pred_latent_mu,
                    pred_latent_sigma, initial_pose_mu, initial_pose_sigma, sample=True):
    '''
    Return latent variables: dictionary containing pose and content.
    Then, crop objects from the images and encode into z.
    '''
    latent = defaultdict(lambda: None)

    beta = self.get_transitions(input_latent_mu, input_latent_sigma,
                                pred_latent_mu, pred_latent_sigma, sample)
    pose = self.accumulate_pose(beta)
    # Sample initial pose
    initial_pose = self.pyro_sample('initial_pose', dist.Normal, initial_pose_mu,
                                    initial_pose_sigma, sample)
    pose += initial_pose.view(-1, 1, self.n_components, self.pose_latent_size)
    pose = self.constrain_pose(pose)

    # Get input objects
    input_pose = pose[:, :self.n_frames_input, :, :]
    input_obj = self.get_objects(input, input_pose)
    # Encode the sampled objects
    z = self.object_encoder(input_obj)
    z = self.sample_content(z, sample)
    latent.update({'pose': pose, 'content': z})
    return latent

Source File: replay.py From iSDX with Apache License 2.0

6 votes

def __init__(self, config, flows_dir, ports_dir, num_timesteps, debug=False):
        self.logger = logging.getLogger("LogHistory")
        if debug:
            self.logger.setLevel(logging.DEBUG)

        self.log_entry = namedtuple("LogEntry", "source destination type")
        self.ports = defaultdict(list)
        self.flows = defaultdict(list)

        self.data = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.current_timestep = 0
        self.total_timesteps = num_timesteps

        self.parse_config(config)
        self.parse_logs(num_timesteps, flows_dir, ports_dir)
        self.info()

        pretty(self.data)

Source File: bleu_scorer.py From deep-summarization with MIT License

6 votes

def precook(s, n=4, out=False):
    """
    Takes a string as input and returns an object that can be given to
    either cook_refs or cook_test. This is optional: cook_refs and cook_test
    can take string arguments as well.

    :param s:
    :param n:
    :param out:
    :return:
    """
    words = s.split()
    counts = defaultdict(int)
    for k in xrange(1,n+1):
        for i in xrange(len(words)-k+1):
            ngram = tuple(words[i:i+k])
            counts[ngram] += 1
    return (len(words), counts)

Source File: coco.py From dynamic-training-with-apache-mxnet-on-aws with Apache License 2.0

6 votes

def __init__(self, annotation_file=None):
        """
        Constructor of Microsoft COCO helper class for reading and visualizing annotations.
        :param annotation_file (str): location of annotation file
        :param image_folder (str): location to the folder that hosts images.
        :return:
        """
        # load dataset
        self.dataset,self.anns,self.cats,self.imgs = dict(),dict(),dict(),dict()
        self.imgToAnns, self.catToImgs = defaultdict(list), defaultdict(list)
        if not annotation_file == None:
            print('loading annotations into memory...')
            tic = time.time()
            dataset = json.load(open(annotation_file, 'r'))
            assert type(dataset)==dict, 'annotation file format {} not supported'.format(type(dataset))
            print('Done (t={:0.2f}s)'.format(time.time()- tic))
            self.dataset = dataset
            self.createIndex()

Source File: s3.py From aegea with Apache License 2.0

6 votes

def lifecycle(args):
    if args.delete:
        return resources.s3.BucketLifecycle(args.bucket_name).delete()
    rule = defaultdict(list, Prefix=args.prefix, Status="Enabled")
    if args.transition_to_infrequent_access is not None:
        rule["Transitions"].append(dict(StorageClass="STANDARD_IA", Days=args.transition_to_infrequent_access))
    if args.transition_to_glacier is not None:
        rule["Transitions"].append(dict(StorageClass="GLACIER", Days=args.transition_to_glacier))
    if args.expire is not None:
        rule["Expiration"] = dict(Days=args.expire)
    if args.abort_incomplete_multipart_upload is not None:
        rule["AbortIncompleteMultipartUpload"] = dict(DaysAfterInitiation=args.abort_incomplete_multipart_upload)
    if len(rule) > 2:
        clients.s3.put_bucket_lifecycle_configuration(Bucket=args.bucket_name,
                                                      LifecycleConfiguration=dict(Rules=[rule]))
    try:
        for rule in resources.s3.BucketLifecycle(args.bucket_name).rules:
            print(json.dumps(rule))
    except ClientError as e:
        expect_error_codes(e, "NoSuchLifecycleConfiguration")
        logger.error("No lifecycle configuration for bucket %s", args.bucket_name)

Source File: __init__.py From EDeN with MIT License

6 votes

def compute_matching_neighborhoods_fraction(GA, GB, pairings):
    count = 0
    matches = dict([(i, j) for i, j in enumerate(pairings)])
    matching_edges = defaultdict(list)
    for i, j in GA.edges():
        ii = matches[i]
        jj = matches[j]
        if (ii, jj) in GB.edges():
            matching_edges[i].append(j)
            matching_edges[j].append(i)
    for u in GA.nodes():
        if matching_edges.get(u, False):
            neighbors = nx.neighbors(GA, u)
            matches_neighborhood = True
            for v in neighbors:
                if v not in matching_edges[u]:
                    matches_neighborhood = False
                    break
            if matches_neighborhood:
                count += 1
    return float(count) / len(GA.nodes())

Source File: iterated_maximum_subarray.py From EDeN with MIT License

6 votes

def extract_sequence_and_score(graph=None):
    # make dict with positions as keys and lists of ids as values
    pos_to_ids = defaultdict(list)
    for u in graph.nodes():
        if 'position' not in graph.node[u]:  # no position attributes in graph, use the vertex id instead
            raise Exception('Missing "position" attribute in node:%s %s' % (u, graph.node[u]))
        else:
            pos = graph.node[u]['position']
        # accumulate all node ids
        pos_to_ids[pos] += [u]

    # extract sequence of labels and importances
    seq = [None] * len(pos_to_ids)
    score = [0] * len(pos_to_ids)
    for pos in sorted(pos_to_ids):
        ids = pos_to_ids[pos]
        labels = [graph.node[u].get('label', 'N/A') for u in ids]
        # check that all labels for the same position are identical
        assert(sum([1 for label in labels if label == labels[0]]) == len(labels)
               ), 'ERROR: non identical labels referring to same position: %s  %s' % (pos, labels)
        seq[pos] = labels[0]
        # average all importance score for the same position
        importances = [graph.node[u].get('importance', 0) for u in ids]
        score[pos] = np.mean(importances)
    return seq, score

Source File: unique_constraint.py From goodtables-py with MIT License

6 votes

def _create_unique_fields_cache(cells):
    primary_key_column_numbers = []
    cache = {}

    # Unique
    for _, cell in enumerate(cells, start=1):
        field = cell.get('field')
        column_number = cell.get('column-number')
        if field is not None:
            if field.descriptor.get('primaryKey'):
                primary_key_column_numbers.append(column_number)
            if field.constraints.get('unique'):
                cache[tuple([column_number])] = defaultdict(list)

    # Primary key
    if primary_key_column_numbers:
        cache[tuple(primary_key_column_numbers)] = defaultdict(list)

    return cache

Source File: graph.py From EDeN with MIT License

6 votes

def _add_sparse_vector_labes(self, graph, vertex_v, node_feature_list):
        # add the vector with a feature resulting from hashing
        # the discrete labeled graph sparse encoding with the sparse vector
        # feature, the val is then multiplied.
        svec = graph.nodes[vertex_v].get(self.key_svec, None)
        if svec:
            vec_feature_list = defaultdict(lambda: defaultdict(float))
            for radius_dist_key in node_feature_list:
                for feature in node_feature_list[radius_dist_key]:
                    val = node_feature_list[radius_dist_key][feature]
                    for i in svec:
                        vec_val = svec[i]
                        key = fast_hash_2(feature, i, self.bitmask)
                        vec_feature_list[radius_dist_key][key] += val * vec_val
            node_feature_list = vec_feature_list
        return node_feature_list

Source File: analyze_logs.py From mmdetection with Apache License 2.0

6 votes

def load_json_logs(json_logs):
    # load and convert json_logs to log_dict, key is epoch, value is a sub dict
    # keys of sub dict is different metrics, e.g. memory, bbox_mAP
    # value of sub dict is a list of corresponding values of all iterations
    log_dicts = [dict() for _ in json_logs]
    for json_log, log_dict in zip(json_logs, log_dicts):
        with open(json_log, 'r') as log_file:
            for line in log_file:
                log = json.loads(line.strip())
                # skip lines without `epoch` field
                if 'epoch' not in log:
                    continue
                epoch = log.pop('epoch')
                if epoch not in log_dict:
                    log_dict[epoch] = defaultdict(list)
                for k, v in log.items():
                    log_dict[epoch][k].append(v)
    return log_dicts

Source File: datahub.py From svviz with MIT License

6 votes

def __init__(self):
        self.args = None
        self.alignDistance = 0
        self.samples = collections.OrderedDict()
        self.genome = None
        self.sources = {}
        self.annotationSets = collections.OrderedDict()

        # for storing axes, annotations, etc, by allele
        self.alleleTracks = collections.defaultdict(collections.OrderedDict)
        self.trackCompositor = None

        self.dotplots = {}
        self.info = {}

        self.reset()

Source File: test_loop.py From query-exporter with GNU General Public License v3.0

6 votes

def metric_values(metric, by_labels=()):
    """Return values for the metric."""
    if metric._type == "gauge":
        suffix = ""
    elif metric._type == "counter":
        suffix = "_total"

    values = defaultdict(list)
    for sample_suffix, labels, value in metric._samples():
        if sample_suffix == suffix:
            if by_labels:
                label_values = tuple(labels[label] for label in by_labels)
                values[label_values] = value
            else:
                values[sample_suffix].append(value)

    return values if by_labels else values[suffix]

Source File: master.py From neural-fingerprinting with BSD 3-Clause "New" or "Revised" License

6 votes

def _save_sorted_results(self, run_stats, scores, image_count, filename):
    """Saves sorted (by score) results of the evaluation.

    Args:
      run_stats: dictionary with runtime statistics for submissions,
        can be generated by WorkPiecesBase.compute_work_statistics
      scores: dictionary mapping submission ids to scores
      image_count: dictionary with number of images processed by submission
      filename: output filename
    """
    with open(filename, 'w') as f:
      writer = csv.writer(f)
      writer.writerow(['SubmissionID', 'ExternalTeamId', 'Score',
                       'MedianTime', 'ImageCount'])
      get_second = lambda x: x[1]
      for s_id, score in sorted(iteritems(scores),
                                key=get_second, reverse=True):
        external_id = self.submissions.get_external_id(s_id)
        stat = run_stats.get(
            s_id, collections.defaultdict(lambda: float('NaN')))
        writer.writerow([s_id, external_id, score,
                         stat['median_eval_time'],
                         image_count[s_id]])

Source File: fingerprint.py From neural-fingerprinting with BSD 3-Clause "New" or "Revised" License

6 votes

def __init__(self, tau=0, name="", ds_name=""):
        self.name = name
        self.ds_name = ds_name
        self.tau = tau

        self.ids = set()
        self.ids_correct = set()
        self.ids_correct_fp = set()
        self.ids_agree = set()

        # Legal = there is a fingerprint match below threshold tau
        self.ids_legal = set()

        self.counts = defaultdict(lambda: 0)
        self.counts_legal = defaultdict(lambda: 0)
        self.counts_correct = defaultdict(lambda: 0)

        # Total number of examples
        self.i = 0

Source File: 53_machine_translation.py From deep-learning-note with MIT License

5 votes

def bleu(pred_tokens, label_tokens, k):
    len_pred, len_label = len(pred_tokens), len(label_tokens)
    score = math.exp(min(0, 1 - len_label / len_pred))
    for n in range(1, k + 1):
        num_matches, label_subs = 0, collections.defaultdict(int)
        for i in range(len_label - n + 1):
            label_subs[''.join(label_tokens[i: i + n])] += 1
        for i in range(len_pred - n + 1):
            if label_subs[''.join(pred_tokens[i: i + n])] > 0:
                num_matches += 1
                label_subs[''.join(pred_tokens[i: i + n])] -= 1
        score *= math.pow(num_matches / (len_pred - n + 1), math.pow(0.5, n))
    return score

Source File: core.py From friendly-telegram with GNU Affero General Public License v3.0

5 votes

def __init__(self, **kwargs):
        self.runner = None
        self.port = None
        self.running = asyncio.Event()
        self.ready = asyncio.Event()
        self.client_data = {}
        self._ratelimit_data = collections.defaultdict(dict)
        self.app = web.Application(middlewares=[ratelimit(lambda f: self._ratelimit_data[f])])
        aiohttp_jinja2.setup(self.app, filters={"getdoc": inspect.getdoc, "ascii": ascii},
                             loader=jinja2.FileSystemLoader("web-resources"))
        self.app["static_root_url"] = "/static"
        super().__init__(**kwargs)
        self.app.router.add_static("/static/", "web-resources/static")

Source File: foreign_key.py From goodtables-py with MIT License

5 votes

def _get_foreign_keys_values(schema, relations):
    # It's based on the following code:
    # https://github.com/frictionlessdata/tableschema-py/blob/master/tableschema/table.py#L218

    # we dont need to load the complete reference table to test relations
    # we can lower payload AND optimize testing foreign keys
    # by preparing the right index based on the foreign key definition
    # foreign_keys are sets of tuples of all possible values in the foreign table
    # foreign keys =
    # [reference] [foreign_keys tuple] = { (foreign_keys_values, ) : one_keyedrow, ... }
    foreign_keys = defaultdict(dict)
    if schema:
        for fk in schema.foreign_keys:
            # load relation data
            relation = fk['reference']['resource']

            # create a set of foreign keys
            # to optimize we prepare index of existing values
            # this index should use reference + foreign_keys as key
            # cause many foreign keys may use the same reference
            foreign_keys[relation][tuple(fk['reference']['fields'])] = {}
            for row in (relations[relation] or []):
                key = tuple([row[foreign_field] for foreign_field in fk['reference']['fields']])
                # here we should chose to pick the first or nth row which match
                # previous implementation picked the first, so be it
                if key not in foreign_keys[relation][tuple(fk['reference']['fields'])]:
                    foreign_keys[relation][tuple(fk['reference']['fields'])][key] = row
    return foreign_keys

Source File: core.py From friendly-telegram with GNU Affero General Public License v3.0

5 votes

def ratelimit(get_storage):
    @web.middleware
    async def ratelimit_middleware(request, handler):
        storage = get_storage(handler)
        if not hasattr(storage, "_ratelimit"):
            storage.setdefault("ratelimit", collections.defaultdict(lambda: 0))
            storage.setdefault("ratelimit_last", collections.defaultdict(lambda: 1))
            storage.setdefault("last_request", collections.defaultdict(lambda: 0))
        if storage["last_request"][request.remote] > time.time() - 30:
            # Maybe ratelimit, was requested within 30 seconds
            last = storage["ratelimit_last"][request.remote]
            storage["ratelimit_last"][request.remote] = storage["ratelimit"][request.remote]
            storage["ratelimit"][request.remote] += last
            if storage["ratelimit"][request.remote] > 50:
                # If they have to wait more than 5 seconds (10 requests), kill em.
                return web.Response(status=429)
            await asyncio.sleep(storage["ratelimit"][request.remote] / 10)
        else:
            try:
                del storage["ratelimit"][request.remote]
                del storage["ratelimit_last"][request.remote]
            except KeyError:
                pass
        storage["last_request"][request.remote] = time.time()
        return await handler(request)
    return ratelimit_middleware

Source File: coco.py From dynamic-training-with-apache-mxnet-on-aws with Apache License 2.0

5 votes

def createIndex(self):
        # create index
        print('creating index...')
        anns, cats, imgs = {}, {}, {}
        imgToAnns,catToImgs = defaultdict(list),defaultdict(list)
        if 'annotations' in self.dataset:
            for ann in self.dataset['annotations']:
                imgToAnns[ann['image_id']].append(ann)
                anns[ann['id']] = ann

        if 'images' in self.dataset:
            for img in self.dataset['images']:
                imgs[img['id']] = img

        if 'categories' in self.dataset:
            for cat in self.dataset['categories']:
                cats[cat['id']] = cat

        if 'annotations' in self.dataset and 'categories' in self.dataset:
            for ann in self.dataset['annotations']:
                catToImgs[ann['category_id']].append(ann['image_id'])

        print('index created!')

        # create class members
        self.anns = anns
        self.imgToAnns = imgToAnns
        self.catToImgs = catToImgs
        self.imgs = imgs
        self.cats = cats

Source File: DecisionTree.py From fuku-ml with MIT License

5 votes

def classify_with_missing_data(self, x, tree):

        if tree.is_leaf:
            # leaf
            return tree.each_class_counts
        else:
            v = x[tree.col]

            value_is_float = True
            try:
                v = float(v)
            except ValueError:
                value_is_float = False

            if v == 'None':
                true_branch = self.classify_with_missing_data(x, tree.true_branch)
                false_branch = self.classify_with_missing_data(x, tree.false_branch)
                true_branch_count = sum(true_branch.values())
                false_branch_count = sum(false_branch.values())
                true_branch_weight = float(true_branch_count) / (true_branch_count + false_branch_count)
                false_branch_weight = float(false_branch_count) / (true_branch_count + false_branch_count)
                each_class_counts = collections.defaultdict(int)
                for k, v in true_branch.items():
                    each_class_counts[k] += v * true_branch_weight
                for k, v in false_branch.items():
                    each_class_counts[k] += v * false_branch_weight
                return dict(each_class_counts)
            else:
                branch = None
                if value_is_float:
                    if v >= float(tree.value):
                        branch = tree.true_branch
                    else:
                        branch = tree.false_branch
                else:
                    if v == tree.value:
                        branch = tree.true_branch
                    else:
                        branch = tree.false_branch
            return self.classify_with_missing_data(x, branch)

Source File: callback.py From dynamic-training-with-apache-mxnet-on-aws with Apache License 2.0

5 votes

def args_wrapper(*args):
    """Generates callback arguments for model.fit()
    for a set of callback objects.
    Callback objects like PandasLogger(), LiveLearningCurve()
    get passed in.  This assembles all their callback arguments.
    """
    out = defaultdict(list)
    for callback in args:
        callback_args = callback.callback_args()
        for k, v in callback_args.items():
            out[k].append(v)
    return dict(out)

Source File: test_contrib_control_flow.py From dynamic-training-with-apache-mxnet-on-aws with Apache License 2.0

5 votes

def test_scope():
    class TestBlock1(gluon.HybridBlock):
        def __init__(self, prefix=None, params=None):
            super(TestBlock1, self).__init__(prefix=prefix, params=params)
        def hybrid_forward(self, F, data):
            (new_data, ) = F.contrib.cond(
                data > 0.5,
                then_func=lambda: data * 2,
                else_func=lambda: data * 3,
                name="my_cond",
            )
            return new_data
    class TestBlock2(gluon.HybridBlock):
        def __init__(self, prefix=None, params=None):
            super(TestBlock2, self).__init__(prefix=prefix, params=params)
        def hybrid_forward(self, F, data):
            (new_data, ) = F.contrib.cond(
                data > 0.5,
                then_func=lambda: data * 2,
                else_func=lambda: data * 3,
                name="my_cond",
            )
            return new_data
    AttrScope._subgraph_names = defaultdict(int)
    data = mx.nd.normal(loc=0, scale=1, shape=(1, ))
    block1 = TestBlock1()
    block1.initialize(ctx=default_context())
    block1.hybridize()
    _ = block1(data)
    block2 = TestBlock2()
    block2.initialize(ctx=default_context())
    block2.hybridize()
    _ = block2(data)
    assert len(AttrScope._subgraph_names) == 3
    assert AttrScope._subgraph_names['my_cond_else'] == 2
    assert AttrScope._subgraph_names['my_cond_pred'] == 2
    assert AttrScope._subgraph_names['my_cond_then'] == 2

Source File: api.py From tmhmm.py with MIT License

5 votes

def predict(sequence, model_or_filelike='TMHMM2.0.model', compute_posterior=True):
    if isinstance(model_or_filelike, tuple):
        model = model_or_filelike
    else:
        _, model = parse(model_or_filelike)

    _, path = viterbi(sequence, *model)
    if compute_posterior:
        forward_table, constants = forward(sequence, *model)
        backward_table = backward(sequence, constants, *model)

        posterior = forward_table * backward_table
        _, _, _, char_map, label_map, name_map = model

        observations = len(sequence)
        states = len(name_map)

        table = np.zeros(shape=(observations, 3))
        for i in range(observations):
            group_probs = defaultdict(float)
            for j in range(states):
                group = label_map[j].lower()
                group_probs[group] += posterior[i, j]

            for k, group in enumerate(GROUP_NAMES):
                table[i, k] = group_probs[group]
        return path, table/table.sum(axis=1, keepdims=True)
    return path

Source File: gen_vocab.py From DOTA_models with Apache License 2.0

5 votes

def main(_):
  tf.logging.set_verbosity(tf.logging.INFO)
  vocab_freqs = defaultdict(int)
  doc_counts = defaultdict(int)

  # Fill vocabulary frequencies map and document counts map
  for doc in document_generators.documents(
      dataset='train',
      include_unlabeled=FLAGS.use_unlabeled,
      include_validation=FLAGS.include_validation):
    fill_vocab_from_doc(doc, vocab_freqs, doc_counts)

  # Filter out low-occurring terms
  vocab_freqs = dict((term, freq) for term, freq in vocab_freqs.iteritems()
                     if doc_counts[term] > FLAGS.doc_count_threshold)

  # Sort by frequency
  ordered_vocab_freqs = data_utils.sort_vocab_by_frequency(vocab_freqs)

  # Limit vocab size
  ordered_vocab_freqs = ordered_vocab_freqs[:MAX_VOCAB_SIZE]

  # Add EOS token
  ordered_vocab_freqs.append((data_utils.EOS_TOKEN, 1))

  # Write
  tf.gfile.MakeDirs(FLAGS.output_dir)
  data_utils.write_vocab_and_frequency(ordered_vocab_freqs, FLAGS.output_dir)

Source File: utility.py From soccer-matlab with BSD 2-Clause "Simplified" License

5 votes

def variable_summaries(vars_, groups=None, scope='weights'):
  """Create histogram summaries for the provided variables.

  Summaries can be grouped via regexes matching variables names.

  Args:
    vars_: List of variables to summarize.
    groups: Mapping of name to regex for grouping summaries.
    scope: Name scope for this operation.

  Returns:
    Summary tensor.
  """
  groups = groups or {r'all': r'.*'}
  grouped = collections.defaultdict(list)
  for var in vars_:
    for name, pattern in groups.items():
      if re.match(pattern, var.name):
        name = re.sub(pattern, name, var.name)
        grouped[name].append(var)
  for name in groups:
    if name not in grouped:
      tf.logging.warn("No variables matching '{}' group.".format(name))
  summaries = []
  for name, vars_ in grouped.items():
    vars_ = [tf.reshape(var, [-1]) for var in vars_]
    vars_ = tf.concat(vars_, 0)
    summaries.append(tf.summary.histogram(scope + '/' + name, vars_))
  return tf.summary.merge(summaries)

Source File: molecular_formula.py From QCElemental with BSD 3-Clause "New" or "Revised" License

5 votes

def order_molecular_formula(formula: str, order: str = "alphabetical") -> str:
    """
    Reorders a molecular formula.

    Parameters
    ----------
    formula: str
        A molecular formula
    order: str, optional
        Sorting order of the formula. Valid choices are "alphabetical" and "hill".

    Returns
    -------
    str
        The molecular formula.
    """

    matches = re.findall(r"[A-Z][^A-Z]*", formula)
    if not "".join(matches) == formula:
        raise ValueError(f"{formula} is not a valid molecular formula.")
    count = collections.defaultdict(int)
    for match in matches:
        match_n = re.match(r"(\D+)(\d*)", match)
        assert match_n
        if match_n.group(2) == "":
            n = 1
        else:
            n = int(match_n.group(2))
        count[match_n.group(1)] += n
    symbols = [k for k, v in count.items() for i in range(v)]
    return molecular_formula_from_symbols(symbols=symbols, order=order)

Source File: diet.py From fine-lm with MIT License

5 votes

def create_slots(self, var):
    """Create the factorized Adam accumulators for diet variables."""
    params = self.params
    shape = var.get_shape().as_list()

    if not hasattr(params, "slots"):
      params.slots = defaultdict(dict)

    name = var.op.name
    slots = params.slots[name]

    if params.factored_second_moment_accumulator and len(shape) == 2:
      slots["adam_vr"] = tf.get_variable(
          name + "_adam_vr", [shape[0], 1],
          trainable=False,
          initializer=tf.zeros_initializer())
      slots["adam_vc"] = tf.get_variable(
          name + "_adam_vc", [1, shape[1]],
          trainable=False,
          initializer=tf.zeros_initializer())
    else:
      slots["adam_v"] = tf.get_variable(
          name + "_adam_v",
          shape,
          trainable=False,
          initializer=tf.zeros_initializer())
    if params.beta1 != 0.0:
      slots["adam_m"] = tf.get_variable(
          name + "_adam_m",
          shape,
          trainable=False,
          initializer=tf.zeros_initializer())

Source File: periodic_table.py From QCElemental with BSD 3-Clause "New" or "Revised" License

5 votes

def __init__(self):

        from . import data

        # Of length number of elements
        self.Z = data.nist_2011_atomic_weights["Z"]
        self.E = data.nist_2011_atomic_weights["E"]
        self.name = data.nist_2011_atomic_weights["name"]

        self._el2z = dict(zip(self.E, self.Z))
        self._z2el = collections.OrderedDict(zip(self.Z, self.E))
        self._element2el = dict(zip(self.name, self.E))
        self._el2element = dict(zip(self.E, self.name))

        # Of length number of isotopes
        self._EE = data.nist_2011_atomic_weights["_EE"]
        self.EA = data.nist_2011_atomic_weights["EA"]
        self.A = data.nist_2011_atomic_weights["A"]
        self.mass = data.nist_2011_atomic_weights["mass"]

        self._eliso2mass = dict(zip(self.EA, self.mass))
        self._eliso2el = dict(zip(self.EA, self._EE))
        self._eliso2a = dict(zip(self.EA, self.A))
        self._el2a2mass = collections.defaultdict(dict)
        for EE, m, A in zip(self._EE, self.mass, self.A):
            self._el2a2mass[EE][A] = float(m)

Source File: diet.py From fine-lm with MIT License

5 votes

def make_diet_var_getter(params):
  """Create a custom variable getter for diet variables according to params."""

  def diet_var_initializer(shape, dtype, partition_info=None):
    """Initializer for a diet variable."""
    del dtype
    del partition_info

    with common_layers.fn_device_dependency("diet_init") as out_deps:
      float_range = math.sqrt(3)
      ret = tf.random_uniform(shape, -float_range, float_range)
      if params.quantize:
        ret = _quantize(ret, params, randomize=False)
      out_deps.append(ret)
      return ret

  def diet_var_getter(getter, **kwargs):
    """Get diet variable and return it dequantized."""
    if params.quantize:
      kwargs["dtype"] = tf.float16
    kwargs["initializer"] = diet_var_initializer
    kwargs["trainable"] = False

    base_var = getter(**kwargs)

    dequantized = _dequantize(base_var, params)

    if not hasattr(params, "dequantized"):
      params.dequantized = defaultdict(list)
    params.dequantized[base_var.name].append(dequantized)

    return dequantized

  return diet_var_getter

Python collections.defaultdict() Examples