Python get span

Source File: utils.py From iss-rnns with Apache License 2.0

6 votes

def get_best_span(ypi, yp2i):
    max_val = 0
    best_word_span = (0, 1)
    best_sent_idx = 0
    for f, (ypif, yp2if) in enumerate(zip(ypi, yp2i)):
        argmax_j1 = 0
        for j in range(len(ypif)):
            val1 = ypif[argmax_j1]
            if val1 < ypif[j]:
                val1 = ypif[j]
                argmax_j1 = j

            val2 = yp2if[j]
            if val1 * val2 > max_val:
                best_word_span = (argmax_j1, j)
                best_sent_idx = f
                max_val = val1 * val2
    return ((best_sent_idx, best_word_span[0]), (best_sent_idx, best_word_span[1] + 1)), float(max_val)

Source File: gold_mentions.py From coref with Apache License 2.0

6 votes

def get_span_emb(self, head_emb, context_outputs, span_starts, span_ends):
    span_emb_list = []

    span_start_emb = tf.gather(context_outputs, span_starts) # [k, emb]
    span_emb_list.append(span_start_emb)

    span_end_emb = tf.gather(context_outputs, span_ends) # [k, emb]
    span_emb_list.append(span_end_emb)

    span_width = 1 + span_ends - span_starts # [k]

    if self.config["use_features"]:
      span_width_index = span_width - 1 # [k]
      span_width_emb = tf.gather(tf.get_variable("span_width_embeddings", [self.config["max_span_width"], self.config["feature_size"]]), span_width_index) # [k, emb]
      span_width_emb = tf.nn.dropout(span_width_emb, self.dropout)
      span_emb_list.append(span_width_emb)

    if self.config["model_heads"]:
      mention_word_scores = self.get_masked_mention_word_scores(context_outputs, span_starts, span_ends)
      head_attn_reps = tf.matmul(mention_word_scores, context_outputs) # [K, T]
      span_emb_list.append(head_attn_reps)

    span_emb = tf.concat(span_emb_list, 1) # [k, emb]
    return span_emb # [k, emb]

Source File: tagging_utils.py From multilabel-image-classification-tensorflow with MIT License

6 votes

def get_span_labels(sentence_tags, inv_label_mapping=None):
  """Go from token-level labels to list of entities (start, end, class)."""

  if inv_label_mapping:
    sentence_tags = [inv_label_mapping[i] for i in sentence_tags]
  span_labels = []
  last = 'O'
  start = -1
  for i, tag in enumerate(sentence_tags):
    pos, _ = (None, 'O') if tag == 'O' else tag.split('-')
    if (pos == 'S' or pos == 'B' or tag == 'O') and last != 'O':
      span_labels.append((start, i - 1, last.split('-')[-1]))
    if pos == 'B' or pos == 'S' or last == 'O':
      start = i
    last = tag
  if sentence_tags[-1] != 'O':
    span_labels.append((start, len(sentence_tags) - 1,
                        sentence_tags[-1].split('-')[-1]))
  return span_labels

Source File: span_data.py From document-qa with Apache License 2.0

6 votes

def get_best_span_bounded(word_start_probs, word_end_probs, bound):
    max_val = -1
    best_word_span = None

    span_start = -1
    span_start_val = -1

    for word_ix in range(0, len(word_start_probs)):

        # Move `span_start` forward iff that would improve our score
        if span_start_val < word_start_probs[word_ix]:
            span_start_val = word_start_probs[word_ix]
            span_start = word_ix

        # Jump to the next largest span start iff we reach the boundary limit
        if (word_ix - span_start + 1) > bound:
            span_start += 1 + np.argmax(word_start_probs[span_start+1:word_ix+1])
            span_start_val = word_start_probs[span_start]

        # Check if the new span is the best one yet
        if span_start_val * word_end_probs[word_ix] > max_val:
            best_word_span = (span_start, word_ix)
            max_val = span_start_val * word_end_probs[word_ix]

    return best_word_span, max_val

Source File: span_data.py From document-qa with Apache License 2.0

6 votes

def get_best_span(word_start_probs, word_end_probs):
    max_val = -1
    best_word_span = None

    span_start = -1
    span_start_val = -1

    for word_ix in range(0, len(word_start_probs)):

        # Move `span_start` forward iff that would improve our score
        # Thus span_start will always be the largest valued start between
        # [0, `word_ix`]
        if span_start_val < word_start_probs[word_ix]:
            span_start_val = word_start_probs[word_ix]
            span_start = word_ix

        # Check if the new span is the best one yet
        if span_start_val * word_end_probs[word_ix] > max_val:
            best_word_span = (span_start, word_ix)
            max_val = span_start_val * word_end_probs[word_ix]

    return best_word_span, max_val

Source File: utils.py From active-qa with Apache License 2.0

6 votes

def get_best_span(ypi, yp2i):
  max_val = 0
  best_word_span = (0, 1)
  best_sent_idx = 0
  for f, (ypif, yp2if) in enumerate(zip(ypi, yp2i)):
    argmax_j1 = 0
    for j in range(len(ypif)):
      val1 = ypif[argmax_j1]
      if val1 < ypif[j]:
        val1 = ypif[j]
        argmax_j1 = j

      val2 = yp2if[j]
      if val1 * val2 > max_val:
        best_word_span = (argmax_j1, j)
        best_sent_idx = f
        max_val = val1 * val2
  return ((best_sent_idx, best_word_span[0]),
          (best_sent_idx, best_word_span[1] + 1)), float(max_val)

Source File: evaluation.py From kbqa-ar-smcnn with Apache License 2.0

6 votes

def get_span(label):
    span = []
    st = 0
    en = 0
    flag = False
    for k in range(len(label)):
        if label[k] == 1 and flag == False:
            flag = True
            st = k
        if label[k] != 1 and flag == True:
            flag = False
            en = k
            span.append((st, en))
            st = 0
            en = 0
    if st != 0 and en == 0:
        en = k
        span.append((st, en))
    return span

Source File: rnet.py From R-net with MIT License

6 votes

def get_best_span(span_start_logits: torch.Tensor, span_end_logits: torch.Tensor) -> torch.Tensor:
        if span_start_logits.dim() != 2 or span_end_logits.dim() != 2:
            raise ValueError("Input shapes must be (batch_size, passage_length)")
        batch_size, passage_length = span_start_logits.size()
        max_span_log_prob = [-1e20] * batch_size
        span_start_argmax = [0] * batch_size
        best_word_span = span_start_logits.new_zeros((batch_size, 2), dtype=torch.long)

        span_start_logits = span_start_logits.detach().cpu().numpy()
        span_end_logits = span_end_logits.detach().cpu().numpy()

        for b in range(batch_size):  # pylint: disable=invalid-name
            for j in range(passage_length):
                val1 = span_start_logits[b, span_start_argmax[b]]
                if val1 < span_start_logits[b, j]:
                    span_start_argmax[b] = j
                    val1 = span_start_logits[b, j]

                val2 = span_end_logits[b, j]

                if val1 + val2 > max_span_log_prob[b]:
                    best_word_span[b, 0] = span_start_argmax[b]
                    best_word_span[b, 1] = j
                    max_span_log_prob[b] = val1 + val2
        return best_word_span

Source File: get_span.py From DashTable with MIT License

6 votes

def get_span(spans, row, column):
    """
    Gets the span containing the [row, column] pair

    Parameters
    ----------
    spans : list of lists of lists
        A list containing spans, which are lists of [row, column] pairs
        that define where a span is inside a table.

    Returns
    -------
    span : list of lists
        A span containing the [row, column] pair
    """
    for i in range(len(spans)):
        if [row, column] in spans[i]:
            return spans[i]

    return None

Source File: get_span_char_height.py From DashTable with MIT License

6 votes

def get_span_char_height(span, row_heights):
    """
    Get the height of a span in the number of newlines it fills.

    Parameters
    ----------
    span : list of list of int
        A list of [row, column] pairs that make up the span
    row_heights : list of int
        A list of the number of newlines for each row in the table

    Returns
    -------
    total_height : int
        The height of the span in number of newlines
    """
    start_row = span[0][0]
    row_count = get_span_row_count(span)
    total_height = 0

    for i in range(start_row, start_row + row_count):
        total_height += row_heights[i]
    total_height += row_count - 1

    return total_height

Source File: general_utils.py From FlowDelta with MIT License

6 votes

def get_context_span(context, context_token):
    p_str = 0
    p_token = 0
    t_span = []
    while p_str < len(context):
        if re.match('\s', context[p_str]):
            p_str += 1
            continue

        token = context_token[p_token]
        token_len = len(token)
        if context[p_str:p_str + token_len] != token:
            log.info("Something wrong with get_context_span()")
            return []
        t_span.append((p_str, p_str + token_len))

        p_str += token_len
        p_token += 1
    return t_span

Source File: model_utils.py From lsgn with Apache License 2.0

6 votes

def get_span_candidates(text_len, max_sentence_length, max_mention_width):
  """Get a list of candidate spans up to length W.
  Args:
    text_len: Tensor of [num_sentences,]
    max_sentence_length: Integer scalar.
    max_mention_width: Integer.
  """
  num_sentences = util.shape(text_len, 0)
  candidate_starts = tf.tile(
      tf.expand_dims(tf.expand_dims(tf.range(max_sentence_length), 0), 1),
      [num_sentences, max_mention_width, 1])  # [num_sentences, max_mention_width, max_sentence_length]
  candidate_widths = tf.expand_dims(tf.expand_dims(tf.range(max_mention_width), 0), 2)  # [1, max_mention_width, 1]
  candidate_ends = candidate_starts + candidate_widths  # [num_sentences, max_mention_width, max_sentence_length]
  
  candidate_starts = tf.reshape(candidate_starts, [num_sentences, max_mention_width * max_sentence_length])
  candidate_ends = tf.reshape(candidate_ends, [num_sentences, max_mention_width * max_sentence_length])
  candidate_mask = tf.less(
      candidate_ends,
      tf.tile(tf.expand_dims(text_len, 1), [1, max_mention_width * max_sentence_length])
  )  # [num_sentences, max_mention_width * max_sentence_length]

  # Mask to avoid indexing error.
  candidate_starts = tf.multiply(candidate_starts, tf.to_int32(candidate_mask))
  candidate_ends = tf.multiply(candidate_ends, tf.to_int32(candidate_mask))
  return candidate_starts, candidate_ends, candidate_mask

Source File: helper.py From LinkedRW with MIT License

6 votes

def get_span_text(element, name):
    """
    Scrape text inside the span element
    Args:
        element: the element containing the text
        name: the class name

    Returns:
        A string of text
    """
    try:
        return (
            element.find_element_by_css_selector(name)
            .find_elements_by_tag_name("span")[1]
            .text.replace("–", "-")
        )
    except NoSuchElementException:
        return ""

Source File: request_context.py From opentracing-python-instrumentation with MIT License

6 votes

def get_current_span():
    """
    Access current request context and extract current Span from it.
    :return:
        Return current span associated with the current request context.
        If no request context is present in thread local, or the context
        has no span, return None.
    """
    # Check against the old, ScopeManager-less implementation,
    # for backwards compatibility.
    context = RequestContextManager.current_context()
    if context is not None:
        return context.span

    active = opentracing.tracer.scope_manager.active
    return active.span if active else None

Source File: IDAMetrics_static.py From IDAmetrics with BSD 2-Clause "Simplified" License

6 votes

def get_span_metric(self, bbls_dict):
        """
        The function calculates span metric.
        @bbls_dict - basic blocks dictionary
        @return - span metric
        """
        span_metric = 0
        for bbl_key, bbl in bbls_dict.items():
            for head in bbl:
                instr_op = self.get_instr_operands(int(head, 16))
                instr_type = GetInstructionType(int(head, 16))
                if instr_type == CALL_INSTRUCTION or instr_type == BRANCH_INSTRUCTION:
                    continue
                for op,type in instr_op:
                    if self.is_operand_called(op, bbl):
                        continue
                    if type >= idc.o_mem and type <= idc.o_displ:
                        span_metric += 1
        return span_metric

Source File: metamap.py From medaCy with GNU General Public License v3.0

6 votes

def get_span_by_term(self, term):
        """
        Takes a given utterance dictionary (term) and extracts out the character indices of the utterance
        :param term: The full dictionary corresponding to a metamap term
        :return: the span of the referenced term in the document
        """
        if isinstance(term['ConceptPIs']['ConceptPI'], list):
            spans = []
            for span in term['ConceptPIs']['ConceptPI']:
                start = int(span['StartPos'])
                length = int(span['Length'])
                spans.append((start, start + length))
            return spans
        else:
            start = int(term['ConceptPIs']['ConceptPI']['StartPos'])
            length = int(term['ConceptPIs']['ConceptPI']['Length'])
            return [(start, start + length)]

Source File: time_proj_tdns_scope.py From HyTE with Apache License 2.0

6 votes

def get_span_ids(self, start, end):
		start =int(start)
		end=int(end)
		if start > end:
			end = YEARMAX

		if start == YEARMIN:
			start_lbl = 0
		else:
			for key,lbl in sorted(self.year2id.items(), key=lambda x:x[1]):
				if start >= key[0] and start <= key[1]:
					start_lbl = lbl
		
		if end == YEARMAX:
			end_lbl = len(self.year2id.keys())-1
		else:
			for key,lbl in sorted(self.year2id.items(), key=lambda x:x[1]):
				if end >= key[0] and end <= key[1]:
					end_lbl = lbl
		return start_lbl, end_lbl

Source File: thread_local.py From py_zipkin with Apache License 2.0

6 votes

def get_thread_local_span_storage():
    """A wrapper to return _thread_local.span_storage

    Returns a SpanStorage object used to temporarily store all spans created in
    the current process. The transport handlers will pull from this storage when
    they emit the spans.

    .. deprecated::
       Use the Tracer interface which offers better multi-threading support.
       get_thread_local_span_storage will be removed in version 1.0.

    :returns: SpanStore object containing all non-root spans.
    :rtype: py_zipkin.storage.SpanStore
    """
    log.warning(
        "get_thread_local_span_storage is deprecated. See DEPRECATIONS.rst"
        " for details on how to migrate to using Tracer."
    )
    return get_default_tracer()._span_storage

Source File: BSplines.py From tIGAr with GNU Lesser General Public License v3.0

6 votes

def getKnotSpan(self,u):
        """
        Given parameter ``u``, return the index of the knot span in which
        ``u`` falls.  (Numbering includes degenerate knot spans.)
        """
        
        # placeholder linear search
        #span = 0
        #nspans = len(self.knots)-1
        #for i in range(0,nspans):
        #    span = i
        #    if(u<self.knots[i+1]+DOLFIN_EPS):
        #        break

        # from docs: should be index of "rightmost value less than x"
        nspans = len(self.knots)-1
        #span = bisect.bisect_left(self.knots,u)-1
        span = searchsorted(self.knots,u)-1
        
        if(span < self.multiplicities[0]-1):
            span = self.multiplicities[0]-1
        if(span > nspans-(self.multiplicities[-1]-1)-1):
            span = nspans-(self.multiplicities[-1]-1)-1
        return span

Source File: querymaker.py From open-context-py with GNU General Public License v3.0

6 votes

def get_form_use_life_span_query_dict(form_start=None, form_stop=None):
    """Makes a filter query for formation-use-life chrono based on 
    start and/or stop times
    """
    if form_start is None and form_stop is None:
        return None
    if form_start is None:
        # Set the start to be older than the entire Cosmos.
        form_start = -10*1000*1000*1000
    if form_stop is None:
        # Set the stop long after the Sun would have died.
        form_stop = 10*1000*1000*1000
    term = 'form_use_life_chrono_point:[{},{} TO {},{}]'.format(
        form_start,
        form_start, 
        form_stop,
        form_stop,
    )
    return {'fq': [term]}


# ---------------------------------------------------------------------
# SPATIAL CONTEXT RELATED FUNCTIONS
# ---------------------------------------------------------------------

Source File: __init__.py From opentelemetry-python with Apache License 2.0

6 votes

def get_current_span(context: Optional[Context] = None) -> Optional[Span]:
    """Retrieve the current span.

    Args:
        context: A Context object. If one is not passed, the
            default current context is used instead.

    Returns:
        The Span set in the context if it exists. None otherwise.
    """
    span = get_value(SPAN_KEY, context=context)
    if span is None:
        return None
    if not isinstance(span, Span):
        return INVALID_SPAN
    return span

Source File: preprocess.py From reading_comprehension_tf with Apache License 2.0

6 votes

def get_word_span(char_spans, answer_char_start, answer_char_end):
    answer_word_start = None
    answer_word_end = None
    for word_idx, (char_start_idx, char_end_indx) in enumerate(char_spans):
        if char_start_idx <= answer_char_start <= char_end_indx:
            answer_word_start = word_idx
        if char_start_idx <= answer_char_end <= char_end_indx:
            answer_word_end = word_idx
    
    if answer_word_end is None and answer_word_start is not None:
        if answer_char_end > char_spans[-1][-1]:
            answer_word_end = len(char_spans) - 1
    
    if answer_word_end is None or answer_word_start is None or answer_word_end < answer_word_start:
        raise ValueError("invalid word span: ({0}, {1})".format(answer_word_start, answer_word_end))
    
    return answer_word_start, answer_word_end

Source File: utils.py From mnnpy with BSD 3-Clause "New" or "Revised" License

6 votes

def get_bio_span(exprs, ndim, svd_mode, var_subset=None, **kwargs):
    centred = exprs - np.mean(exprs, axis=0)
    if var_subset is not None:
        subsetter = [True] * centred.shape[1]
        keeper = [False] * centred.shape[1]
        for i in var_subset:
            subsetter[i] = False
            keeper[i] = True
        leftovers = centred[:, subsetter].T
        centred = centred[:, keeper]
    ndim = min(ndim, *centred.shape)
    singular = svd_internal(centred.T, ndim, svd_mode, **kwargs)
    if var_subset is None:
        return singular[0]
    output = np.zeros((exprs.shape[1], ndim), dtype=np.float32)
    output[keeper,] = singular[0]
    output[subsetter,] = np.divide(np.dot(leftovers, singular[2]), singular[1][range(ndim)])
    return output

Source File: bidaf.py From magnitude with MIT License

6 votes

def get_best_span(span_start_logits              , span_end_logits              )                :
        if span_start_logits.dim() != 2 or span_end_logits.dim() != 2:
            raise ValueError(u"Input shapes must be (batch_size, passage_length)")
        batch_size, passage_length = span_start_logits.size()
        max_span_log_prob = [-1e20] * batch_size
        span_start_argmax = [0] * batch_size
        best_word_span = span_start_logits.new_zeros((batch_size, 2), dtype=torch.long)

        span_start_logits = span_start_logits.detach().cpu().numpy()
        span_end_logits = span_end_logits.detach().cpu().numpy()

        for b in range(batch_size):  # pylint: disable=invalid-name
            for j in range(passage_length):
                val1 = span_start_logits[b, span_start_argmax[b]]
                if val1 < span_start_logits[b, j]:
                    span_start_argmax[b] = j
                    val1 = span_start_logits[b, j]

                val2 = span_end_logits[b, j]

                if val1 + val2 > max_span_log_prob[b]:
                    best_word_span[b, 0] = span_start_argmax[b]
                    best_word_span[b, 1] = j
                    max_span_log_prob[b] = val1 + val2
        return best_word_span

Source File: spacy_plugin.py From self-attentive-parser with MIT License

6 votes

def get_parent_span(span):
    constituent_data, position = get_constituent(span)
    label_vocab = constituent_data.label_vocab
    doc = span.doc
    sent = span.sent

    position -= 1
    while position >= 0:
        start = constituent_data.starts[position]
        end = constituent_data.ends[position]

        if start <= span.start and span.end <= end:
            return doc[start:end]
        if end < span.sent.start:
            break
        position -= 1

    return None

#%%

Source File: span_mention.py From fonduer with MIT License

6 votes

def get_attrib_span(self, a: str, sep: str = " ") -> str:
        """Get the span of sentence attribute *a*.

        Intuitively, like calling::

            sep.join(span.a)

        :param a: The attribute to get a span for.
        :param sep: The separator to use for the join.
        :return: The joined tokens, or text if a="words".
        """
        # NOTE: Special behavior for words currently (due to correspondence
        # with char_offsets)
        if a == "words":
            return self.sentence.text[self.char_start : self.char_end + 1]
        else:
            return sep.join(self.get_attrib_tokens(a))

Source File: agents.py From ParlAI with MIT License

5 votes

def get_span_label(self, data, idx):
        dialog_entry = data['dialog'][idx]
        said = dialog_entry['text']
        sentence = _first_val(dialog_entry['checked_sentence'])
        overlap = self.get_span(said, sentence)
        if not overlap or overlap in self.stop_words:
            label = sentence
        else:
            label = overlap

        return label

Source File: postprocess.py From bidaf-keras with GNU General Public License v3.0

5 votes

def get_best_span(span_begin_probs, span_end_probs, context_length, squad_version, max_span_length):
    if len(span_begin_probs.shape) > 2 or len(span_end_probs.shape) > 2:
        raise ValueError("Input shapes must be (X,) or (1,X)")
    if len(span_begin_probs.shape) == 2:
        assert span_begin_probs.shape[0] == 1, "2D input must have an initial dimension of 1"
        span_begin_probs = span_begin_probs.flatten()
    if len(span_end_probs.shape) == 2:
        assert span_end_probs.shape[0] == 1, "2D input must have an initial dimension of 1"
        span_end_probs = span_end_probs.flatten()

    max_span_probability = 0
    best_word_span = (0, 1)

    for i, val1 in enumerate(span_begin_probs):
        if squad_version == 2.0 and i == 0:
            continue

        for j, val2 in enumerate(span_end_probs):
            if j > context_length - 1:
                break

            if (squad_version == 2.0 and j == 0) or (j < i):
                continue

            if (j - i) >= max_span_length:
                break

            if val1 * val2 > max_span_probability:
                best_word_span = (i, j)
                max_span_probability = val1 * val2

    if squad_version == 2.0:
        if span_begin_probs[0] * span_end_probs[0] > max_span_probability:
            best_word_span = (0, 0)
            max_span_probability = span_begin_probs[0] * span_end_probs[0]

    return best_word_span, max_span_probability

Source File: bidirectional_attention.py From deep_qa with Apache License 2.0

5 votes

def get_best_span(span_begin_probs, span_end_probs):
        if len(span_begin_probs.shape) > 2 or len(span_end_probs.shape) > 2:
            raise ValueError("Input shapes must be (X,) or (1,X)")
        if len(span_begin_probs.shape) == 2:
            assert span_begin_probs.shape[0] == 1, "2D input must have an initial dimension of 1"
            span_begin_probs = span_begin_probs.flatten()
        if len(span_end_probs.shape) == 2:
            assert span_end_probs.shape[0] == 1, "2D input must have an initial dimension of 1"
            span_end_probs = span_end_probs.flatten()
        max_span_probability = 0
        best_word_span = (0, 1)
        begin_span_argmax = 0
        for j, _ in enumerate(span_begin_probs):
            val1 = span_begin_probs[begin_span_argmax]
            val2 = span_end_probs[j]

            if val1 * val2 > max_span_probability:
                best_word_span = (begin_span_argmax, j)
                max_span_probability = val1 * val2

            # We need to update best_span_argmax here _after_ we've checked the current span
            # position, so that we don't allow things like (1, 1), which are empty spans.  We've
            # added a special stop symbol to the end of the passage, so this still allows for all
            # valid spans over the passage.
            if val1 < span_begin_probs[j]:
                val1 = span_begin_probs[j]
                begin_span_argmax = j
        return (best_word_span[0], best_word_span[1])

Source File: nlp.py From ReSAN with Apache License 2.0

5 votes

def get_word_span(context, wordss, start, stop):
    spanss = get_2d_spans(context, wordss)  # [[(start,end),...],...] -> char level
    idxs = []
    for sent_idx, spans in enumerate(spanss):
        for word_idx, span in enumerate(spans):
            if not (stop <= span[0] or start >= span[1]):
                idxs.append((sent_idx, word_idx))

    assert len(idxs) > 0, "{} {} {} {}".format(context, spanss, start, stop)
    return idxs[0], (idxs[-1][0], idxs[-1][1] + 1)  # (sent_start, token_start) --> (sent_stop, token_stop+1)

Source File: banana.py From monasca-analytics with Apache License 2.0

5 votes

def get_span(self):
        """
        :rtype: ast.Span
        :return: Returns the span where the error occured if appropriate
        """
        pass

Source File: breakpoint.py From PyDev.Debugger with Eclipse Public License 1.0

5 votes

def get_span(self):
        """
        @rtype:  tuple( int, int )
        @return:
            Starting and ending address of the memory range
            covered by the breakpoint.
        """
        address = self.get_address()
        size    = self.get_size()
        return ( address, address + size )

Source File: fx_gmu.py From filmkodi with Apache License 2.0

5 votes

def get_span_ids(html, media_id):
    spans = []
    pattern = '''<img[^>]+src=['"][^"']+%s.jpe?g''' % (media_id)
    for span in get_dom(html, 'span'):
        match = re.search('''<span[^>]+id=['"]([^'"]+)[^>]+>(.*)''', span, re.I | re.DOTALL)
        if match:
            if re.search(pattern, match.group(2), re.I | re.DOTALL):
                spans.append(match.group(1))

    return spans

Source File: utils.py From adversarial-squad with MIT License

5 votes

def get_word_span(context, wordss, start, stop):
    spanss = get_2d_spans(context, wordss)
    idxs = []
    for sent_idx, spans in enumerate(spanss):
        for word_idx, span in enumerate(spans):
            if not (stop <= span[0] or start >= span[1]):
                idxs.append((sent_idx, word_idx))

    assert len(idxs) > 0, "{} {} {} {}".format(context, spanss, start, stop)
    return idxs[0], (idxs[-1][0], idxs[-1][1] + 1)

Source File: utils.py From adversarial-squad with MIT License

5 votes

def get_span_score_pairs(ypi, yp2i):
    span_score_pairs = []
    for f, (ypif, yp2if) in enumerate(zip(ypi, yp2i)):
        for j in range(len(ypif)):
            for k in range(j, len(yp2if)):
                span = ((f, j), (f, k+1))
                score = ypif[j] * yp2if[k]
                span_score_pairs.append((span, score))
    return span_score_pairs

Source File: tracing.py From python-flask with BSD 3-Clause "New" or "Revised" License

5 votes

def get_span(self, request=None):
        """
        Returns the span tracing `request`, or the current request if
        `request==None`.

        If there is no such span, get_span returns None.

        @param request the request to get the span from
        """
        if request is None and stack.top:
            request = stack.top.request

        scope = self._current_scopes.get(request, None)
        return None if scope is None else scope.span

Source File: gtf_store.py From SUPPA with MIT License

5 votes

def get_span(self):
        """
        Fetches the gene range span and returns results in tuple
        """
        return min([self.transcripts[transc].span[0] for transc in self.transcripts]), \
            max([self.transcripts[transc].span[1] for transc in self.transcripts])

Source File: mixin.py From claf with MIT License

5 votes

def get_best_span(self, span_start_logits, span_end_logits, answer_maxlen=None):
        """
        Take argmax of constrained score_s * score_e.

        * Args:
            span_start_logits: independent start logits
            span_end_logits: independent end logits

        * Kwargs:
            answer_maxlen: max span length to consider (default is None -> All)
        """

        B = span_start_logits.size(0)
        best_word_span = span_start_logits.new_zeros((B, 2), dtype=torch.long)

        score_starts = F.softmax(span_start_logits, dim=-1)
        score_ends = F.softmax(span_end_logits, dim=-1)

        max_len = answer_maxlen or score_starts.size(1)

        for i in range(score_starts.size(0)):
            # Outer product of scores to get full p_s * p_e matrix
            scores = torch.ger(score_starts[i], score_ends[i])

            # Zero out negative length and over-length span scores
            scores.triu_().tril_(max_len - 1)

            # Take argmax or top n
            scores = scores.detach().cpu().numpy()
            scores_flat = scores.flatten()

            idx_sort = [np.argmax(scores_flat)]

            s_idx, e_idx = np.unravel_index(idx_sort, scores.shape)
            best_word_span[i, 0] = int(s_idx[0])
            best_word_span[i, 1] = int(e_idx[0])

        return best_word_span

Source File: context_tracer.py From opencensus-python with Apache License 2.0

5 votes

def get_span_datas(self, span):
        """Extracts a list of SpanData tuples from a span

        :rtype: list of opencensus.trace.span_data.SpanData
        :return list of SpanData tuples
        """
        span_datas = [
            span_data_module.SpanData(
                name=ss.name,
                context=self.span_context,
                span_id=ss.span_id,
                parent_span_id=ss.parent_span.span_id if
                ss.parent_span else None,
                attributes=ss.attributes,
                start_time=ss.start_time,
                end_time=ss.end_time,
                child_span_count=len(ss.children),
                stack_trace=ss.stack_trace,
                annotations=ss.annotations,
                message_events=ss.message_events,
                links=ss.links,
                status=ss.status,
                same_process_as_parent_span=ss.same_process_as_parent_span,
                span_kind=ss.span_kind
            )
            for ss in span
        ]

        return span_datas

Source File: coref_model.py From e2e-coref with Apache License 2.0

5 votes

def get_span_emb(self, head_emb, context_outputs, span_starts, span_ends):
    span_emb_list = []

    span_start_emb = tf.gather(context_outputs, span_starts) # [k, emb]
    span_emb_list.append(span_start_emb)

    span_end_emb = tf.gather(context_outputs, span_ends) # [k, emb]
    span_emb_list.append(span_end_emb)

    span_width = 1 + span_ends - span_starts # [k]

    if self.config["use_features"]:
      span_width_index = span_width - 1 # [k]
      span_width_emb = tf.gather(tf.get_variable("span_width_embeddings", [self.config["max_span_width"], self.config["feature_size"]]), span_width_index) # [k, emb]
      span_width_emb = tf.nn.dropout(span_width_emb, self.dropout)
      span_emb_list.append(span_width_emb)

    if self.config["model_heads"]:
      span_indices = tf.expand_dims(tf.range(self.config["max_span_width"]), 0) + tf.expand_dims(span_starts, 1) # [k, max_span_width]
      span_indices = tf.minimum(util.shape(context_outputs, 0) - 1, span_indices) # [k, max_span_width]
      span_text_emb = tf.gather(head_emb, span_indices) # [k, max_span_width, emb]
      with tf.variable_scope("head_scores"):
        self.head_scores = util.projection(context_outputs, 1) # [num_words, 1]
      span_head_scores = tf.gather(self.head_scores, span_indices) # [k, max_span_width, 1]
      span_mask = tf.expand_dims(tf.sequence_mask(span_width, self.config["max_span_width"], dtype=tf.float32), 2) # [k, max_span_width, 1]
      span_head_scores += tf.log(span_mask) # [k, max_span_width, 1]
      span_attention = tf.nn.softmax(span_head_scores, 1) # [k, max_span_width, 1]
      span_head_emb = tf.reduce_sum(span_attention * span_text_emb, 1) # [k, emb]
      span_emb_list.append(span_head_emb)

    span_emb = tf.concat(span_emb_list, 1) # [k, emb]
    return span_emb # [k, emb]

Source File: model_utils.py From lsgn with Apache License 2.0

5 votes

def get_dense_span_labels(span_starts, span_ends, span_labels, num_spans, max_sentence_length, span_parents=None):
  """Utility function to get dense span or span-head labels.
  Args:
    span_starts: [num_sentences, max_num_spans]
    span_ends: [num_sentences, max_num_spans]
    span_labels: [num_sentences, max_num_spans]
    num_spans: [num_sentences,]
    max_sentence_length:
    span_parents: [num_sentences, max_num_spans]. Predicates in SRL.
  """
  num_sentences = util.shape(span_starts, 0)
  max_num_spans = util.shape(span_starts, 1)
  # For padded spans, we have starts = 1, and ends = 0, so they don't collide with any existing spans.
  span_starts += (1 - tf.sequence_mask(num_spans, dtype=tf.int32))  # [num_sentences, max_num_spans]
  sentence_indices = tf.tile(
      tf.expand_dims(tf.range(num_sentences), 1),
      [1, max_num_spans])  # [num_sentences, max_num_spans]
  sparse_indices = tf.concat([
      tf.expand_dims(sentence_indices, 2),
      tf.expand_dims(span_starts, 2),
      tf.expand_dims(span_ends, 2)], axis=2)  # [num_sentences, max_num_spans, 3]
  if span_parents is not None:
    sparse_indices = tf.concat([
      sparse_indices, tf.expand_dims(span_parents, 2)], axis=2)  # [num_sentenes, max_num_spans, 4]

  rank = 3 if (span_parents is None) else 4
  # (sent_id, span_start, span_end) -> span_label
  dense_labels = tf.sparse_to_dense(
      sparse_indices = tf.reshape(sparse_indices, [num_sentences * max_num_spans, rank]),
      output_shape = [num_sentences] + [max_sentence_length] * (rank - 1),
      sparse_values = tf.reshape(span_labels, [-1]),
      default_value = 0,
      validate_indices = False)  # [num_sentences, max_sent_len, max_sent_len]
  return dense_labels

Source File: bidaf.py From MSMARCO with MIT License

5 votes

def get_best_span(cls, start_log_probs, end_log_probs):
        """
        Get the best span.
        """
        if isinstance(start_log_probs, Variable):
            start_log_probs = start_log_probs.data
        if isinstance(end_log_probs, Variable):
            end_log_probs = end_log_probs.data

        batch_size, num_tokens = start_log_probs.size()
        start_end = torch.zeros(batch_size, 2).long()
        max_val = start_log_probs[:, 0] + end_log_probs[:, 0]
        max_start = start_log_probs[:, 0]
        arg_max_start = torch.zeros(batch_size).long()

        for batch in range(batch_size):
            _start_lp = start_log_probs[batch]
            _end_lp = end_log_probs[batch]
            for t_s in range(1, num_tokens):
                if max_start[batch] < _start_lp[t_s]:
                    arg_max_start[batch] = t_s
                    max_start[batch] = _start_lp[t_s]

                cur_score = max_start[batch] + _end_lp[t_s]
                if max_val[batch] < cur_score:
                    start_end[batch, 0] = arg_max_start[batch]
                    start_end[batch, 1] = t_s
                    max_val[batch] = cur_score

        # Place the end point one time step after the end, so that
        # passage[s:e] works.
        start_end[:, 1] += 1
        return start_end

Source File: server.py From jaeger-client-python with Apache License 2.0

5 votes

def get_span(self, http_request, update_span_func):
        span = http_server.before_request(http_server.TornadoRequestWrapper(request=http_request),
                                          self.tracer)
        if update_span_func:
            update_span_func(span)

        return span

Source File: server.py From jaeger-client-python with Apache License 2.0

5 votes

def get_observed_span(span):
    return ObservedSpan(
        traceId='%x' % span.trace_id,
        sampled=span.is_sampled(),
        baggage=span.get_baggage_item(constants.baggage_key)
    )

Source File: get_span_column_count.py From DashTable with MIT License

5 votes

def get_span_column_count(span):
    """
    Find the length of a colspan.

    Parameters
    ----------
    span : list of lists of int
        The [row, column] pairs that make up the span

    Returns
    -------
    columns : int
        The number of columns included in the span

    Example
    -------
    Consider this table::

        +------+------------------+
        | foo  | bar              |
        +------+--------+---------+
        | spam | goblet | berries |
        +------+--------+---------+

    ::

        >>> span = [[0, 1], [0, 2]]
        >>> print(get_span_column_count(span))
        2
    """
    columns = 1
    first_column = span[0][1]

    for i in range(len(span)):
        if span[i][1] > first_column:
            columns += 1
            first_column = span[i][1]

    return columns

Source File: get_span_char_width.py From DashTable with MIT License

5 votes

def get_span_char_width(span, column_widths):
    """
    Sum the widths of the columns that make up the span, plus the extra.

    Parameters
    ----------
    span : list of lists of int
        list of [row, column] pairs that make up the span
    column_widths : list of int
        The widths of the columns that make up the table

    Returns
    -------
    total_width : int
        The total width of the span
    """

    start_column = span[0][1]
    column_count = get_span_column_count(span)
    total_width = 0

    for i in range(start_column, start_column + column_count):
        total_width += column_widths[i]

    total_width += column_count - 1

    return total_width

Source File: get_span_row_count.py From DashTable with MIT License

5 votes

def get_span_row_count(span):
    """
    Gets the number of rows included in a span

    Parameters
    ----------
    span : list of lists of int
        The [row, column] pairs that make up the span

    Returns
    -------
    rows : int
        The number of rows included in the span

    Example
    -------
    Consider this table::

        +--------+-----+
        | foo    | bar |
        +--------+     |
        | spam   |     |
        +--------+     |
        | goblet |     |
        +--------+-----+

    ::

        >>> span = [[0, 1], [1, 1], [2, 1]]
        >>> print(get_span_row_count(span))
        3
    """
    rows = 1
    first_row = span[0][0]

    for i in range(len(span)):
        if span[i][0] > first_row:
            rows += 1
            first_row = span[i][0]

    return rows

Source File: basecalling_guppy_mod.py From nanopype with MIT License

5 votes

def get_refernce_span(self, ref={}):
        if "MD" in self.tags:
            return self.__decode_md__()
        elif self._rname in ref:
            ref_span = self.__opsLength__("MDN=X")
            return ref[self._rname][self._pos - 1:self._pos -1 + ref_span]
        else:
            raise Exception(self._rname)

Source File: utils.py From active-qa with Apache License 2.0

5 votes

def get_word_span(context, wordss, start, stop):
  spanss = get_2d_spans(context, wordss)
  idxs = []
  for sent_idx, spans in enumerate(spanss):
    for word_idx, span in enumerate(spans):
      if not (stop <= span[0] or start >= span[1]):
        idxs.append((sent_idx, word_idx))

  assert len(idxs) > 0, "{} {} {} {}".format(context, spanss, start, stop)
  return idxs[0], (idxs[-1][0], idxs[-1][1] + 1)