Python Examples of future.backports.email._encoded

Source File: _header_value_parser.py From misp42splunk with GNU Lesser General Public License v3.0

6 votes

def get_extended_attrtext(value):
    """attrtext = 1*(any non-ATTRIBUTE_ENDS character plus '%')

    This is a special parsing routine so that we get a value that
    includes % escapes as a single string (which we decode as a single
    string later).

    """
    m = _non_extended_attribute_end_matcher(value)
    if not m:
        raise errors.HeaderParseError(
            "expected extended attrtext but found {!r}".format(value))
    attrtext = m.group()
    value = value[len(attrtext):]
    attrtext = ValueTerminal(attrtext, 'extended-attrtext')
    _validate_xtext(attrtext)
    return attrtext, value

Source File: _header_value_parser.py From telegram-robot-rss with Mozilla Public License 2.0

6 votes

def get_extended_attrtext(value):
    """attrtext = 1*(any non-ATTRIBUTE_ENDS character plus '%')

    This is a special parsing routine so that we get a value that
    includes % escapes as a single string (which we decode as a single
    string later).

    """
    m = _non_extended_attribute_end_matcher(value)
    if not m:
        raise errors.HeaderParseError(
            "expected extended attrtext but found {!r}".format(value))
    attrtext = m.group()
    value = value[len(attrtext):]
    attrtext = ValueTerminal(attrtext, 'extended-attrtext')
    _validate_xtext(attrtext)
    return attrtext, value

Source File: _header_value_parser.py From kgsgo-dataset-preprocessor with Mozilla Public License 2.0

6 votes

def get_extended_attrtext(value):
    """attrtext = 1*(any non-ATTRIBUTE_ENDS character plus '%')

    This is a special parsing routine so that we get a value that
    includes % escapes as a single string (which we decode as a single
    string later).

    """
    m = _non_extended_attribute_end_matcher(value)
    if not m:
        raise errors.HeaderParseError(
            "expected extended attrtext but found {!r}".format(value))
    attrtext = m.group()
    value = value[len(attrtext):]
    attrtext = ValueTerminal(attrtext, 'extended-attrtext')
    _validate_xtext(attrtext)
    return attrtext, value

Source File: _header_value_parser.py From cadquery-freecad-module with GNU Lesser General Public License v3.0

6 votes

def get_extended_attrtext(value):
    """attrtext = 1*(any non-ATTRIBUTE_ENDS character plus '%')

    This is a special parsing routine so that we get a value that
    includes % escapes as a single string (which we decode as a single
    string later).

    """
    m = _non_extended_attribute_end_matcher(value)
    if not m:
        raise errors.HeaderParseError(
            "expected extended attrtext but found {!r}".format(value))
    attrtext = m.group()
    value = value[len(attrtext):]
    attrtext = ValueTerminal(attrtext, 'extended-attrtext')
    _validate_xtext(attrtext)
    return attrtext, value

Source File: _header_value_parser.py From addon with GNU General Public License v3.0

6 votes

def get_extended_attrtext(value):
    """attrtext = 1*(any non-ATTRIBUTE_ENDS character plus '%')

    This is a special parsing routine so that we get a value that
    includes % escapes as a single string (which we decode as a single
    string later).

    """
    m = _non_extended_attribute_end_matcher(value)
    if not m:
        raise errors.HeaderParseError(
            "expected extended attrtext but found {!r}".format(value))
    attrtext = m.group()
    value = value[len(attrtext):]
    attrtext = ValueTerminal(attrtext, 'extended-attrtext')
    _validate_xtext(attrtext)
    return attrtext, value

Source File: _header_value_parser.py From deepWordBug with Apache License 2.0

6 votes

def get_extended_attrtext(value):
    """attrtext = 1*(any non-ATTRIBUTE_ENDS character plus '%')

    This is a special parsing routine so that we get a value that
    includes % escapes as a single string (which we decode as a single
    string later).

    """
    m = _non_extended_attribute_end_matcher(value)
    if not m:
        raise errors.HeaderParseError(
            "expected extended attrtext but found {!r}".format(value))
    attrtext = m.group()
    value = value[len(attrtext):]
    attrtext = ValueTerminal(attrtext, 'extended-attrtext')
    _validate_xtext(attrtext)
    return attrtext, value

Source File: _header_value_parser.py From blackmamba with MIT License

6 votes

def get_extended_attrtext(value):
    """attrtext = 1*(any non-ATTRIBUTE_ENDS character plus '%')

    This is a special parsing routine so that we get a value that
    includes % escapes as a single string (which we decode as a single
    string later).

    """
    m = _non_extended_attribute_end_matcher(value)
    if not m:
        raise errors.HeaderParseError(
            "expected extended attrtext but found {!r}".format(value))
    attrtext = m.group()
    value = value[len(attrtext):]
    attrtext = ValueTerminal(attrtext, 'extended-attrtext')
    _validate_xtext(attrtext)
    return attrtext, value

Source File: _header_value_parser.py From gimp-plugin-export-layers with GNU General Public License v3.0

6 votes

def get_extended_attrtext(value):
    """attrtext = 1*(any non-ATTRIBUTE_ENDS character plus '%')

    This is a special parsing routine so that we get a value that
    includes % escapes as a single string (which we decode as a single
    string later).

    """
    m = _non_extended_attribute_end_matcher(value)
    if not m:
        raise errors.HeaderParseError(
            "expected extended attrtext but found {!r}".format(value))
    attrtext = m.group()
    value = value[len(attrtext):]
    attrtext = ValueTerminal(attrtext, 'extended-attrtext')
    _validate_xtext(attrtext)
    return attrtext, value

Source File: _header_value_parser.py From misp42splunk with GNU Lesser General Public License v3.0

6 votes

def get_extended_attrtext(value):
    """attrtext = 1*(any non-ATTRIBUTE_ENDS character plus '%')

    This is a special parsing routine so that we get a value that
    includes % escapes as a single string (which we decode as a single
    string later).

    """
    m = _non_extended_attribute_end_matcher(value)
    if not m:
        raise errors.HeaderParseError(
            "expected extended attrtext but found {!r}".format(value))
    attrtext = m.group()
    value = value[len(attrtext):]
    attrtext = ValueTerminal(attrtext, 'extended-attrtext')
    _validate_xtext(attrtext)
    return attrtext, value

Source File: _header_value_parser.py From arissploit with GNU General Public License v3.0

6 votes

def get_extended_attrtext(value):
    """attrtext = 1*(any non-ATTRIBUTE_ENDS character plus '%')

    This is a special parsing routine so that we get a value that
    includes % escapes as a single string (which we decode as a single
    string later).

    """
    m = _non_extended_attribute_end_matcher(value)
    if not m:
        raise errors.HeaderParseError(
            "expected extended attrtext but found {!r}".format(value))
    attrtext = m.group()
    value = value[len(attrtext):]
    attrtext = ValueTerminal(attrtext, 'extended-attrtext')
    _validate_xtext(attrtext)
    return attrtext, value

Source File: _header_value_parser.py From Tautulli with GNU General Public License v3.0

6 votes

def get_extended_attrtext(value):
    """attrtext = 1*(any non-ATTRIBUTE_ENDS character plus '%')

    This is a special parsing routine so that we get a value that
    includes % escapes as a single string (which we decode as a single
    string later).

    """
    m = _non_extended_attribute_end_matcher(value)
    if not m:
        raise errors.HeaderParseError(
            "expected extended attrtext but found {!r}".format(value))
    attrtext = m.group()
    value = value[len(attrtext):]
    attrtext = ValueTerminal(attrtext, 'extended-attrtext')
    _validate_xtext(attrtext)
    return attrtext, value

Source File: _header_value_parser.py From verge3d-blender-addon with GNU General Public License v3.0

6 votes

def get_extended_attrtext(value):
    """attrtext = 1*(any non-ATTRIBUTE_ENDS character plus '%')

    This is a special parsing routine so that we get a value that
    includes % escapes as a single string (which we decode as a single
    string later).

    """
    m = _non_extended_attribute_end_matcher(value)
    if not m:
        raise errors.HeaderParseError(
            "expected extended attrtext but found {!r}".format(value))
    attrtext = m.group()
    value = value[len(attrtext):]
    attrtext = ValueTerminal(attrtext, 'extended-attrtext')
    _validate_xtext(attrtext)
    return attrtext, value

Source File: _header_value_parser.py From V1EngineeringInc-Docs with Creative Commons Attribution Share Alike 4.0 International

6 votes

def get_extended_attrtext(value):
    """attrtext = 1*(any non-ATTRIBUTE_ENDS character plus '%')

    This is a special parsing routine so that we get a value that
    includes % escapes as a single string (which we decode as a single
    string later).

    """
    m = _non_extended_attribute_end_matcher(value)
    if not m:
        raise errors.HeaderParseError(
            "expected extended attrtext but found {!r}".format(value))
    attrtext = m.group()
    value = value[len(attrtext):]
    attrtext = ValueTerminal(attrtext, 'extended-attrtext')
    _validate_xtext(attrtext)
    return attrtext, value

Source File: _header_value_parser.py From arissploit with GNU General Public License v3.0

5 votes

def _decode_ew_run(value):
    """ Decode a run of RFC2047 encoded words.

        _decode_ew_run(value) -> (text, value, defects)

    Scans the supplied value for a run of tokens that look like they are RFC
    2047 encoded words, decodes those words into text according to RFC 2047
    rules (whitespace between encoded words is discarded), and returns the text
    and the remaining value (including any leading whitespace on the remaining
    value), as well as a list of any defects encountered while decoding.  The
    input value may not have any leading whitespace.

    """
    res = []
    defects = []
    last_ws = ''
    while value:
        try:
            tok, ws, value = _wsp_splitter(value, 1)
        except ValueError:
            tok, ws, value = value, '', ''
        if not (tok.startswith('=?') and tok.endswith('?=')):
            return ''.join(res), last_ws + tok + ws + value, defects
        text, charset, lang, new_defects = _ew.decode(tok)
        res.append(text)
        defects.extend(new_defects)
        last_ws = ws
    return ''.join(res), last_ws, defects

Source File: _header_value_parser.py From verge3d-blender-addon with GNU General Public License v3.0

5 votes

def _decode_ew_run(value):
    """ Decode a run of RFC2047 encoded words.

        _decode_ew_run(value) -> (text, value, defects)

    Scans the supplied value for a run of tokens that look like they are RFC
    2047 encoded words, decodes those words into text according to RFC 2047
    rules (whitespace between encoded words is discarded), and returns the text
    and the remaining value (including any leading whitespace on the remaining
    value), as well as a list of any defects encountered while decoding.  The
    input value may not have any leading whitespace.

    """
    res = []
    defects = []
    last_ws = ''
    while value:
        try:
            tok, ws, value = _wsp_splitter(value, 1)
        except ValueError:
            tok, ws, value = value, '', ''
        if not (tok.startswith('=?') and tok.endswith('?=')):
            return ''.join(res), last_ws + tok + ws + value, defects
        text, charset, lang, new_defects = _ew.decode(tok)
        res.append(text)
        defects.extend(new_defects)
        last_ws = ws
    return ''.join(res), last_ws, defects

Source File: _header_value_parser.py From gimp-plugin-export-layers with GNU General Public License v3.0

5 votes

def _decode_ew_run(value):
    """ Decode a run of RFC2047 encoded words.

        _decode_ew_run(value) -> (text, value, defects)

    Scans the supplied value for a run of tokens that look like they are RFC
    2047 encoded words, decodes those words into text according to RFC 2047
    rules (whitespace between encoded words is discarded), and returns the text
    and the remaining value (including any leading whitespace on the remaining
    value), as well as a list of any defects encountered while decoding.  The
    input value may not have any leading whitespace.

    """
    res = []
    defects = []
    last_ws = ''
    while value:
        try:
            tok, ws, value = _wsp_splitter(value, 1)
        except ValueError:
            tok, ws, value = value, '', ''
        if not (tok.startswith('=?') and tok.endswith('?=')):
            return ''.join(res), last_ws + tok + ws + value, defects
        text, charset, lang, new_defects = _ew.decode(tok)
        res.append(text)
        defects.extend(new_defects)
        last_ws = ws
    return ''.join(res), last_ws, defects

Source File: _header_value_parser.py From blackmamba with MIT License

5 votes

def _decode_ew_run(value):
    """ Decode a run of RFC2047 encoded words.

        _decode_ew_run(value) -> (text, value, defects)

    Scans the supplied value for a run of tokens that look like they are RFC
    2047 encoded words, decodes those words into text according to RFC 2047
    rules (whitespace between encoded words is discarded), and returns the text
    and the remaining value (including any leading whitespace on the remaining
    value), as well as a list of any defects encountered while decoding.  The
    input value may not have any leading whitespace.

    """
    res = []
    defects = []
    last_ws = ''
    while value:
        try:
            tok, ws, value = _wsp_splitter(value, 1)
        except ValueError:
            tok, ws, value = value, '', ''
        if not (tok.startswith('=?') and tok.endswith('?=')):
            return ''.join(res), last_ws + tok + ws + value, defects
        text, charset, lang, new_defects = _ew.decode(tok)
        res.append(text)
        defects.extend(new_defects)
        last_ws = ws
    return ''.join(res), last_ws, defects

Source File: _header_value_parser.py From Tautulli with GNU General Public License v3.0

5 votes

def _decode_ew_run(value):
    """ Decode a run of RFC2047 encoded words.

        _decode_ew_run(value) -> (text, value, defects)

    Scans the supplied value for a run of tokens that look like they are RFC
    2047 encoded words, decodes those words into text according to RFC 2047
    rules (whitespace between encoded words is discarded), and returns the text
    and the remaining value (including any leading whitespace on the remaining
    value), as well as a list of any defects encountered while decoding.  The
    input value may not have any leading whitespace.

    """
    res = []
    defects = []
    last_ws = ''
    while value:
        try:
            tok, ws, value = _wsp_splitter(value, 1)
        except ValueError:
            tok, ws, value = value, '', ''
        if not (tok.startswith('=?') and tok.endswith('?=')):
            return ''.join(res), last_ws + tok + ws + value, defects
        text, charset, lang, new_defects = _ew.decode(tok)
        res.append(text)
        defects.extend(new_defects)
        last_ws = ws
    return ''.join(res), last_ws, defects

Source File: _header_value_parser.py From addon with GNU General Public License v3.0

5 votes

def _decode_ew_run(value):
    """ Decode a run of RFC2047 encoded words.

        _decode_ew_run(value) -> (text, value, defects)

    Scans the supplied value for a run of tokens that look like they are RFC
    2047 encoded words, decodes those words into text according to RFC 2047
    rules (whitespace between encoded words is discarded), and returns the text
    and the remaining value (including any leading whitespace on the remaining
    value), as well as a list of any defects encountered while decoding.  The
    input value may not have any leading whitespace.

    """
    res = []
    defects = []
    last_ws = ''
    while value:
        try:
            tok, ws, value = _wsp_splitter(value, 1)
        except ValueError:
            tok, ws, value = value, '', ''
        if not (tok.startswith('=?') and tok.endswith('?=')):
            return ''.join(res), last_ws + tok + ws + value, defects
        text, charset, lang, new_defects = _ew.decode(tok)
        res.append(text)
        defects.extend(new_defects)
        last_ws = ws
    return ''.join(res), last_ws, defects

Source File: _header_value_parser.py From V1EngineeringInc-Docs with Creative Commons Attribution Share Alike 4.0 International

5 votes

def _decode_ew_run(value):
    """ Decode a run of RFC2047 encoded words.

        _decode_ew_run(value) -> (text, value, defects)

    Scans the supplied value for a run of tokens that look like they are RFC
    2047 encoded words, decodes those words into text according to RFC 2047
    rules (whitespace between encoded words is discarded), and returns the text
    and the remaining value (including any leading whitespace on the remaining
    value), as well as a list of any defects encountered while decoding.  The
    input value may not have any leading whitespace.

    """
    res = []
    defects = []
    last_ws = ''
    while value:
        try:
            tok, ws, value = _wsp_splitter(value, 1)
        except ValueError:
            tok, ws, value = value, '', ''
        if not (tok.startswith('=?') and tok.endswith('?=')):
            return ''.join(res), last_ws + tok + ws + value, defects
        text, charset, lang, new_defects = _ew.decode(tok)
        res.append(text)
        defects.extend(new_defects)
        last_ws = ws
    return ''.join(res), last_ws, defects

Source File: _header_value_parser.py From cadquery-freecad-module with GNU Lesser General Public License v3.0

5 votes

def _decode_ew_run(value):
    """ Decode a run of RFC2047 encoded words.

        _decode_ew_run(value) -> (text, value, defects)

    Scans the supplied value for a run of tokens that look like they are RFC
    2047 encoded words, decodes those words into text according to RFC 2047
    rules (whitespace between encoded words is discarded), and returns the text
    and the remaining value (including any leading whitespace on the remaining
    value), as well as a list of any defects encountered while decoding.  The
    input value may not have any leading whitespace.

    """
    res = []
    defects = []
    last_ws = ''
    while value:
        try:
            tok, ws, value = _wsp_splitter(value, 1)
        except ValueError:
            tok, ws, value = value, '', ''
        if not (tok.startswith('=?') and tok.endswith('?=')):
            return ''.join(res), last_ws + tok + ws + value, defects
        text, charset, lang, new_defects = _ew.decode(tok)
        res.append(text)
        defects.extend(new_defects)
        last_ws = ws
    return ''.join(res), last_ws, defects

Source File: _header_value_parser.py From telegram-robot-rss with Mozilla Public License 2.0

5 votes

def _decode_ew_run(value):
    """ Decode a run of RFC2047 encoded words.

        _decode_ew_run(value) -> (text, value, defects)

    Scans the supplied value for a run of tokens that look like they are RFC
    2047 encoded words, decodes those words into text according to RFC 2047
    rules (whitespace between encoded words is discarded), and returns the text
    and the remaining value (including any leading whitespace on the remaining
    value), as well as a list of any defects encountered while decoding.  The
    input value may not have any leading whitespace.

    """
    res = []
    defects = []
    last_ws = ''
    while value:
        try:
            tok, ws, value = _wsp_splitter(value, 1)
        except ValueError:
            tok, ws, value = value, '', ''
        if not (tok.startswith('=?') and tok.endswith('?=')):
            return ''.join(res), last_ws + tok + ws + value, defects
        text, charset, lang, new_defects = _ew.decode(tok)
        res.append(text)
        defects.extend(new_defects)
        last_ws = ws
    return ''.join(res), last_ws, defects

Source File: _header_value_parser.py From deepWordBug with Apache License 2.0

5 votes

def _decode_ew_run(value):
    """ Decode a run of RFC2047 encoded words.

        _decode_ew_run(value) -> (text, value, defects)

    Scans the supplied value for a run of tokens that look like they are RFC
    2047 encoded words, decodes those words into text according to RFC 2047
    rules (whitespace between encoded words is discarded), and returns the text
    and the remaining value (including any leading whitespace on the remaining
    value), as well as a list of any defects encountered while decoding.  The
    input value may not have any leading whitespace.

    """
    res = []
    defects = []
    last_ws = ''
    while value:
        try:
            tok, ws, value = _wsp_splitter(value, 1)
        except ValueError:
            tok, ws, value = value, '', ''
        if not (tok.startswith('=?') and tok.endswith('?=')):
            return ''.join(res), last_ws + tok + ws + value, defects
        text, charset, lang, new_defects = _ew.decode(tok)
        res.append(text)
        defects.extend(new_defects)
        last_ws = ws
    return ''.join(res), last_ws, defects

Source File: _header_value_parser.py From kgsgo-dataset-preprocessor with Mozilla Public License 2.0

5 votes

def _decode_ew_run(value):
    """ Decode a run of RFC2047 encoded words.

        _decode_ew_run(value) -> (text, value, defects)

    Scans the supplied value for a run of tokens that look like they are RFC
    2047 encoded words, decodes those words into text according to RFC 2047
    rules (whitespace between encoded words is discarded), and returns the text
    and the remaining value (including any leading whitespace on the remaining
    value), as well as a list of any defects encountered while decoding.  The
    input value may not have any leading whitespace.

    """
    res = []
    defects = []
    last_ws = ''
    while value:
        try:
            tok, ws, value = _wsp_splitter(value, 1)
        except ValueError:
            tok, ws, value = value, '', ''
        if not (tok.startswith('=?') and tok.endswith('?=')):
            return ''.join(res), last_ws + tok + ws + value, defects
        text, charset, lang, new_defects = _ew.decode(tok)
        res.append(text)
        defects.extend(new_defects)
        last_ws = ws
    return ''.join(res), last_ws, defects

Source File: _header_value_parser.py From misp42splunk with GNU Lesser General Public License v3.0

5 votes

def _decode_ew_run(value):
    """ Decode a run of RFC2047 encoded words.

        _decode_ew_run(value) -> (text, value, defects)

    Scans the supplied value for a run of tokens that look like they are RFC
    2047 encoded words, decodes those words into text according to RFC 2047
    rules (whitespace between encoded words is discarded), and returns the text
    and the remaining value (including any leading whitespace on the remaining
    value), as well as a list of any defects encountered while decoding.  The
    input value may not have any leading whitespace.

    """
    res = []
    defects = []
    last_ws = ''
    while value:
        try:
            tok, ws, value = _wsp_splitter(value, 1)
        except ValueError:
            tok, ws, value = value, '', ''
        if not (tok.startswith('=?') and tok.endswith('?=')):
            return ''.join(res), last_ws + tok + ws + value, defects
        text, charset, lang, new_defects = _ew.decode(tok)
        res.append(text)
        defects.extend(new_defects)
        last_ws = ws
    return ''.join(res), last_ws, defects

Source File: _header_value_parser.py From misp42splunk with GNU Lesser General Public License v3.0

4 votes

def get_encoded_word(value):
    """ encoded-word = "=?" charset "?" encoding "?" encoded-text "?="

    """
    ew = EncodedWord()
    if not value.startswith('=?'):
        raise errors.HeaderParseError(
            "expected encoded word but found {}".format(value))
    _3to2list1 = list(value[2:].split('?=', 1))
    tok, remainder, = _3to2list1[:1] + [_3to2list1[1:]]
    if tok == value[2:]:
        raise errors.HeaderParseError(
            "expected encoded word but found {}".format(value))
    remstr = ''.join(remainder)
    if remstr[:2].isdigit():
        _3to2list3 = list(remstr.split('?=', 1))
        rest, remainder, = _3to2list3[:1] + [_3to2list3[1:]]
        tok = tok + '?=' + rest
    if len(tok.split()) > 1:
        ew.defects.append(errors.InvalidHeaderDefect(
            "whitespace inside encoded word"))
    ew.cte = value
    value = ''.join(remainder)
    try:
        text, charset, lang, defects = _ew.decode('=?' + tok + '?=')
    except ValueError:
        raise errors.HeaderParseError(
            "encoded word format invalid: '{}'".format(ew.cte))
    ew.charset = charset
    ew.lang = lang
    ew.defects.extend(defects)
    while text:
        if text[0] in WSP:
            token, text = get_fws(text)
            ew.append(token)
            continue
        _3to2list5 = list(_wsp_splitter(text, 1))
        chars, remainder, = _3to2list5[:1] + [_3to2list5[1:]]
        vtext = ValueTerminal(chars, 'vtext')
        _validate_xtext(vtext)
        ew.append(vtext)
        text = ''.join(remainder)
    return ew, value

Source File: _header_value_parser.py From verge3d-blender-addon with GNU General Public License v3.0

4 votes

def params(self):
        # The RFC specifically states that the ordering of parameters is not
        # guaranteed and may be reordered by the transport layer.  So we have
        # to assume the RFC 2231 pieces can come in any order.  However, we
        # output them in the order that we first see a given name, which gives
        # us a stable __str__.
        params = OrderedDict()
        for token in self:
            if not token.token_type.endswith('parameter'):
                continue
            if token[0].token_type != 'attribute':
                continue
            name = token[0].value.strip()
            if name not in params:
                params[name] = []
            params[name].append((token.section_number, token))
        for name, parts in params.items():
            parts = sorted(parts)
            # XXX: there might be more recovery we could do here if, for
            # example, this is really a case of a duplicate attribute name.
            value_parts = []
            charset = parts[0][1].charset
            for i, (section_number, param) in enumerate(parts):
                if section_number != i:
                    param.defects.append(errors.InvalidHeaderDefect(
                        "inconsistent multipart parameter numbering"))
                value = param.param_value
                if param.extended:
                    try:
                        value = unquote_to_bytes(value)
                    except UnicodeEncodeError:
                        # source had surrogate escaped bytes.  What we do now
                        # is a bit of an open question.  I'm not sure this is
                        # the best choice, but it is what the old algorithm did
                        value = unquote(value, encoding='latin-1')
                    else:
                        try:
                            value = value.decode(charset, 'surrogateescape')
                        except LookupError:
                            # XXX: there should really be a custom defect for
                            # unknown character set to make it easy to find,
                            # because otherwise unknown charset is a silent
                            # failure.
                            value = value.decode('us-ascii', 'surrogateescape')
                        if utils._has_surrogates(value):
                            param.defects.append(errors.UndecodableBytesDefect())
                value_parts.append(value)
            value = ''.join(value_parts)
            yield name, value

Source File: _header_value_parser.py From arissploit with GNU General Public License v3.0

4 votes

def params(self):
        # The RFC specifically states that the ordering of parameters is not
        # guaranteed and may be reordered by the transport layer.  So we have
        # to assume the RFC 2231 pieces can come in any order.  However, we
        # output them in the order that we first see a given name, which gives
        # us a stable __str__.
        params = OrderedDict()
        for token in self:
            if not token.token_type.endswith('parameter'):
                continue
            if token[0].token_type != 'attribute':
                continue
            name = token[0].value.strip()
            if name not in params:
                params[name] = []
            params[name].append((token.section_number, token))
        for name, parts in params.items():
            parts = sorted(parts)
            # XXX: there might be more recovery we could do here if, for
            # example, this is really a case of a duplicate attribute name.
            value_parts = []
            charset = parts[0][1].charset
            for i, (section_number, param) in enumerate(parts):
                if section_number != i:
                    param.defects.append(errors.InvalidHeaderDefect(
                        "inconsistent multipart parameter numbering"))
                value = param.param_value
                if param.extended:
                    try:
                        value = unquote_to_bytes(value)
                    except UnicodeEncodeError:
                        # source had surrogate escaped bytes.  What we do now
                        # is a bit of an open question.  I'm not sure this is
                        # the best choice, but it is what the old algorithm did
                        value = unquote(value, encoding='latin-1')
                    else:
                        try:
                            value = value.decode(charset, 'surrogateescape')
                        except LookupError:
                            # XXX: there should really be a custom defect for
                            # unknown character set to make it easy to find,
                            # because otherwise unknown charset is a silent
                            # failure.
                            value = value.decode('us-ascii', 'surrogateescape')
                        if utils._has_surrogates(value):
                            param.defects.append(errors.UndecodableBytesDefect())
                value_parts.append(value)
            value = ''.join(value_parts)
            yield name, value

Source File: _header_value_parser.py From misp42splunk with GNU Lesser General Public License v3.0

4 votes

def params(self):
        # The RFC specifically states that the ordering of parameters is not
        # guaranteed and may be reordered by the transport layer.  So we have
        # to assume the RFC 2231 pieces can come in any order.  However, we
        # output them in the order that we first see a given name, which gives
        # us a stable __str__.
        params = OrderedDict()
        for token in self:
            if not token.token_type.endswith('parameter'):
                continue
            if token[0].token_type != 'attribute':
                continue
            name = token[0].value.strip()
            if name not in params:
                params[name] = []
            params[name].append((token.section_number, token))
        for name, parts in params.items():
            parts = sorted(parts)
            # XXX: there might be more recovery we could do here if, for
            # example, this is really a case of a duplicate attribute name.
            value_parts = []
            charset = parts[0][1].charset
            for i, (section_number, param) in enumerate(parts):
                if section_number != i:
                    param.defects.append(errors.InvalidHeaderDefect(
                        "inconsistent multipart parameter numbering"))
                value = param.param_value
                if param.extended:
                    try:
                        value = unquote_to_bytes(value)
                    except UnicodeEncodeError:
                        # source had surrogate escaped bytes.  What we do now
                        # is a bit of an open question.  I'm not sure this is
                        # the best choice, but it is what the old algorithm did
                        value = unquote(value, encoding='latin-1')
                    else:
                        try:
                            value = value.decode(charset, 'surrogateescape')
                        except LookupError:
                            # XXX: there should really be a custom defect for
                            # unknown character set to make it easy to find,
                            # because otherwise unknown charset is a silent
                            # failure.
                            value = value.decode('us-ascii', 'surrogateescape')
                        if utils._has_surrogates(value):
                            param.defects.append(errors.UndecodableBytesDefect())
                value_parts.append(value)
            value = ''.join(value_parts)
            yield name, value

Source File: _header_value_parser.py From arissploit with GNU General Public License v3.0

4 votes

def get_encoded_word(value):
    """ encoded-word = "=?" charset "?" encoding "?" encoded-text "?="

    """
    ew = EncodedWord()
    if not value.startswith('=?'):
        raise errors.HeaderParseError(
            "expected encoded word but found {}".format(value))
    _3to2list1 = list(value[2:].split('?=', 1))
    tok, remainder, = _3to2list1[:1] + [_3to2list1[1:]]
    if tok == value[2:]:
        raise errors.HeaderParseError(
            "expected encoded word but found {}".format(value))
    remstr = ''.join(remainder)
    if remstr[:2].isdigit():
        _3to2list3 = list(remstr.split('?=', 1))
        rest, remainder, = _3to2list3[:1] + [_3to2list3[1:]]
        tok = tok + '?=' + rest
    if len(tok.split()) > 1:
        ew.defects.append(errors.InvalidHeaderDefect(
            "whitespace inside encoded word"))
    ew.cte = value
    value = ''.join(remainder)
    try:
        text, charset, lang, defects = _ew.decode('=?' + tok + '?=')
    except ValueError:
        raise errors.HeaderParseError(
            "encoded word format invalid: '{}'".format(ew.cte))
    ew.charset = charset
    ew.lang = lang
    ew.defects.extend(defects)
    while text:
        if text[0] in WSP:
            token, text = get_fws(text)
            ew.append(token)
            continue
        _3to2list5 = list(_wsp_splitter(text, 1))
        chars, remainder, = _3to2list5[:1] + [_3to2list5[1:]]
        vtext = ValueTerminal(chars, 'vtext')
        _validate_xtext(vtext)
        ew.append(vtext)
        text = ''.join(remainder)
    return ew, value

Python future.backports.email._encoded_words.decode() Examples