Python future.backports.email._encoded_words.decode() Examples
The following are 30
code examples of future.backports.email._encoded_words.decode().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
future.backports.email._encoded_words
, or try the search function
.
Example #1
Source File: _header_value_parser.py From misp42splunk with GNU Lesser General Public License v3.0 | 6 votes |
def get_extended_attrtext(value): """attrtext = 1*(any non-ATTRIBUTE_ENDS character plus '%') This is a special parsing routine so that we get a value that includes % escapes as a single string (which we decode as a single string later). """ m = _non_extended_attribute_end_matcher(value) if not m: raise errors.HeaderParseError( "expected extended attrtext but found {!r}".format(value)) attrtext = m.group() value = value[len(attrtext):] attrtext = ValueTerminal(attrtext, 'extended-attrtext') _validate_xtext(attrtext) return attrtext, value
Example #2
Source File: _header_value_parser.py From telegram-robot-rss with Mozilla Public License 2.0 | 6 votes |
def get_extended_attrtext(value): """attrtext = 1*(any non-ATTRIBUTE_ENDS character plus '%') This is a special parsing routine so that we get a value that includes % escapes as a single string (which we decode as a single string later). """ m = _non_extended_attribute_end_matcher(value) if not m: raise errors.HeaderParseError( "expected extended attrtext but found {!r}".format(value)) attrtext = m.group() value = value[len(attrtext):] attrtext = ValueTerminal(attrtext, 'extended-attrtext') _validate_xtext(attrtext) return attrtext, value
Example #3
Source File: _header_value_parser.py From kgsgo-dataset-preprocessor with Mozilla Public License 2.0 | 6 votes |
def get_extended_attrtext(value): """attrtext = 1*(any non-ATTRIBUTE_ENDS character plus '%') This is a special parsing routine so that we get a value that includes % escapes as a single string (which we decode as a single string later). """ m = _non_extended_attribute_end_matcher(value) if not m: raise errors.HeaderParseError( "expected extended attrtext but found {!r}".format(value)) attrtext = m.group() value = value[len(attrtext):] attrtext = ValueTerminal(attrtext, 'extended-attrtext') _validate_xtext(attrtext) return attrtext, value
Example #4
Source File: _header_value_parser.py From cadquery-freecad-module with GNU Lesser General Public License v3.0 | 6 votes |
def get_extended_attrtext(value): """attrtext = 1*(any non-ATTRIBUTE_ENDS character plus '%') This is a special parsing routine so that we get a value that includes % escapes as a single string (which we decode as a single string later). """ m = _non_extended_attribute_end_matcher(value) if not m: raise errors.HeaderParseError( "expected extended attrtext but found {!r}".format(value)) attrtext = m.group() value = value[len(attrtext):] attrtext = ValueTerminal(attrtext, 'extended-attrtext') _validate_xtext(attrtext) return attrtext, value
Example #5
Source File: _header_value_parser.py From addon with GNU General Public License v3.0 | 6 votes |
def get_extended_attrtext(value): """attrtext = 1*(any non-ATTRIBUTE_ENDS character plus '%') This is a special parsing routine so that we get a value that includes % escapes as a single string (which we decode as a single string later). """ m = _non_extended_attribute_end_matcher(value) if not m: raise errors.HeaderParseError( "expected extended attrtext but found {!r}".format(value)) attrtext = m.group() value = value[len(attrtext):] attrtext = ValueTerminal(attrtext, 'extended-attrtext') _validate_xtext(attrtext) return attrtext, value
Example #6
Source File: _header_value_parser.py From deepWordBug with Apache License 2.0 | 6 votes |
def get_extended_attrtext(value): """attrtext = 1*(any non-ATTRIBUTE_ENDS character plus '%') This is a special parsing routine so that we get a value that includes % escapes as a single string (which we decode as a single string later). """ m = _non_extended_attribute_end_matcher(value) if not m: raise errors.HeaderParseError( "expected extended attrtext but found {!r}".format(value)) attrtext = m.group() value = value[len(attrtext):] attrtext = ValueTerminal(attrtext, 'extended-attrtext') _validate_xtext(attrtext) return attrtext, value
Example #7
Source File: _header_value_parser.py From blackmamba with MIT License | 6 votes |
def get_extended_attrtext(value): """attrtext = 1*(any non-ATTRIBUTE_ENDS character plus '%') This is a special parsing routine so that we get a value that includes % escapes as a single string (which we decode as a single string later). """ m = _non_extended_attribute_end_matcher(value) if not m: raise errors.HeaderParseError( "expected extended attrtext but found {!r}".format(value)) attrtext = m.group() value = value[len(attrtext):] attrtext = ValueTerminal(attrtext, 'extended-attrtext') _validate_xtext(attrtext) return attrtext, value
Example #8
Source File: _header_value_parser.py From gimp-plugin-export-layers with GNU General Public License v3.0 | 6 votes |
def get_extended_attrtext(value): """attrtext = 1*(any non-ATTRIBUTE_ENDS character plus '%') This is a special parsing routine so that we get a value that includes % escapes as a single string (which we decode as a single string later). """ m = _non_extended_attribute_end_matcher(value) if not m: raise errors.HeaderParseError( "expected extended attrtext but found {!r}".format(value)) attrtext = m.group() value = value[len(attrtext):] attrtext = ValueTerminal(attrtext, 'extended-attrtext') _validate_xtext(attrtext) return attrtext, value
Example #9
Source File: _header_value_parser.py From misp42splunk with GNU Lesser General Public License v3.0 | 6 votes |
def get_extended_attrtext(value): """attrtext = 1*(any non-ATTRIBUTE_ENDS character plus '%') This is a special parsing routine so that we get a value that includes % escapes as a single string (which we decode as a single string later). """ m = _non_extended_attribute_end_matcher(value) if not m: raise errors.HeaderParseError( "expected extended attrtext but found {!r}".format(value)) attrtext = m.group() value = value[len(attrtext):] attrtext = ValueTerminal(attrtext, 'extended-attrtext') _validate_xtext(attrtext) return attrtext, value
Example #10
Source File: _header_value_parser.py From arissploit with GNU General Public License v3.0 | 6 votes |
def get_extended_attrtext(value): """attrtext = 1*(any non-ATTRIBUTE_ENDS character plus '%') This is a special parsing routine so that we get a value that includes % escapes as a single string (which we decode as a single string later). """ m = _non_extended_attribute_end_matcher(value) if not m: raise errors.HeaderParseError( "expected extended attrtext but found {!r}".format(value)) attrtext = m.group() value = value[len(attrtext):] attrtext = ValueTerminal(attrtext, 'extended-attrtext') _validate_xtext(attrtext) return attrtext, value
Example #11
Source File: _header_value_parser.py From Tautulli with GNU General Public License v3.0 | 6 votes |
def get_extended_attrtext(value): """attrtext = 1*(any non-ATTRIBUTE_ENDS character plus '%') This is a special parsing routine so that we get a value that includes % escapes as a single string (which we decode as a single string later). """ m = _non_extended_attribute_end_matcher(value) if not m: raise errors.HeaderParseError( "expected extended attrtext but found {!r}".format(value)) attrtext = m.group() value = value[len(attrtext):] attrtext = ValueTerminal(attrtext, 'extended-attrtext') _validate_xtext(attrtext) return attrtext, value
Example #12
Source File: _header_value_parser.py From verge3d-blender-addon with GNU General Public License v3.0 | 6 votes |
def get_extended_attrtext(value): """attrtext = 1*(any non-ATTRIBUTE_ENDS character plus '%') This is a special parsing routine so that we get a value that includes % escapes as a single string (which we decode as a single string later). """ m = _non_extended_attribute_end_matcher(value) if not m: raise errors.HeaderParseError( "expected extended attrtext but found {!r}".format(value)) attrtext = m.group() value = value[len(attrtext):] attrtext = ValueTerminal(attrtext, 'extended-attrtext') _validate_xtext(attrtext) return attrtext, value
Example #13
Source File: _header_value_parser.py From V1EngineeringInc-Docs with Creative Commons Attribution Share Alike 4.0 International | 6 votes |
def get_extended_attrtext(value): """attrtext = 1*(any non-ATTRIBUTE_ENDS character plus '%') This is a special parsing routine so that we get a value that includes % escapes as a single string (which we decode as a single string later). """ m = _non_extended_attribute_end_matcher(value) if not m: raise errors.HeaderParseError( "expected extended attrtext but found {!r}".format(value)) attrtext = m.group() value = value[len(attrtext):] attrtext = ValueTerminal(attrtext, 'extended-attrtext') _validate_xtext(attrtext) return attrtext, value
Example #14
Source File: _header_value_parser.py From arissploit with GNU General Public License v3.0 | 5 votes |
def _decode_ew_run(value): """ Decode a run of RFC2047 encoded words. _decode_ew_run(value) -> (text, value, defects) Scans the supplied value for a run of tokens that look like they are RFC 2047 encoded words, decodes those words into text according to RFC 2047 rules (whitespace between encoded words is discarded), and returns the text and the remaining value (including any leading whitespace on the remaining value), as well as a list of any defects encountered while decoding. The input value may not have any leading whitespace. """ res = [] defects = [] last_ws = '' while value: try: tok, ws, value = _wsp_splitter(value, 1) except ValueError: tok, ws, value = value, '', '' if not (tok.startswith('=?') and tok.endswith('?=')): return ''.join(res), last_ws + tok + ws + value, defects text, charset, lang, new_defects = _ew.decode(tok) res.append(text) defects.extend(new_defects) last_ws = ws return ''.join(res), last_ws, defects
Example #15
Source File: _header_value_parser.py From verge3d-blender-addon with GNU General Public License v3.0 | 5 votes |
def _decode_ew_run(value): """ Decode a run of RFC2047 encoded words. _decode_ew_run(value) -> (text, value, defects) Scans the supplied value for a run of tokens that look like they are RFC 2047 encoded words, decodes those words into text according to RFC 2047 rules (whitespace between encoded words is discarded), and returns the text and the remaining value (including any leading whitespace on the remaining value), as well as a list of any defects encountered while decoding. The input value may not have any leading whitespace. """ res = [] defects = [] last_ws = '' while value: try: tok, ws, value = _wsp_splitter(value, 1) except ValueError: tok, ws, value = value, '', '' if not (tok.startswith('=?') and tok.endswith('?=')): return ''.join(res), last_ws + tok + ws + value, defects text, charset, lang, new_defects = _ew.decode(tok) res.append(text) defects.extend(new_defects) last_ws = ws return ''.join(res), last_ws, defects
Example #16
Source File: _header_value_parser.py From gimp-plugin-export-layers with GNU General Public License v3.0 | 5 votes |
def _decode_ew_run(value): """ Decode a run of RFC2047 encoded words. _decode_ew_run(value) -> (text, value, defects) Scans the supplied value for a run of tokens that look like they are RFC 2047 encoded words, decodes those words into text according to RFC 2047 rules (whitespace between encoded words is discarded), and returns the text and the remaining value (including any leading whitespace on the remaining value), as well as a list of any defects encountered while decoding. The input value may not have any leading whitespace. """ res = [] defects = [] last_ws = '' while value: try: tok, ws, value = _wsp_splitter(value, 1) except ValueError: tok, ws, value = value, '', '' if not (tok.startswith('=?') and tok.endswith('?=')): return ''.join(res), last_ws + tok + ws + value, defects text, charset, lang, new_defects = _ew.decode(tok) res.append(text) defects.extend(new_defects) last_ws = ws return ''.join(res), last_ws, defects
Example #17
Source File: _header_value_parser.py From blackmamba with MIT License | 5 votes |
def _decode_ew_run(value): """ Decode a run of RFC2047 encoded words. _decode_ew_run(value) -> (text, value, defects) Scans the supplied value for a run of tokens that look like they are RFC 2047 encoded words, decodes those words into text according to RFC 2047 rules (whitespace between encoded words is discarded), and returns the text and the remaining value (including any leading whitespace on the remaining value), as well as a list of any defects encountered while decoding. The input value may not have any leading whitespace. """ res = [] defects = [] last_ws = '' while value: try: tok, ws, value = _wsp_splitter(value, 1) except ValueError: tok, ws, value = value, '', '' if not (tok.startswith('=?') and tok.endswith('?=')): return ''.join(res), last_ws + tok + ws + value, defects text, charset, lang, new_defects = _ew.decode(tok) res.append(text) defects.extend(new_defects) last_ws = ws return ''.join(res), last_ws, defects
Example #18
Source File: _header_value_parser.py From Tautulli with GNU General Public License v3.0 | 5 votes |
def _decode_ew_run(value): """ Decode a run of RFC2047 encoded words. _decode_ew_run(value) -> (text, value, defects) Scans the supplied value for a run of tokens that look like they are RFC 2047 encoded words, decodes those words into text according to RFC 2047 rules (whitespace between encoded words is discarded), and returns the text and the remaining value (including any leading whitespace on the remaining value), as well as a list of any defects encountered while decoding. The input value may not have any leading whitespace. """ res = [] defects = [] last_ws = '' while value: try: tok, ws, value = _wsp_splitter(value, 1) except ValueError: tok, ws, value = value, '', '' if not (tok.startswith('=?') and tok.endswith('?=')): return ''.join(res), last_ws + tok + ws + value, defects text, charset, lang, new_defects = _ew.decode(tok) res.append(text) defects.extend(new_defects) last_ws = ws return ''.join(res), last_ws, defects
Example #19
Source File: _header_value_parser.py From addon with GNU General Public License v3.0 | 5 votes |
def _decode_ew_run(value): """ Decode a run of RFC2047 encoded words. _decode_ew_run(value) -> (text, value, defects) Scans the supplied value for a run of tokens that look like they are RFC 2047 encoded words, decodes those words into text according to RFC 2047 rules (whitespace between encoded words is discarded), and returns the text and the remaining value (including any leading whitespace on the remaining value), as well as a list of any defects encountered while decoding. The input value may not have any leading whitespace. """ res = [] defects = [] last_ws = '' while value: try: tok, ws, value = _wsp_splitter(value, 1) except ValueError: tok, ws, value = value, '', '' if not (tok.startswith('=?') and tok.endswith('?=')): return ''.join(res), last_ws + tok + ws + value, defects text, charset, lang, new_defects = _ew.decode(tok) res.append(text) defects.extend(new_defects) last_ws = ws return ''.join(res), last_ws, defects
Example #20
Source File: _header_value_parser.py From V1EngineeringInc-Docs with Creative Commons Attribution Share Alike 4.0 International | 5 votes |
def _decode_ew_run(value): """ Decode a run of RFC2047 encoded words. _decode_ew_run(value) -> (text, value, defects) Scans the supplied value for a run of tokens that look like they are RFC 2047 encoded words, decodes those words into text according to RFC 2047 rules (whitespace between encoded words is discarded), and returns the text and the remaining value (including any leading whitespace on the remaining value), as well as a list of any defects encountered while decoding. The input value may not have any leading whitespace. """ res = [] defects = [] last_ws = '' while value: try: tok, ws, value = _wsp_splitter(value, 1) except ValueError: tok, ws, value = value, '', '' if not (tok.startswith('=?') and tok.endswith('?=')): return ''.join(res), last_ws + tok + ws + value, defects text, charset, lang, new_defects = _ew.decode(tok) res.append(text) defects.extend(new_defects) last_ws = ws return ''.join(res), last_ws, defects
Example #21
Source File: _header_value_parser.py From cadquery-freecad-module with GNU Lesser General Public License v3.0 | 5 votes |
def _decode_ew_run(value): """ Decode a run of RFC2047 encoded words. _decode_ew_run(value) -> (text, value, defects) Scans the supplied value for a run of tokens that look like they are RFC 2047 encoded words, decodes those words into text according to RFC 2047 rules (whitespace between encoded words is discarded), and returns the text and the remaining value (including any leading whitespace on the remaining value), as well as a list of any defects encountered while decoding. The input value may not have any leading whitespace. """ res = [] defects = [] last_ws = '' while value: try: tok, ws, value = _wsp_splitter(value, 1) except ValueError: tok, ws, value = value, '', '' if not (tok.startswith('=?') and tok.endswith('?=')): return ''.join(res), last_ws + tok + ws + value, defects text, charset, lang, new_defects = _ew.decode(tok) res.append(text) defects.extend(new_defects) last_ws = ws return ''.join(res), last_ws, defects
Example #22
Source File: _header_value_parser.py From telegram-robot-rss with Mozilla Public License 2.0 | 5 votes |
def _decode_ew_run(value): """ Decode a run of RFC2047 encoded words. _decode_ew_run(value) -> (text, value, defects) Scans the supplied value for a run of tokens that look like they are RFC 2047 encoded words, decodes those words into text according to RFC 2047 rules (whitespace between encoded words is discarded), and returns the text and the remaining value (including any leading whitespace on the remaining value), as well as a list of any defects encountered while decoding. The input value may not have any leading whitespace. """ res = [] defects = [] last_ws = '' while value: try: tok, ws, value = _wsp_splitter(value, 1) except ValueError: tok, ws, value = value, '', '' if not (tok.startswith('=?') and tok.endswith('?=')): return ''.join(res), last_ws + tok + ws + value, defects text, charset, lang, new_defects = _ew.decode(tok) res.append(text) defects.extend(new_defects) last_ws = ws return ''.join(res), last_ws, defects
Example #23
Source File: _header_value_parser.py From deepWordBug with Apache License 2.0 | 5 votes |
def _decode_ew_run(value): """ Decode a run of RFC2047 encoded words. _decode_ew_run(value) -> (text, value, defects) Scans the supplied value for a run of tokens that look like they are RFC 2047 encoded words, decodes those words into text according to RFC 2047 rules (whitespace between encoded words is discarded), and returns the text and the remaining value (including any leading whitespace on the remaining value), as well as a list of any defects encountered while decoding. The input value may not have any leading whitespace. """ res = [] defects = [] last_ws = '' while value: try: tok, ws, value = _wsp_splitter(value, 1) except ValueError: tok, ws, value = value, '', '' if not (tok.startswith('=?') and tok.endswith('?=')): return ''.join(res), last_ws + tok + ws + value, defects text, charset, lang, new_defects = _ew.decode(tok) res.append(text) defects.extend(new_defects) last_ws = ws return ''.join(res), last_ws, defects
Example #24
Source File: _header_value_parser.py From kgsgo-dataset-preprocessor with Mozilla Public License 2.0 | 5 votes |
def _decode_ew_run(value): """ Decode a run of RFC2047 encoded words. _decode_ew_run(value) -> (text, value, defects) Scans the supplied value for a run of tokens that look like they are RFC 2047 encoded words, decodes those words into text according to RFC 2047 rules (whitespace between encoded words is discarded), and returns the text and the remaining value (including any leading whitespace on the remaining value), as well as a list of any defects encountered while decoding. The input value may not have any leading whitespace. """ res = [] defects = [] last_ws = '' while value: try: tok, ws, value = _wsp_splitter(value, 1) except ValueError: tok, ws, value = value, '', '' if not (tok.startswith('=?') and tok.endswith('?=')): return ''.join(res), last_ws + tok + ws + value, defects text, charset, lang, new_defects = _ew.decode(tok) res.append(text) defects.extend(new_defects) last_ws = ws return ''.join(res), last_ws, defects
Example #25
Source File: _header_value_parser.py From misp42splunk with GNU Lesser General Public License v3.0 | 5 votes |
def _decode_ew_run(value): """ Decode a run of RFC2047 encoded words. _decode_ew_run(value) -> (text, value, defects) Scans the supplied value for a run of tokens that look like they are RFC 2047 encoded words, decodes those words into text according to RFC 2047 rules (whitespace between encoded words is discarded), and returns the text and the remaining value (including any leading whitespace on the remaining value), as well as a list of any defects encountered while decoding. The input value may not have any leading whitespace. """ res = [] defects = [] last_ws = '' while value: try: tok, ws, value = _wsp_splitter(value, 1) except ValueError: tok, ws, value = value, '', '' if not (tok.startswith('=?') and tok.endswith('?=')): return ''.join(res), last_ws + tok + ws + value, defects text, charset, lang, new_defects = _ew.decode(tok) res.append(text) defects.extend(new_defects) last_ws = ws return ''.join(res), last_ws, defects
Example #26
Source File: _header_value_parser.py From misp42splunk with GNU Lesser General Public License v3.0 | 4 votes |
def get_encoded_word(value): """ encoded-word = "=?" charset "?" encoding "?" encoded-text "?=" """ ew = EncodedWord() if not value.startswith('=?'): raise errors.HeaderParseError( "expected encoded word but found {}".format(value)) _3to2list1 = list(value[2:].split('?=', 1)) tok, remainder, = _3to2list1[:1] + [_3to2list1[1:]] if tok == value[2:]: raise errors.HeaderParseError( "expected encoded word but found {}".format(value)) remstr = ''.join(remainder) if remstr[:2].isdigit(): _3to2list3 = list(remstr.split('?=', 1)) rest, remainder, = _3to2list3[:1] + [_3to2list3[1:]] tok = tok + '?=' + rest if len(tok.split()) > 1: ew.defects.append(errors.InvalidHeaderDefect( "whitespace inside encoded word")) ew.cte = value value = ''.join(remainder) try: text, charset, lang, defects = _ew.decode('=?' + tok + '?=') except ValueError: raise errors.HeaderParseError( "encoded word format invalid: '{}'".format(ew.cte)) ew.charset = charset ew.lang = lang ew.defects.extend(defects) while text: if text[0] in WSP: token, text = get_fws(text) ew.append(token) continue _3to2list5 = list(_wsp_splitter(text, 1)) chars, remainder, = _3to2list5[:1] + [_3to2list5[1:]] vtext = ValueTerminal(chars, 'vtext') _validate_xtext(vtext) ew.append(vtext) text = ''.join(remainder) return ew, value
Example #27
Source File: _header_value_parser.py From verge3d-blender-addon with GNU General Public License v3.0 | 4 votes |
def params(self): # The RFC specifically states that the ordering of parameters is not # guaranteed and may be reordered by the transport layer. So we have # to assume the RFC 2231 pieces can come in any order. However, we # output them in the order that we first see a given name, which gives # us a stable __str__. params = OrderedDict() for token in self: if not token.token_type.endswith('parameter'): continue if token[0].token_type != 'attribute': continue name = token[0].value.strip() if name not in params: params[name] = [] params[name].append((token.section_number, token)) for name, parts in params.items(): parts = sorted(parts) # XXX: there might be more recovery we could do here if, for # example, this is really a case of a duplicate attribute name. value_parts = [] charset = parts[0][1].charset for i, (section_number, param) in enumerate(parts): if section_number != i: param.defects.append(errors.InvalidHeaderDefect( "inconsistent multipart parameter numbering")) value = param.param_value if param.extended: try: value = unquote_to_bytes(value) except UnicodeEncodeError: # source had surrogate escaped bytes. What we do now # is a bit of an open question. I'm not sure this is # the best choice, but it is what the old algorithm did value = unquote(value, encoding='latin-1') else: try: value = value.decode(charset, 'surrogateescape') except LookupError: # XXX: there should really be a custom defect for # unknown character set to make it easy to find, # because otherwise unknown charset is a silent # failure. value = value.decode('us-ascii', 'surrogateescape') if utils._has_surrogates(value): param.defects.append(errors.UndecodableBytesDefect()) value_parts.append(value) value = ''.join(value_parts) yield name, value
Example #28
Source File: _header_value_parser.py From arissploit with GNU General Public License v3.0 | 4 votes |
def params(self): # The RFC specifically states that the ordering of parameters is not # guaranteed and may be reordered by the transport layer. So we have # to assume the RFC 2231 pieces can come in any order. However, we # output them in the order that we first see a given name, which gives # us a stable __str__. params = OrderedDict() for token in self: if not token.token_type.endswith('parameter'): continue if token[0].token_type != 'attribute': continue name = token[0].value.strip() if name not in params: params[name] = [] params[name].append((token.section_number, token)) for name, parts in params.items(): parts = sorted(parts) # XXX: there might be more recovery we could do here if, for # example, this is really a case of a duplicate attribute name. value_parts = [] charset = parts[0][1].charset for i, (section_number, param) in enumerate(parts): if section_number != i: param.defects.append(errors.InvalidHeaderDefect( "inconsistent multipart parameter numbering")) value = param.param_value if param.extended: try: value = unquote_to_bytes(value) except UnicodeEncodeError: # source had surrogate escaped bytes. What we do now # is a bit of an open question. I'm not sure this is # the best choice, but it is what the old algorithm did value = unquote(value, encoding='latin-1') else: try: value = value.decode(charset, 'surrogateescape') except LookupError: # XXX: there should really be a custom defect for # unknown character set to make it easy to find, # because otherwise unknown charset is a silent # failure. value = value.decode('us-ascii', 'surrogateescape') if utils._has_surrogates(value): param.defects.append(errors.UndecodableBytesDefect()) value_parts.append(value) value = ''.join(value_parts) yield name, value
Example #29
Source File: _header_value_parser.py From misp42splunk with GNU Lesser General Public License v3.0 | 4 votes |
def params(self): # The RFC specifically states that the ordering of parameters is not # guaranteed and may be reordered by the transport layer. So we have # to assume the RFC 2231 pieces can come in any order. However, we # output them in the order that we first see a given name, which gives # us a stable __str__. params = OrderedDict() for token in self: if not token.token_type.endswith('parameter'): continue if token[0].token_type != 'attribute': continue name = token[0].value.strip() if name not in params: params[name] = [] params[name].append((token.section_number, token)) for name, parts in params.items(): parts = sorted(parts) # XXX: there might be more recovery we could do here if, for # example, this is really a case of a duplicate attribute name. value_parts = [] charset = parts[0][1].charset for i, (section_number, param) in enumerate(parts): if section_number != i: param.defects.append(errors.InvalidHeaderDefect( "inconsistent multipart parameter numbering")) value = param.param_value if param.extended: try: value = unquote_to_bytes(value) except UnicodeEncodeError: # source had surrogate escaped bytes. What we do now # is a bit of an open question. I'm not sure this is # the best choice, but it is what the old algorithm did value = unquote(value, encoding='latin-1') else: try: value = value.decode(charset, 'surrogateescape') except LookupError: # XXX: there should really be a custom defect for # unknown character set to make it easy to find, # because otherwise unknown charset is a silent # failure. value = value.decode('us-ascii', 'surrogateescape') if utils._has_surrogates(value): param.defects.append(errors.UndecodableBytesDefect()) value_parts.append(value) value = ''.join(value_parts) yield name, value
Example #30
Source File: _header_value_parser.py From arissploit with GNU General Public License v3.0 | 4 votes |
def get_encoded_word(value): """ encoded-word = "=?" charset "?" encoding "?" encoded-text "?=" """ ew = EncodedWord() if not value.startswith('=?'): raise errors.HeaderParseError( "expected encoded word but found {}".format(value)) _3to2list1 = list(value[2:].split('?=', 1)) tok, remainder, = _3to2list1[:1] + [_3to2list1[1:]] if tok == value[2:]: raise errors.HeaderParseError( "expected encoded word but found {}".format(value)) remstr = ''.join(remainder) if remstr[:2].isdigit(): _3to2list3 = list(remstr.split('?=', 1)) rest, remainder, = _3to2list3[:1] + [_3to2list3[1:]] tok = tok + '?=' + rest if len(tok.split()) > 1: ew.defects.append(errors.InvalidHeaderDefect( "whitespace inside encoded word")) ew.cte = value value = ''.join(remainder) try: text, charset, lang, defects = _ew.decode('=?' + tok + '?=') except ValueError: raise errors.HeaderParseError( "encoded word format invalid: '{}'".format(ew.cte)) ew.charset = charset ew.lang = lang ew.defects.extend(defects) while text: if text[0] in WSP: token, text = get_fws(text) ew.append(token) continue _3to2list5 = list(_wsp_splitter(text, 1)) chars, remainder, = _3to2list5[:1] + [_3to2list5[1:]] vtext = ValueTerminal(chars, 'vtext') _validate_xtext(vtext) ew.append(vtext) text = ''.join(remainder) return ew, value