Python email.header.decode_header() Examples

The following are 30 code examples of email.header.decode_header(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module email.header , or try the search function .
Example #1
Source File: email_utils.py    From app with MIT License 6 votes vote down vote up
def parseaddr_unicode(addr) -> (str, str):
    """Like parseaddr but return name in unicode instead of in RFC 2047 format
    '=?UTF-8?B?TmjGoW4gTmd1eeG7hW4=?= <abcd@gmail.com>' -> ('Nhơn Nguyễn', "abcd@gmail.com")
    """
    name, email = parseaddr(addr)
    email = email.strip().lower()
    if name:
        name = name.strip()
        decoded_string, charset = decode_header(name)[0]
        if charset is not None:
            try:
                name = decoded_string.decode(charset)
            except UnicodeDecodeError:
                LOG.warning("Cannot decode addr name %s", name)
                name = ""
        else:
            name = decoded_string

    return name, email 
Example #2
Source File: test_email.py    From ironpython3 with Apache License 2.0 6 votes vote down vote up
def test_rfc2047_multiline(self):
        eq = self.assertEqual
        s = """Re: =?mac-iceland?q?r=8Aksm=9Arg=8Cs?= baz
 foo bar =?mac-iceland?q?r=8Aksm=9Arg=8Cs?="""
        dh = decode_header(s)
        eq(dh, [
            (b'Re: ', None),
            (b'r\x8aksm\x9arg\x8cs', 'mac-iceland'),
            (b' baz foo bar ', None),
            (b'r\x8aksm\x9arg\x8cs', 'mac-iceland')])
        header = make_header(dh)
        eq(str(header),
           'Re: r\xe4ksm\xf6rg\xe5s baz foo bar r\xe4ksm\xf6rg\xe5s')
        self.ndiffAssertEqual(header.encode(maxlinelen=76), """\
Re: =?mac-iceland?q?r=8Aksm=9Arg=8Cs?= baz foo bar =?mac-iceland?q?r=8Aksm?=
 =?mac-iceland?q?=9Arg=8Cs?=""") 
Example #3
Source File: utils.py    From imap_tools with Apache License 2.0 6 votes vote down vote up
def parse_email_addresses(raw_header: str) -> (dict,):
    """
    Parse email addresses from header
    :param raw_header: example: '=?UTF-8?B?0J7Qu9C1=?= <name@company.ru>,\r\n "\'\\"z, z\\"\'" <imap.tools@ya.ru>'
    :return: tuple(dict(name: str, email: str, full: str))
    """
    result = []
    for raw_name, email in getaddresses([raw_header]):
        name = decode_value(*decode_header(raw_name)[0]).strip()
        email = email.strip()
        if not (name or email):
            continue
        result.append({
            'email': email if '@' in email else '',
            'name': name,
            'full': '{} <{}>'.format(name, email) if name and email else name or email
        })
    return tuple(result) 
Example #4
Source File: store.py    From pymailq with GNU General Public License v2.0 6 votes vote down vote up
def show(self):
        """
        Return mail detailled representation for printing

        :return: Representation as :class:`str`
        """
        output = "=== Mail %s ===\n" % (self.qid,)
        for attr in sorted(dir(self.head)):
            if attr.startswith("_"):
                continue

            value = getattr(self.head, attr)
            if not isinstance(value, str):
                value = ", ".join(value)

            if attr == "Subject":
                print(attr, value)
                value, enc = header.decode_header(value)[0]
                print(enc, attr, value)
                if sys.version_info[0] == 2:
                    value = value.decode(enc) if enc else unicode(value)

            output += "%s: %s\n" % (attr, value)
        return output 
Example #5
Source File: email.py    From ACE with Apache License 2.0 6 votes vote down vote up
def decode_rfc2822(header_value):
    """Returns the value of the rfc2822 decoded header, or the header_value as-is if it's not encoded."""
    result = []
    for binary_value, charset in decode_header(header_value):
        decoded_value = None
        if isinstance(binary_value, str):
            result.append(binary_value)
            continue

        if charset is not None:
            try:
                decoded_value = binary_value.decode(charset, errors='ignore')
            except Exception as e:
                logging.warning(f"unable to decode for charset {charset}: {e}")

        if decoded_value is None:
            try:
                decoded_value = binary_value.decode('utf8', errors='ignore')
            except Exception as e:
                logging.warning(f"unable to decode email header at all (defaulting to hex rep): {e}")
                decoded_value = 'HEX({})'.format(binary_value.hex())

        result.append(decoded_value)

    return ''.join(result) 
Example #6
Source File: test_email_renamed.py    From ironpython2 with Apache License 2.0 6 votes vote down vote up
def test_encoded_adjacent_nonencoded(self):
        eq = self.assertEqual
        h = Header()
        h.append('hello', 'iso-8859-1')
        h.append('world')
        s = h.encode()
        eq(s, '=?iso-8859-1?q?hello?= world')
        h = make_header(decode_header(s))
        eq(h.encode(), s) 
Example #7
Source File: test_email_renamed.py    From ironpython2 with Apache License 2.0 6 votes vote down vote up
def test_whitespace_eater(self):
        eq = self.assertEqual
        s = 'Subject: =?koi8-r?b?8NLP18XSy8EgzsEgxsnOwczYztk=?= =?koi8-r?q?=CA?= zz.'
        parts = decode_header(s)
        eq(parts, [('Subject:', None), ('\xf0\xd2\xcf\xd7\xc5\xd2\xcb\xc1 \xce\xc1 \xc6\xc9\xce\xc1\xcc\xd8\xce\xd9\xca', 'koi8-r'), ('zz.', None)])
        hdr = make_header(parts)
        eq(hdr.encode(),
           'Subject: =?koi8-r?b?8NLP18XSy8EgzsEgxsnOwczYztnK?= zz.') 
Example #8
Source File: message.py    From imap_tools with Apache License 2.0 5 votes vote down vote up
def subject(self) -> str:
        """Message subject"""
        if 'subject' in self.obj:
            msg_subject = decode_header(self.obj['subject'])
            return decode_value(msg_subject[0][0], msg_subject[0][1])
        return '' 
Example #9
Source File: test_email.py    From ironpython3 with Apache License 2.0 5 votes vote down vote up
def test_rfc2047_with_whitespace(self):
        s = 'Sm =?ISO-8859-1?B?9g==?= rg =?ISO-8859-1?B?5Q==?= sbord'
        dh = decode_header(s)
        self.assertEqual(dh, [(b'Sm ', None), (b'\xf6', 'iso-8859-1'),
                              (b' rg ', None), (b'\xe5', 'iso-8859-1'),
                              (b' sbord', None)]) 
Example #10
Source File: test_email.py    From ironpython3 with Apache License 2.0 5 votes vote down vote up
def test_rfc2047_B_bad_padding(self):
        s = '=?iso-8859-1?B?%s?='
        data = [                                # only test complete bytes
            ('dm==', b'v'), ('dm=', b'v'), ('dm', b'v'),
            ('dmk=', b'vi'), ('dmk', b'vi')
          ]
        for q, a in data:
            dh = decode_header(s % q)
            self.assertEqual(dh, [(a, 'iso-8859-1')]) 
Example #11
Source File: test_email.py    From ironpython3 with Apache License 2.0 5 votes vote down vote up
def test_rfc2047_Q_invalid_digits(self):
        # issue 10004.
        s = '=?iso-8659-1?Q?andr=e9=zz?='
        self.assertEqual(decode_header(s),
                        [(b'andr\xe9=zz', 'iso-8659-1')]) 
Example #12
Source File: test_email.py    From ironpython3 with Apache License 2.0 5 votes vote down vote up
def test_rfc2047_rfc2047_1(self):
        # 1st testcase at end of rfc2047
        s = '(=?ISO-8859-1?Q?a?=)'
        self.assertEqual(decode_header(s),
            [(b'(', None), (b'a', 'iso-8859-1'), (b')', None)]) 
Example #13
Source File: test_email.py    From ironpython3 with Apache License 2.0 5 votes vote down vote up
def test_rfc2047_rfc2047_2(self):
        # 2nd testcase at end of rfc2047
        s = '(=?ISO-8859-1?Q?a?= b)'
        self.assertEqual(decode_header(s),
            [(b'(', None), (b'a', 'iso-8859-1'), (b' b)', None)]) 
Example #14
Source File: test_asian_codecs.py    From ironpython3 with Apache License 2.0 5 votes vote down vote up
def test_japanese_codecs(self):
        eq = self.ndiffAssertEqual
        jcode = "euc-jp"
        gcode = "iso-8859-1"
        j = Charset(jcode)
        g = Charset(gcode)
        h = Header("Hello World!")
        jhello = str(b'\xa5\xcf\xa5\xed\xa1\xbc\xa5\xef\xa1\xbc'
                     b'\xa5\xeb\xa5\xc9\xa1\xaa', jcode)
        ghello = str(b'Gr\xfc\xdf Gott!', gcode)
        h.append(jhello, j)
        h.append(ghello, g)
        # BAW: This used to -- and maybe should -- fold the two iso-8859-1
        # chunks into a single encoded word.  However it doesn't violate the
        # standard to have them as two encoded chunks and maybe it's
        # reasonable <wink> for each .append() call to result in a separate
        # encoded word.
        eq(h.encode(), """\
Hello World! =?iso-2022-jp?b?GyRCJU8lbSE8JW8hPCVrJUkhKhsoQg==?=
 =?iso-8859-1?q?Gr=FC=DF_Gott!?=""")
        eq(decode_header(h.encode()),
           [(b'Hello World! ', None),
            (b'\x1b$B%O%m!<%o!<%k%I!*\x1b(B', 'iso-2022-jp'),
            (b'Gr\xfc\xdf Gott!', gcode)])
        subject_bytes = (b'test-ja \xa4\xd8\xc5\xea\xb9\xc6\xa4\xb5'
            b'\xa4\xec\xa4\xbf\xa5\xe1\xa1\xbc\xa5\xeb\xa4\xcf\xbb\xca\xb2'
            b'\xf1\xbc\xd4\xa4\xce\xbe\xb5\xc7\xa7\xa4\xf2\xc2\xd4\xa4\xc3'
            b'\xa4\xc6\xa4\xa4\xa4\xde\xa4\xb9')
        subject = str(subject_bytes, jcode)
        h = Header(subject, j, header_name="Subject")
        # test a very long header
        enc = h.encode()
        # TK: splitting point may differ by codec design and/or Header encoding
        eq(enc , """\
=?iso-2022-jp?b?dGVzdC1qYSAbJEIkWEVqOUYkNSRsJD8lYSE8JWskTztKGyhC?=
 =?iso-2022-jp?b?GyRCMnE8VCROPjVHJyRyQlQkQyRGJCQkXiQ5GyhC?=""")
        # TK: full decode comparison
        eq(str(h).encode(jcode), subject_bytes) 
Example #15
Source File: test_email.py    From ironpython3 with Apache License 2.0 5 votes vote down vote up
def test_modify_returned_list_does_not_change_header(self):
        h = Header('test')
        chunks = email.header.decode_header(h)
        chunks.append(('ascii', 'test2'))
        self.assertEqual(str(h), 'test') 
Example #16
Source File: test_email.py    From ironpython3 with Apache License 2.0 5 votes vote down vote up
def test_encoded_adjacent_nonencoded(self):
        eq = self.assertEqual
        h = Header()
        h.append('hello', 'iso-8859-1')
        h.append('world')
        s = h.encode()
        eq(s, '=?iso-8859-1?q?hello?= world')
        h = make_header(decode_header(s))
        eq(h.encode(), s) 
Example #17
Source File: test_email.py    From ironpython3 with Apache License 2.0 5 votes vote down vote up
def test_whitespace_keeper(self):
        eq = self.assertEqual
        s = 'Subject: =?koi8-r?b?8NLP18XSy8EgzsEgxsnOwczYztk=?= =?koi8-r?q?=CA?= zz.'
        parts = decode_header(s)
        eq(parts, [(b'Subject: ', None), (b'\xf0\xd2\xcf\xd7\xc5\xd2\xcb\xc1 \xce\xc1 \xc6\xc9\xce\xc1\xcc\xd8\xce\xd9\xca', 'koi8-r'), (b' zz.', None)])
        hdr = make_header(parts)
        eq(hdr.encode(),
           'Subject: =?koi8-r?b?8NLP18XSy8EgzsEgxsnOwczYztnK?= zz.') 
Example #18
Source File: test_email.py    From ironpython3 with Apache License 2.0 5 votes vote down vote up
def test_broken_base64_header(self):
        raises = self.assertRaises
        s = 'Subject: =?EUC-KR?B?CSixpLDtKSC/7Liuvsax4iC6uLmwMcijIKHaILzSwd/H0SC8+LCjwLsgv7W/+Mj3I ?='
        raises(errors.HeaderParseError, decode_header, s) 
Example #19
Source File: httputil.py    From opsbro with MIT License 5 votes vote down vote up
def decode_TEXT(value):
    r"""Decode :rfc:`2047` TEXT (e.g. "=?utf-8?q?f=C3=BCr?=" -> "f\xfcr")."""
    try:
        # Python 3
        from email.header import decode_header
    except ImportError:
        from email.Header import decode_header
    atoms = decode_header(value)
    decodedvalue = ""
    for atom, charset in atoms:
        if charset is not None:
            atom = atom.decode(charset)
        decodedvalue += atom
    return decodedvalue 
Example #20
Source File: test_email.py    From ironpython3 with Apache License 2.0 5 votes vote down vote up
def test_whitespace_keeper_unicode_2(self):
        eq = self.assertEqual
        s = 'The =?iso-8859-1?b?cXVpY2sgYnJvd24gZm94?= jumped over the =?iso-8859-1?b?bGF6eSBkb2c=?='
        dh = decode_header(s)
        eq(dh, [(b'The ', None), (b'quick brown fox', 'iso-8859-1'),
                (b' jumped over the ', None), (b'lazy dog', 'iso-8859-1')])
        hu = str(make_header(dh))
        eq(hu, 'The quick brown fox jumped over the lazy dog') 
Example #21
Source File: test_email.py    From ironpython3 with Apache License 2.0 5 votes vote down vote up
def test_whitespace_keeper_unicode(self):
        eq = self.assertEqual
        s = '=?ISO-8859-1?Q?Andr=E9?= Pirard <pirard@dom.ain>'
        dh = decode_header(s)
        eq(dh, [(b'Andr\xe9', 'iso-8859-1'),
                (b' Pirard <pirard@dom.ain>', None)])
        header = str(make_header(dh))
        eq(header, 'Andr\xe9 Pirard <pirard@dom.ain>') 
Example #22
Source File: test_email.py    From ironpython3 with Apache License 2.0 5 votes vote down vote up
def test_rfc2047_rfc2047_6(self):
        # 6th testcase at end of rfc2047
        s = '(=?ISO-8859-1?Q?a_b?=)'
        self.assertEqual(decode_header(s),
            [(b'(', None), (b'a b', 'iso-8859-1'), (b')', None)]) 
Example #23
Source File: nntplib.py    From ironpython3 with Apache License 2.0 5 votes vote down vote up
def decode_header(header_str):
    """Takes an unicode string representing a munged header value
    and decodes it as a (possibly non-ASCII) readable value."""
    parts = []
    for v, enc in _email_decode_header(header_str):
        if isinstance(v, bytes):
            parts.append(v.decode(enc or 'ascii'))
        else:
            parts.append(v)
    return ''.join(parts) 
Example #24
Source File: nntplib.py    From Imogen with MIT License 5 votes vote down vote up
def decode_header(header_str):
    """Takes a unicode string representing a munged header value
    and decodes it as a (possibly non-ASCII) readable value."""
    parts = []
    for v, enc in _email_decode_header(header_str):
        if isinstance(v, bytes):
            parts.append(v.decode(enc or 'ascii'))
        else:
            parts.append(v)
    return ''.join(parts) 
Example #25
Source File: test_asian_codecs.py    From Fluid-Designer with GNU General Public License v3.0 5 votes vote down vote up
def test_japanese_codecs(self):
        eq = self.ndiffAssertEqual
        jcode = "euc-jp"
        gcode = "iso-8859-1"
        j = Charset(jcode)
        g = Charset(gcode)
        h = Header("Hello World!")
        jhello = str(b'\xa5\xcf\xa5\xed\xa1\xbc\xa5\xef\xa1\xbc'
                     b'\xa5\xeb\xa5\xc9\xa1\xaa', jcode)
        ghello = str(b'Gr\xfc\xdf Gott!', gcode)
        h.append(jhello, j)
        h.append(ghello, g)
        # BAW: This used to -- and maybe should -- fold the two iso-8859-1
        # chunks into a single encoded word.  However it doesn't violate the
        # standard to have them as two encoded chunks and maybe it's
        # reasonable <wink> for each .append() call to result in a separate
        # encoded word.
        eq(h.encode(), """\
Hello World! =?iso-2022-jp?b?GyRCJU8lbSE8JW8hPCVrJUkhKhsoQg==?=
 =?iso-8859-1?q?Gr=FC=DF_Gott!?=""")
        eq(decode_header(h.encode()),
           [(b'Hello World! ', None),
            (b'\x1b$B%O%m!<%o!<%k%I!*\x1b(B', 'iso-2022-jp'),
            (b'Gr\xfc\xdf Gott!', gcode)])
        subject_bytes = (b'test-ja \xa4\xd8\xc5\xea\xb9\xc6\xa4\xb5'
            b'\xa4\xec\xa4\xbf\xa5\xe1\xa1\xbc\xa5\xeb\xa4\xcf\xbb\xca\xb2'
            b'\xf1\xbc\xd4\xa4\xce\xbe\xb5\xc7\xa7\xa4\xf2\xc2\xd4\xa4\xc3'
            b'\xa4\xc6\xa4\xa4\xa4\xde\xa4\xb9')
        subject = str(subject_bytes, jcode)
        h = Header(subject, j, header_name="Subject")
        # test a very long header
        enc = h.encode()
        # TK: splitting point may differ by codec design and/or Header encoding
        eq(enc , """\
=?iso-2022-jp?b?dGVzdC1qYSAbJEIkWEVqOUYkNSRsJD8lYSE8JWskTztKGyhC?=
 =?iso-2022-jp?b?GyRCMnE8VCROPjVHJyRyQlQkQyRGJCQkXiQ5GyhC?=""")
        # TK: full decode comparison
        eq(str(h).encode(jcode), subject_bytes) 
Example #26
Source File: nntplib.py    From Fluid-Designer with GNU General Public License v3.0 5 votes vote down vote up
def decode_header(header_str):
    """Takes an unicode string representing a munged header value
    and decodes it as a (possibly non-ASCII) readable value."""
    parts = []
    for v, enc in _email_decode_header(header_str):
        if isinstance(v, bytes):
            parts.append(v.decode(enc or 'ascii'))
        else:
            parts.append(v)
    return ''.join(parts) 
Example #27
Source File: health_metric.py    From royal-chaos with MIT License 5 votes vote down vote up
def decode_str(string):
    value, charset = decode_header(string)[0]
    if charset:
        value = value.decode(charset)
    return value 
Example #28
Source File: do_experiments.py    From royal-chaos with MIT License 5 votes vote down vote up
def decode_str(string):
    value, charset = decode_header(string)[0]
    if charset:
        value = value.decode(charset)
    return value 
Example #29
Source File: utils.py    From openprocurement.api with Apache License 2.0 5 votes vote down vote up
def get_filename(data):
    try:
        pairs = decode_header(data.filename)
    except Exception:
        pairs = None
    if not pairs:
        return data.filename
    header = pairs[0]
    if header[1]:
        return header[0].decode(header[1])
    else:
        return header[0] 
Example #30
Source File: Server.py    From Tools with MIT License 5 votes vote down vote up
def __parse_message(self, msg):
		result = {}
		for header in ['From', 'To', 'Subject']:
			result[header] = None
			temp = msg.get(header, '')
			if temp:
				if header == 'Subject':
					value, charset = decode_header(temp)[0]
					if charset:
						value = value.decode(charset)
					result[header] = value
				else:
					name, addr = parseaddr(temp)
					value, charset = decode_header(name)[0]
					if charset:
						value = value.decode(charset)
					result[header] = '%s<%s>' % (value, addr)
		result['Text'] = None
		# 不考虑MIMEMultipart对象
		if not msg.is_multipart():
			content_type = msg.get_content_type()
			# 只考虑纯文本/HTML内容
			if content_type == 'text/plain' or content_type == 'text/html':
				content = msg.get_payload(decode=True)
				charset = msg.get_charset()
				if charset is None:
					temp = msg.get('Content-Type', '').lower()
					pos = temp.find('charset=')
					if pos >= 0:
						charset = temp[pos+8:].strip()
				if charset:
					content = content.decode(charset)
				result['Text'] = content
		return result