Python Examples of email.header.decode

Source File: email_utils.py From app with MIT License

6 votes

def parseaddr_unicode(addr) -> (str, str):
    """Like parseaddr but return name in unicode instead of in RFC 2047 format
    '=?UTF-8?B?TmjGoW4gTmd1eeG7hW4=?= <abcd@gmail.com>' -> ('Nhơn Nguyễn', "abcd@gmail.com")
    """
    name, email = parseaddr(addr)
    email = email.strip().lower()
    if name:
        name = name.strip()
        decoded_string, charset = decode_header(name)[0]
        if charset is not None:
            try:
                name = decoded_string.decode(charset)
            except UnicodeDecodeError:
                LOG.warning("Cannot decode addr name %s", name)
                name = ""
        else:
            name = decoded_string

    return name, email

Source File: test_email.py From ironpython3 with Apache License 2.0

6 votes

def test_rfc2047_multiline(self):
        eq = self.assertEqual
        s = """Re: =?mac-iceland?q?r=8Aksm=9Arg=8Cs?= baz
 foo bar =?mac-iceland?q?r=8Aksm=9Arg=8Cs?="""
        dh = decode_header(s)
        eq(dh, [
            (b'Re: ', None),
            (b'r\x8aksm\x9arg\x8cs', 'mac-iceland'),
            (b' baz foo bar ', None),
            (b'r\x8aksm\x9arg\x8cs', 'mac-iceland')])
        header = make_header(dh)
        eq(str(header),
           'Re: r\xe4ksm\xf6rg\xe5s baz foo bar r\xe4ksm\xf6rg\xe5s')
        self.ndiffAssertEqual(header.encode(maxlinelen=76), """\
Re: =?mac-iceland?q?r=8Aksm=9Arg=8Cs?= baz foo bar =?mac-iceland?q?r=8Aksm?=
 =?mac-iceland?q?=9Arg=8Cs?=""")

Source File: utils.py From imap_tools with Apache License 2.0

6 votes

def parse_email_addresses(raw_header: str) -> (dict,):
    """
    Parse email addresses from header
    :param raw_header: example: '=?UTF-8?B?0J7Qu9C1=?= <name@company.ru>,\r\n "\'\\"z, z\\"\'" <imap.tools@ya.ru>'
    :return: tuple(dict(name: str, email: str, full: str))
    """
    result = []
    for raw_name, email in getaddresses([raw_header]):
        name = decode_value(*decode_header(raw_name)[0]).strip()
        email = email.strip()
        if not (name or email):
            continue
        result.append({
            'email': email if '@' in email else '',
            'name': name,
            'full': '{} <{}>'.format(name, email) if name and email else name or email
        })
    return tuple(result)

Source File: store.py From pymailq with GNU General Public License v2.0

6 votes

def show(self):
        """
        Return mail detailled representation for printing

        :return: Representation as :class:`str`
        """
        output = "=== Mail %s ===\n" % (self.qid,)
        for attr in sorted(dir(self.head)):
            if attr.startswith("_"):
                continue

            value = getattr(self.head, attr)
            if not isinstance(value, str):
                value = ", ".join(value)

            if attr == "Subject":
                print(attr, value)
                value, enc = header.decode_header(value)[0]
                print(enc, attr, value)
                if sys.version_info[0] == 2:
                    value = value.decode(enc) if enc else unicode(value)

            output += "%s: %s\n" % (attr, value)
        return output

Source File: email.py From ACE with Apache License 2.0

6 votes

def decode_rfc2822(header_value):
    """Returns the value of the rfc2822 decoded header, or the header_value as-is if it's not encoded."""
    result = []
    for binary_value, charset in decode_header(header_value):
        decoded_value = None
        if isinstance(binary_value, str):
            result.append(binary_value)
            continue

        if charset is not None:
            try:
                decoded_value = binary_value.decode(charset, errors='ignore')
            except Exception as e:
                logging.warning(f"unable to decode for charset {charset}: {e}")

        if decoded_value is None:
            try:
                decoded_value = binary_value.decode('utf8', errors='ignore')
            except Exception as e:
                logging.warning(f"unable to decode email header at all (defaulting to hex rep): {e}")
                decoded_value = 'HEX({})'.format(binary_value.hex())

        result.append(decoded_value)

    return ''.join(result)

Source File: test_email_renamed.py From ironpython2 with Apache License 2.0

6 votes

def test_encoded_adjacent_nonencoded(self):
        eq = self.assertEqual
        h = Header()
        h.append('hello', 'iso-8859-1')
        h.append('world')
        s = h.encode()
        eq(s, '=?iso-8859-1?q?hello?= world')
        h = make_header(decode_header(s))
        eq(h.encode(), s)

Source File: test_email_renamed.py From ironpython2 with Apache License 2.0

6 votes

def test_whitespace_eater(self):
        eq = self.assertEqual
        s = 'Subject: =?koi8-r?b?8NLP18XSy8EgzsEgxsnOwczYztk=?= =?koi8-r?q?=CA?= zz.'
        parts = decode_header(s)
        eq(parts, [('Subject:', None), ('\xf0\xd2\xcf\xd7\xc5\xd2\xcb\xc1 \xce\xc1 \xc6\xc9\xce\xc1\xcc\xd8\xce\xd9\xca', 'koi8-r'), ('zz.', None)])
        hdr = make_header(parts)
        eq(hdr.encode(),
           'Subject: =?koi8-r?b?8NLP18XSy8EgzsEgxsnOwczYztnK?= zz.')

Source File: message.py From imap_tools with Apache License 2.0

5 votes

def subject(self) -> str:
        """Message subject"""
        if 'subject' in self.obj:
            msg_subject = decode_header(self.obj['subject'])
            return decode_value(msg_subject[0][0], msg_subject[0][1])
        return ''

Source File: test_email.py From ironpython3 with Apache License 2.0

5 votes

def test_rfc2047_with_whitespace(self):
        s = 'Sm =?ISO-8859-1?B?9g==?= rg =?ISO-8859-1?B?5Q==?= sbord'
        dh = decode_header(s)
        self.assertEqual(dh, [(b'Sm ', None), (b'\xf6', 'iso-8859-1'),
                              (b' rg ', None), (b'\xe5', 'iso-8859-1'),
                              (b' sbord', None)])

Source File: test_email.py From ironpython3 with Apache License 2.0

5 votes

def test_rfc2047_B_bad_padding(self):
        s = '=?iso-8859-1?B?%s?='
        data = [                                # only test complete bytes
            ('dm==', b'v'), ('dm=', b'v'), ('dm', b'v'),
            ('dmk=', b'vi'), ('dmk', b'vi')
          ]
        for q, a in data:
            dh = decode_header(s % q)
            self.assertEqual(dh, [(a, 'iso-8859-1')])

Source File: test_email.py From ironpython3 with Apache License 2.0

5 votes

def test_rfc2047_Q_invalid_digits(self):
        # issue 10004.
        s = '=?iso-8659-1?Q?andr=e9=zz?='
        self.assertEqual(decode_header(s),
                        [(b'andr\xe9=zz', 'iso-8659-1')])

Source File: test_email.py From ironpython3 with Apache License 2.0

5 votes

def test_rfc2047_rfc2047_1(self):
        # 1st testcase at end of rfc2047
        s = '(=?ISO-8859-1?Q?a?=)'
        self.assertEqual(decode_header(s),
            [(b'(', None), (b'a', 'iso-8859-1'), (b')', None)])

Source File: test_email.py From ironpython3 with Apache License 2.0

5 votes

def test_rfc2047_rfc2047_2(self):
        # 2nd testcase at end of rfc2047
        s = '(=?ISO-8859-1?Q?a?= b)'
        self.assertEqual(decode_header(s),
            [(b'(', None), (b'a', 'iso-8859-1'), (b' b)', None)])

Source File: test_asian_codecs.py From ironpython3 with Apache License 2.0

5 votes

def test_japanese_codecs(self):
        eq = self.ndiffAssertEqual
        jcode = "euc-jp"
        gcode = "iso-8859-1"
        j = Charset(jcode)
        g = Charset(gcode)
        h = Header("Hello World!")
        jhello = str(b'\xa5\xcf\xa5\xed\xa1\xbc\xa5\xef\xa1\xbc'
                     b'\xa5\xeb\xa5\xc9\xa1\xaa', jcode)
        ghello = str(b'Gr\xfc\xdf Gott!', gcode)
        h.append(jhello, j)
        h.append(ghello, g)
        # BAW: This used to -- and maybe should -- fold the two iso-8859-1
        # chunks into a single encoded word.  However it doesn't violate the
        # standard to have them as two encoded chunks and maybe it's
        # reasonable <wink> for each .append() call to result in a separate
        # encoded word.
        eq(h.encode(), """\
Hello World! =?iso-2022-jp?b?GyRCJU8lbSE8JW8hPCVrJUkhKhsoQg==?=
 =?iso-8859-1?q?Gr=FC=DF_Gott!?=""")
        eq(decode_header(h.encode()),
           [(b'Hello World! ', None),
            (b'\x1b$B%O%m!<%o!<%k%I!*\x1b(B', 'iso-2022-jp'),
            (b'Gr\xfc\xdf Gott!', gcode)])
        subject_bytes = (b'test-ja \xa4\xd8\xc5\xea\xb9\xc6\xa4\xb5'
            b'\xa4\xec\xa4\xbf\xa5\xe1\xa1\xbc\xa5\xeb\xa4\xcf\xbb\xca\xb2'
            b'\xf1\xbc\xd4\xa4\xce\xbe\xb5\xc7\xa7\xa4\xf2\xc2\xd4\xa4\xc3'
            b'\xa4\xc6\xa4\xa4\xa4\xde\xa4\xb9')
        subject = str(subject_bytes, jcode)
        h = Header(subject, j, header_name="Subject")
        # test a very long header
        enc = h.encode()
        # TK: splitting point may differ by codec design and/or Header encoding
        eq(enc , """\
=?iso-2022-jp?b?dGVzdC1qYSAbJEIkWEVqOUYkNSRsJD8lYSE8JWskTztKGyhC?=
 =?iso-2022-jp?b?GyRCMnE8VCROPjVHJyRyQlQkQyRGJCQkXiQ5GyhC?=""")
        # TK: full decode comparison
        eq(str(h).encode(jcode), subject_bytes)

Source File: test_email.py From ironpython3 with Apache License 2.0

5 votes

def test_modify_returned_list_does_not_change_header(self):
        h = Header('test')
        chunks = email.header.decode_header(h)
        chunks.append(('ascii', 'test2'))
        self.assertEqual(str(h), 'test')

Source File: test_email.py From ironpython3 with Apache License 2.0

5 votes

def test_encoded_adjacent_nonencoded(self):
        eq = self.assertEqual
        h = Header()
        h.append('hello', 'iso-8859-1')
        h.append('world')
        s = h.encode()
        eq(s, '=?iso-8859-1?q?hello?= world')
        h = make_header(decode_header(s))
        eq(h.encode(), s)

Source File: test_email.py From ironpython3 with Apache License 2.0

5 votes

def test_whitespace_keeper(self):
        eq = self.assertEqual
        s = 'Subject: =?koi8-r?b?8NLP18XSy8EgzsEgxsnOwczYztk=?= =?koi8-r?q?=CA?= zz.'
        parts = decode_header(s)
        eq(parts, [(b'Subject: ', None), (b'\xf0\xd2\xcf\xd7\xc5\xd2\xcb\xc1 \xce\xc1 \xc6\xc9\xce\xc1\xcc\xd8\xce\xd9\xca', 'koi8-r'), (b' zz.', None)])
        hdr = make_header(parts)
        eq(hdr.encode(),
           'Subject: =?koi8-r?b?8NLP18XSy8EgzsEgxsnOwczYztnK?= zz.')

Source File: test_email.py From ironpython3 with Apache License 2.0

5 votes

def test_broken_base64_header(self):
        raises = self.assertRaises
        s = 'Subject: =?EUC-KR?B?CSixpLDtKSC/7Liuvsax4iC6uLmwMcijIKHaILzSwd/H0SC8+LCjwLsgv7W/+Mj3I ?='
        raises(errors.HeaderParseError, decode_header, s)

Source File: httputil.py From opsbro with MIT License

5 votes

def decode_TEXT(value):
    r"""Decode :rfc:`2047` TEXT (e.g. "=?utf-8?q?f=C3=BCr?=" -> "f\xfcr")."""
    try:
        # Python 3
        from email.header import decode_header
    except ImportError:
        from email.Header import decode_header
    atoms = decode_header(value)
    decodedvalue = ""
    for atom, charset in atoms:
        if charset is not None:
            atom = atom.decode(charset)
        decodedvalue += atom
    return decodedvalue

Source File: test_email.py From ironpython3 with Apache License 2.0

5 votes

def test_whitespace_keeper_unicode_2(self):
        eq = self.assertEqual
        s = 'The =?iso-8859-1?b?cXVpY2sgYnJvd24gZm94?= jumped over the =?iso-8859-1?b?bGF6eSBkb2c=?='
        dh = decode_header(s)
        eq(dh, [(b'The ', None), (b'quick brown fox', 'iso-8859-1'),
                (b' jumped over the ', None), (b'lazy dog', 'iso-8859-1')])
        hu = str(make_header(dh))
        eq(hu, 'The quick brown fox jumped over the lazy dog')

Source File: test_email.py From ironpython3 with Apache License 2.0

5 votes

def test_whitespace_keeper_unicode(self):
        eq = self.assertEqual
        s = '=?ISO-8859-1?Q?Andr=E9?= Pirard <pirard@dom.ain>'
        dh = decode_header(s)
        eq(dh, [(b'Andr\xe9', 'iso-8859-1'),
                (b' Pirard <pirard@dom.ain>', None)])
        header = str(make_header(dh))
        eq(header, 'Andr\xe9 Pirard <pirard@dom.ain>')

Source File: test_email.py From ironpython3 with Apache License 2.0

5 votes

def test_rfc2047_rfc2047_6(self):
        # 6th testcase at end of rfc2047
        s = '(=?ISO-8859-1?Q?a_b?=)'
        self.assertEqual(decode_header(s),
            [(b'(', None), (b'a b', 'iso-8859-1'), (b')', None)])

Source File: nntplib.py From ironpython3 with Apache License 2.0

5 votes

def decode_header(header_str):
    """Takes an unicode string representing a munged header value
    and decodes it as a (possibly non-ASCII) readable value."""
    parts = []
    for v, enc in _email_decode_header(header_str):
        if isinstance(v, bytes):
            parts.append(v.decode(enc or 'ascii'))
        else:
            parts.append(v)
    return ''.join(parts)

Source File: nntplib.py From Imogen with MIT License

5 votes

def decode_header(header_str):
    """Takes a unicode string representing a munged header value
    and decodes it as a (possibly non-ASCII) readable value."""
    parts = []
    for v, enc in _email_decode_header(header_str):
        if isinstance(v, bytes):
            parts.append(v.decode(enc or 'ascii'))
        else:
            parts.append(v)
    return ''.join(parts)

Source File: test_asian_codecs.py From Fluid-Designer with GNU General Public License v3.0

5 votes

def test_japanese_codecs(self):
        eq = self.ndiffAssertEqual
        jcode = "euc-jp"
        gcode = "iso-8859-1"
        j = Charset(jcode)
        g = Charset(gcode)
        h = Header("Hello World!")
        jhello = str(b'\xa5\xcf\xa5\xed\xa1\xbc\xa5\xef\xa1\xbc'
                     b'\xa5\xeb\xa5\xc9\xa1\xaa', jcode)
        ghello = str(b'Gr\xfc\xdf Gott!', gcode)
        h.append(jhello, j)
        h.append(ghello, g)
        # BAW: This used to -- and maybe should -- fold the two iso-8859-1
        # chunks into a single encoded word.  However it doesn't violate the
        # standard to have them as two encoded chunks and maybe it's
        # reasonable <wink> for each .append() call to result in a separate
        # encoded word.
        eq(h.encode(), """\
Hello World! =?iso-2022-jp?b?GyRCJU8lbSE8JW8hPCVrJUkhKhsoQg==?=
 =?iso-8859-1?q?Gr=FC=DF_Gott!?=""")
        eq(decode_header(h.encode()),
           [(b'Hello World! ', None),
            (b'\x1b$B%O%m!<%o!<%k%I!*\x1b(B', 'iso-2022-jp'),
            (b'Gr\xfc\xdf Gott!', gcode)])
        subject_bytes = (b'test-ja \xa4\xd8\xc5\xea\xb9\xc6\xa4\xb5'
            b'\xa4\xec\xa4\xbf\xa5\xe1\xa1\xbc\xa5\xeb\xa4\xcf\xbb\xca\xb2'
            b'\xf1\xbc\xd4\xa4\xce\xbe\xb5\xc7\xa7\xa4\xf2\xc2\xd4\xa4\xc3'
            b'\xa4\xc6\xa4\xa4\xa4\xde\xa4\xb9')
        subject = str(subject_bytes, jcode)
        h = Header(subject, j, header_name="Subject")
        # test a very long header
        enc = h.encode()
        # TK: splitting point may differ by codec design and/or Header encoding
        eq(enc , """\
=?iso-2022-jp?b?dGVzdC1qYSAbJEIkWEVqOUYkNSRsJD8lYSE8JWskTztKGyhC?=
 =?iso-2022-jp?b?GyRCMnE8VCROPjVHJyRyQlQkQyRGJCQkXiQ5GyhC?=""")
        # TK: full decode comparison
        eq(str(h).encode(jcode), subject_bytes)

Source File: nntplib.py From Fluid-Designer with GNU General Public License v3.0

5 votes

def decode_header(header_str):
    """Takes an unicode string representing a munged header value
    and decodes it as a (possibly non-ASCII) readable value."""
    parts = []
    for v, enc in _email_decode_header(header_str):
        if isinstance(v, bytes):
            parts.append(v.decode(enc or 'ascii'))
        else:
            parts.append(v)
    return ''.join(parts)

Source File: health_metric.py From royal-chaos with MIT License

5 votes

def decode_str(string):
    value, charset = decode_header(string)[0]
    if charset:
        value = value.decode(charset)
    return value

Source File: do_experiments.py From royal-chaos with MIT License

5 votes

def decode_str(string):
    value, charset = decode_header(string)[0]
    if charset:
        value = value.decode(charset)
    return value

Source File: utils.py From openprocurement.api with Apache License 2.0

5 votes

def get_filename(data):
    try:
        pairs = decode_header(data.filename)
    except Exception:
        pairs = None
    if not pairs:
        return data.filename
    header = pairs[0]
    if header[1]:
        return header[0].decode(header[1])
    else:
        return header[0]

Source File: Server.py From Tools with MIT License

5 votes

def __parse_message(self, msg):
		result = {}
		for header in ['From', 'To', 'Subject']:
			result[header] = None
			temp = msg.get(header, '')
			if temp:
				if header == 'Subject':
					value, charset = decode_header(temp)[0]
					if charset:
						value = value.decode(charset)
					result[header] = value
				else:
					name, addr = parseaddr(temp)
					value, charset = decode_header(name)[0]
					if charset:
						value = value.decode(charset)
					result[header] = '%s<%s>' % (value, addr)
		result['Text'] = None
		# 不考虑MIMEMultipart对象
		if not msg.is_multipart():
			content_type = msg.get_content_type()
			# 只考虑纯文本/HTML内容
			if content_type == 'text/plain' or content_type == 'text/html':
				content = msg.get_payload(decode=True)
				charset = msg.get_charset()
				if charset is None:
					temp = msg.get('Content-Type', '').lower()
					pos = temp.find('charset=')
					if pos >= 0:
						charset = temp[pos+8:].strip()
				if charset:
					content = content.decode(charset)
				result['Text'] = content
		return result

Python email.header.decode_header() Examples