diff --git a/Lib/email/_header_value_parser.py b/Lib/email/_header_value_parser.py index 04035b2612d7af..91a01fd1f44b84 100644 --- a/Lib/email/_header_value_parser.py +++ b/Lib/email/_header_value_parser.py @@ -2641,7 +2641,7 @@ def _refold_parse_tree(parse_tree, *, policy): want_encoding = False last_ew = None if part.syntactic_break: - encoded_part = part.fold(policy=policy)[:-1] # strip nl + encoded_part = part.fold(policy=policy)[:-len(policy.linesep)] if policy.linesep not in encoded_part: # It fits on a single line if len(encoded_part) > maxlen - len(lines[-1]): diff --git a/Lib/email/generator.py b/Lib/email/generator.py index 6deb95ba8a13c7..6b08d84ff4866d 100644 --- a/Lib/email/generator.py +++ b/Lib/email/generator.py @@ -22,6 +22,7 @@ NLCRE = re.compile(r'\r\n|\r|\n') fcre = re.compile(r'^From ', re.MULTILINE) NEWLINE_WITHOUT_FWSP = re.compile(r'\r\n[^ \t]|\r[^ \n\t]|\n[^ \t]') +NEWLINE_WITHOUT_FWSP_BYTES = re.compile(br'\r\n[^ \t]|\r[^ \n\t]|\n[^ \t]') @@ -429,7 +430,19 @@ def _write_headers(self, msg): # This is almost the same as the string version, except for handling # strings with 8bit bytes. for h, v in msg.raw_items(): - self._fp.write(self.policy.fold_binary(h, v)) + folded = self.policy.fold_binary(h, v) + if self.policy.verify_generated_headers: + linesep = self.policy.linesep.encode() + if not folded.endswith(linesep): + raise HeaderWriteError( + f'folded header does not end with {linesep!r}: {folded!r}') + folded_no_linesep = folded + if folded.endswith(linesep): + folded_no_linesep = folded[:-len(linesep)] + if NEWLINE_WITHOUT_FWSP_BYTES.search(folded_no_linesep): + raise HeaderWriteError( + f'folded header contains newline: {folded!r}') + self._fp.write(folded) # A blank line always separates headers from body self.write(self._NL) diff --git a/Lib/imaplib.py b/Lib/imaplib.py index 67b2cc02c40f18..abd530e98b484a 100644 --- a/Lib/imaplib.py +++ b/Lib/imaplib.py @@ -128,7 +128,7 @@ # We compile these in _mode_xxx. _Literal = br'.*{(?P\d+)}$' _Untagged_status = br'\* (?P\d+) (?P[A-Z-]+)( (?P.*))?' - +_control_chars = re.compile(b'[\x00-\x1F\x7F]') class IMAP4: @@ -958,6 +958,8 @@ def _command(self, name, *args): if arg is None: continue if isinstance(arg, str): arg = bytes(arg, self._encoding) + if _control_chars.search(arg): + raise ValueError("Control characters not allowed in commands") data = data + b' ' + arg literal = self.literal diff --git a/Lib/poplib.py b/Lib/poplib.py index 9796f0d2f9c552..64690628fec078 100644 --- a/Lib/poplib.py +++ b/Lib/poplib.py @@ -117,6 +117,8 @@ def _putline(self, line): def _putcmd(self, line): if self._debugging: print('*cmd*', repr(line)) line = bytes(line, self.encoding) + if re.search(b'[\x00-\x1F\x7F]', line): + raise ValueError('Control characters not allowed in commands') self._putline(line) diff --git a/Lib/test/support/__init__.py b/Lib/test/support/__init__.py index da1f2ee719381e..7c260a3dfea85f 100644 --- a/Lib/test/support/__init__.py +++ b/Lib/test/support/__init__.py @@ -2902,3 +2902,10 @@ def adjust_int_max_str_digits(max_digits): yield finally: sys.set_int_max_str_digits(current) + + +def control_characters_c0(): + """Returns a list of C0 control characters as strings. + C0 control characters defined as the byte range 0x00-0x1F, and 0x7F. + """ + return [chr(c) for c in range(0x00, 0x20)] + ["\x7F"] diff --git a/Lib/test/test_email/test_generator.py b/Lib/test/test_email/test_generator.py index cdf1075bab8794..23adb06425988c 100644 --- a/Lib/test/test_email/test_generator.py +++ b/Lib/test/test_email/test_generator.py @@ -4,6 +4,7 @@ from email import message_from_string, message_from_bytes from email.message import EmailMessage from email.generator import Generator, BytesGenerator +from email.headerregistry import Address from email import policy import email.errors from test.test_email import TestEmailBase, parameterize @@ -263,7 +264,7 @@ class TestGenerator(TestGeneratorBase, TestEmailBase): typ = str def test_verify_generated_headers(self): - """gh-121650: by default the generator prevents header injection""" + # gh-121650: by default the generator prevents header injection class LiteralHeader(str): name = 'Header' def fold(self, **kwargs): @@ -284,6 +285,8 @@ def fold(self, **kwargs): with self.assertRaises(email.errors.HeaderWriteError): message.as_string() + with self.assertRaises(email.errors.HeaderWriteError): + message.as_bytes() class TestBytesGenerator(TestGeneratorBase, TestEmailBase): @@ -353,6 +356,27 @@ def test_smtputf8_policy(self): g.flatten(msg) self.assertEqual(s.getvalue(), expected) + def test_smtp_policy(self): + msg = EmailMessage() + msg["From"] = Address(addr_spec="foo@bar.com", display_name="Páolo") + msg["To"] = Address(addr_spec="bar@foo.com", display_name="Dinsdale") + msg["Subject"] = "Nudge nudge, wink, wink" + msg.set_content("oh boy, know what I mean, know what I mean?") + expected = textwrap.dedent("""\ + From: =?utf-8?q?P=C3=A1olo?= + To: Dinsdale + Subject: Nudge nudge, wink, wink + Content-Type: text/plain; charset="utf-8" + Content-Transfer-Encoding: 7bit + MIME-Version: 1.0 + + oh boy, know what I mean, know what I mean? + """).encode().replace(b"\n", b"\r\n") + s = io.BytesIO() + g = BytesGenerator(s, policy=policy.SMTP) + g.flatten(msg) + self.assertEqual(s.getvalue(), expected) + if __name__ == '__main__': unittest.main() diff --git a/Lib/test/test_email/test_policy.py b/Lib/test/test_email/test_policy.py index 76198392e5234e..815309732d04b6 100644 --- a/Lib/test/test_email/test_policy.py +++ b/Lib/test/test_email/test_policy.py @@ -267,7 +267,7 @@ def test_short_maxlen_error(self): policy.fold("Subject", subject) def test_verify_generated_headers(self): - """Turning protection off allows header injection""" + # Turning protection off allows header injection policy = email.policy.default.clone(verify_generated_headers=False) for text in ( 'Header: Value\r\nBad: Injection\r\n', @@ -290,6 +290,10 @@ def fold(self, **kwargs): message.as_string(), f"{text}\nBody", ) + self.assertEqual( + message.as_bytes(), + f"{text}\nBody".encode(), + ) # XXX: Need subclassing tests. # For adding subclassed objects, make sure the usual rules apply (subclass diff --git a/Lib/test/test_imaplib.py b/Lib/test/test_imaplib.py index 0593a3756b0843..ec95577e96dc32 100644 --- a/Lib/test/test_imaplib.py +++ b/Lib/test/test_imaplib.py @@ -462,6 +462,12 @@ def test_login(self): self.assertEqual(data[0], b'LOGIN completed') self.assertEqual(client.state, 'AUTH') + def test_control_characters(self): + client, _ = self._setup(SimpleIMAPHandler) + for c0 in support.control_characters_c0(): + with self.assertRaises(ValueError): + client.login(f'user{c0}', 'pass') + def test_logout(self): client, _ = self._setup(SimpleIMAPHandler) typ, data = client.login('user', 'pass') diff --git a/Lib/test/test_poplib.py b/Lib/test/test_poplib.py index 234c855545c2bb..18b8c191ab22d9 100644 --- a/Lib/test/test_poplib.py +++ b/Lib/test/test_poplib.py @@ -12,6 +12,7 @@ from unittest import TestCase, skipUnless from test import support as test_support +from test.support import control_characters_c0 threading = test_support.import_module('threading') HOST = test_support.HOST @@ -349,6 +350,13 @@ def test_quit(self): self.assertIsNone(self.client.sock) self.assertIsNone(self.client.file) + def test_control_characters(self): + for c0 in control_characters_c0(): + with self.assertRaises(ValueError): + self.client.user(f'user{c0}') + with self.assertRaises(ValueError): + self.client.pass_(f'{c0}pass') + @requires_ssl def test_stls_capa(self): capa = self.client.capa() diff --git a/Lib/test/test_wsgiref.py b/Lib/test/test_wsgiref.py index 7708e206840581..baac84d71cbcba 100644 --- a/Lib/test/test_wsgiref.py +++ b/Lib/test/test_wsgiref.py @@ -1,5 +1,6 @@ from unittest import mock from test import support +from test.support import control_characters_c0 from test.test_httpservers import NoLogRequestHandler from unittest import TestCase from wsgiref.util import setup_testing_defaults @@ -517,6 +518,16 @@ def testExtras(self): '\r\n' ) + def testRaisesControlCharacters(self): + headers = Headers() + for c0 in control_characters_c0(): + self.assertRaises(ValueError, headers.__setitem__, f"key{c0}", "val") + self.assertRaises(ValueError, headers.__setitem__, "key", f"val{c0}") + self.assertRaises(ValueError, headers.add_header, f"key{c0}", "val", param="param") + self.assertRaises(ValueError, headers.add_header, "key", f"val{c0}", param="param") + self.assertRaises(ValueError, headers.add_header, "key", "val", param=f"param{c0}") + + class ErrorHandler(BaseCGIHandler): """Simple handler subclass for testing BaseHandler""" diff --git a/Lib/wsgiref/headers.py b/Lib/wsgiref/headers.py index fab851c5a44430..fd98e85d75492b 100644 --- a/Lib/wsgiref/headers.py +++ b/Lib/wsgiref/headers.py @@ -9,6 +9,7 @@ # existence of which force quoting of the parameter value. import re tspecials = re.compile(r'[ \(\)<>@,;:\\"/\[\]\?=]') +_control_chars_re = re.compile(r'[\x00-\x1F\x7F]') def _formatparam(param, value=None, quote=1): """Convenience function to format and return a key=value pair. @@ -41,6 +42,8 @@ def __init__(self, headers=None): def _convert_string_type(self, value): """Convert/check value type.""" if type(value) is str: + if _control_chars_re.search(value): + raise ValueError("Control characters not allowed in headers") return value raise AssertionError("Header names/values must be" " of type str (got {0})".format(repr(value))) diff --git a/Misc/NEWS.d/next/Library/2018-08-18-14-47-00.bpo-34424.wAlRuS.rst b/Misc/NEWS.d/next/Library/2018-08-18-14-47-00.bpo-34424.wAlRuS.rst new file mode 100644 index 00000000000000..2b384cd5513fca --- /dev/null +++ b/Misc/NEWS.d/next/Library/2018-08-18-14-47-00.bpo-34424.wAlRuS.rst @@ -0,0 +1,2 @@ +Fix serialization of messages containing encoded strings when the +policy.linesep is set to a multi-character string. Patch by Jens Troeger. diff --git a/Misc/NEWS.d/next/Security/2026-01-16-11-07-36.gh-issue-143916.dpWeOD.rst b/Misc/NEWS.d/next/Security/2026-01-16-11-07-36.gh-issue-143916.dpWeOD.rst new file mode 100644 index 00000000000000..44bd0b27059f94 --- /dev/null +++ b/Misc/NEWS.d/next/Security/2026-01-16-11-07-36.gh-issue-143916.dpWeOD.rst @@ -0,0 +1,2 @@ +Reject C0 control characters within wsgiref.headers.Headers fields, values, +and parameters. diff --git a/Misc/NEWS.d/next/Security/2026-01-16-11-41-06.gh-issue-143921.AeCOor.rst b/Misc/NEWS.d/next/Security/2026-01-16-11-41-06.gh-issue-143921.AeCOor.rst new file mode 100644 index 00000000000000..4e13fe92bc60fb --- /dev/null +++ b/Misc/NEWS.d/next/Security/2026-01-16-11-41-06.gh-issue-143921.AeCOor.rst @@ -0,0 +1 @@ +Reject control characters in IMAP commands. diff --git a/Misc/NEWS.d/next/Security/2026-01-16-11-43-47.gh-issue-143923.DuytMe.rst b/Misc/NEWS.d/next/Security/2026-01-16-11-43-47.gh-issue-143923.DuytMe.rst new file mode 100644 index 00000000000000..3cde4df3e0069f --- /dev/null +++ b/Misc/NEWS.d/next/Security/2026-01-16-11-43-47.gh-issue-143923.DuytMe.rst @@ -0,0 +1 @@ +Reject control characters in POP3 commands. diff --git a/Misc/NEWS.d/next/Security/2026-01-21-12-34-05.gh-issue-144125.TAz5uo.rst b/Misc/NEWS.d/next/Security/2026-01-21-12-34-05.gh-issue-144125.TAz5uo.rst new file mode 100644 index 00000000000000..e6333e724972c5 --- /dev/null +++ b/Misc/NEWS.d/next/Security/2026-01-21-12-34-05.gh-issue-144125.TAz5uo.rst @@ -0,0 +1,4 @@ +:mod:`~email.generator.BytesGenerator` will now refuse to serialize (write) headers +that are unsafely folded or delimited; see +:attr:`~email.policy.Policy.verify_generated_headers`. (Contributed by Bas +Bloemsaat and Petr Viktorin in :gh:`121650`).