mirror of
https://github.com/python/cpython.git
synced 2024-12-17 13:54:14 +08:00
c27e52265b
This patch primarily does two things: (1) it adds some internal-interface methods to Policy that allow for Policy to control the parsing and folding of headers in such a way that we can construct a backward compatibility policy that is 100% compatible with the 3.2 API, while allowing a new policy to implement the email6 API. (2) it adds that backward compatibility policy and refactors the test suite so that the only differences between the 3.2 test_email.py file and the 3.3 test_email.py file is some small changes in test framework and the addition of tests for bugs fixed that apply to the 3.2 API. There are some additional teaks, such as moving just the code needed for the compatibility policy into _policybase, so that the library code can import only _policybase. That way the new code that will be added for email6 will only get imported when a non-compatibility policy is imported.
132 lines
4.9 KiB
Python
132 lines
4.9 KiB
Python
# Copyright (C) 2001-2007 Python Software Foundation
|
||
# Author: Barry Warsaw, Thomas Wouters, Anthony Baxter
|
||
# Contact: email-sig@python.org
|
||
|
||
"""A parser of RFC 2822 and MIME email messages."""
|
||
|
||
__all__ = ['Parser', 'HeaderParser', 'BytesParser', 'BytesHeaderParser']
|
||
|
||
import warnings
|
||
from io import StringIO, TextIOWrapper
|
||
|
||
from email.feedparser import FeedParser
|
||
from email.message import Message
|
||
from email._policybase import compat32
|
||
|
||
|
||
|
||
class Parser:
|
||
def __init__(self, _class=Message, *, policy=compat32):
|
||
"""Parser of RFC 2822 and MIME email messages.
|
||
|
||
Creates an in-memory object tree representing the email message, which
|
||
can then be manipulated and turned over to a Generator to return the
|
||
textual representation of the message.
|
||
|
||
The string must be formatted as a block of RFC 2822 headers and header
|
||
continuation lines, optionally preceeded by a `Unix-from' header. The
|
||
header block is terminated either by the end of the string or by a
|
||
blank line.
|
||
|
||
_class is the class to instantiate for new message objects when they
|
||
must be created. This class must have a constructor that can take
|
||
zero arguments. Default is Message.Message.
|
||
|
||
The policy keyword specifies a policy object that controls a number of
|
||
aspects of the parser's operation. The default policy maintains
|
||
backward compatibility.
|
||
|
||
"""
|
||
self._class = _class
|
||
self.policy = policy
|
||
|
||
def parse(self, fp, headersonly=False):
|
||
"""Create a message structure from the data in a file.
|
||
|
||
Reads all the data from the file and returns the root of the message
|
||
structure. Optional headersonly is a flag specifying whether to stop
|
||
parsing after reading the headers or not. The default is False,
|
||
meaning it parses the entire contents of the file.
|
||
"""
|
||
feedparser = FeedParser(self._class, policy=self.policy)
|
||
if headersonly:
|
||
feedparser._set_headersonly()
|
||
while True:
|
||
data = fp.read(8192)
|
||
if not data:
|
||
break
|
||
feedparser.feed(data)
|
||
return feedparser.close()
|
||
|
||
def parsestr(self, text, headersonly=False):
|
||
"""Create a message structure from a string.
|
||
|
||
Returns the root of the message structure. Optional headersonly is a
|
||
flag specifying whether to stop parsing after reading the headers or
|
||
not. The default is False, meaning it parses the entire contents of
|
||
the file.
|
||
"""
|
||
return self.parse(StringIO(text), headersonly=headersonly)
|
||
|
||
|
||
|
||
class HeaderParser(Parser):
|
||
def parse(self, fp, headersonly=True):
|
||
return Parser.parse(self, fp, True)
|
||
|
||
def parsestr(self, text, headersonly=True):
|
||
return Parser.parsestr(self, text, True)
|
||
|
||
|
||
class BytesParser:
|
||
|
||
def __init__(self, *args, **kw):
|
||
"""Parser of binary RFC 2822 and MIME email messages.
|
||
|
||
Creates an in-memory object tree representing the email message, which
|
||
can then be manipulated and turned over to a Generator to return the
|
||
textual representation of the message.
|
||
|
||
The input must be formatted as a block of RFC 2822 headers and header
|
||
continuation lines, optionally preceeded by a `Unix-from' header. The
|
||
header block is terminated either by the end of the input or by a
|
||
blank line.
|
||
|
||
_class is the class to instantiate for new message objects when they
|
||
must be created. This class must have a constructor that can take
|
||
zero arguments. Default is Message.Message.
|
||
"""
|
||
self.parser = Parser(*args, **kw)
|
||
|
||
def parse(self, fp, headersonly=False):
|
||
"""Create a message structure from the data in a binary file.
|
||
|
||
Reads all the data from the file and returns the root of the message
|
||
structure. Optional headersonly is a flag specifying whether to stop
|
||
parsing after reading the headers or not. The default is False,
|
||
meaning it parses the entire contents of the file.
|
||
"""
|
||
fp = TextIOWrapper(fp, encoding='ascii', errors='surrogateescape')
|
||
with fp:
|
||
return self.parser.parse(fp, headersonly)
|
||
|
||
|
||
def parsebytes(self, text, headersonly=False):
|
||
"""Create a message structure from a byte string.
|
||
|
||
Returns the root of the message structure. Optional headersonly is a
|
||
flag specifying whether to stop parsing after reading the headers or
|
||
not. The default is False, meaning it parses the entire contents of
|
||
the file.
|
||
"""
|
||
text = text.decode('ASCII', errors='surrogateescape')
|
||
return self.parser.parsestr(text, headersonly)
|
||
|
||
|
||
class BytesHeaderParser(BytesParser):
|
||
def parse(self, fp, headersonly=True):
|
||
return BytesParser.parse(self, fp, headersonly=True)
|
||
|
||
def parsebytes(self, text, headersonly=True):
|
||
return BytesParser.parsebytes(self, text, headersonly=True)
|