mirror of
https://github.com/python/cpython.git
synced 2024-12-17 22:05:04 +08:00
96fd54eaec
The work on this is not 100% complete, but everything is present to allow real-world testing of the code. The only remaining major todo item is to (hopefully!) enhance the handling of non-ASCII bytes in headers converted to unicode by RFC2047 encoding them rather than replacing them with '?'s.
136 lines
5.1 KiB
Python
136 lines
5.1 KiB
Python
# Copyright (C) 2001-2007 Python Software Foundation
|
||
# Author: Barry Warsaw, Thomas Wouters, Anthony Baxter
|
||
# Contact: email-sig@python.org
|
||
|
||
"""A parser of RFC 2822 and MIME email messages."""
|
||
|
||
__all__ = ['Parser', 'HeaderParser']
|
||
|
||
import warnings
|
||
from io import StringIO, TextIOWrapper
|
||
|
||
from email.feedparser import FeedParser
|
||
from email.message import Message
|
||
|
||
|
||
|
||
class Parser:
|
||
def __init__(self, *args, **kws):
|
||
"""Parser of RFC 2822 and MIME email messages.
|
||
|
||
Creates an in-memory object tree representing the email message, which
|
||
can then be manipulated and turned over to a Generator to return the
|
||
textual representation of the message.
|
||
|
||
The string must be formatted as a block of RFC 2822 headers and header
|
||
continuation lines, optionally preceeded by a `Unix-from' header. The
|
||
header block is terminated either by the end of the string or by a
|
||
blank line.
|
||
|
||
_class is the class to instantiate for new message objects when they
|
||
must be created. This class must have a constructor that can take
|
||
zero arguments. Default is Message.Message.
|
||
"""
|
||
if len(args) >= 1:
|
||
if '_class' in kws:
|
||
raise TypeError("Multiple values for keyword arg '_class'")
|
||
kws['_class'] = args[0]
|
||
if len(args) == 2:
|
||
if 'strict' in kws:
|
||
raise TypeError("Multiple values for keyword arg 'strict'")
|
||
kws['strict'] = args[1]
|
||
if len(args) > 2:
|
||
raise TypeError('Too many arguments')
|
||
if '_class' in kws:
|
||
self._class = kws['_class']
|
||
del kws['_class']
|
||
else:
|
||
self._class = Message
|
||
if 'strict' in kws:
|
||
warnings.warn("'strict' argument is deprecated (and ignored)",
|
||
DeprecationWarning, 2)
|
||
del kws['strict']
|
||
if kws:
|
||
raise TypeError('Unexpected keyword arguments')
|
||
|
||
def parse(self, fp, headersonly=False):
|
||
"""Create a message structure from the data in a file.
|
||
|
||
Reads all the data from the file and returns the root of the message
|
||
structure. Optional headersonly is a flag specifying whether to stop
|
||
parsing after reading the headers or not. The default is False,
|
||
meaning it parses the entire contents of the file.
|
||
"""
|
||
feedparser = FeedParser(self._class)
|
||
if headersonly:
|
||
feedparser._set_headersonly()
|
||
while True:
|
||
data = fp.read(8192)
|
||
if not data:
|
||
break
|
||
feedparser.feed(data)
|
||
return feedparser.close()
|
||
|
||
def parsestr(self, text, headersonly=False):
|
||
"""Create a message structure from a string.
|
||
|
||
Returns the root of the message structure. Optional headersonly is a
|
||
flag specifying whether to stop parsing after reading the headers or
|
||
not. The default is False, meaning it parses the entire contents of
|
||
the file.
|
||
"""
|
||
return self.parse(StringIO(text), headersonly=headersonly)
|
||
|
||
|
||
|
||
class HeaderParser(Parser):
|
||
def parse(self, fp, headersonly=True):
|
||
return Parser.parse(self, fp, True)
|
||
|
||
def parsestr(self, text, headersonly=True):
|
||
return Parser.parsestr(self, text, True)
|
||
|
||
|
||
class BytesParser:
|
||
|
||
def __init__(self, *args, **kw):
|
||
"""Parser of binary RFC 2822 and MIME email messages.
|
||
|
||
Creates an in-memory object tree representing the email message, which
|
||
can then be manipulated and turned over to a Generator to return the
|
||
textual representation of the message.
|
||
|
||
The input must be formatted as a block of RFC 2822 headers and header
|
||
continuation lines, optionally preceeded by a `Unix-from' header. The
|
||
header block is terminated either by the end of the input or by a
|
||
blank line.
|
||
|
||
_class is the class to instantiate for new message objects when they
|
||
must be created. This class must have a constructor that can take
|
||
zero arguments. Default is Message.Message.
|
||
"""
|
||
self.parser = Parser(*args, **kw)
|
||
|
||
def parse(self, fp, headersonly=False):
|
||
"""Create a message structure from the data in a binary file.
|
||
|
||
Reads all the data from the file and returns the root of the message
|
||
structure. Optional headersonly is a flag specifying whether to stop
|
||
parsing after reading the headers or not. The default is False,
|
||
meaning it parses the entire contents of the file.
|
||
"""
|
||
fp = TextIOWrapper(fp, encoding='ascii', errors='surrogateescape')
|
||
return self.parser.parse(fp, headersonly)
|
||
|
||
|
||
def parsebytes(self, text, headersonly=False):
|
||
"""Create a message structure from a byte string.
|
||
|
||
Returns the root of the message structure. Optional headersonly is a
|
||
flag specifying whether to stop parsing after reading the headers or
|
||
not. The default is False, meaning it parses the entire contents of
|
||
the file.
|
||
"""
|
||
text = text.decode('ASCII', errors='surrogateescape')
|
||
return self.parser.parsestr(text, headersonly)
|