cpython/Lib/http/cookies.py
R. David Murray daa7ba038b Merged revisions 87550 via svnmerge from
svn+ssh://pythondev@svn.python.org/python/branches/py3k

........
  r87550 | r.david.murray | 2010-12-28 13:54:13 -0500 (Tue, 28 Dec 2010) | 8 lines

  #9824: encode , and ; in cookie values so that browsers don't split on them

  There is a small chance of backward incompatibility here, but only for
  non-SimpleCookie applications reading SimpleCookie generated cookies.  Even
  then, any such ap is likely to be handling escaped values already, and it would
  take a fairly perverse implementation of unescaping to fail to unescape these
  newly escaped chars, so the risk seems minimal.
........
2010-12-28 18:56:33 +00:00

572 lines
20 KiB
Python

#!/usr/bin/env python3
#
####
# Copyright 2000 by Timothy O'Malley <timo@alum.mit.edu>
#
# All Rights Reserved
#
# Permission to use, copy, modify, and distribute this software
# and its documentation for any purpose and without fee is hereby
# granted, provided that the above copyright notice appear in all
# copies and that both that copyright notice and this permission
# notice appear in supporting documentation, and that the name of
# Timothy O'Malley not be used in advertising or publicity
# pertaining to distribution of the software without specific, written
# prior permission.
#
# Timothy O'Malley DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS
# SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
# AND FITNESS, IN NO EVENT SHALL Timothy O'Malley BE LIABLE FOR
# ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
# WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
# PERFORMANCE OF THIS SOFTWARE.
#
####
#
# Id: Cookie.py,v 2.29 2000/08/23 05:28:49 timo Exp
# by Timothy O'Malley <timo@alum.mit.edu>
#
# Cookie.py is a Python module for the handling of HTTP
# cookies as a Python dictionary. See RFC 2109 for more
# information on cookies.
#
# The original idea to treat Cookies as a dictionary came from
# Dave Mitchell (davem@magnet.com) in 1995, when he released the
# first version of nscookie.py.
#
####
r"""
Here's a sample session to show how to use this module.
At the moment, this is the only documentation.
The Basics
----------
Importing is easy...
>>> from http import cookies
Most of the time you start by creating a cookie.
>>> C = cookies.SimpleCookie()
Once you've created your Cookie, you can add values just as if it were
a dictionary.
>>> C = cookies.SimpleCookie()
>>> C["fig"] = "newton"
>>> C["sugar"] = "wafer"
>>> C.output()
'Set-Cookie: fig=newton\r\nSet-Cookie: sugar=wafer'
Notice that the printable representation of a Cookie is the
appropriate format for a Set-Cookie: header. This is the
default behavior. You can change the header and printed
attributes by using the .output() function
>>> C = cookies.SimpleCookie()
>>> C["rocky"] = "road"
>>> C["rocky"]["path"] = "/cookie"
>>> print(C.output(header="Cookie:"))
Cookie: rocky=road; Path=/cookie
>>> print(C.output(attrs=[], header="Cookie:"))
Cookie: rocky=road
The load() method of a Cookie extracts cookies from a string. In a
CGI script, you would use this method to extract the cookies from the
HTTP_COOKIE environment variable.
>>> C = cookies.SimpleCookie()
>>> C.load("chips=ahoy; vienna=finger")
>>> C.output()
'Set-Cookie: chips=ahoy\r\nSet-Cookie: vienna=finger'
The load() method is darn-tootin smart about identifying cookies
within a string. Escaped quotation marks, nested semicolons, and other
such trickeries do not confuse it.
>>> C = cookies.SimpleCookie()
>>> C.load('keebler="E=everybody; L=\\"Loves\\"; fudge=\\012;";')
>>> print(C)
Set-Cookie: keebler="E=everybody; L=\"Loves\"; fudge=\012;"
Each element of the Cookie also supports all of the RFC 2109
Cookie attributes. Here's an example which sets the Path
attribute.
>>> C = cookies.SimpleCookie()
>>> C["oreo"] = "doublestuff"
>>> C["oreo"]["path"] = "/"
>>> print(C)
Set-Cookie: oreo=doublestuff; Path=/
Each dictionary element has a 'value' attribute, which gives you
back the value associated with the key.
>>> C = cookies.SimpleCookie()
>>> C["twix"] = "none for you"
>>> C["twix"].value
'none for you'
The SimpleCookie expects that all values should be standard strings.
Just to be sure, SimpleCookie invokes the str() builtin to convert
the value to a string, when the values are set dictionary-style.
>>> C = cookies.SimpleCookie()
>>> C["number"] = 7
>>> C["string"] = "seven"
>>> C["number"].value
'7'
>>> C["string"].value
'seven'
>>> C.output()
'Set-Cookie: number=7\r\nSet-Cookie: string=seven'
Finis.
"""
#
# Import our required modules
#
import re
import string
__all__ = ["CookieError", "BaseCookie", "SimpleCookie"]
_nulljoin = ''.join
_semispacejoin = '; '.join
_spacejoin = ' '.join
#
# Define an exception visible to External modules
#
class CookieError(Exception):
pass
# These quoting routines conform to the RFC2109 specification, which in
# turn references the character definitions from RFC2068. They provide
# a two-way quoting algorithm. Any non-text character is translated
# into a 4 character sequence: a forward-slash followed by the
# three-digit octal equivalent of the character. Any '\' or '"' is
# quoted with a preceeding '\' slash.
#
# These are taken from RFC2068 and RFC2109.
# _LegalChars is the list of chars which don't require "'s
# _Translator hash-table for fast quoting
#
_LegalChars = string.ascii_letters + string.digits + "!#$%&'*+-.^_`|~"
_Translator = {
'\000' : '\\000', '\001' : '\\001', '\002' : '\\002',
'\003' : '\\003', '\004' : '\\004', '\005' : '\\005',
'\006' : '\\006', '\007' : '\\007', '\010' : '\\010',
'\011' : '\\011', '\012' : '\\012', '\013' : '\\013',
'\014' : '\\014', '\015' : '\\015', '\016' : '\\016',
'\017' : '\\017', '\020' : '\\020', '\021' : '\\021',
'\022' : '\\022', '\023' : '\\023', '\024' : '\\024',
'\025' : '\\025', '\026' : '\\026', '\027' : '\\027',
'\030' : '\\030', '\031' : '\\031', '\032' : '\\032',
'\033' : '\\033', '\034' : '\\034', '\035' : '\\035',
'\036' : '\\036', '\037' : '\\037',
# Because of the way browsers really handle cookies (as opposed
# to what the RFC says) we also encode , and ;
',' : '\\054', ';' : '\\073',
'"' : '\\"', '\\' : '\\\\',
'\177' : '\\177', '\200' : '\\200', '\201' : '\\201',
'\202' : '\\202', '\203' : '\\203', '\204' : '\\204',
'\205' : '\\205', '\206' : '\\206', '\207' : '\\207',
'\210' : '\\210', '\211' : '\\211', '\212' : '\\212',
'\213' : '\\213', '\214' : '\\214', '\215' : '\\215',
'\216' : '\\216', '\217' : '\\217', '\220' : '\\220',
'\221' : '\\221', '\222' : '\\222', '\223' : '\\223',
'\224' : '\\224', '\225' : '\\225', '\226' : '\\226',
'\227' : '\\227', '\230' : '\\230', '\231' : '\\231',
'\232' : '\\232', '\233' : '\\233', '\234' : '\\234',
'\235' : '\\235', '\236' : '\\236', '\237' : '\\237',
'\240' : '\\240', '\241' : '\\241', '\242' : '\\242',
'\243' : '\\243', '\244' : '\\244', '\245' : '\\245',
'\246' : '\\246', '\247' : '\\247', '\250' : '\\250',
'\251' : '\\251', '\252' : '\\252', '\253' : '\\253',
'\254' : '\\254', '\255' : '\\255', '\256' : '\\256',
'\257' : '\\257', '\260' : '\\260', '\261' : '\\261',
'\262' : '\\262', '\263' : '\\263', '\264' : '\\264',
'\265' : '\\265', '\266' : '\\266', '\267' : '\\267',
'\270' : '\\270', '\271' : '\\271', '\272' : '\\272',
'\273' : '\\273', '\274' : '\\274', '\275' : '\\275',
'\276' : '\\276', '\277' : '\\277', '\300' : '\\300',
'\301' : '\\301', '\302' : '\\302', '\303' : '\\303',
'\304' : '\\304', '\305' : '\\305', '\306' : '\\306',
'\307' : '\\307', '\310' : '\\310', '\311' : '\\311',
'\312' : '\\312', '\313' : '\\313', '\314' : '\\314',
'\315' : '\\315', '\316' : '\\316', '\317' : '\\317',
'\320' : '\\320', '\321' : '\\321', '\322' : '\\322',
'\323' : '\\323', '\324' : '\\324', '\325' : '\\325',
'\326' : '\\326', '\327' : '\\327', '\330' : '\\330',
'\331' : '\\331', '\332' : '\\332', '\333' : '\\333',
'\334' : '\\334', '\335' : '\\335', '\336' : '\\336',
'\337' : '\\337', '\340' : '\\340', '\341' : '\\341',
'\342' : '\\342', '\343' : '\\343', '\344' : '\\344',
'\345' : '\\345', '\346' : '\\346', '\347' : '\\347',
'\350' : '\\350', '\351' : '\\351', '\352' : '\\352',
'\353' : '\\353', '\354' : '\\354', '\355' : '\\355',
'\356' : '\\356', '\357' : '\\357', '\360' : '\\360',
'\361' : '\\361', '\362' : '\\362', '\363' : '\\363',
'\364' : '\\364', '\365' : '\\365', '\366' : '\\366',
'\367' : '\\367', '\370' : '\\370', '\371' : '\\371',
'\372' : '\\372', '\373' : '\\373', '\374' : '\\374',
'\375' : '\\375', '\376' : '\\376', '\377' : '\\377'
}
def _quote(str, LegalChars=_LegalChars):
r"""Quote a string for use in a cookie header.
If the string does not need to be double-quoted, then just return the
string. Otherwise, surround the string in doublequotes and quote
(with a \) special characters.
"""
if all(c in LegalChars for c in str):
return str
else:
return '"' + _nulljoin(_Translator.get(s, s) for s in str) + '"'
_OctalPatt = re.compile(r"\\[0-3][0-7][0-7]")
_QuotePatt = re.compile(r"[\\].")
def _unquote(str):
# If there aren't any doublequotes,
# then there can't be any special characters. See RFC 2109.
if len(str) < 2:
return str
if str[0] != '"' or str[-1] != '"':
return str
# We have to assume that we must decode this string.
# Down to work.
# Remove the "s
str = str[1:-1]
# Check for special sequences. Examples:
# \012 --> \n
# \" --> "
#
i = 0
n = len(str)
res = []
while 0 <= i < n:
o_match = _OctalPatt.search(str, i)
q_match = _QuotePatt.search(str, i)
if not o_match and not q_match: # Neither matched
res.append(str[i:])
break
# else:
j = k = -1
if o_match:
j = o_match.start(0)
if q_match:
k = q_match.start(0)
if q_match and (not o_match or k < j): # QuotePatt matched
res.append(str[i:k])
res.append(str[k+1])
i = k + 2
else: # OctalPatt matched
res.append(str[i:j])
res.append(chr(int(str[j+1:j+4], 8)))
i = j + 4
return _nulljoin(res)
# The _getdate() routine is used to set the expiration time in the cookie's HTTP
# header. By default, _getdate() returns the current time in the appropriate
# "expires" format for a Set-Cookie header. The one optional argument is an
# offset from now, in seconds. For example, an offset of -3600 means "one hour
# ago". The offset may be a floating point number.
#
_weekdayname = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun']
_monthname = [None,
'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
def _getdate(future=0, weekdayname=_weekdayname, monthname=_monthname):
from time import gmtime, time
now = time()
year, month, day, hh, mm, ss, wd, y, z = gmtime(now + future)
return "%s, %02d-%3s-%4d %02d:%02d:%02d GMT" % \
(weekdayname[wd], day, monthname[month], year, hh, mm, ss)
class Morsel(dict):
"""A class to hold ONE (key, value) pair.
In a cookie, each such pair may have several attributes, so this class is
used to keep the attributes associated with the appropriate key,value pair.
This class also includes a coded_value attribute, which is used to hold
the network representation of the value. This is most useful when Python
objects are pickled for network transit.
"""
# RFC 2109 lists these attributes as reserved:
# path comment domain
# max-age secure version
#
# For historical reasons, these attributes are also reserved:
# expires
#
# This is an extension from Microsoft:
# httponly
#
# This dictionary provides a mapping from the lowercase
# variant on the left to the appropriate traditional
# formatting on the right.
_reserved = {
"expires" : "expires",
"path" : "Path",
"comment" : "Comment",
"domain" : "Domain",
"max-age" : "Max-Age",
"secure" : "secure",
"httponly" : "httponly",
"version" : "Version",
}
def __init__(self):
# Set defaults
self.key = self.value = self.coded_value = None
# Set default attributes
for key in self._reserved:
dict.__setitem__(self, key, "")
def __setitem__(self, K, V):
K = K.lower()
if not K in self._reserved:
raise CookieError("Invalid Attribute %s" % K)
dict.__setitem__(self, K, V)
def isReservedKey(self, K):
return K.lower() in self._reserved
def set(self, key, val, coded_val, LegalChars=_LegalChars):
# First we verify that the key isn't a reserved word
# Second we make sure it only contains legal characters
if key.lower() in self._reserved:
raise CookieError("Attempt to set a reserved key: %s" % key)
if any(c not in LegalChars for c in key):
raise CookieError("Illegal key value: %s" % key)
# It's a good key, so save it.
self.key = key
self.value = val
self.coded_value = coded_val
def output(self, attrs=None, header="Set-Cookie:"):
return "%s %s" % (header, self.OutputString(attrs))
__str__ = output
def __repr__(self):
return '<%s: %s=%s>' % (self.__class__.__name__,
self.key, repr(self.value))
def js_output(self, attrs=None):
# Print javascript
return """
<script type="text/javascript">
<!-- begin hiding
document.cookie = \"%s\";
// end hiding -->
</script>
""" % (self.OutputString(attrs).replace('"', r'\"'))
def OutputString(self, attrs=None):
# Build up our result
#
result = []
append = result.append
# First, the key=value pair
append("%s=%s" % (self.key, self.coded_value))
# Now add any defined attributes
if attrs is None:
attrs = self._reserved
items = sorted(self.items())
for key, value in items:
if value == "":
continue
if key not in attrs:
continue
if key == "expires" and isinstance(value, int):
append("%s=%s" % (self._reserved[key], _getdate(value)))
elif key == "max-age" and isinstance(value, int):
append("%s=%d" % (self._reserved[key], value))
elif key == "secure":
append(str(self._reserved[key]))
elif key == "httponly":
append(str(self._reserved[key]))
else:
append("%s=%s" % (self._reserved[key], value))
# Return the result
return _semispacejoin(result)
#
# Pattern for finding cookie
#
# This used to be strict parsing based on the RFC2109 and RFC2068
# specifications. I have since discovered that MSIE 3.0x doesn't
# follow the character rules outlined in those specs. As a
# result, the parsing rules here are less strict.
#
_LegalCharsPatt = r"[\w\d!#%&'~_`><@,:/\$\*\+\-\.\^\|\)\(\?\}\{\=]"
_CookiePattern = re.compile(r"""
(?x) # This is a verbose pattern
(?P<key> # Start of group 'key'
""" + _LegalCharsPatt + r"""+? # Any word of at least one letter
) # End of group 'key'
\s*=\s* # Equal Sign
(?P<val> # Start of group 'val'
"(?:[^\\"]|\\.)*" # Any doublequoted string
| # or
\w{3},\s[\w\d-]{9,11}\s[\d:]{8}\sGMT # Special case for "expires" attr
| # or
""" + _LegalCharsPatt + r"""* # Any word or empty string
) # End of group 'val'
\s*;? # Probably ending in a semi-colon
""", re.ASCII) # May be removed if safe.
# At long last, here is the cookie class. Using this class is almost just like
# using a dictionary. See this module's docstring for example usage.
#
class BaseCookie(dict):
"""A container class for a set of Morsels."""
def value_decode(self, val):
"""real_value, coded_value = value_decode(STRING)
Called prior to setting a cookie's value from the network
representation. The VALUE is the value read from HTTP
header.
Override this function to modify the behavior of cookies.
"""
return val, val
def value_encode(self, val):
"""real_value, coded_value = value_encode(VALUE)
Called prior to setting a cookie's value from the dictionary
representation. The VALUE is the value being assigned.
Override this function to modify the behavior of cookies.
"""
strval = str(val)
return strval, strval
def __init__(self, input=None):
if input:
self.load(input)
def __set(self, key, real_value, coded_value):
"""Private method for setting a cookie's value"""
M = self.get(key, Morsel())
M.set(key, real_value, coded_value)
dict.__setitem__(self, key, M)
def __setitem__(self, key, value):
"""Dictionary style assignment."""
rval, cval = self.value_encode(value)
self.__set(key, rval, cval)
def output(self, attrs=None, header="Set-Cookie:", sep="\015\012"):
"""Return a string suitable for HTTP."""
result = []
items = sorted(self.items())
for key, value in items:
result.append(value.output(attrs, header))
return sep.join(result)
__str__ = output
def __repr__(self):
l = []
items = sorted(self.items())
for key, value in items:
l.append('%s=%s' % (key, repr(value.value)))
return '<%s: %s>' % (self.__class__.__name__, _spacejoin(l))
def js_output(self, attrs=None):
"""Return a string suitable for JavaScript."""
result = []
items = sorted(self.items())
for key, value in items:
result.append(value.js_output(attrs))
return _nulljoin(result)
def load(self, rawdata):
"""Load cookies from a string (presumably HTTP_COOKIE) or
from a dictionary. Loading cookies from a dictionary 'd'
is equivalent to calling:
map(Cookie.__setitem__, d.keys(), d.values())
"""
if isinstance(rawdata, str):
self.__parse_string(rawdata)
else:
# self.update() wouldn't call our custom __setitem__
for key, value in rawdata.items():
self[key] = value
return
def __parse_string(self, str, patt=_CookiePattern):
i = 0 # Our starting point
n = len(str) # Length of string
M = None # current morsel
while 0 <= i < n:
# Start looking for a cookie
match = patt.search(str, i)
if not match:
# No more cookies
break
key, value = match.group("key"), match.group("val")
i = match.end(0)
# Parse the key, value in case it's metainfo
if key[0] == "$":
# We ignore attributes which pertain to the cookie
# mechanism as a whole. See RFC 2109.
# (Does anyone care?)
if M:
M[key[1:]] = value
elif key.lower() in Morsel._reserved:
if M:
M[key] = _unquote(value)
else:
rval, cval = self.value_decode(value)
self.__set(key, rval, cval)
M = self[key]
class SimpleCookie(BaseCookie):
"""
SimpleCookie supports strings as cookie values. When setting
the value using the dictionary assignment notation, SimpleCookie
calls the builtin str() to convert the value to a string. Values
received from HTTP are kept as strings.
"""
def value_decode(self, val):
return _unquote(val), val
def value_encode(self, val):
strval = str(val)
return strval, _quote(strval)