cpython/Lib/encodings/utf_16.py
Victor Stinner a92ad7ee2c Merged revisions 81471-81472 via svnmerge from
svn+ssh://pythondev@svn.python.org/python/trunk

........
  r81471 | victor.stinner | 2010-05-22 15:37:56 +0200 (sam., 22 mai 2010) | 7 lines

  Issue #6268: More bugfixes about BOM, UTF-16 and UTF-32

   * Fix seek() method of codecs.open(), don't write the BOM twice after seek(0)
   * Fix reset() method of codecs, UTF-16, UTF-32 and StreamWriter classes
   * test_codecs: use "w+" mode instead of "wt+". "t" mode is not supported by
     Solaris or Windows, but does it really exist? I found it the in the issue.
........
  r81472 | victor.stinner | 2010-05-22 15:44:25 +0200 (sam., 22 mai 2010) | 4 lines

  Fix my last commit (r81471) about codecs

  Rememder: don't touch the code just before a commit
........
2010-05-22 16:59:09 +00:00

156 lines
5.1 KiB
Python

""" Python 'utf-16' Codec
Written by Marc-Andre Lemburg (mal@lemburg.com).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
"""
import codecs, sys
### Codec APIs
encode = codecs.utf_16_encode
def decode(input, errors='strict'):
return codecs.utf_16_decode(input, errors, True)
class IncrementalEncoder(codecs.IncrementalEncoder):
def __init__(self, errors='strict'):
codecs.IncrementalEncoder.__init__(self, errors)
self.encoder = None
def encode(self, input, final=False):
if self.encoder is None:
result = codecs.utf_16_encode(input, self.errors)[0]
if sys.byteorder == 'little':
self.encoder = codecs.utf_16_le_encode
else:
self.encoder = codecs.utf_16_be_encode
return result
return self.encoder(input, self.errors)[0]
def reset(self):
codecs.IncrementalEncoder.reset(self)
self.encoder = None
def getstate(self):
# state info we return to the caller:
# 0: stream is in natural order for this platform
# 2: endianness hasn't been determined yet
# (we're never writing in unnatural order)
return (2 if self.encoder is None else 0)
def setstate(self, state):
if state:
self.encoder = None
else:
if sys.byteorder == 'little':
self.encoder = codecs.utf_16_le_encode
else:
self.encoder = codecs.utf_16_be_encode
class IncrementalDecoder(codecs.BufferedIncrementalDecoder):
def __init__(self, errors='strict'):
codecs.BufferedIncrementalDecoder.__init__(self, errors)
self.decoder = None
def _buffer_decode(self, input, errors, final):
if self.decoder is None:
(output, consumed, byteorder) = \
codecs.utf_16_ex_decode(input, errors, 0, final)
if byteorder == -1:
self.decoder = codecs.utf_16_le_decode
elif byteorder == 1:
self.decoder = codecs.utf_16_be_decode
elif consumed >= 2:
raise UnicodeError("UTF-16 stream does not start with BOM")
return (output, consumed)
return self.decoder(input, self.errors, final)
def reset(self):
codecs.BufferedIncrementalDecoder.reset(self)
self.decoder = None
def getstate(self):
# additonal state info from the base class must be None here,
# as it isn't passed along to the caller
state = codecs.BufferedIncrementalDecoder.getstate(self)[0]
# additional state info we pass to the caller:
# 0: stream is in natural order for this platform
# 1: stream is in unnatural order
# 2: endianness hasn't been determined yet
if self.decoder is None:
return (state, 2)
addstate = int((sys.byteorder == "big") !=
(self.decoder is codecs.utf_16_be_decode))
return (state, addstate)
def setstate(self, state):
# state[1] will be ignored by BufferedIncrementalDecoder.setstate()
codecs.BufferedIncrementalDecoder.setstate(self, state)
state = state[1]
if state == 0:
self.decoder = (codecs.utf_16_be_decode
if sys.byteorder == "big"
else codecs.utf_16_le_decode)
elif state == 1:
self.decoder = (codecs.utf_16_le_decode
if sys.byteorder == "big"
else codecs.utf_16_be_decode)
else:
self.decoder = None
class StreamWriter(codecs.StreamWriter):
def __init__(self, stream, errors='strict'):
codecs.StreamWriter.__init__(self, stream, errors)
self.encoder = None
def reset(self):
codecs.StreamWriter.reset(self)
self.encoder = None
def encode(self, input, errors='strict'):
if self.encoder is None:
result = codecs.utf_16_encode(input, errors)
if sys.byteorder == 'little':
self.encoder = codecs.utf_16_le_encode
else:
self.encoder = codecs.utf_16_be_encode
return result
else:
return self.encoder(input, errors)
class StreamReader(codecs.StreamReader):
def reset(self):
codecs.StreamReader.reset(self)
try:
del self.decode
except AttributeError:
pass
def decode(self, input, errors='strict'):
(object, consumed, byteorder) = \
codecs.utf_16_ex_decode(input, errors, 0, False)
if byteorder == -1:
self.decode = codecs.utf_16_le_decode
elif byteorder == 1:
self.decode = codecs.utf_16_be_decode
elif consumed>=2:
raise UnicodeError("UTF-16 stream does not start with BOM")
return (object, consumed)
### encodings module API
def getregentry():
return codecs.CodecInfo(
name='utf-16',
encode=encode,
decode=decode,
incrementalencoder=IncrementalEncoder,
incrementaldecoder=IncrementalDecoder,
streamreader=StreamReader,
streamwriter=StreamWriter,
)