cpython/Lib/encodings/__init__.py
2002-12-24 18:31:27 +00:00

123 lines
3.6 KiB
Python

""" Standard "encodings" Package
Standard Python encoding modules are stored in this package
directory.
Codec modules must have names corresponding to normalized encoding
names as defined in the normalize_encoding() function below, e.g.
'utf-8' must be implemented by the module 'utf_8.py'.
Each codec module must export the following interface:
* getregentry() -> (encoder, decoder, stream_reader, stream_writer)
The getregentry() API must return callable objects which adhere to
the Python Codec Interface Standard.
In addition, a module may optionally also define the following
APIs which are then used by the package's codec search function:
* getaliases() -> sequence of encoding name strings to use as aliases
Alias names returned by getaliases() must be normalized encoding
names as defined by normalize_encoding().
Written by Marc-Andre Lemburg (mal@lemburg.com).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
"""#"
import codecs, exceptions, re
_cache = {}
_unknown = '--unknown--'
_import_tail = ['*']
_norm_encoding_RE = re.compile('[^a-zA-Z0-9.]')
class CodecRegistryError(exceptions.LookupError,
exceptions.SystemError):
pass
def normalize_encoding(encoding):
""" Normalize an encoding name.
Normalization works as follows: all non-alphanumeric
characters except the dot used for Python package names are
collapsed and replaced with a single underscore, e.g. ' -;#'
becomes '_'.
"""
return '_'.join(_norm_encoding_RE.split(encoding))
def search_function(encoding):
# Cache lookup
entry = _cache.get(encoding, _unknown)
if entry is not _unknown:
return entry
# Import the module:
#
# First look in the encodings package, then try to lookup the
# encoding in the aliases mapping and retry the import using the
# default import module lookup scheme with the alias name.
#
modname = normalize_encoding(encoding)
try:
mod = __import__('encodings.' + modname,
globals(), locals(), _import_tail)
except ImportError:
import aliases
modname = (aliases.aliases.get(modname) or
aliases.aliases.get(modname.replace('.', '_')) or
modname)
try:
mod = __import__(modname, globals(), locals(), _import_tail)
except ImportError:
mod = None
try:
getregentry = mod.getregentry
except AttributeError:
# Not a codec module
mod = None
if mod is None:
# Cache misses
_cache[encoding] = None
return None
# Now ask the module for the registry entry
entry = tuple(getregentry())
if len(entry) != 4:
raise CodecRegistryError,\
'module "%s" (%s) failed to register' % \
(mod.__name__, mod.__file__)
for obj in entry:
if not callable(obj):
raise CodecRegistryError,\
'incompatible codecs in module "%s" (%s)' % \
(mod.__name__, mod.__file__)
# Cache the codec registry entry
_cache[encoding] = entry
# Register its aliases (without overwriting previously registered
# aliases)
try:
codecaliases = mod.getaliases()
except AttributeError:
pass
else:
import aliases
for alias in codecaliases:
if not aliases.aliases.has_key(alias):
aliases.aliases[alias] = modname
# Return the registry entry
return entry
# Register the search_function in the Python codec registry
codecs.register(search_function)