mirror of
https://github.com/python/cpython.git
synced 2024-11-28 04:15:11 +08:00
rename HTMLParser to html.parser and htmlentitydefs to html.entities;
includes merge of trunk revision 63432
This commit is contained in:
parent
9b020c784c
commit
3c50ea4303
@ -75,12 +75,12 @@ The module defines a parser class and an exception:
|
||||
Interface definition for transforming an abstract flow of formatting events into
|
||||
specific output events on writer objects.
|
||||
|
||||
Module :mod:`HTMLParser`
|
||||
Module :mod:`html.parser`
|
||||
Alternate HTML parser that offers a slightly lower-level view of the input, but
|
||||
is designed to work with XHTML, and does not implement some of the SGML syntax
|
||||
not used in "HTML as deployed" and which isn't legal for XHTML.
|
||||
|
||||
Module :mod:`htmlentitydefs`
|
||||
Module :mod:`html.entities`
|
||||
Definition of replacement text for XHTML 1.0 entities.
|
||||
|
||||
Module :mod:`sgmllib`
|
||||
@ -147,10 +147,10 @@ additional methods and instance variables for use within tag methods.
|
||||
:meth:`save_bgn` will raise a :exc:`TypeError` exception.
|
||||
|
||||
|
||||
:mod:`htmlentitydefs` --- Definitions of HTML general entities
|
||||
==============================================================
|
||||
:mod:`html.entities` --- Definitions of HTML general entities
|
||||
=============================================================
|
||||
|
||||
.. module:: htmlentitydefs
|
||||
.. module:: html.entities
|
||||
:synopsis: Definitions of HTML general entities.
|
||||
.. sectionauthor:: Fred L. Drake, Jr. <fdrake@acm.org>
|
||||
|
||||
|
@ -1,8 +1,8 @@
|
||||
|
||||
:mod:`HTMLParser` --- Simple HTML and XHTML parser
|
||||
==================================================
|
||||
:mod:`html.parser` --- Simple HTML and XHTML parser
|
||||
===================================================
|
||||
|
||||
.. module:: HTMLParser
|
||||
.. module:: html.parser
|
||||
:synopsis: A simple parser that can handle HTML and XHTML.
|
||||
|
||||
|
||||
@ -18,7 +18,7 @@ in :mod:`sgmllib`.
|
||||
|
||||
The :class:`HTMLParser` class is instantiated without arguments.
|
||||
|
||||
An HTMLParser instance is fed HTML data and calls handler functions when tags
|
||||
An :class:`HTMLParser` instance is fed HTML data and calls handler functions when tags
|
||||
begin and end. The :class:`HTMLParser` class is meant to be overridden by the
|
||||
user to provide a desired behavior.
|
||||
|
||||
@ -87,8 +87,8 @@ An exception is defined as well:
|
||||
HREF="http://www.cwi.nl/">``, this method would be called as
|
||||
``handle_starttag('a', [('href', 'http://www.cwi.nl/')])``.
|
||||
|
||||
All entity references from htmlentitydefs are replaced in the attribute
|
||||
values.
|
||||
All entity references from :mod:`html.entities` are replaced in the
|
||||
attribute values.
|
||||
|
||||
|
||||
.. method:: HTMLParser.handle_startendtag(tag, attrs)
|
||||
@ -166,7 +166,7 @@ Example HTML Parser Application
|
||||
As a basic example, below is a very basic HTML parser that uses the
|
||||
:class:`HTMLParser` class to print out tags as they are encountered::
|
||||
|
||||
from HTMLParser import HTMLParser
|
||||
from html.parser import HTMLParser
|
||||
|
||||
class MyHTMLParser(HTMLParser):
|
||||
|
||||
|
1
Lib/html/__init__.py
Normal file
1
Lib/html/__init__.py
Normal file
@ -0,0 +1 @@
|
||||
# This directory is a Python package.
|
@ -372,16 +372,17 @@ class HTMLParser(_markupbase.ParserBase):
|
||||
c = int(s)
|
||||
return chr(c)
|
||||
else:
|
||||
# Cannot use name2codepoint directly, because HTMLParser supports apos,
|
||||
# which is not part of HTML 4
|
||||
import htmlentitydefs
|
||||
# Cannot use name2codepoint directly, because HTMLParser
|
||||
# supports apos, which is not part of HTML 4
|
||||
import html.entities
|
||||
if HTMLParser.entitydefs is None:
|
||||
entitydefs = HTMLParser.entitydefs = {'apos':"'"}
|
||||
for k, v in htmlentitydefs.name2codepoint.items():
|
||||
entitydefs[k] = chr(v)
|
||||
for k, v in html.entities.name2codepoint.items():
|
||||
entitydefs[k] = unichr(v)
|
||||
try:
|
||||
return self.entitydefs[s]
|
||||
except KeyError:
|
||||
return '&'+s+';'
|
||||
|
||||
return re.sub(r"&(#?[xX]?(?:[0-9a-fA-F]+|\w{1,8}));", replaceEntities, s)
|
||||
return re.sub(r"&(#?[xX]?(?:[0-9a-fA-F]+|\w{1,8}));",
|
||||
replaceEntities, s)
|
@ -24,7 +24,7 @@ class HTMLParser(sgmllib.SGMLParser):
|
||||
|
||||
"""
|
||||
|
||||
from htmlentitydefs import entitydefs
|
||||
from html.entities import entitydefs
|
||||
|
||||
def __init__(self, formatter, verbose=0):
|
||||
"""Creates an instance of the HTMLParser class.
|
||||
|
@ -1,5 +1,5 @@
|
||||
import test.test_support, unittest
|
||||
import sys, codecs, htmlentitydefs, unicodedata
|
||||
import sys, codecs, html.entities, unicodedata
|
||||
|
||||
class PosReturn:
|
||||
# this can be used for configurable callbacks
|
||||
@ -86,7 +86,7 @@ class CodecCallbackTest(unittest.TestCase):
|
||||
l = []
|
||||
for c in exc.object[exc.start:exc.end]:
|
||||
try:
|
||||
l.append("&%s;" % htmlentitydefs.codepoint2name[ord(c)])
|
||||
l.append("&%s;" % html.entities.codepoint2name[ord(c)])
|
||||
except KeyError:
|
||||
l.append("&#%d;" % ord(c))
|
||||
return ("".join(l), exc.end)
|
||||
|
@ -74,7 +74,7 @@ class TestBase:
|
||||
if self.has_iso10646:
|
||||
return
|
||||
|
||||
from htmlentitydefs import codepoint2name
|
||||
from html.entities import codepoint2name
|
||||
|
||||
def xmlcharnamereplace(exc):
|
||||
if not isinstance(exc, UnicodeEncodeError):
|
||||
|
@ -48,7 +48,7 @@ class TestUntestedModules(unittest.TestCase):
|
||||
import encodings
|
||||
import formatter
|
||||
import getpass
|
||||
import htmlentitydefs
|
||||
import html.entities
|
||||
import imghdr
|
||||
import keyword
|
||||
import linecache
|
||||
|
Loading…
Reference in New Issue
Block a user