mirror of
https://github.com/python/cpython.git
synced 2024-12-12 03:04:15 +08:00
e7ba495627
svn+ssh://pythondev@svn.python.org/python/branches/p3yk ................ r55636 | neal.norwitz | 2007-05-29 00:06:39 -0700 (Tue, 29 May 2007) | 149 lines Merged revisions 55506-55635 via svnmerge from svn+ssh://pythondev@svn.python.org/python/trunk ........ r55507 | georg.brandl | 2007-05-22 07:28:17 -0700 (Tue, 22 May 2007) | 2 lines Remove the "panel" module doc file which has been ignored since 1994. ........ r55522 | mark.hammond | 2007-05-22 19:04:28 -0700 (Tue, 22 May 2007) | 4 lines Remove definition of PY_UNICODE_TYPE from pyconfig.h, allowing the definition in unicodeobject.h to be used, giving us the desired wchar_t in place of 'unsigned short'. As discussed on python-dev. ........ r55525 | neal.norwitz | 2007-05-22 23:35:32 -0700 (Tue, 22 May 2007) | 6 lines Add -3 option to the interpreter to warn about features that are deprecated and will be changed/removed in Python 3.0. This patch is mostly from Anthony. I tweaked some format and added a little doc. ........ r55527 | neal.norwitz | 2007-05-22 23:57:35 -0700 (Tue, 22 May 2007) | 1 line Whitespace cleanup ........ r55528 | neal.norwitz | 2007-05-22 23:58:36 -0700 (Tue, 22 May 2007) | 1 line Add a bunch more deprecation warnings for builtins that are going away in 3.0 ........ r55549 | georg.brandl | 2007-05-24 09:49:29 -0700 (Thu, 24 May 2007) | 2 lines shlex.split() now has an optional "posix" parameter. ........ r55550 | georg.brandl | 2007-05-24 10:33:33 -0700 (Thu, 24 May 2007) | 2 lines Fix parameter passing. ........ r55555 | facundo.batista | 2007-05-24 10:50:54 -0700 (Thu, 24 May 2007) | 6 lines Added an optional timeout parameter to urllib.ftpwrapper, with tests (for this and a basic one, because there weren't any). Changed also NEWS, but didn't find documentation for this function, assumed it wasn't public... ........ r55563 | facundo.batista | 2007-05-24 13:01:59 -0700 (Thu, 24 May 2007) | 4 lines Removed the .recv() in the test, is not necessary, and was causing problems that didn't have anything to do with was actually being tested... ........ r55564 | facundo.batista | 2007-05-24 13:51:19 -0700 (Thu, 24 May 2007) | 5 lines Let's see if reading exactly what is written allow this live test to pass (now I know why there were so few tests in ftp, http, etc, :( ). ........ r55567 | facundo.batista | 2007-05-24 20:10:28 -0700 (Thu, 24 May 2007) | 4 lines Trying to make the tests work in Windows and Solaris, everywhere else just works ........ r55568 | facundo.batista | 2007-05-24 20:47:19 -0700 (Thu, 24 May 2007) | 4 lines Fixing stupid error, and introducing a sleep, to see if the other thread is awakened and finish sending data. ........ r55569 | facundo.batista | 2007-05-24 21:20:22 -0700 (Thu, 24 May 2007) | 4 lines Commenting out the tests until find out who can test them in one of the problematic enviroments. ........ r55570 | neal.norwitz | 2007-05-24 22:13:40 -0700 (Thu, 24 May 2007) | 2 lines Get test passing again by commenting out the reference to the test class. ........ r55575 | vinay.sajip | 2007-05-25 00:05:59 -0700 (Fri, 25 May 2007) | 1 line Updated docstring for SysLogHandler (#1720726). ........ r55576 | vinay.sajip | 2007-05-25 00:06:55 -0700 (Fri, 25 May 2007) | 1 line Updated documentation for SysLogHandler (#1720726). ........ r55592 | brett.cannon | 2007-05-25 13:17:15 -0700 (Fri, 25 May 2007) | 3 lines Remove direct call's to file's constructor and replace them with calls to open() as ths is considered best practice. ........ r55601 | kristjan.jonsson | 2007-05-26 12:19:50 -0700 (Sat, 26 May 2007) | 1 line Remove the rgbimgmodule from PCBuild8 ........ r55602 | kristjan.jonsson | 2007-05-26 12:31:39 -0700 (Sat, 26 May 2007) | 1 line Include <windows.h> after python.h, so that WINNT is properly set before windows.h is included. Fixes warnings in PC builds. ........ r55603 | walter.doerwald | 2007-05-26 14:04:13 -0700 (Sat, 26 May 2007) | 2 lines Fix typo. ........ r55604 | peter.astrand | 2007-05-26 15:18:20 -0700 (Sat, 26 May 2007) | 1 line Applied patch 1669481, slightly modified: Support close_fds on Win32 ........ r55606 | neal.norwitz | 2007-05-26 21:08:54 -0700 (Sat, 26 May 2007) | 2 lines Add the new function object attribute names from py3k. ........ r55617 | lars.gustaebel | 2007-05-27 12:49:30 -0700 (Sun, 27 May 2007) | 20 lines Added errors argument to TarFile class that allows the user to specify an error handling scheme for character conversion. Additional scheme "utf-8" in read mode. Unicode input filenames are now supported by design. The values of the pax_headers dictionary are now limited to unicode objects. Fixed: The prefix field is no longer used in PAX_FORMAT (in conformance with POSIX). Fixed: In read mode use a possible pax header size field. Fixed: Strip trailing slashes from pax header name values. Fixed: Give values in user-specified pax_headers precedence when writing. Added unicode tests. Added pax/regtype4 member to testtar.tar all possible number fields in a pax header. Added two chapters to the documentation about the different formats tarfile.py supports and how unicode issues are handled. ........ r55618 | raymond.hettinger | 2007-05-27 22:23:22 -0700 (Sun, 27 May 2007) | 1 line Explain when groupby() issues a new group. ........ r55634 | martin.v.loewis | 2007-05-28 21:01:29 -0700 (Mon, 28 May 2007) | 2 lines Test pre-commit hook for a link to a .py file. ........ r55635 | martin.v.loewis | 2007-05-28 21:02:03 -0700 (Mon, 28 May 2007) | 2 lines Revert 55634. ........ ................ r55639 | neal.norwitz | 2007-05-29 00:58:11 -0700 (Tue, 29 May 2007) | 1 line Remove sys.exc_{type,exc_value,exc_traceback} ................ r55641 | neal.norwitz | 2007-05-29 01:03:50 -0700 (Tue, 29 May 2007) | 1 line Missed one sys.exc_type. I wonder why exc_{value,traceback} were already gone ................ r55642 | neal.norwitz | 2007-05-29 01:08:33 -0700 (Tue, 29 May 2007) | 1 line Missed more doc for sys.exc_* attrs. ................ r55643 | neal.norwitz | 2007-05-29 01:18:19 -0700 (Tue, 29 May 2007) | 1 line Remove sys.exc_clear() ................ r55665 | guido.van.rossum | 2007-05-29 19:45:43 -0700 (Tue, 29 May 2007) | 4 lines Make None, True, False keywords. We can now also delete all the other places that explicitly forbid assignment to None, but I'm not going to bother right now. ................ r55666 | guido.van.rossum | 2007-05-29 20:01:51 -0700 (Tue, 29 May 2007) | 3 lines Found another place that needs check for forbidden names. Fixed test_syntax.py accordingly (it helped me find that one). ................ r55668 | guido.van.rossum | 2007-05-29 20:41:48 -0700 (Tue, 29 May 2007) | 2 lines Mark None, True, False as keywords. ................ r55673 | neal.norwitz | 2007-05-29 23:28:25 -0700 (Tue, 29 May 2007) | 3 lines Get the dis module working on modules again after changing dicts to not return lists and also new-style classes. Add a test. ................ r55674 | neal.norwitz | 2007-05-29 23:35:45 -0700 (Tue, 29 May 2007) | 1 line Umm, it helps to add the module that the test uses ................ r55675 | neal.norwitz | 2007-05-29 23:53:05 -0700 (Tue, 29 May 2007) | 4 lines Try to fix up all the other places that were assigning to True/False. There's at least one more problem in test.test_xmlrpc. I have other changes in that file and that should be fixed soon (I hope). ................ r55679 | neal.norwitz | 2007-05-30 00:31:55 -0700 (Wed, 30 May 2007) | 1 line Fix up another place that was assigning to True/False. ................ r55688 | brett.cannon | 2007-05-30 14:19:47 -0700 (Wed, 30 May 2007) | 2 lines Ditch MimeWriter. ................ r55692 | brett.cannon | 2007-05-30 14:52:00 -0700 (Wed, 30 May 2007) | 2 lines Remove the mimify module. ................ r55707 | guido.van.rossum | 2007-05-31 05:08:45 -0700 (Thu, 31 May 2007) | 2 lines Backport the addition of show_code() to dis.py -- it's too handy. ................ r55708 | guido.van.rossum | 2007-05-31 06:22:57 -0700 (Thu, 31 May 2007) | 7 lines Fix a fairly long-standing bug in the check for assignment to None (and other keywords, these days). In 2.5, you could write foo(None=1) without getting a SyntaxError (although foo()'s definition would have to use **kwds to avoid getting a runtime error complaining about an unknown keyword of course). This ought to be backported to 2.5.2 or at least 2.6. ................ r55724 | brett.cannon | 2007-05-31 19:32:41 -0700 (Thu, 31 May 2007) | 2 lines Remove the cfmfile. ................ r55727 | neal.norwitz | 2007-05-31 22:19:44 -0700 (Thu, 31 May 2007) | 1 line Remove reload() builtin. ................ r55729 | neal.norwitz | 2007-05-31 22:51:30 -0700 (Thu, 31 May 2007) | 59 lines Merged revisions 55636-55728 via svnmerge from svn+ssh://pythondev@svn.python.org/python/trunk ........ r55637 | georg.brandl | 2007-05-29 00:16:47 -0700 (Tue, 29 May 2007) | 2 lines Fix rst markup. ........ r55638 | neal.norwitz | 2007-05-29 00:51:39 -0700 (Tue, 29 May 2007) | 1 line Fix typo in doc ........ r55671 | neal.norwitz | 2007-05-29 21:53:41 -0700 (Tue, 29 May 2007) | 1 line Fix indentation (whitespace only). ........ r55676 | thomas.heller | 2007-05-29 23:58:30 -0700 (Tue, 29 May 2007) | 1 line Fix compiler warnings. ........ r55677 | thomas.heller | 2007-05-30 00:01:25 -0700 (Wed, 30 May 2007) | 2 lines Correct the name of a field in the WIN32_FIND_DATAA and WIN32_FIND_DATAW structures. Closes bug #1726026. ........ r55686 | brett.cannon | 2007-05-30 13:46:26 -0700 (Wed, 30 May 2007) | 2 lines Have MimeWriter raise a DeprecationWarning as per PEP 4 and its documentation. ........ r55690 | brett.cannon | 2007-05-30 14:48:58 -0700 (Wed, 30 May 2007) | 3 lines Have mimify raise a DeprecationWarning. The docs and PEP 4 have listed the module as deprecated for a while. ........ r55696 | brett.cannon | 2007-05-30 15:24:28 -0700 (Wed, 30 May 2007) | 2 lines Have md5 raise a DeprecationWarning as per PEP 4. ........ r55705 | neal.norwitz | 2007-05-30 21:14:22 -0700 (Wed, 30 May 2007) | 1 line Add some spaces in the example code. ........ r55716 | brett.cannon | 2007-05-31 12:20:00 -0700 (Thu, 31 May 2007) | 2 lines Have the sha module raise a DeprecationWarning as specified in PEP 4. ........ r55719 | brett.cannon | 2007-05-31 12:40:42 -0700 (Thu, 31 May 2007) | 2 lines Cause buildtools to raise a DeprecationWarning. ........ r55721 | brett.cannon | 2007-05-31 13:01:11 -0700 (Thu, 31 May 2007) | 2 lines Have cfmfile raise a DeprecationWarning as per PEP 4. ........ r55726 | neal.norwitz | 2007-05-31 21:56:47 -0700 (Thu, 31 May 2007) | 1 line Mail if there is an installation failure. ........ ................ r55730 | neal.norwitz | 2007-05-31 23:22:07 -0700 (Thu, 31 May 2007) | 2 lines Remove the code that was missed in rev 55303. ................ r55738 | neal.norwitz | 2007-06-01 19:10:43 -0700 (Fri, 01 Jun 2007) | 1 line Fix doc breakage ................ r55741 | neal.norwitz | 2007-06-02 00:41:58 -0700 (Sat, 02 Jun 2007) | 1 line Remove timing module (plus some remnants of other modules). ................ r55742 | neal.norwitz | 2007-06-02 00:51:44 -0700 (Sat, 02 Jun 2007) | 1 line Remove posixfile module (plus some remnants of other modules). ................ r55744 | neal.norwitz | 2007-06-02 10:18:56 -0700 (Sat, 02 Jun 2007) | 1 line Fix doc breakage. ................ r55745 | neal.norwitz | 2007-06-02 11:32:16 -0700 (Sat, 02 Jun 2007) | 1 line Make a whatsnew 3.0 template. ................ r55754 | neal.norwitz | 2007-06-03 23:24:18 -0700 (Sun, 03 Jun 2007) | 1 line SF #1730441, os._execvpe raises UnboundLocal due to new try/except semantics ................ r55755 | neal.norwitz | 2007-06-03 23:26:00 -0700 (Sun, 03 Jun 2007) | 1 line Get rid of extra whitespace ................ r55794 | guido.van.rossum | 2007-06-06 15:29:22 -0700 (Wed, 06 Jun 2007) | 3 lines Make this compile in GCC 2.96, which does not allow interspersing declarations and code. ................
371 lines
14 KiB
Python
371 lines
14 KiB
Python
"""Text wrapping and filling.
|
|
"""
|
|
|
|
# Copyright (C) 1999-2001 Gregory P. Ward.
|
|
# Copyright (C) 2002, 2003 Python Software Foundation.
|
|
# Written by Greg Ward <gward@python.net>
|
|
|
|
__revision__ = "$Id$"
|
|
|
|
import string, re
|
|
|
|
__all__ = ['TextWrapper', 'wrap', 'fill']
|
|
|
|
# Hardcode the recognized whitespace characters to the US-ASCII
|
|
# whitespace characters. The main reason for doing this is that in
|
|
# ISO-8859-1, 0xa0 is non-breaking whitespace, so in certain locales
|
|
# that character winds up in string.whitespace. Respecting
|
|
# string.whitespace in those cases would 1) make textwrap treat 0xa0 the
|
|
# same as any other whitespace char, which is clearly wrong (it's a
|
|
# *non-breaking* space), 2) possibly cause problems with Unicode,
|
|
# since 0xa0 is not in range(128).
|
|
_whitespace = '\t\n\x0b\x0c\r '
|
|
|
|
class TextWrapper:
|
|
"""
|
|
Object for wrapping/filling text. The public interface consists of
|
|
the wrap() and fill() methods; the other methods are just there for
|
|
subclasses to override in order to tweak the default behaviour.
|
|
If you want to completely replace the main wrapping algorithm,
|
|
you'll probably have to override _wrap_chunks().
|
|
|
|
Several instance attributes control various aspects of wrapping:
|
|
width (default: 70)
|
|
the maximum width of wrapped lines (unless break_long_words
|
|
is false)
|
|
initial_indent (default: "")
|
|
string that will be prepended to the first line of wrapped
|
|
output. Counts towards the line's width.
|
|
subsequent_indent (default: "")
|
|
string that will be prepended to all lines save the first
|
|
of wrapped output; also counts towards each line's width.
|
|
expand_tabs (default: true)
|
|
Expand tabs in input text to spaces before further processing.
|
|
Each tab will become 1 .. 8 spaces, depending on its position in
|
|
its line. If false, each tab is treated as a single character.
|
|
replace_whitespace (default: true)
|
|
Replace all whitespace characters in the input text by spaces
|
|
after tab expansion. Note that if expand_tabs is false and
|
|
replace_whitespace is true, every tab will be converted to a
|
|
single space!
|
|
fix_sentence_endings (default: false)
|
|
Ensure that sentence-ending punctuation is always followed
|
|
by two spaces. Off by default because the algorithm is
|
|
(unavoidably) imperfect.
|
|
break_long_words (default: true)
|
|
Break words longer than 'width'. If false, those words will not
|
|
be broken, and some lines might be longer than 'width'.
|
|
drop_whitespace (default: true)
|
|
Drop leading and trailing whitespace from lines.
|
|
"""
|
|
|
|
whitespace_trans = string.maketrans(_whitespace, ' ' * len(_whitespace))
|
|
|
|
unicode_whitespace_trans = {}
|
|
uspace = ord(' ')
|
|
for x in map(ord, _whitespace):
|
|
unicode_whitespace_trans[x] = uspace
|
|
|
|
# This funky little regex is just the trick for splitting
|
|
# text up into word-wrappable chunks. E.g.
|
|
# "Hello there -- you goof-ball, use the -b option!"
|
|
# splits into
|
|
# Hello/ /there/ /--/ /you/ /goof-/ball,/ /use/ /the/ /-b/ /option!
|
|
# (after stripping out empty strings).
|
|
wordsep_re = re.compile(
|
|
r'(\s+|' # any whitespace
|
|
r'[^\s\w]*\w+[a-zA-Z]-(?=\w+[a-zA-Z])|' # hyphenated words
|
|
r'(?<=[\w\!\"\'\&\.\,\?])-{2,}(?=\w))') # em-dash
|
|
|
|
# XXX this is not locale- or charset-aware -- string.lowercase
|
|
# is US-ASCII only (and therefore English-only)
|
|
sentence_end_re = re.compile(r'[%s]' # lowercase letter
|
|
r'[\.\!\?]' # sentence-ending punct.
|
|
r'[\"\']?' # optional end-of-quote
|
|
% string.lowercase)
|
|
|
|
|
|
def __init__(self,
|
|
width=70,
|
|
initial_indent="",
|
|
subsequent_indent="",
|
|
expand_tabs=True,
|
|
replace_whitespace=True,
|
|
fix_sentence_endings=False,
|
|
break_long_words=True,
|
|
drop_whitespace=True):
|
|
self.width = width
|
|
self.initial_indent = initial_indent
|
|
self.subsequent_indent = subsequent_indent
|
|
self.expand_tabs = expand_tabs
|
|
self.replace_whitespace = replace_whitespace
|
|
self.fix_sentence_endings = fix_sentence_endings
|
|
self.break_long_words = break_long_words
|
|
self.drop_whitespace = drop_whitespace
|
|
|
|
|
|
# -- Private methods -----------------------------------------------
|
|
# (possibly useful for subclasses to override)
|
|
|
|
def _munge_whitespace(self, text):
|
|
"""_munge_whitespace(text : string) -> string
|
|
|
|
Munge whitespace in text: expand tabs and convert all other
|
|
whitespace characters to spaces. Eg. " foo\tbar\n\nbaz"
|
|
becomes " foo bar baz".
|
|
"""
|
|
if self.expand_tabs:
|
|
text = text.expandtabs()
|
|
if self.replace_whitespace:
|
|
if isinstance(text, str8):
|
|
text = text.translate(self.whitespace_trans)
|
|
elif isinstance(text, str):
|
|
text = text.translate(self.unicode_whitespace_trans)
|
|
return text
|
|
|
|
|
|
def _split(self, text):
|
|
"""_split(text : string) -> [string]
|
|
|
|
Split the text to wrap into indivisible chunks. Chunks are
|
|
not quite the same as words; see wrap_chunks() for full
|
|
details. As an example, the text
|
|
Look, goof-ball -- use the -b option!
|
|
breaks into the following chunks:
|
|
'Look,', ' ', 'goof-', 'ball', ' ', '--', ' ',
|
|
'use', ' ', 'the', ' ', '-b', ' ', 'option!'
|
|
"""
|
|
chunks = self.wordsep_re.split(text)
|
|
chunks = filter(None, chunks) # remove empty chunks
|
|
return chunks
|
|
|
|
def _fix_sentence_endings(self, chunks):
|
|
"""_fix_sentence_endings(chunks : [string])
|
|
|
|
Correct for sentence endings buried in 'chunks'. Eg. when the
|
|
original text contains "... foo.\nBar ...", munge_whitespace()
|
|
and split() will convert that to [..., "foo.", " ", "Bar", ...]
|
|
which has one too few spaces; this method simply changes the one
|
|
space to two.
|
|
"""
|
|
i = 0
|
|
pat = self.sentence_end_re
|
|
while i < len(chunks)-1:
|
|
if chunks[i+1] == " " and pat.search(chunks[i]):
|
|
chunks[i+1] = " "
|
|
i += 2
|
|
else:
|
|
i += 1
|
|
|
|
def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width):
|
|
"""_handle_long_word(chunks : [string],
|
|
cur_line : [string],
|
|
cur_len : int, width : int)
|
|
|
|
Handle a chunk of text (most likely a word, not whitespace) that
|
|
is too long to fit in any line.
|
|
"""
|
|
space_left = max(width - cur_len, 1)
|
|
|
|
# If we're allowed to break long words, then do so: put as much
|
|
# of the next chunk onto the current line as will fit.
|
|
if self.break_long_words:
|
|
cur_line.append(reversed_chunks[-1][:space_left])
|
|
reversed_chunks[-1] = reversed_chunks[-1][space_left:]
|
|
|
|
# Otherwise, we have to preserve the long word intact. Only add
|
|
# it to the current line if there's nothing already there --
|
|
# that minimizes how much we violate the width constraint.
|
|
elif not cur_line:
|
|
cur_line.append(reversed_chunks.pop())
|
|
|
|
# If we're not allowed to break long words, and there's already
|
|
# text on the current line, do nothing. Next time through the
|
|
# main loop of _wrap_chunks(), we'll wind up here again, but
|
|
# cur_len will be zero, so the next line will be entirely
|
|
# devoted to the long word that we can't handle right now.
|
|
|
|
def _wrap_chunks(self, chunks):
|
|
"""_wrap_chunks(chunks : [string]) -> [string]
|
|
|
|
Wrap a sequence of text chunks and return a list of lines of
|
|
length 'self.width' or less. (If 'break_long_words' is false,
|
|
some lines may be longer than this.) Chunks correspond roughly
|
|
to words and the whitespace between them: each chunk is
|
|
indivisible (modulo 'break_long_words'), but a line break can
|
|
come between any two chunks. Chunks should not have internal
|
|
whitespace; ie. a chunk is either all whitespace or a "word".
|
|
Whitespace chunks will be removed from the beginning and end of
|
|
lines, but apart from that whitespace is preserved.
|
|
"""
|
|
lines = []
|
|
if self.width <= 0:
|
|
raise ValueError("invalid width %r (must be > 0)" % self.width)
|
|
|
|
# Arrange in reverse order so items can be efficiently popped
|
|
# from a stack of chucks.
|
|
chunks.reverse()
|
|
|
|
while chunks:
|
|
|
|
# Start the list of chunks that will make up the current line.
|
|
# cur_len is just the length of all the chunks in cur_line.
|
|
cur_line = []
|
|
cur_len = 0
|
|
|
|
# Figure out which static string will prefix this line.
|
|
if lines:
|
|
indent = self.subsequent_indent
|
|
else:
|
|
indent = self.initial_indent
|
|
|
|
# Maximum width for this line.
|
|
width = self.width - len(indent)
|
|
|
|
# First chunk on line is whitespace -- drop it, unless this
|
|
# is the very beginning of the text (ie. no lines started yet).
|
|
if self.drop_whitespace and chunks[-1].strip() == '' and lines:
|
|
del chunks[-1]
|
|
|
|
while chunks:
|
|
l = len(chunks[-1])
|
|
|
|
# Can at least squeeze this chunk onto the current line.
|
|
if cur_len + l <= width:
|
|
cur_line.append(chunks.pop())
|
|
cur_len += l
|
|
|
|
# Nope, this line is full.
|
|
else:
|
|
break
|
|
|
|
# The current line is full, and the next chunk is too big to
|
|
# fit on *any* line (not just this one).
|
|
if chunks and len(chunks[-1]) > width:
|
|
self._handle_long_word(chunks, cur_line, cur_len, width)
|
|
|
|
# If the last chunk on this line is all whitespace, drop it.
|
|
if self.drop_whitespace and cur_line and cur_line[-1].strip() == '':
|
|
del cur_line[-1]
|
|
|
|
# Convert current line back to a string and store it in list
|
|
# of all lines (return value).
|
|
if cur_line:
|
|
lines.append(indent + ''.join(cur_line))
|
|
|
|
return lines
|
|
|
|
|
|
# -- Public interface ----------------------------------------------
|
|
|
|
def wrap(self, text):
|
|
"""wrap(text : string) -> [string]
|
|
|
|
Reformat the single paragraph in 'text' so it fits in lines of
|
|
no more than 'self.width' columns, and return a list of wrapped
|
|
lines. Tabs in 'text' are expanded with string.expandtabs(),
|
|
and all other whitespace characters (including newline) are
|
|
converted to space.
|
|
"""
|
|
text = self._munge_whitespace(text)
|
|
chunks = self._split(text)
|
|
if self.fix_sentence_endings:
|
|
self._fix_sentence_endings(chunks)
|
|
return self._wrap_chunks(chunks)
|
|
|
|
def fill(self, text):
|
|
"""fill(text : string) -> string
|
|
|
|
Reformat the single paragraph in 'text' to fit in lines of no
|
|
more than 'self.width' columns, and return a new string
|
|
containing the entire wrapped paragraph.
|
|
"""
|
|
return "\n".join(self.wrap(text))
|
|
|
|
|
|
# -- Convenience interface ---------------------------------------------
|
|
|
|
def wrap(text, width=70, **kwargs):
|
|
"""Wrap a single paragraph of text, returning a list of wrapped lines.
|
|
|
|
Reformat the single paragraph in 'text' so it fits in lines of no
|
|
more than 'width' columns, and return a list of wrapped lines. By
|
|
default, tabs in 'text' are expanded with string.expandtabs(), and
|
|
all other whitespace characters (including newline) are converted to
|
|
space. See TextWrapper class for available keyword args to customize
|
|
wrapping behaviour.
|
|
"""
|
|
w = TextWrapper(width=width, **kwargs)
|
|
return w.wrap(text)
|
|
|
|
def fill(text, width=70, **kwargs):
|
|
"""Fill a single paragraph of text, returning a new string.
|
|
|
|
Reformat the single paragraph in 'text' to fit in lines of no more
|
|
than 'width' columns, and return a new string containing the entire
|
|
wrapped paragraph. As with wrap(), tabs are expanded and other
|
|
whitespace characters converted to space. See TextWrapper class for
|
|
available keyword args to customize wrapping behaviour.
|
|
"""
|
|
w = TextWrapper(width=width, **kwargs)
|
|
return w.fill(text)
|
|
|
|
|
|
# -- Loosely related functionality -------------------------------------
|
|
|
|
_whitespace_only_re = re.compile('^[ \t]+$', re.MULTILINE)
|
|
_leading_whitespace_re = re.compile('(^[ \t]*)(?:[^ \t\n])', re.MULTILINE)
|
|
|
|
def dedent(text):
|
|
"""Remove any common leading whitespace from every line in `text`.
|
|
|
|
This can be used to make triple-quoted strings line up with the left
|
|
edge of the display, while still presenting them in the source code
|
|
in indented form.
|
|
|
|
Note that tabs and spaces are both treated as whitespace, but they
|
|
are not equal: the lines " hello" and "\thello" are
|
|
considered to have no common leading whitespace. (This behaviour is
|
|
new in Python 2.5; older versions of this module incorrectly
|
|
expanded tabs before searching for common leading whitespace.)
|
|
"""
|
|
# Look for the longest leading string of spaces and tabs common to
|
|
# all lines.
|
|
margin = None
|
|
text = _whitespace_only_re.sub('', text)
|
|
indents = _leading_whitespace_re.findall(text)
|
|
for indent in indents:
|
|
if margin is None:
|
|
margin = indent
|
|
|
|
# Current line more deeply indented than previous winner:
|
|
# no change (previous winner is still on top).
|
|
elif indent.startswith(margin):
|
|
pass
|
|
|
|
# Current line consistent with and no deeper than previous winner:
|
|
# it's the new winner.
|
|
elif margin.startswith(indent):
|
|
margin = indent
|
|
|
|
# Current line and previous winner have no common whitespace:
|
|
# there is no margin.
|
|
else:
|
|
margin = ""
|
|
break
|
|
|
|
# sanity check (testing/debugging only)
|
|
if 0 and margin:
|
|
for line in text.split("\n"):
|
|
assert not line or line.startswith(margin), \
|
|
"line = %r, margin = %r" % (line, margin)
|
|
|
|
if margin:
|
|
text = re.sub(r'(?m)^' + margin, '', text)
|
|
return text
|
|
|
|
if __name__ == "__main__":
|
|
#print dedent("\tfoo\n\tbar")
|
|
#print dedent(" \thello there\n \t how are you?")
|
|
print(dedent("Hello there.\n This is indented."))
|