mirror of
https://github.com/python/cpython.git
synced 2024-11-26 03:14:27 +08:00
21896a330a
svn+ssh://pythondev@svn.python.org/python/trunk ........ r77952 | mark.dickinson | 2010-02-03 10:50:14 -0600 (Wed, 03 Feb 2010) | 1 line Fix test_inspect.py data to match recent change to inspect_fodder.py (r77942). ........ r78030 | benjamin.peterson | 2010-02-06 14:14:10 -0600 (Sat, 06 Feb 2010) | 1 line check type_getattro for correctness in a descriptor corner case ........ r78102 | andrew.kuchling | 2010-02-07 19:35:35 -0600 (Sun, 07 Feb 2010) | 1 line Move distutils into its own subsection; add various items ........ r78104 | andrew.kuchling | 2010-02-08 07:22:24 -0600 (Mon, 08 Feb 2010) | 1 line Add two items; move a subsection ........ r78107 | antoine.pitrou | 2010-02-08 14:25:47 -0600 (Mon, 08 Feb 2010) | 3 lines Clarify and correct description for ccbench and iobench. ........ r78206 | r.david.murray | 2010-02-16 11:55:26 -0600 (Tue, 16 Feb 2010) | 3 lines Make the references to Popen in the description of Call and check_call into links. ........ r78216 | andrew.kuchling | 2010-02-18 08:16:48 -0600 (Thu, 18 Feb 2010) | 1 line Add various items ........ r78296 | andrew.kuchling | 2010-02-21 20:08:45 -0600 (Sun, 21 Feb 2010) | 1 line Re-word ........ r78297 | andrew.kuchling | 2010-02-21 20:29:10 -0600 (Sun, 21 Feb 2010) | 1 line #7076: mention SystemRandom class near start of the module docs; reword change description for clarity. Noted by Shawn Ligocki. ........ r78328 | jack.diederich | 2010-02-22 12:17:16 -0600 (Mon, 22 Feb 2010) | 1 line fixes issue #7530, serve_forever() ........ r78331 | andrew.kuchling | 2010-02-22 12:38:23 -0600 (Mon, 22 Feb 2010) | 1 line Fix comment typo ........ r78332 | andrew.kuchling | 2010-02-22 12:42:07 -0600 (Mon, 22 Feb 2010) | 2 lines #7627: MH.remove() would fail if the MH mailbox was locked; it would call _unlock_file() and pass it a closed file object. Noted by Rob Austein. ........ r78336 | jack.diederich | 2010-02-22 13:55:22 -0600 (Mon, 22 Feb 2010) | 1 line fixes issue #1522237, bad init check in _threading_local ........ r78339 | jack.diederich | 2010-02-22 15:27:38 -0600 (Mon, 22 Feb 2010) | 1 line * fix issue#7476 ........ r78343 | andrew.kuchling | 2010-02-22 16:48:41 -0600 (Mon, 22 Feb 2010) | 10 lines #2560: remove an unnecessary 'for' loop from my_fgets() in Parser/myreadline.c. Noted by Joseph Armbruster; patch by Jessica McKellar. The original code was 'for (;;) {...}', where ... ended with a 'return -2' statement and did not contain a 'break' or 'continue' statement. Therefore, the body of the loop is always executed once. Once upon a time there was a 'continue' in the loop, but it was removed in rev36346, committed by mwh on Wed Jul 7 17:44:12 2004. ........ r78378 | jack.diederich | 2010-02-23 11:23:30 -0600 (Tue, 23 Feb 2010) | 1 line fixup markup error ........ r78379 | jack.diederich | 2010-02-23 13:34:06 -0600 (Tue, 23 Feb 2010) | 1 line issue#6442 use in operator instead of has_key ........ r78415 | dirkjan.ochtman | 2010-02-23 22:00:52 -0600 (Tue, 23 Feb 2010) | 1 line Issue #7733: add explicit reference in asyncore docs. ........ r78559 | andrew.kuchling | 2010-03-01 13:45:21 -0600 (Mon, 01 Mar 2010) | 1 line #7637: update discussion of minidom.unlink() and garbage collection ........ r78717 | benjamin.peterson | 2010-03-05 21:13:33 -0600 (Fri, 05 Mar 2010) | 1 line settscdump is definitely an implementation detail ........ r78791 | andrew.kuchling | 2010-03-08 06:00:39 -0600 (Mon, 08 Mar 2010) | 1 line Add various items ........
272 lines
9.3 KiB
Python
272 lines
9.3 KiB
Python
"""Conversion pipeline templates.
|
|
|
|
The problem:
|
|
------------
|
|
|
|
Suppose you have some data that you want to convert to another format,
|
|
such as from GIF image format to PPM image format. Maybe the
|
|
conversion involves several steps (e.g. piping it through compress or
|
|
uuencode). Some of the conversion steps may require that their input
|
|
is a disk file, others may be able to read standard input; similar for
|
|
their output. The input to the entire conversion may also be read
|
|
from a disk file or from an open file, and similar for its output.
|
|
|
|
The module lets you construct a pipeline template by sticking one or
|
|
more conversion steps together. It will take care of creating and
|
|
removing temporary files if they are necessary to hold intermediate
|
|
data. You can then use the template to do conversions from many
|
|
different sources to many different destinations. The temporary
|
|
file names used are different each time the template is used.
|
|
|
|
The templates are objects so you can create templates for many
|
|
different conversion steps and store them in a dictionary, for
|
|
instance.
|
|
|
|
|
|
Directions:
|
|
-----------
|
|
|
|
To create a template:
|
|
t = Template()
|
|
|
|
To add a conversion step to a template:
|
|
t.append(command, kind)
|
|
where kind is a string of two characters: the first is '-' if the
|
|
command reads its standard input or 'f' if it requires a file; the
|
|
second likewise for the output. The command must be valid /bin/sh
|
|
syntax. If input or output files are required, they are passed as
|
|
$IN and $OUT; otherwise, it must be possible to use the command in
|
|
a pipeline.
|
|
|
|
To add a conversion step at the beginning:
|
|
t.prepend(command, kind)
|
|
|
|
To convert a file to another file using a template:
|
|
sts = t.copy(infile, outfile)
|
|
If infile or outfile are the empty string, standard input is read or
|
|
standard output is written, respectively. The return value is the
|
|
exit status of the conversion pipeline.
|
|
|
|
To open a file for reading or writing through a conversion pipeline:
|
|
fp = t.open(file, mode)
|
|
where mode is 'r' to read the file, or 'w' to write it -- just like
|
|
for the built-in function open() or for os.popen().
|
|
|
|
To create a new template object initialized to a given one:
|
|
t2 = t.clone()
|
|
|
|
For an example, see the function test() at the end of the file.
|
|
""" # '
|
|
|
|
|
|
import re
|
|
import os
|
|
import tempfile
|
|
import string
|
|
|
|
__all__ = ["Template"]
|
|
|
|
# Conversion step kinds
|
|
|
|
FILEIN_FILEOUT = 'ff' # Must read & write real files
|
|
STDIN_FILEOUT = '-f' # Must write a real file
|
|
FILEIN_STDOUT = 'f-' # Must read a real file
|
|
STDIN_STDOUT = '--' # Normal pipeline element
|
|
SOURCE = '.-' # Must be first, writes stdout
|
|
SINK = '-.' # Must be last, reads stdin
|
|
|
|
stepkinds = [FILEIN_FILEOUT, STDIN_FILEOUT, FILEIN_STDOUT, STDIN_STDOUT, \
|
|
SOURCE, SINK]
|
|
|
|
|
|
class Template:
|
|
"""Class representing a pipeline template."""
|
|
|
|
def __init__(self):
|
|
"""Template() returns a fresh pipeline template."""
|
|
self.debugging = 0
|
|
self.reset()
|
|
|
|
def __repr__(self):
|
|
"""t.__repr__() implements repr(t)."""
|
|
return '<Template instance, steps=%r>' % (self.steps,)
|
|
|
|
def reset(self):
|
|
"""t.reset() restores a pipeline template to its initial state."""
|
|
self.steps = []
|
|
|
|
def clone(self):
|
|
"""t.clone() returns a new pipeline template with identical
|
|
initial state as the current one."""
|
|
t = Template()
|
|
t.steps = self.steps[:]
|
|
t.debugging = self.debugging
|
|
return t
|
|
|
|
def debug(self, flag):
|
|
"""t.debug(flag) turns debugging on or off."""
|
|
self.debugging = flag
|
|
|
|
def append(self, cmd, kind):
|
|
"""t.append(cmd, kind) adds a new step at the end."""
|
|
if type(cmd) is not type(''):
|
|
raise TypeError('Template.append: cmd must be a string')
|
|
if kind not in stepkinds:
|
|
raise ValueError('Template.append: bad kind %r' % (kind,))
|
|
if kind == SOURCE:
|
|
raise ValueError('Template.append: SOURCE can only be prepended')
|
|
if self.steps and self.steps[-1][1] == SINK:
|
|
raise ValueError('Template.append: already ends with SINK')
|
|
if kind[0] == 'f' and not re.search(r'\$IN\b', cmd):
|
|
raise ValueError('Template.append: missing $IN in cmd')
|
|
if kind[1] == 'f' and not re.search(r'\$OUT\b', cmd):
|
|
raise ValueError('Template.append: missing $OUT in cmd')
|
|
self.steps.append((cmd, kind))
|
|
|
|
def prepend(self, cmd, kind):
|
|
"""t.prepend(cmd, kind) adds a new step at the front."""
|
|
if type(cmd) is not type(''):
|
|
raise TypeError('Template.prepend: cmd must be a string')
|
|
if kind not in stepkinds:
|
|
raise ValueError('Template.prepend: bad kind %r' % (kind,))
|
|
if kind == SINK:
|
|
raise ValueError('Template.prepend: SINK can only be appended')
|
|
if self.steps and self.steps[0][1] == SOURCE:
|
|
raise ValueError('Template.prepend: already begins with SOURCE')
|
|
if kind[0] == 'f' and not re.search(r'\$IN\b', cmd):
|
|
raise ValueError('Template.prepend: missing $IN in cmd')
|
|
if kind[1] == 'f' and not re.search(r'\$OUT\b', cmd):
|
|
raise ValueError('Template.prepend: missing $OUT in cmd')
|
|
self.steps.insert(0, (cmd, kind))
|
|
|
|
def open(self, file, rw):
|
|
"""t.open(file, rw) returns a pipe or file object open for
|
|
reading or writing; the file is the other end of the pipeline."""
|
|
if rw == 'r':
|
|
return self.open_r(file)
|
|
if rw == 'w':
|
|
return self.open_w(file)
|
|
raise ValueError('Template.open: rw must be \'r\' or \'w\', not %r'
|
|
% (rw,))
|
|
|
|
def open_r(self, file):
|
|
"""t.open_r(file) and t.open_w(file) implement
|
|
t.open(file, 'r') and t.open(file, 'w') respectively."""
|
|
if not self.steps:
|
|
return open(file, 'r')
|
|
if self.steps[-1][1] == SINK:
|
|
raise ValueError('Template.open_r: pipeline ends width SINK')
|
|
cmd = self.makepipeline(file, '')
|
|
return os.popen(cmd, 'r')
|
|
|
|
def open_w(self, file):
|
|
if not self.steps:
|
|
return open(file, 'w')
|
|
if self.steps[0][1] == SOURCE:
|
|
raise ValueError('Template.open_w: pipeline begins with SOURCE')
|
|
cmd = self.makepipeline('', file)
|
|
return os.popen(cmd, 'w')
|
|
|
|
def copy(self, infile, outfile):
|
|
return os.system(self.makepipeline(infile, outfile))
|
|
|
|
def makepipeline(self, infile, outfile):
|
|
cmd = makepipeline(infile, self.steps, outfile)
|
|
if self.debugging:
|
|
print(cmd)
|
|
cmd = 'set -x; ' + cmd
|
|
return cmd
|
|
|
|
|
|
def makepipeline(infile, steps, outfile):
|
|
# Build a list with for each command:
|
|
# [input filename or '', command string, kind, output filename or '']
|
|
|
|
list = []
|
|
for cmd, kind in steps:
|
|
list.append(['', cmd, kind, ''])
|
|
#
|
|
# Make sure there is at least one step
|
|
#
|
|
if not list:
|
|
list.append(['', 'cat', '--', ''])
|
|
#
|
|
# Take care of the input and output ends
|
|
#
|
|
[cmd, kind] = list[0][1:3]
|
|
if kind[0] == 'f' and not infile:
|
|
list.insert(0, ['', 'cat', '--', ''])
|
|
list[0][0] = infile
|
|
#
|
|
[cmd, kind] = list[-1][1:3]
|
|
if kind[1] == 'f' and not outfile:
|
|
list.append(['', 'cat', '--', ''])
|
|
list[-1][-1] = outfile
|
|
#
|
|
# Invent temporary files to connect stages that need files
|
|
#
|
|
garbage = []
|
|
for i in range(1, len(list)):
|
|
lkind = list[i-1][2]
|
|
rkind = list[i][2]
|
|
if lkind[1] == 'f' or rkind[0] == 'f':
|
|
(fd, temp) = tempfile.mkstemp()
|
|
os.close(fd)
|
|
garbage.append(temp)
|
|
list[i-1][-1] = list[i][0] = temp
|
|
#
|
|
for item in list:
|
|
[inf, cmd, kind, outf] = item
|
|
if kind[1] == 'f':
|
|
cmd = 'OUT=' + quote(outf) + '; ' + cmd
|
|
if kind[0] == 'f':
|
|
cmd = 'IN=' + quote(inf) + '; ' + cmd
|
|
if kind[0] == '-' and inf:
|
|
cmd = cmd + ' <' + quote(inf)
|
|
if kind[1] == '-' and outf:
|
|
cmd = cmd + ' >' + quote(outf)
|
|
item[1] = cmd
|
|
#
|
|
cmdlist = list[0][1]
|
|
for item in list[1:]:
|
|
[cmd, kind] = item[1:3]
|
|
if item[0] == '':
|
|
if 'f' in kind:
|
|
cmd = '{ ' + cmd + '; }'
|
|
cmdlist = cmdlist + ' |\n' + cmd
|
|
else:
|
|
cmdlist = cmdlist + '\n' + cmd
|
|
#
|
|
if garbage:
|
|
rmcmd = 'rm -f'
|
|
for file in garbage:
|
|
rmcmd = rmcmd + ' ' + quote(file)
|
|
trapcmd = 'trap ' + quote(rmcmd + '; exit') + ' 1 2 3 13 14 15'
|
|
cmdlist = trapcmd + '\n' + cmdlist + '\n' + rmcmd
|
|
#
|
|
return cmdlist
|
|
|
|
|
|
# Reliably quote a string as a single argument for /bin/sh
|
|
|
|
_safechars = string.ascii_letters + string.digits + '!@%_-+=:,./' # Safe unquoted
|
|
_funnychars = '"`$\\' # Unsafe inside "double quotes"
|
|
|
|
def quote(file):
|
|
''' return a shell-escaped version of the file string '''
|
|
for c in file:
|
|
if c not in _safechars:
|
|
break
|
|
else:
|
|
if not file:
|
|
return "''"
|
|
return file
|
|
if '\'' not in file:
|
|
return '\'' + file + '\''
|
|
res = ''
|
|
for c in file:
|
|
if c in _funnychars:
|
|
c = '\\' + c
|
|
res = res + c
|
|
return '"' + res + '"'
|