mirror of
https://github.com/python/cpython.git
synced 2024-11-26 19:34:19 +08:00
2130 lines
59 KiB
Python
2130 lines
59 KiB
Python
#
|
|
# partparse.py: parse a by-Guido-written-and-by-Jan-Hein-edited LaTeX file,
|
|
# and generate texinfo source.
|
|
#
|
|
# This is *not* a good example of good programming practices. In fact, this
|
|
# file could use a complete rewrite, in order to become faster, more
|
|
# easy extensible and maintainable.
|
|
#
|
|
# However, I added some comments on a few places for the pityful person who
|
|
# would ever need to take a look into this file.
|
|
#
|
|
# Have I been clear enough??
|
|
#
|
|
# -jh
|
|
|
|
|
|
import sys, string, regex, getopt, os
|
|
|
|
# Different parse modes for phase 1
|
|
MODE_REGULAR = 0
|
|
MODE_VERBATIM = 1
|
|
MODE_CS_SCAN = 2
|
|
MODE_COMMENT = 3
|
|
MODE_MATH = 4
|
|
MODE_DMATH = 5
|
|
MODE_GOBBLEWHITE = 6
|
|
|
|
the_modes = MODE_REGULAR, MODE_VERBATIM, MODE_CS_SCAN, MODE_COMMENT, \
|
|
MODE_MATH, MODE_DMATH, MODE_GOBBLEWHITE
|
|
|
|
# Show the neighbourhood of the scanned buffer
|
|
def epsilon(buf, where):
|
|
wmt, wpt = where - 10, where + 10
|
|
if wmt < 0:
|
|
wmt = 0
|
|
if wpt > len(buf):
|
|
wpt = len(buf)
|
|
return ' Context ' + `buf[wmt:where]` + '.' + `buf[where:wpt]` + '.'
|
|
|
|
# Should return the line number. never worked
|
|
def lin():
|
|
global lineno
|
|
return ' Line ' + `lineno` + '.'
|
|
|
|
# Displays the recursion level.
|
|
def lv(lvl):
|
|
return ' Level ' + `lvl` + '.'
|
|
|
|
# Combine the three previous functions. Used often.
|
|
def lle(lvl, buf, where):
|
|
return lv(lvl) + lin() + epsilon(buf, where)
|
|
|
|
|
|
# This class is only needed for _symbolic_ representation of the parse mode.
|
|
class Mode:
|
|
def init(self, arg):
|
|
if arg not in the_modes:
|
|
raise ValueError, 'mode not in the_modes'
|
|
self.mode = arg
|
|
return self
|
|
|
|
def __cmp__(self, other):
|
|
if type(self) != type(other):
|
|
other = mode(other)
|
|
return cmp(self.mode, other.mode)
|
|
|
|
def __repr__(self):
|
|
if self.mode == MODE_REGULAR:
|
|
return 'MODE_REGULAR'
|
|
elif self.mode == MODE_VERBATIM:
|
|
return 'MODE_VERBATIM'
|
|
elif self.mode == MODE_CS_SCAN:
|
|
return 'MODE_CS_SCAN'
|
|
elif self.mode == MODE_COMMENT:
|
|
return 'MODE_COMMENT'
|
|
elif self.mode == MODE_MATH:
|
|
return 'MODE_MATH'
|
|
elif self.mode == MODE_DMATH:
|
|
return 'MODE_DMATH'
|
|
elif self.mode == MODE_GOBBLEWHITE:
|
|
return 'MODE_GOBBLEWHITE'
|
|
else:
|
|
raise ValueError, 'mode not in the_modes'
|
|
|
|
# just a wrapper around a class initialisation
|
|
def mode(arg):
|
|
return Mode().init(arg)
|
|
|
|
|
|
# After phase 1, the text consists of chunks, with a certain type
|
|
# this type will be assigned to the chtype member of the chunk
|
|
# the where-field contains the file position where this is found
|
|
# and the data field contains (1): a tuple describing start- end end
|
|
# positions of the substring (can be used as slice for the buf-variable),
|
|
# (2) just a string, mostly generated by the changeit routine,
|
|
# or (3) a list, describing a (recursive) subgroup of chunks
|
|
PLAIN = 0 # ASSUME PLAINTEXT, data = the text
|
|
GROUP = 1 # GROUP ({}), data = [chunk, chunk,..]
|
|
CSNAME = 2 # CONTROL SEQ TOKEN, data = the command
|
|
COMMENT = 3 # data is the actual comment
|
|
DMATH = 4 # DISPLAYMATH, data = [chunk, chunk,..]
|
|
MATH = 5 # MATH, see DISPLAYMATH
|
|
OTHER = 6 # CHAR WITH CATCODE OTHER, data = char
|
|
ACTIVE = 7 # ACTIVE CHAR
|
|
GOBBLEDWHITE = 8 # Gobbled LWSP, after CSNAME
|
|
ENDLINE = 9 # END-OF-LINE, data = '\n'
|
|
DENDLINE = 10 # DOUBLE EOL, data='\n', indicates \par
|
|
ENV = 11 # LaTeX-environment
|
|
# data =(envname,[ch,ch,ch,.])
|
|
CSLINE = 12 # for texi: next chunk will be one group
|
|
# of args. Will be set all on 1 line
|
|
IGNORE = 13 # IGNORE this data
|
|
ENDENV = 14 # TEMP END OF GROUP INDICATOR
|
|
IF = 15 # IF-directive
|
|
# data = (flag,negate,[ch, ch, ch,...])
|
|
the_types = PLAIN, GROUP, CSNAME, COMMENT, DMATH, MATH, OTHER, ACTIVE, \
|
|
GOBBLEDWHITE, ENDLINE, DENDLINE, ENV, CSLINE, IGNORE, ENDENV, IF
|
|
|
|
# class, just to display symbolic name
|
|
class ChunkType:
|
|
def init(self, chunk_type):
|
|
if chunk_type not in the_types:
|
|
raise 'ValueError', 'chunk_type not in the_types'
|
|
self.chunk_type = chunk_type
|
|
return self
|
|
|
|
def __cmp__(self, other):
|
|
if type(self) != type(other):
|
|
other = chunk_type(other)
|
|
return cmp(self.chunk_type, other.chunk_type)
|
|
|
|
def __repr__(self):
|
|
if self.chunk_type == PLAIN:
|
|
return 'PLAIN'
|
|
elif self.chunk_type == GROUP:
|
|
return 'GROUP'
|
|
elif self.chunk_type == CSNAME:
|
|
return 'CSNAME'
|
|
elif self.chunk_type == COMMENT:
|
|
return 'COMMENT'
|
|
elif self.chunk_type == DMATH:
|
|
return 'DMATH'
|
|
elif self.chunk_type == MATH:
|
|
return 'MATH'
|
|
elif self.chunk_type == OTHER:
|
|
return 'OTHER'
|
|
elif self.chunk_type == ACTIVE:
|
|
return 'ACTIVE'
|
|
elif self.chunk_type == GOBBLEDWHITE:
|
|
return 'GOBBLEDWHITE'
|
|
elif self.chunk_type == DENDLINE:
|
|
return 'DENDLINE'
|
|
elif self.chunk_type == ENDLINE:
|
|
return 'ENDLINE'
|
|
elif self.chunk_type == ENV:
|
|
return 'ENV'
|
|
elif self.chunk_type == CSLINE:
|
|
return 'CSLINE'
|
|
elif self.chunk_type == IGNORE:
|
|
return 'IGNORE'
|
|
elif self.chunk_type == ENDENV:
|
|
return 'ENDENV'
|
|
elif self.chunk_type == IF:
|
|
return 'IF'
|
|
else:
|
|
raise ValueError, 'chunk_type not in the_types'
|
|
|
|
# ...and the wrapper
|
|
def chunk_type(type):
|
|
return ChunkType().init(type)
|
|
|
|
# store a type object of the ChunkType-class-instance...
|
|
chunk_type_type = type(chunk_type(0))
|
|
|
|
# this class contains a part of the parsed buffer
|
|
class Chunk:
|
|
def init(self, chtype, where, data):
|
|
if type(chtype) != chunk_type_type:
|
|
chtype = chunk_type(chtype)
|
|
self.chtype = chtype
|
|
if type(where) != type(0):
|
|
raise TypeError, '\'where\' is not a number'
|
|
self.where = where
|
|
self.data = data
|
|
##print 'CHUNK', self
|
|
return self
|
|
|
|
def __repr__(self):
|
|
return 'chunk' + `self.chtype, self.where, self.data`
|
|
|
|
# and the wrapper
|
|
def chunk(chtype, where, data):
|
|
return Chunk().init(chtype, where, data)
|
|
|
|
|
|
|
|
error = 'partparse.error'
|
|
|
|
#
|
|
# TeX's catcodes...
|
|
#
|
|
CC_ESCAPE = 0
|
|
CC_LBRACE = 1
|
|
CC_RBRACE = 2
|
|
CC_MATHSHIFT = 3
|
|
CC_ALIGNMENT = 4
|
|
CC_ENDLINE = 5
|
|
CC_PARAMETER = 6
|
|
CC_SUPERSCRIPT = 7
|
|
CC_SUBSCRIPT = 8
|
|
CC_IGNORE = 9
|
|
CC_WHITE = 10
|
|
CC_LETTER = 11
|
|
CC_OTHER = 12
|
|
CC_ACTIVE = 13
|
|
CC_COMMENT = 14
|
|
CC_INVALID = 15
|
|
|
|
# and the names
|
|
cc_names = [\
|
|
'CC_ESCAPE', \
|
|
'CC_LBRACE', \
|
|
'CC_RBRACE', \
|
|
'CC_MATHSHIFT', \
|
|
'CC_ALIGNMENT', \
|
|
'CC_ENDLINE', \
|
|
'CC_PARAMETER', \
|
|
'CC_SUPERSCRIPT', \
|
|
'CC_SUBSCRIPT', \
|
|
'CC_IGNORE', \
|
|
'CC_WHITE', \
|
|
'CC_LETTER', \
|
|
'CC_OTHER', \
|
|
'CC_ACTIVE', \
|
|
'CC_COMMENT', \
|
|
'CC_INVALID', \
|
|
]
|
|
|
|
# Show a list of catcode-name-symbols
|
|
def pcl(codelist):
|
|
result = ''
|
|
for i in codelist:
|
|
result = result + cc_names[i] + ', '
|
|
return '[' + result[:-2] + ']'
|
|
|
|
# the name of the catcode (ACTIVE, OTHER, etc.)
|
|
def pc(code):
|
|
return cc_names[code]
|
|
|
|
|
|
# Which catcodes make the parser stop parsing regular plaintext
|
|
regular_stopcodes = [CC_ESCAPE, CC_LBRACE, CC_RBRACE, CC_MATHSHIFT, \
|
|
CC_ALIGNMENT, CC_PARAMETER, CC_SUPERSCRIPT, CC_SUBSCRIPT, \
|
|
CC_IGNORE, CC_ACTIVE, CC_COMMENT, CC_INVALID, CC_ENDLINE]
|
|
|
|
# same for scanning a control sequence name
|
|
csname_scancodes = [CC_LETTER]
|
|
|
|
# same for gobbling LWSP
|
|
white_scancodes = [CC_WHITE]
|
|
##white_scancodes = [CC_WHITE, CC_ENDLINE]
|
|
|
|
# make a list of all catcode id's, except for catcode ``other''
|
|
all_but_other_codes = range(16)
|
|
del all_but_other_codes[CC_OTHER]
|
|
##print all_but_other_codes
|
|
|
|
# when does a comment end
|
|
comment_stopcodes = [CC_ENDLINE]
|
|
|
|
# gather all characters together, specified by a list of catcodes
|
|
def code2string(cc, codelist):
|
|
##print 'code2string: codelist = ' + pcl(codelist),
|
|
result = ''
|
|
for category in codelist:
|
|
if cc[category]:
|
|
result = result + cc[category]
|
|
##print 'result = ' + `result`
|
|
return result
|
|
|
|
# automatically generate all characters of catcode other, being the
|
|
# complement set in the ASCII range (128 characters)
|
|
def make_other_codes(cc):
|
|
otherchars = range(256) # could be made 256, no problem
|
|
for category in all_but_other_codes:
|
|
if cc[category]:
|
|
for c in cc[category]:
|
|
otherchars[ord(c)] = None
|
|
result = ''
|
|
for i in otherchars:
|
|
if i != None:
|
|
result = result + chr(i)
|
|
return result
|
|
|
|
# catcode dump (which characters have which catcodes).
|
|
def dump_cc(name, cc):
|
|
##print '\t' + name
|
|
##print '=' * (8+len(name))
|
|
if len(cc) != 16:
|
|
raise TypeError, 'cc not good cat class'
|
|
## for i in range(16):
|
|
## print pc(i) + '\t' + `cc[i]`
|
|
|
|
|
|
# In the beginning,....
|
|
epoch_cc = [None] * 16
|
|
##dump_cc('epoch_cc', epoch_cc)
|
|
|
|
|
|
# INITEX
|
|
initex_cc = epoch_cc[:]
|
|
initex_cc[CC_ESCAPE] = '\\'
|
|
initex_cc[CC_ENDLINE], initex_cc[CC_IGNORE], initex_cc[CC_WHITE] = \
|
|
'\n', '\0', ' '
|
|
initex_cc[CC_LETTER] = string.uppercase + string.lowercase
|
|
initex_cc[CC_COMMENT], initex_cc[CC_INVALID] = '%', '\x7F'
|
|
#initex_cc[CC_OTHER] = make_other_codes(initex_cc) I don't need them, anyway
|
|
##dump_cc('initex_cc', initex_cc)
|
|
|
|
|
|
# LPLAIN: LaTeX catcode setting (see lplain.tex)
|
|
lplain_cc = initex_cc[:]
|
|
lplain_cc[CC_LBRACE], lplain_cc[CC_RBRACE] = '{', '}'
|
|
lplain_cc[CC_MATHSHIFT] = '$'
|
|
lplain_cc[CC_ALIGNMENT] = '&'
|
|
lplain_cc[CC_PARAMETER] = '#'
|
|
lplain_cc[CC_SUPERSCRIPT] = '^\x0B' # '^' and C-k
|
|
lplain_cc[CC_SUBSCRIPT] = '_\x01' # '_' and C-a
|
|
lplain_cc[CC_WHITE] = lplain_cc[CC_WHITE] + '\t'
|
|
lplain_cc[CC_ACTIVE] = '~\x0C' # '~' and C-l
|
|
lplain_cc[CC_OTHER] = make_other_codes(lplain_cc)
|
|
##dump_cc('lplain_cc', lplain_cc)
|
|
|
|
|
|
# Guido's LaTeX environment catcoded '_' as ``other''
|
|
# my own purpose catlist
|
|
my_cc = lplain_cc[:]
|
|
my_cc[CC_SUBSCRIPT] = my_cc[CC_SUBSCRIPT][1:] # remove '_' here
|
|
my_cc[CC_OTHER] = my_cc[CC_OTHER] + '_' # add it to OTHER list
|
|
dump_cc('my_cc', my_cc)
|
|
|
|
|
|
|
|
# needed for un_re, my equivalent for regexp-quote in Emacs
|
|
re_meaning = '\\[]^$'
|
|
|
|
def un_re(str):
|
|
result = ''
|
|
for i in str:
|
|
if i in re_meaning:
|
|
result = result + '\\'
|
|
result = result + i
|
|
return result
|
|
|
|
# NOTE the negate ('^') operator in *some* of the regexps below
|
|
def make_rc_regular(cc):
|
|
# problems here if '[]' are included!!
|
|
return regex.compile('[' + code2string(cc, regular_stopcodes) + ']')
|
|
|
|
def make_rc_cs_scan(cc):
|
|
return regex.compile('[^' + code2string(cc, csname_scancodes) + ']')
|
|
|
|
def make_rc_comment(cc):
|
|
return regex.compile('[' + code2string(cc, comment_stopcodes) + ']')
|
|
|
|
def make_rc_endwhite(cc):
|
|
return regex.compile('[^' + code2string(cc, white_scancodes) + ']')
|
|
|
|
|
|
|
|
# regular: normal mode:
|
|
rc_regular = make_rc_regular(my_cc)
|
|
|
|
# scan: scan a command sequence e.g. `newlength' or `mbox' or `;', `,' or `$'
|
|
rc_cs_scan = make_rc_cs_scan(my_cc)
|
|
rc_comment = make_rc_comment(my_cc)
|
|
rc_endwhite = make_rc_endwhite(my_cc)
|
|
|
|
|
|
# parseit (BUF, PARSEMODE=mode(MODE_REGULAR), START=0, RECURSION-LEVEL=0)
|
|
# RECURSION-LEVEL will is incremented on entry.
|
|
# result contains the list of chunks returned
|
|
# together with this list, the buffer position is returned
|
|
|
|
# RECURSION-LEVEL will be set to zero *again*, when recursively a
|
|
# {,D}MATH-mode scan has been enetered.
|
|
# This has been done in order to better check for environment-mismatches
|
|
|
|
def parseit(buf, *rest):
|
|
global lineno
|
|
|
|
if len(rest) == 3:
|
|
parsemode, start, lvl = rest
|
|
elif len(rest) == 2:
|
|
parsemode, start, lvl = rest + (0, )
|
|
elif len(rest) == 1:
|
|
parsemode, start, lvl = rest + (0, 0)
|
|
elif len(rest) == 0:
|
|
parsemode, start, lvl = mode(MODE_REGULAR), 0, 0
|
|
else:
|
|
raise TypeError, 'usage: parseit(buf[, parsemode[, start[, level]]])'
|
|
result = []
|
|
end = len(buf)
|
|
if lvl == 0 and parsemode == mode(MODE_REGULAR):
|
|
lineno = 1
|
|
lvl = lvl + 1
|
|
|
|
##print 'parseit(' + epsilon(buf, start) + ', ' + `parsemode` + ', ' + `start` + ', ' + `lvl` + ')'
|
|
|
|
#
|
|
# some of the more regular modes...
|
|
#
|
|
|
|
if parsemode in (mode(MODE_REGULAR), mode(MODE_DMATH), mode(MODE_MATH)):
|
|
cstate = []
|
|
newpos = start
|
|
curpmode = parsemode
|
|
while 1:
|
|
where = newpos
|
|
#print '\tnew round: ' + epsilon(buf, where)
|
|
if where == end:
|
|
if lvl > 1 or curpmode != mode(MODE_REGULAR):
|
|
# not the way we started...
|
|
raise EOFError, 'premature end of file.' + lle(lvl, buf, where)
|
|
# the real ending of lvl-1 parse
|
|
return end, result
|
|
|
|
pos = rc_regular.search(buf, where)
|
|
|
|
if pos < 0:
|
|
pos = end
|
|
|
|
if pos != where:
|
|
newpos, c = pos, chunk(PLAIN, where, (where, pos))
|
|
result.append(c)
|
|
continue
|
|
|
|
|
|
#
|
|
# ok, pos == where and pos != end
|
|
#
|
|
foundchar = buf[where]
|
|
if foundchar in my_cc[CC_LBRACE]:
|
|
# recursive subgroup parse...
|
|
newpos, data = parseit(buf, curpmode, where+1, lvl)
|
|
result.append(chunk(GROUP, where, data))
|
|
|
|
elif foundchar in my_cc[CC_RBRACE]:
|
|
if lvl <= 1:
|
|
raise error, 'ENDGROUP while in base level.' + lle(lvl, buf, where)
|
|
if lvl == 1 and mode != mode(MODE_REGULAR):
|
|
raise error, 'endgroup while in math mode. +lin() + epsilon(buf, where)'
|
|
return where + 1, result
|
|
|
|
elif foundchar in my_cc[CC_ESCAPE]:
|
|
#
|
|
# call the routine that actually deals with
|
|
# this problem. If do_ret is None, than
|
|
# return the value of do_ret
|
|
#
|
|
# Note that handle_cs might call this routine
|
|
# recursively again...
|
|
#
|
|
do_ret, newpos = handlecs(buf, where, \
|
|
curpmode, lvl, result, end)
|
|
if do_ret != None:
|
|
return do_ret
|
|
|
|
elif foundchar in my_cc[CC_COMMENT]:
|
|
newpos, data = parseit(buf, \
|
|
mode(MODE_COMMENT), where+1, lvl)
|
|
result.append(chunk(COMMENT, where, data))
|
|
|
|
elif foundchar in my_cc[CC_MATHSHIFT]:
|
|
# note that recursive calls to math-mode
|
|
# scanning are called with recursion-level 0
|
|
# again, in order to check for bad mathend
|
|
#
|
|
if where + 1 != end and \
|
|
buf[where + 1] in \
|
|
my_cc[CC_MATHSHIFT]:
|
|
#
|
|
# double mathshift, e.g. '$$'
|
|
#
|
|
if curpmode == mode(MODE_REGULAR):
|
|
newpos, data = parseit(buf, \
|
|
mode(MODE_DMATH), \
|
|
where+2, 0)
|
|
result.append(chunk(DMATH, \
|
|
where, data))
|
|
elif curpmode == mode(MODE_MATH):
|
|
raise error, 'wrong math delimiiter' + lin() + epsilon(buf, where)
|
|
elif lvl != 1:
|
|
raise error, 'bad mathend.' + \
|
|
lle(lvl, buf, where)
|
|
else:
|
|
return where + 2, result
|
|
else:
|
|
#
|
|
# single math shift, e.g. '$'
|
|
#
|
|
if curpmode == mode(MODE_REGULAR):
|
|
newpos, data = parseit(buf, \
|
|
mode(MODE_MATH), \
|
|
where+1, 0)
|
|
result.append(chunk(MATH, \
|
|
where, data))
|
|
elif curpmode == mode(MODE_DMATH):
|
|
raise error, 'wrong math delimiiter' + lin() + epsilon(buf, where)
|
|
elif lvl != 1:
|
|
raise error, 'bad mathend.' + \
|
|
lv(lvl, buf, where)
|
|
else:
|
|
return where + 1, result
|
|
|
|
elif foundchar in my_cc[CC_IGNORE]:
|
|
print 'warning: ignored char', `foundchar`
|
|
newpos = where + 1
|
|
|
|
elif foundchar in my_cc[CC_ACTIVE]:
|
|
result.append(chunk(ACTIVE, where, foundchar))
|
|
newpos = where + 1
|
|
|
|
elif foundchar in my_cc[CC_INVALID]:
|
|
raise error, 'invalid char ' + `foundchar`
|
|
newpos = where + 1
|
|
|
|
elif foundchar in my_cc[CC_ENDLINE]:
|
|
#
|
|
# after an end of line, eat the rest of
|
|
# whitespace on the beginning of the next line
|
|
# this is what LaTeX more or less does
|
|
#
|
|
# also, try to indicate double newlines (\par)
|
|
#
|
|
lineno = lineno + 1
|
|
savedwhere = where
|
|
newpos, dummy = parseit(buf, mode(MODE_GOBBLEWHITE), where + 1, lvl)
|
|
if newpos != end and buf[newpos] in \
|
|
my_cc[CC_ENDLINE]:
|
|
result.append(chunk(DENDLINE, \
|
|
savedwhere, foundchar))
|
|
else:
|
|
result.append(chunk(ENDLINE, \
|
|
savedwhere, foundchar))
|
|
else:
|
|
result.append(chunk(OTHER, where, foundchar))
|
|
newpos = where + 1
|
|
|
|
elif parsemode == mode(MODE_CS_SCAN):
|
|
#
|
|
# scan for a control sequence token. `\ape', `\nut' or `\%'
|
|
#
|
|
if start == end:
|
|
raise EOFError, 'can\'t find end of csname'
|
|
pos = rc_cs_scan.search(buf, start)
|
|
if pos < 0:
|
|
pos = end
|
|
if pos == start:
|
|
# first non-letter right where we started the search
|
|
# ---> the control sequence name consists of one single
|
|
# character. Also: don't eat white space...
|
|
if buf[pos] in my_cc[CC_ENDLINE]:
|
|
lineno = lineno + 1
|
|
pos = pos + 1
|
|
return pos, (start, pos)
|
|
else:
|
|
spos = pos
|
|
if buf[pos] == '\n':
|
|
lineno = lineno + 1
|
|
spos = pos + 1
|
|
pos2, dummy = parseit(buf, \
|
|
mode(MODE_GOBBLEWHITE), spos, lvl)
|
|
return pos2, (start, pos)
|
|
|
|
elif parsemode == mode(MODE_GOBBLEWHITE):
|
|
if start == end:
|
|
return start, ''
|
|
pos = rc_endwhite.search(buf, start)
|
|
if pos < 0:
|
|
pos = start
|
|
return pos, (start, pos)
|
|
|
|
elif parsemode == mode(MODE_COMMENT):
|
|
pos = rc_comment.search(buf, start)
|
|
lineno = lineno + 1
|
|
if pos < 0:
|
|
print 'no newline perhaps?'
|
|
raise EOFError, 'can\'t find end of comment'
|
|
pos = pos + 1
|
|
pos2, dummy = parseit(buf, mode(MODE_GOBBLEWHITE), pos, lvl)
|
|
return pos2, (start, pos)
|
|
|
|
|
|
else:
|
|
raise error, 'Unknown mode (' + `parsemode` + ')'
|
|
|
|
|
|
#moreresult = cswitch(buf[x1:x2], buf, newpos, parsemode, lvl)
|
|
|
|
#boxcommands = 'mbox', 'fbox'
|
|
#defcommands = 'def', 'newcommand'
|
|
|
|
endverbstr = '\\end{verbatim}'
|
|
|
|
re_endverb = regex.compile(un_re(endverbstr))
|
|
|
|
#
|
|
# handlecs: helper function for parseit, for the special thing we might
|
|
# wanna do after certain command control sequences
|
|
# returns: None or return_data, newpos
|
|
#
|
|
# in the latter case, the calling function is instructed to immediately
|
|
# return with the data in return_data
|
|
#
|
|
def handlecs(buf, where, curpmode, lvl, result, end):
|
|
global lineno
|
|
|
|
# get the control sequence name...
|
|
newpos, data = parseit(buf, mode(MODE_CS_SCAN), where+1, lvl)
|
|
saveddata = data
|
|
|
|
if s(buf, data) in ('begin', 'end'):
|
|
# skip the expected '{' and get the LaTeX-envname '}'
|
|
newpos, data = parseit(buf, mode(MODE_REGULAR), newpos+1, lvl)
|
|
if len(data) != 1:
|
|
raise error, 'expected 1 chunk of data.' + \
|
|
lle(lvl, buf, where)
|
|
|
|
# yucky, we've got an environment
|
|
envname = s(buf, data[0].data)
|
|
##print 'FOUND ' + s(buf, saveddata) + '. Name ' + `envname` + '.' + lv(lvl)
|
|
if s(buf, saveddata) == 'begin' and envname == 'verbatim':
|
|
# verbatim deserves special treatment
|
|
pos = re_endverb.search(buf, newpos)
|
|
if pos < 0:
|
|
raise error, `endverbstr` + ' not found.' + lle(lvl, buf, where)
|
|
result.append(chunk(ENV, where, (envname, [chunk(PLAIN, newpos, (newpos, pos))])))
|
|
newpos = pos + len(endverbstr)
|
|
|
|
elif s(buf, saveddata) == 'begin':
|
|
# start parsing recursively... If that parse returns
|
|
# from an '\end{...}', then should the last item of
|
|
# the returned data be a string containing the ended
|
|
# environment
|
|
newpos, data = parseit(buf, curpmode, newpos, lvl)
|
|
if not data or type(data[-1]) != type(''):
|
|
raise error, 'missing \'end\'' + lle(lvl, buf, where) + epsilon(buf, newpos)
|
|
retenv = data[-1]
|
|
del data[-1]
|
|
if retenv != envname:
|
|
#[`retenv`, `envname`]
|
|
raise error, 'environments do not match.' + \
|
|
lle(lvl, buf, where) + \
|
|
epsilon(buf, newpos)
|
|
result.append(chunk(ENV, where, (retenv, data)))
|
|
else:
|
|
# 'end'... append the environment name, as just
|
|
# pointed out, and order parsit to return...
|
|
result.append(envname)
|
|
##print 'POINT of return: ' + epsilon(buf, newpos)
|
|
# the tuple will be returned by parseit
|
|
return (newpos, result), newpos
|
|
|
|
# end of \begin ... \end handling
|
|
|
|
elif s(buf, data)[0:2] == 'if':
|
|
# another scary monster: the 'if' directive
|
|
flag = s(buf, data)[2:]
|
|
|
|
# recursively call parseit, just like environment above..
|
|
# the last item of data should contain the if-termination
|
|
# e.g., 'else' of 'fi'
|
|
newpos, data = parseit(buf, curpmode, newpos, lvl)
|
|
if not data or data[-1] not in ('else', 'fi'):
|
|
raise error, 'wrong if... termination' + \
|
|
lle(lvl, buf, where) + epsilon(buf, newpos)
|
|
|
|
ifterm = data[-1]
|
|
del data[-1]
|
|
# 0 means dont_negate flag
|
|
result.append(chunk(IF, where, (flag, 0, data)))
|
|
if ifterm == 'else':
|
|
# do the whole thing again, there is only one way
|
|
# to end this one, by 'fi'
|
|
newpos, data = parseit(buf, curpmode, newpos, lvl)
|
|
if not data or data[-1] not in ('fi', ):
|
|
raise error, 'wrong if...else... termination' \
|
|
+ lle(lvl, buf, where) \
|
|
+ epsilon(buf, newpos)
|
|
|
|
ifterm = data[-1]
|
|
del data[-1]
|
|
result.append(chunk(IF, where, (flag, 1, data)))
|
|
#done implicitely: return None, newpos
|
|
|
|
elif s(buf, data) in ('else', 'fi'):
|
|
result.append(s(buf, data))
|
|
# order calling party to return tuple
|
|
return (newpos, result), newpos
|
|
|
|
# end of \if, \else, ... \fi handling
|
|
|
|
elif s(buf, saveddata) == 'verb':
|
|
x2 = saveddata[1]
|
|
result.append(chunk(CSNAME, where, data))
|
|
if x2 == end:
|
|
raise error, 'premature end of command.' + lle(lvl, buf, where)
|
|
delimchar = buf[x2]
|
|
##print 'VERB: delimchar ' + `delimchar`
|
|
pos = regex.compile(un_re(delimchar)).search(buf, x2 + 1)
|
|
if pos < 0:
|
|
raise error, 'end of \'verb\' argument (' + \
|
|
`delimchar` + ') not found.' + \
|
|
lle(lvl, buf, where)
|
|
result.append(chunk(GROUP, x2, [chunk(PLAIN, x2+1, (x2+1, pos))]))
|
|
newpos = pos + 1
|
|
else:
|
|
result.append(chunk(CSNAME, where, data))
|
|
return None, newpos
|
|
|
|
# this is just a function to get the string value if the possible data-tuple
|
|
def s(buf, data):
|
|
if type(data) == type(''):
|
|
return data
|
|
if len(data) != 2 or not (type(data[0]) == type(data[1]) == type(0)):
|
|
raise TypeError, 'expected tuple of 2 integers'
|
|
x1, x2 = data
|
|
return buf[x1:x2]
|
|
|
|
|
|
##length, data1, i = getnextarg(length, buf, pp, i + 1)
|
|
|
|
# make a deep-copy of some chunks
|
|
def crcopy(r):
|
|
result = []
|
|
for x in r:
|
|
result.append(chunkcopy(x))
|
|
return result
|
|
|
|
|
|
|
|
# copy a chunk, would better be a method of class Chunk...
|
|
def chunkcopy(ch):
|
|
if ch.chtype == chunk_type(GROUP):
|
|
listc = ch.data[:]
|
|
for i in range(len(listc)):
|
|
listc[i] = chunkcopy(listc[i])
|
|
return chunk(GROUP, ch.where, listc)
|
|
else:
|
|
return chunk(ch.chtype, ch.where, ch.data)
|
|
|
|
|
|
# get next argument for TeX-macro, flatten a group (insert between)
|
|
# or return Command Sequence token, or give back one character
|
|
def getnextarg(length, buf, pp, item):
|
|
|
|
##wobj = Wobj().init()
|
|
##dumpit(buf, wobj.write, pp[item:min(length, item + 5)])
|
|
##print 'GETNEXTARG, (len, item) =', `length, item` + ' ---> ' + wobj.data + ' <---'
|
|
|
|
while item < length and pp[item].chtype == chunk_type(ENDLINE):
|
|
del pp[item]
|
|
length = length - 1
|
|
if item >= length:
|
|
raise error, 'no next arg.' + epsilon(buf, pp[-1].where)
|
|
if pp[item].chtype == chunk_type(GROUP):
|
|
newpp = pp[item].data
|
|
del pp[item]
|
|
length = length - 1
|
|
changeit(buf, newpp)
|
|
length = length + len(newpp)
|
|
pp[item:item] = newpp
|
|
item = item + len(newpp)
|
|
if len(newpp) < 10:
|
|
wobj = Wobj().init()
|
|
dumpit(buf, wobj.write, newpp)
|
|
##print 'GETNEXTARG: inserted ' + `wobj.data`
|
|
return length, item
|
|
elif pp[item].chtype == chunk_type(PLAIN):
|
|
#grab one char
|
|
print 'WARNING: grabbing one char'
|
|
if len(s(buf, pp[item].data)) > 1:
|
|
pp.insert(item, chunk(PLAIN, pp[item].where, s(buf, pp[item].data)[:1]))
|
|
item, length = item+1, length+1
|
|
pp[item].data = s(buf, pp[item].data)[1:]
|
|
else:
|
|
item = item+1
|
|
return length, item
|
|
else:
|
|
try:
|
|
str = `s(buf, ch.data)`
|
|
except TypeError:
|
|
str = `ch.data`
|
|
if len(str) > 400:
|
|
str = str[:400] + '...'
|
|
print 'GETNEXTARG:', ch.chtype, 'not handled, data ' + str
|
|
return length, item
|
|
|
|
|
|
# this one is needed to find the end of LaTeX's optional argument, like
|
|
# item[...]
|
|
re_endopt = regex.compile(']')
|
|
|
|
# get a LaTeX-optional argument, you know, the square braces '[' and ']'
|
|
def getoptarg(length, buf, pp, item):
|
|
|
|
wobj = Wobj().init()
|
|
dumpit(buf, wobj.write, pp[item:min(length, item + 5)])
|
|
##print 'GETOPTARG, (len, item) =', `length, item` + ' ---> ' + wobj.data + ' <---'
|
|
|
|
if item >= length or \
|
|
pp[item].chtype != chunk_type(PLAIN) or \
|
|
s(buf, pp[item].data)[0] != '[':
|
|
return length, item
|
|
|
|
pp[item].data = s(buf, pp[item].data)[1:]
|
|
if len(pp[item].data) == 0:
|
|
del pp[item]
|
|
length = length-1
|
|
|
|
while 1:
|
|
if item == length:
|
|
raise error, 'No end of optional arg found'
|
|
if pp[item].chtype == chunk_type(PLAIN):
|
|
text = s(buf, pp[item].data)
|
|
pos = re_endopt.search(text)
|
|
if pos >= 0:
|
|
pp[item].data = text[:pos]
|
|
if pos == 0:
|
|
del pp[item]
|
|
length = length-1
|
|
else:
|
|
item=item+1
|
|
text = text[pos+1:]
|
|
|
|
while text and text[0] in ' \t':
|
|
text = text[1:]
|
|
|
|
if text:
|
|
pp.insert(item, chunk(PLAIN, 0, text))
|
|
length = length + 1
|
|
return length, item
|
|
|
|
item = item+1
|
|
|
|
|
|
# Wobj just add write-requests to the ``data'' attribute
|
|
class Wobj:
|
|
def init(self):
|
|
self.data = ''
|
|
return self
|
|
def write(self, data):
|
|
self.data = self.data + data
|
|
|
|
# ignore these commands
|
|
ignoredcommands = ('bcode', 'ecode', 'optional')
|
|
# map commands like these to themselves as plaintext
|
|
wordsselves = ('UNIX', 'ABC', 'C', 'ASCII', 'EOF')
|
|
# \{ --> {, \} --> }, etc
|
|
themselves = ('{', '}', '.', '@') + wordsselves
|
|
# these ones also themselves (see argargs macro in myformat.sty)
|
|
inargsselves = (',', '[', ']', '(', ')')
|
|
# this is how *I* would show the difference between emph and strong
|
|
# code 1 means: fold to uppercase
|
|
markcmds = {'code': ('', ''), 'var': 1, 'emph': ('_', '_'), \
|
|
'strong': ('*', '*')}
|
|
|
|
# recognise patter {\FONTCHANGE-CMD TEXT} to \MAPPED-FC-CMD{TEXT}
|
|
fontchanges = {'rm': 'r', 'it': 'i', 'em': 'emph', 'bf': 'b', 'tt': 't'}
|
|
|
|
# transparent for these commands
|
|
for_texi = ('emph', 'var', 'strong', 'code', 'kbd', 'key', 'dfn', 'samp', \
|
|
'r', 'i', 't')
|
|
|
|
|
|
# try to remove macros and return flat text
|
|
def flattext(buf, pp):
|
|
pp = crcopy(pp)
|
|
##print '---> FLATTEXT ' + `pp`
|
|
wobj = Wobj().init()
|
|
|
|
i, length = 0, len(pp)
|
|
while 1:
|
|
if len(pp) != length:
|
|
raise 'FATAL', 'inconsistent length'
|
|
if i >= length:
|
|
break
|
|
ch = pp[i]
|
|
i = i+1
|
|
if ch.chtype == chunk_type(PLAIN):
|
|
pass
|
|
elif ch.chtype == chunk_type(CSNAME):
|
|
if s(buf, ch.data) in themselves or hist.inargs and s(buf, ch.data) in inargsselves:
|
|
ch.chtype = chunk_type(PLAIN)
|
|
elif s(buf, ch.data) == 'e':
|
|
ch.chtype = chunk_type(PLAIN)
|
|
ch.data = '\\'
|
|
elif len(s(buf, ch.data)) == 1 \
|
|
and s(buf, ch.data) in onlylatexspecial:
|
|
ch.chtype = chunk_type(PLAIN)
|
|
# if it is followed by an empty group,
|
|
# remove that group, it was needed for
|
|
# a true space
|
|
if i < length \
|
|
and pp[i].chtype==chunk_type(GROUP) \
|
|
and len(pp[i].data) == 0:
|
|
del pp[i]
|
|
length = length-1
|
|
|
|
elif s(buf, ch.data) in markcmds.keys():
|
|
length, newi = getnextarg(length, buf, pp, i)
|
|
str = flattext(buf, pp[i:newi])
|
|
del pp[i:newi]
|
|
length = length - (newi - i)
|
|
ch.chtype = chunk_type(PLAIN)
|
|
markcmd = s(buf, ch.data)
|
|
x = markcmds[markcmd]
|
|
if type(x) == type(()):
|
|
pre, after = x
|
|
str = pre+str+after
|
|
elif x == 1:
|
|
str = string.upper(str)
|
|
else:
|
|
raise 'FATAL', 'corrupt markcmds'
|
|
ch.data = str
|
|
else:
|
|
if s(buf, ch.data) not in ignoredcommands:
|
|
print 'WARNING: deleting command ' + `s(buf, ch.data)`
|
|
print 'PP' + `pp[i-1]`
|
|
del pp[i-1]
|
|
i, length = i-1, length-1
|
|
elif ch.chtype == chunk_type(GROUP):
|
|
length, newi = getnextarg(length, buf, pp, i-1)
|
|
i = i-1
|
|
## str = flattext(buf, crcopy(pp[i-1:newi]))
|
|
## del pp[i:newi]
|
|
## length = length - (newi - i)
|
|
## ch.chtype = chunk_type(PLAIN)
|
|
## ch.data = str
|
|
else:
|
|
pass
|
|
|
|
dumpit(buf, wobj.write, pp)
|
|
##print 'FLATTEXT: RETURNING ' + `wobj.data`
|
|
return wobj.data
|
|
|
|
# try to generate node names (a bit shorter than the chapter title)
|
|
# note that the \nodename command (see elsewhere) overules these efforts
|
|
def invent_node_names(text):
|
|
words = string.split(text)
|
|
|
|
##print 'WORDS ' + `words`
|
|
|
|
if len(words) == 2 \
|
|
and string.lower(words[0]) == 'built-in' \
|
|
and string.lower(words[1]) not in ('modules', 'functions'):
|
|
return words[1]
|
|
if len(words) == 3 and string.lower(words[1]) == 'module':
|
|
return words[2]
|
|
if len(words) == 3 and string.lower(words[1]) == 'object':
|
|
return string.join(words[0:2])
|
|
if len(words) > 4 and string.lower(string.join(words[-4:])) == \
|
|
'methods and data attributes':
|
|
return string.join(words[:2])
|
|
return text
|
|
|
|
re_commas_etc = regex.compile('[,`\'@{}]')
|
|
|
|
re_whitespace = regex.compile('[ \t]*')
|
|
|
|
|
|
##nodenamecmd = next_command_p(length, buf, pp, newi, 'nodename')
|
|
|
|
# look if the next non-white stuff is also a command, resulting in skipping
|
|
# double endlines (DENDLINE) too, and thus omitting \par's
|
|
# Sometimes this is too much, maybe consider DENDLINE's as stop
|
|
def next_command_p(length, buf, pp, i, cmdname):
|
|
|
|
while 1:
|
|
if i >= len(pp):
|
|
break
|
|
ch = pp[i]
|
|
i = i+1
|
|
if ch.chtype == chunk_type(ENDLINE):
|
|
continue
|
|
if ch.chtype == chunk_type(DENDLINE):
|
|
continue
|
|
if ch.chtype == chunk_type(PLAIN):
|
|
if re_whitespace.search(s(buf, ch.data)) == 0 and \
|
|
re_whitespace.match(s(buf, ch.data)) == len(s(buf, ch.data)):
|
|
continue
|
|
return -1
|
|
if ch.chtype == chunk_type(CSNAME):
|
|
if s(buf, ch.data) == cmdname:
|
|
return i # _after_ the command
|
|
return -1
|
|
return -1
|
|
|
|
|
|
# things that are special to LaTeX, but not to texi..
|
|
onlylatexspecial = '_~^$#&%'
|
|
|
|
class Struct: pass
|
|
|
|
hist = Struct()
|
|
out = Struct()
|
|
|
|
def startchange():
|
|
global hist, out
|
|
|
|
hist.inenv = []
|
|
hist.nodenames = []
|
|
hist.cindex = []
|
|
hist.inargs = 0
|
|
hist.enumeratenesting, hist.itemizenesting = 0, 0
|
|
|
|
out.doublenodes = []
|
|
out.doublecindeces = []
|
|
|
|
|
|
spacech = [chunk(PLAIN, 0, ' ')]
|
|
commach = [chunk(PLAIN, 0, ', ')]
|
|
cindexch = [chunk(CSLINE, 0, 'cindex')]
|
|
|
|
# the standard variation in symbols for itemize
|
|
itemizesymbols = ['bullet', 'minus', 'dots']
|
|
|
|
# same for enumerate
|
|
enumeratesymbols = ['1', 'A', 'a']
|
|
|
|
##
|
|
## \begin{ {func,data,exc}desc }{name}...
|
|
## the resulting texi-code is dependent on the contents of indexsubitem
|
|
##
|
|
|
|
# indexsubitem: `['XXX', 'function']
|
|
# funcdesc:
|
|
# deffn {`idxsi`} NAME (FUNCARGS)
|
|
|
|
# indexsubitem: `['XXX', 'method']`
|
|
# funcdesc:
|
|
# defmethod {`idxsi[0]`} NAME (FUNCARGS)
|
|
|
|
# indexsubitem: `['in', 'module', 'MODNAME']'
|
|
# datadesc:
|
|
# defcv data {`idxsi[1:]`} NAME
|
|
# excdesc:
|
|
# defcv exception {`idxsi[1:]`} NAME
|
|
# funcdesc:
|
|
# deffn {function of `idxsi[1:]`} NAME (FUNCARGS)
|
|
|
|
# indexsubitem: `['OBJECT', 'attribute']'
|
|
# datadesc
|
|
# defcv attribute {`OBJECT`} NAME
|
|
|
|
|
|
## this routine will be called on \begin{funcdesc}{NAME}{ARGS}
|
|
## or \funcline{NAME}{ARGS}
|
|
##
|
|
def do_funcdesc(length, buf, pp, i):
|
|
startpoint = i-1
|
|
ch = pp[startpoint]
|
|
wh = ch.where
|
|
length, newi = getnextarg(length, buf, pp, i)
|
|
funcname = chunk(GROUP, wh, pp[i:newi])
|
|
del pp[i:newi]
|
|
length = length - (newi-i)
|
|
save = hist.inargs
|
|
hist.inargs = 1
|
|
length, newi = getnextarg(length, buf, pp, i)
|
|
hist.inargs = save
|
|
del save
|
|
the_args = [chunk(PLAIN, wh, '()'[0])] + \
|
|
pp[i:newi] + \
|
|
[chunk(PLAIN, wh, '()'[1])]
|
|
del pp[i:newi]
|
|
length = length - (newi-i)
|
|
|
|
idxsi = hist.indexsubitem # words
|
|
command = ''
|
|
cat_class = ''
|
|
if idxsi and idxsi[-1] in ('method', 'attribute'):
|
|
command = 'defmethod'
|
|
cat_class = string.join(idxsi[:-1])
|
|
elif len(idxsi) == 2 and idxsi[1] == 'function':
|
|
command = 'deffn'
|
|
cat_class = string.join(idxsi)
|
|
elif len(idxsi) == 3 and idxsi[:2] == ['in', 'module']:
|
|
command = 'deffn'
|
|
cat_class = 'function of ' + string.join(idxsi[1:])
|
|
|
|
if not command:
|
|
raise error, 'don\'t know what to do with indexsubitem ' + `idxsi`
|
|
|
|
ch.chtype = chunk_type(CSLINE)
|
|
ch.data = command
|
|
|
|
cslinearg = [chunk(GROUP, wh, [chunk(PLAIN, wh, cat_class)])]
|
|
cslinearg.append(chunk(PLAIN, wh, ' '))
|
|
cslinearg.append(funcname)
|
|
cslinearg.append(chunk(PLAIN, wh, ' '))
|
|
l = len(cslinearg)
|
|
cslinearg[l:l] = the_args
|
|
|
|
pp.insert(i, chunk(GROUP, wh, cslinearg))
|
|
i, length = i+1, length+1
|
|
hist.command = command
|
|
return length, i
|
|
|
|
|
|
## this routine will be called on \begin{excdesc}{NAME}
|
|
## or \excline{NAME}
|
|
##
|
|
def do_excdesc(length, buf, pp, i):
|
|
startpoint = i-1
|
|
ch = pp[startpoint]
|
|
wh = ch.where
|
|
length, newi = getnextarg(length, buf, pp, i)
|
|
excname = chunk(GROUP, wh, pp[i:newi])
|
|
del pp[i:newi]
|
|
length = length - (newi-i)
|
|
|
|
idxsi = hist.indexsubitem # words
|
|
command = ''
|
|
cat_class = ''
|
|
class_class = ''
|
|
if len(idxsi) == 2 and idxsi[1] == 'exception':
|
|
command = 'defvr'
|
|
cat_class = string.join(idxsi)
|
|
elif len(idxsi) == 3 and idxsi[:2] == ['in', 'module']:
|
|
command = 'defcv'
|
|
cat_class = 'exception'
|
|
class_class = string.join(idxsi[1:])
|
|
elif len(idxsi) == 4 and idxsi[:3] == ['exception', 'in', 'module']:
|
|
command = 'defcv'
|
|
cat_class = 'exception'
|
|
class_class = string.join(idxsi[2:])
|
|
|
|
|
|
if not command:
|
|
raise error, 'don\'t know what to do with indexsubitem ' + `idxsi`
|
|
|
|
ch.chtype = chunk_type(CSLINE)
|
|
ch.data = command
|
|
|
|
cslinearg = [chunk(GROUP, wh, [chunk(PLAIN, wh, cat_class)])]
|
|
cslinearg.append(chunk(PLAIN, wh, ' '))
|
|
if class_class:
|
|
cslinearg.append(chunk(GROUP, wh, [chunk(PLAIN, wh, class_class)]))
|
|
cslinearg.append(chunk(PLAIN, wh, ' '))
|
|
cslinearg.append(excname)
|
|
|
|
pp.insert(i, chunk(GROUP, wh, cslinearg))
|
|
i, length = i+1, length+1
|
|
hist.command = command
|
|
return length, i
|
|
|
|
## same for datadesc or dataline...
|
|
def do_datadesc(length, buf, pp, i):
|
|
startpoint = i-1
|
|
ch = pp[startpoint]
|
|
wh = ch.where
|
|
length, newi = getnextarg(length, buf, pp, i)
|
|
dataname = chunk(GROUP, wh, pp[i:newi])
|
|
del pp[i:newi]
|
|
length = length - (newi-i)
|
|
|
|
idxsi = hist.indexsubitem # words
|
|
command = ''
|
|
cat_class = ''
|
|
class_class = ''
|
|
if idxsi[-1] == 'attribute':
|
|
command = 'defcv'
|
|
cat_class = 'attribute'
|
|
class_class = string.join(idxsi[:-1])
|
|
elif len(idxsi) == 3 and idxsi[:2] == ['in', 'module']:
|
|
command = 'defcv'
|
|
cat_class = 'data'
|
|
class_class = string.join(idxsi[1:])
|
|
elif len(idxsi) == 4 and idxsi[:3] == ['data', 'in', 'module']:
|
|
command = 'defcv'
|
|
cat_class = 'data'
|
|
class_class = string.join(idxsi[2:])
|
|
|
|
|
|
if not command:
|
|
raise error, 'don\'t know what to do with indexsubitem ' + `idxsi`
|
|
|
|
ch.chtype = chunk_type(CSLINE)
|
|
ch.data = command
|
|
|
|
cslinearg = [chunk(GROUP, wh, [chunk(PLAIN, wh, cat_class)])]
|
|
cslinearg.append(chunk(PLAIN, wh, ' '))
|
|
if class_class:
|
|
cslinearg.append(chunk(GROUP, wh, [chunk(PLAIN, wh, class_class)]))
|
|
cslinearg.append(chunk(PLAIN, wh, ' '))
|
|
cslinearg.append(dataname)
|
|
|
|
pp.insert(i, chunk(GROUP, wh, cslinearg))
|
|
i, length = i+1, length+1
|
|
hist.command = command
|
|
return length, i
|
|
|
|
|
|
# regular indices: those that are not set in tt font by default....
|
|
regindices = ('cindex', )
|
|
|
|
# remove illegal characters from node names
|
|
def rm_commas_etc(text):
|
|
result = ''
|
|
changed = 0
|
|
while 1:
|
|
pos = re_commas_etc.search(text)
|
|
if pos >= 0:
|
|
changed = 1
|
|
result = result + text[:pos]
|
|
text = text[pos+1:]
|
|
else:
|
|
result = result + text
|
|
break
|
|
if changed:
|
|
print 'Warning: nodename changhed to ' + `result`
|
|
|
|
return result
|
|
|
|
# boolean flags
|
|
flags = {'texi': 1}
|
|
|
|
|
|
##
|
|
## changeit: the actual routine, that changes the contents of the parsed
|
|
## chunks
|
|
##
|
|
|
|
def changeit(buf, pp):
|
|
global onlylatexspecial, hist, out
|
|
|
|
i, length = 0, len(pp)
|
|
while 1:
|
|
# sanity check: length should always equal len(pp)
|
|
if len(pp) != length:
|
|
raise 'FATAL', 'inconsistent length. thought ' + `length` + ', but should really be ' + `len(pp)`
|
|
if i >= length:
|
|
break
|
|
ch = pp[i]
|
|
i = i + 1
|
|
|
|
if type(ch) == type(''):
|
|
#normally, only chunks are present in pp,
|
|
# but in some cases, some extra info
|
|
# has been inserted, e.g., the \end{...} clauses
|
|
raise 'FATAL', 'got string, probably too many ' + `end`
|
|
|
|
if ch.chtype == chunk_type(GROUP):
|
|
# check for {\em ...} constructs
|
|
if ch.data and \
|
|
ch.data[0].chtype == chunk_type(CSNAME) and \
|
|
s(buf, ch.data[0].data) in fontchanges.keys():
|
|
k = s(buf, ch.data[0].data)
|
|
del ch.data[0]
|
|
pp.insert(i-1, chunk(CSNAME, ch.where, fontchanges[k]))
|
|
length, i = length+1, i+1
|
|
|
|
# recursively parse the contents of the group
|
|
changeit(buf, ch.data)
|
|
|
|
elif ch.chtype == chunk_type(IF):
|
|
# \if...
|
|
flag, negate, data = ch.data
|
|
##print 'IF: flag, negate = ' + `flag, negate`
|
|
if flag not in flags.keys():
|
|
raise error, 'unknown flag ' + `flag`
|
|
|
|
value = flags[flag]
|
|
if negate:
|
|
value = (not value)
|
|
del pp[i-1]
|
|
length, i = length-1, i-1
|
|
if value:
|
|
pp[i:i] = data
|
|
length = length + len(data)
|
|
|
|
|
|
elif ch.chtype == chunk_type(ENV):
|
|
# \begin{...} ....
|
|
envname, data = ch.data
|
|
|
|
#push this environment name on stack
|
|
hist.inenv.insert(0, envname)
|
|
|
|
#append an endenv chunk after grouped data
|
|
data.append(chunk(ENDENV, ch.where, envname))
|
|
##[`data`]
|
|
|
|
#delete this object
|
|
del pp[i-1]
|
|
i, length = i-1, length-1
|
|
|
|
#insert found data
|
|
pp[i:i] = data
|
|
length = length + len(data)
|
|
|
|
if envname == 'verbatim':
|
|
pp[i:i] = [chunk(CSLINE, ch.where, 'example'), \
|
|
chunk(GROUP, ch.where, [])]
|
|
length, i = length+2, i+2
|
|
|
|
elif envname == 'itemize':
|
|
if hist.itemizenesting > len(itemizesymbols):
|
|
raise error, 'too deep itemize nesting'
|
|
ingroupch = [chunk(CSNAME, ch.where,\
|
|
itemizesymbols[hist.itemizenesting])]
|
|
hist.itemizenesting = hist.itemizenesting + 1
|
|
pp[i:i] = [chunk(CSLINE, ch.where, 'itemize'),\
|
|
chunk(GROUP, ch.where, ingroupch)]
|
|
length, i = length+2, i+2
|
|
|
|
elif envname == 'enumerate':
|
|
if hist.enumeratenesting > len(enumeratesymbols):
|
|
raise error, 'too deep enumerate nesting'
|
|
ingroupch = [chunk(PLAIN, ch.where,\
|
|
enumeratesymbols[hist.enumeratenesting])]
|
|
hist.enumeratenesting = hist.enumeratenesting + 1
|
|
pp[i:i] = [chunk(CSLINE, ch.where, 'enumerate'),\
|
|
chunk(GROUP, ch.where, ingroupch)]
|
|
length, i = length+2, i+2
|
|
|
|
elif envname == 'description':
|
|
ingroupch = [chunk(CSNAME, ch.where, 'b')]
|
|
pp[i:i] = [chunk(CSLINE, ch.where, 'table'), \
|
|
chunk(GROUP, ch.where, ingroupch)]
|
|
length, i = length+2, i+2
|
|
|
|
elif envname == 'tableiii':
|
|
wh = ch.where
|
|
newcode = []
|
|
|
|
#delete tabular format description
|
|
# e.g., {|l|c|l|}
|
|
length, newi = getnextarg(length, buf, pp, i)
|
|
del pp[i:newi]
|
|
length = length - (newi-i)
|
|
|
|
newcode.append(chunk(CSLINE, wh, 'table'))
|
|
ingroupch = [chunk(CSNAME, wh, 'asis')]
|
|
newcode.append(chunk(GROUP, wh, ingroupch))
|
|
newcode.append(chunk(CSLINE, wh, 'item'))
|
|
|
|
#get the name of macro for @item
|
|
# e.g., {code}
|
|
length, newi = getnextarg(length, buf, pp, i)
|
|
|
|
if newi-i != 1:
|
|
raise error, 'Sorry, expected 1 chunk argument'
|
|
if pp[i].chtype != chunk_type(PLAIN):
|
|
raise error, 'Sorry, expected plain text argument'
|
|
hist.itemargmacro = s(buf, pp[i].data)
|
|
del pp[i:newi]
|
|
length = length - (newi-i)
|
|
|
|
for count in range(3):
|
|
length, newi = getnextarg(length, buf, pp, i)
|
|
emphgroup = [\
|
|
chunk(CSNAME, wh, 'emph'), \
|
|
chunk(GROUP, 0, pp[i:newi])]
|
|
del pp[i:newi]
|
|
length = length - (newi-i)
|
|
if count == 0:
|
|
itemarg = emphgroup
|
|
elif count == 2:
|
|
itembody = itembody + \
|
|
[chunk(PLAIN, wh, ' --- ')] + \
|
|
emphgroup
|
|
else:
|
|
itembody = emphgroup
|
|
newcode.append(chunk(GROUP, wh, itemarg))
|
|
newcode = newcode + itembody + [chunk(DENDLINE, wh, '\n')]
|
|
pp[i:i] = newcode
|
|
l = len(newcode)
|
|
length, i = length+l, i+l
|
|
del newcode, l
|
|
|
|
if length != len(pp):
|
|
raise 'STILL, SOMETHING wrong', `i`
|
|
|
|
|
|
elif envname == 'funcdesc':
|
|
pp.insert(i, chunk(PLAIN, ch.where, ''))
|
|
i, length = i+1, length+1
|
|
length, i = do_funcdesc(length, buf, pp, i)
|
|
|
|
elif envname == 'excdesc':
|
|
pp.insert(i, chunk(PLAIN, ch.where, ''))
|
|
i, length = i+1, length+1
|
|
length, i = do_excdesc(length, buf, pp, i)
|
|
|
|
elif envname == 'datadesc':
|
|
pp.insert(i, chunk(PLAIN, ch.where, ''))
|
|
i, length = i+1, length+1
|
|
length, i = do_datadesc(length, buf, pp, i)
|
|
|
|
else:
|
|
print 'WARNING: don\'t know what to do with env ' + `envname`
|
|
|
|
elif ch.chtype == chunk_type(ENDENV):
|
|
envname = ch.data
|
|
if envname != hist.inenv[0]:
|
|
raise error, '\'end\' does not match. Name ' + `envname` + ', expected ' + `hist.inenv[0]`
|
|
del hist.inenv[0]
|
|
del pp[i-1]
|
|
i, length = i-1, length-1
|
|
|
|
if envname == 'verbatim':
|
|
pp[i:i] = [\
|
|
chunk(CSLINE, ch.where, 'end'), \
|
|
chunk(GROUP, ch.where, [\
|
|
chunk(PLAIN, ch.where, 'example')])]
|
|
i, length = i+2, length+2
|
|
elif envname == 'itemize':
|
|
hist.itemizenesting = hist.itemizenesting - 1
|
|
pp[i:i] = [\
|
|
chunk(CSLINE, ch.where, 'end'), \
|
|
chunk(GROUP, ch.where, [\
|
|
chunk(PLAIN, ch.where, 'itemize')])]
|
|
i, length = i+2, length+2
|
|
elif envname == 'enumerate':
|
|
hist.enumeratenesting = hist.enumeratenesting-1
|
|
pp[i:i] = [\
|
|
chunk(CSLINE, ch.where, 'end'), \
|
|
chunk(GROUP, ch.where, [\
|
|
chunk(PLAIN, ch.where, 'enumerate')])]
|
|
i, length = i+2, length+2
|
|
elif envname == 'description':
|
|
pp[i:i] = [\
|
|
chunk(CSLINE, ch.where, 'end'), \
|
|
chunk(GROUP, ch.where, [\
|
|
chunk(PLAIN, ch.where, 'table')])]
|
|
i, length = i+2, length+2
|
|
elif envname == 'tableiii':
|
|
pp[i:i] = [\
|
|
chunk(CSLINE, ch.where, 'end'), \
|
|
chunk(GROUP, ch.where, [\
|
|
chunk(PLAIN, ch.where, 'table')])]
|
|
i, length = i+2, length + 2
|
|
pp.insert(i, chunk(DENDLINE, ch.where, '\n'))
|
|
i, length = i+1, length+1
|
|
|
|
elif envname in ('funcdesc', 'excdesc', 'datadesc'):
|
|
pp[i:i] = [\
|
|
chunk(CSLINE, ch.where, 'end'), \
|
|
chunk(GROUP, ch.where, [\
|
|
chunk(PLAIN, ch.where, hist.command)])]
|
|
i, length = i+2, length+2
|
|
else:
|
|
print 'WARNING: ending env ' + `envname` + 'has no actions'
|
|
|
|
elif ch.chtype == chunk_type(CSNAME):
|
|
# control name transformations
|
|
if s(buf, ch.data) in ignoredcommands:
|
|
del pp[i-1]
|
|
i, length = i-1, length-1
|
|
elif s(buf, ch.data) == '@' and \
|
|
i != length and \
|
|
pp[i].chtype == chunk_type(PLAIN) and \
|
|
s(buf, pp[i].data)[0] == '.':
|
|
# \@. --> \. --> @.
|
|
ch.data = '.'
|
|
del pp[i]
|
|
length = length-1
|
|
elif s(buf, ch.data) == '\\':
|
|
# \\ --> \* --> @*
|
|
ch.data = '*'
|
|
elif len(s(buf, ch.data)) == 1 and \
|
|
s(buf, ch.data) in onlylatexspecial:
|
|
ch.chtype = chunk_type(PLAIN)
|
|
# check if such a command is followed by
|
|
# an empty group: e.g., `\%{}'. If so, remove
|
|
# this empty group too
|
|
if i < length and \
|
|
pp[i].chtype == chunk_type(GROUP) \
|
|
and len(pp[i].data) == 0:
|
|
del pp[i]
|
|
length = length-1
|
|
|
|
elif hist.inargs and s(buf, ch.data) in inargsselves:
|
|
# This is the special processing of the
|
|
# arguments of the \begin{funcdesc}... or
|
|
# \funcline... arguments
|
|
# \, --> , \[ --> [, \] --> ]
|
|
ch.chtype = chunk_type(PLAIN)
|
|
|
|
elif s(buf, ch.data) == 'renewcommand':
|
|
# \renewcommand{\indexsubitem}....
|
|
i, length = i-1, length-1
|
|
del pp[i]
|
|
length, newi = getnextarg(length, buf, pp, i)
|
|
if newi-i == 1 \
|
|
and i < length \
|
|
and pp[i].chtype == chunk_type(CSNAME) \
|
|
and s(buf, pp[i].data) == 'indexsubitem':
|
|
del pp[i:newi]
|
|
length = length - (newi-i)
|
|
length, newi = getnextarg(length, buf, pp, i)
|
|
text = flattext(buf, pp[i:newi])
|
|
if text[0] != '(' or text[-1] != ')':
|
|
raise error, 'expected indexsubitme enclosed in braces'
|
|
words = string.split(text[1:-1])
|
|
hist.indexsubitem = words
|
|
del text, words
|
|
else:
|
|
print 'WARNING: renewcommand with unsupported arg removed'
|
|
del pp[i:newi]
|
|
length = length - (newi-i)
|
|
|
|
elif s(buf, ch.data) == 'item':
|
|
ch.chtype = chunk_type(CSLINE)
|
|
length, newi = getoptarg(length, buf, pp, i)
|
|
ingroupch = pp[i:newi]
|
|
del pp[i:newi]
|
|
length = length - (newi-i)
|
|
pp.insert(i, chunk(GROUP, ch.where, ingroupch))
|
|
i, length = i+1, length+1
|
|
|
|
elif s(buf, ch.data) == 'ttindex':
|
|
idxsi = hist.indexsubitem
|
|
|
|
cat_class = ''
|
|
if len(idxsi) >= 2 and idxsi[1] in \
|
|
('method', 'function'):
|
|
command = 'findex'
|
|
elif len(idxsi) >= 2 and idxsi[1] in \
|
|
('exception', 'object'):
|
|
command = 'vindex'
|
|
else:
|
|
print 'WARNING: can\'t categorize ' + `idxsi` + ' for \'ttindex\' command'
|
|
command = 'cindex'
|
|
|
|
if not cat_class:
|
|
cat_class = '('+string.join(idxsi)+')'
|
|
|
|
ch.chtype = chunk_type(CSLINE)
|
|
ch.data = command
|
|
|
|
length, newi = getnextarg(length, buf, pp, i)
|
|
arg = pp[i:newi]
|
|
del pp[i:newi]
|
|
length = length - (newi-i)
|
|
|
|
cat_arg = [chunk(PLAIN, ch.where, cat_class)]
|
|
|
|
# determine what should be set in roman, and
|
|
# what in tt-font
|
|
if command in regindices:
|
|
|
|
arg = [chunk(CSNAME, ch.where, 't'), \
|
|
chunk(GROUP, ch.where, arg)]
|
|
else:
|
|
cat_arg = [chunk(CSNAME, ch.where, 'r'), \
|
|
chunk(GROUP, ch.where, cat_arg)]
|
|
|
|
ingroupch = arg + \
|
|
[chunk(PLAIN, ch.where, ' ')] + \
|
|
cat_arg
|
|
|
|
pp.insert(i, chunk(GROUP, ch.where, ingroupch))
|
|
length, i = length+1, i+1
|
|
|
|
|
|
elif s(buf, ch.data) == 'ldots':
|
|
# \ldots --> \dots{} --> @dots{}
|
|
ch.data = 'dots'
|
|
if i == length \
|
|
or pp[i].chtype != chunk_type(GROUP) \
|
|
or pp[i].data != []:
|
|
pp.insert(i, chunk(GROUP, ch.where, []))
|
|
i, length = i+1, length+1
|
|
elif s(buf, ch.data) in wordsselves:
|
|
# \UNIX --> UNIX
|
|
ch.chtype = chunk_type(PLAIN)
|
|
if i != length \
|
|
and pp[i].chtype == chunk_type(GROUP) \
|
|
and pp[i].data == []:
|
|
del pp[i]
|
|
length = length-1
|
|
elif s(buf, ch.data) in for_texi:
|
|
pass
|
|
|
|
elif s(buf, ch.data) == 'e':
|
|
# \e --> \
|
|
ch.data = '\\'
|
|
ch.chtype = chunk_type(PLAIN)
|
|
elif s(buf, ch.data) == 'lineiii':
|
|
# This is the most tricky one
|
|
# \lineiii{a1}{a2}[{a3}] -->
|
|
# @item @<cts. of itemargmacro>{a1}
|
|
# a2 [ -- a3]
|
|
#
|
|
##print 'LINEIIIIII!!!!!!!'
|
|
## wobj = Wobj().init()
|
|
## dumpit(buf, wobj.write, pp[i-1:i+5])
|
|
## print '--->' + wobj.data + '<----'
|
|
if not hist.inenv:
|
|
raise error, \
|
|
'no environment for lineiii'
|
|
if hist.inenv[0] != 'tableiii':
|
|
raise error, \
|
|
'wrong command (' + \
|
|
`'lineiii'` + \
|
|
') in wrong environment (' \
|
|
+ `hist.inenv[0]` + ')'
|
|
ch.chtype = chunk_type(CSLINE)
|
|
ch.data = 'item'
|
|
length, newi = getnextarg(length, buf, pp, i)
|
|
ingroupch = [chunk(CSNAME, 0, \
|
|
hist.itemargmacro), \
|
|
chunk(GROUP, 0, pp[i:newi])]
|
|
del pp[i:newi]
|
|
length = length - (newi-i)
|
|
## print 'ITEM ARG: --->',
|
|
## wobj = Wobj().init()
|
|
## dumpit(buf, wobj.write, ingroupch)
|
|
## print wobj.data, '<---'
|
|
pp.insert(i, chunk(GROUP, ch.where, ingroupch))
|
|
grouppos = i
|
|
i, length = i+1, length+1
|
|
length, i = getnextarg(length, buf, pp, i)
|
|
length, newi = getnextarg(length, buf, pp, i)
|
|
if newi > i:
|
|
# we have a 3rd arg
|
|
pp.insert(i, chunk(PLAIN, ch.where, ' --- '))
|
|
i = newi + 1
|
|
length = length + 1
|
|
## pp[grouppos].data = pp[grouppos].data \
|
|
## + [chunk(PLAIN, ch.where, ' ')] \
|
|
## + pp[i:newi]
|
|
## del pp[i:newi]
|
|
## length = length - (newi-i)
|
|
if length != len(pp):
|
|
raise 'IN LINEIII IS THE ERR', `i`
|
|
|
|
elif s(buf, ch.data) in ('chapter', 'section', 'subsection', 'subsubsection'):
|
|
#\xxxsection{A} ---->
|
|
# @node A, , ,
|
|
# @xxxsection A
|
|
## also: remove commas and quotes
|
|
ch.chtype = chunk_type(CSLINE)
|
|
length, newi = getnextarg(length, buf, pp, i)
|
|
afternodenamecmd = next_command_p(length, buf, pp, newi, 'nodename')
|
|
if afternodenamecmd < 0:
|
|
cp1 = crcopy(pp[i:newi])
|
|
pp[i:newi] = [\
|
|
chunk(GROUP, ch.where, \
|
|
pp[i:newi])]
|
|
length, newi = length - (newi-i) + 1, \
|
|
i+1
|
|
text = flattext(buf, cp1)
|
|
text = invent_node_names(text)
|
|
else:
|
|
length, endarg = getnextarg(length, buf, pp, afternodenamecmd)
|
|
cp1 = crcopy(pp[afternodenamecmd:endarg])
|
|
del pp[newi:endarg]
|
|
length = length - (endarg-newi)
|
|
|
|
pp[i:newi] = [\
|
|
chunk(GROUP, ch.where, \
|
|
pp[i:newi])]
|
|
length, newi = length - (newi-i) + 1, \
|
|
i + 1
|
|
text = flattext(buf, cp1)
|
|
if text[-1] == '.':
|
|
text = text[:-1]
|
|
## print 'FLATTEXT:', `text`
|
|
if text in hist.nodenames:
|
|
print 'WARNING: node name ' + `text` + ' already used'
|
|
out.doublenodes.append(text)
|
|
else:
|
|
hist.nodenames.append(text)
|
|
text = rm_commas_etc(text)
|
|
pp[i-1:i-1] = [\
|
|
chunk(CSLINE, ch.where, 'node'), \
|
|
chunk(GROUP, ch.where, [\
|
|
chunk(PLAIN, ch.where, text+', , ,')\
|
|
])]
|
|
i, length = newi+2, length+2
|
|
|
|
elif s(buf,ch.data) == 'funcline':
|
|
# fold it to a very short environment
|
|
pp[i-1:i-1] = [\
|
|
chunk(CSLINE, ch.where, 'end'), \
|
|
chunk(GROUP, ch.where, [\
|
|
chunk(PLAIN, ch.where, hist.command)])]
|
|
i, length = i+2, length+2
|
|
length, i = do_funcdesc(length, buf, pp, i)
|
|
|
|
elif s(buf,ch.data) == 'dataline':
|
|
pp[i-1:i-1] = [\
|
|
chunk(CSLINE, ch.where, 'end'), \
|
|
chunk(GROUP, ch.where, [\
|
|
chunk(PLAIN, ch.where, hist.command)])]
|
|
i, length = i+2, length+2
|
|
length, i = do_datadesc(length, buf, pp, i)
|
|
|
|
elif s(buf,ch.data) == 'excline':
|
|
pp[i-1:i-1] = [\
|
|
chunk(CSLINE, ch.where, 'end'), \
|
|
chunk(GROUP, ch.where, [\
|
|
chunk(PLAIN, ch.where, hist.command)])]
|
|
i, length = i+2, length+2
|
|
length, i = do_excdesc(length, buf, pp, i)
|
|
|
|
|
|
elif s(buf, ch.data) == 'index':
|
|
#\index{A} --->
|
|
# @cindex A
|
|
ch.chtype = chunk_type(CSLINE)
|
|
ch.data = 'cindex'
|
|
length, newi = getnextarg(length, buf, pp, i)
|
|
|
|
ingroupch = pp[i:newi]
|
|
del pp[i:newi]
|
|
length = length - (newi-i)
|
|
pp.insert(i, chunk(GROUP, ch.where, ingroupch))
|
|
length, i = length+1, i+1
|
|
|
|
elif s(buf, ch.data) == 'bifuncindex':
|
|
ch.chtype = chunk_type(CSLINE)
|
|
ch.data = 'findex'
|
|
length, newi = getnextarg(length, buf, pp, i)
|
|
ingroupch = pp[i:newi]
|
|
del pp[i:newi]
|
|
length = length - (newi-i)
|
|
|
|
ingroupch.append(chunk(PLAIN, ch.where, ' '))
|
|
ingroupch.append(chunk(CSNAME, ch.where, 'r'))
|
|
ingroupch.append(chunk(GROUP, ch.where, [\
|
|
chunk(PLAIN, ch.where, \
|
|
'(built-in function)')]))
|
|
|
|
pp.insert(i, chunk(GROUP, ch.where, ingroupch))
|
|
length, i = length+1, i+1
|
|
|
|
|
|
elif s(buf, ch.data) == 'opindex':
|
|
ch.chtype = chunk_type(CSLINE)
|
|
ch.data = 'findex'
|
|
length, newi = getnextarg(length, buf, pp, i)
|
|
ingroupch = pp[i:newi]
|
|
del pp[i:newi]
|
|
length = length - (newi-i)
|
|
|
|
ingroupch.append(chunk(PLAIN, ch.where, ' '))
|
|
ingroupch.append(chunk(CSNAME, ch.where, 'r'))
|
|
ingroupch.append(chunk(GROUP, ch.where, [\
|
|
chunk(PLAIN, ch.where, \
|
|
'(operator)')]))
|
|
|
|
pp.insert(i, chunk(GROUP, ch.where, ingroupch))
|
|
length, i = length+1, i+1
|
|
|
|
|
|
elif s(buf, ch.data) == 'bimodindex':
|
|
ch.chtype = chunk_type(CSLINE)
|
|
ch.data = 'pindex'
|
|
length, newi = getnextarg(length, buf, pp, i)
|
|
ingroupch = pp[i:newi]
|
|
del pp[i:newi]
|
|
length = length - (newi-i)
|
|
|
|
ingroupch.append(chunk(PLAIN, ch.where, ' '))
|
|
ingroupch.append(chunk(CSNAME, ch.where, 'r'))
|
|
ingroupch.append(chunk(GROUP, ch.where, [\
|
|
chunk(PLAIN, ch.where, \
|
|
'(built-in)')]))
|
|
|
|
pp.insert(i, chunk(GROUP, ch.where, ingroupch))
|
|
length, i = length+1, i+1
|
|
|
|
elif s(buf, ch.data) == 'sectcode':
|
|
ch.data = 'code'
|
|
|
|
|
|
elif s(buf, ch.data) == 'stmodindex':
|
|
ch.chtype = chunk_type(CSLINE)
|
|
# use the program index as module index
|
|
ch.data = 'pindex'
|
|
length, newi = getnextarg(length, buf, pp, i)
|
|
ingroupch = pp[i:newi]
|
|
del pp[i:newi]
|
|
length = length - (newi-i)
|
|
|
|
ingroupch.append(chunk(PLAIN, ch.where, ' '))
|
|
ingroupch.append(chunk(CSNAME, ch.where, 'r'))
|
|
ingroupch.append(chunk(GROUP, ch.where, [\
|
|
chunk(PLAIN, ch.where, \
|
|
'(standard)')]))
|
|
|
|
pp.insert(i, chunk(GROUP, ch.where, ingroupch))
|
|
length, i = length+1, i+1
|
|
|
|
|
|
elif s(buf, ch.data) == 'stindex':
|
|
# XXX must actually go to newindex st
|
|
wh = ch.where
|
|
ch.chtype = chunk_type(CSLINE)
|
|
ch.data = 'cindex'
|
|
length, newi = getnextarg(length, buf, pp, i)
|
|
ingroupch = [chunk(CSNAME, wh, 'code'), \
|
|
chunk(GROUP, wh, pp[i:newi])]
|
|
|
|
del pp[i:newi]
|
|
length = length - (newi-i)
|
|
|
|
t = ingroupch[:]
|
|
t.append(chunk(PLAIN, wh, ' statement'))
|
|
|
|
pp.insert(i, chunk(GROUP, wh, t))
|
|
i, length = i+1, length+1
|
|
|
|
pp.insert(i, chunk(CSLINE, wh, 'cindex'))
|
|
i, length = i+1, length+1
|
|
|
|
t = ingroupch[:]
|
|
t.insert(0, chunk(PLAIN, wh, 'statement, '))
|
|
|
|
pp.insert(i, chunk(GROUP, wh, t))
|
|
i, length = i+1, length+1
|
|
|
|
|
|
elif s(buf, ch.data) == 'indexii':
|
|
#\indexii{A}{B} --->
|
|
# @cindex A B
|
|
# @cindex B, A
|
|
length, newi = getnextarg(length, buf, pp, i)
|
|
cp11 = pp[i:newi]
|
|
cp21 = crcopy(pp[i:newi])
|
|
del pp[i:newi]
|
|
length = length - (newi-i)
|
|
length, newi = getnextarg(length, buf, pp, i)
|
|
cp12 = pp[i:newi]
|
|
cp22 = crcopy(pp[i:newi])
|
|
del pp[i:newi]
|
|
length = length - (newi-i)
|
|
|
|
ch.chtype = chunk_type(CSLINE)
|
|
ch.data = 'cindex'
|
|
pp.insert(i, chunk(GROUP, ch.where, cp11 + [\
|
|
chunk(PLAIN, ch.where, ' ')] + cp12))
|
|
i, length = i+1, length+1
|
|
pp[i:i] = [chunk(CSLINE, ch.where, 'cindex'), \
|
|
chunk(GROUP, ch.where, cp22 + [\
|
|
chunk(PLAIN, ch.where, ', ')]+ cp21)]
|
|
i, length = i+2, length+2
|
|
|
|
elif s(buf, ch.data) == 'indexiii':
|
|
length, newi = getnextarg(length, buf, pp, i)
|
|
cp11 = pp[i:newi]
|
|
cp21 = crcopy(pp[i:newi])
|
|
cp31 = crcopy(pp[i:newi])
|
|
del pp[i:newi]
|
|
length = length - (newi-i)
|
|
length, newi = getnextarg(length, buf, pp, i)
|
|
cp12 = pp[i:newi]
|
|
cp22 = crcopy(pp[i:newi])
|
|
cp32 = crcopy(pp[i:newi])
|
|
del pp[i:newi]
|
|
length = length - (newi-i)
|
|
length, newi = getnextarg(length, buf, pp, i)
|
|
cp13 = pp[i:newi]
|
|
cp23 = crcopy(pp[i:newi])
|
|
cp33 = crcopy(pp[i:newi])
|
|
del pp[i:newi]
|
|
length = length - (newi-i)
|
|
|
|
ch.chtype = chunk_type(CSLINE)
|
|
ch.data = 'cindex'
|
|
pp.insert(i, chunk(GROUP, ch.where, cp11 + [\
|
|
chunk(PLAIN, ch.where, ' ')] + cp12 \
|
|
+ [chunk(PLAIN, ch.where, ' ')] \
|
|
+ cp13))
|
|
i, length = i+1, length+1
|
|
pp[i:i] = [chunk(CSLINE, ch.where, 'cindex'), \
|
|
chunk(GROUP, ch.where, cp22 + [\
|
|
chunk(PLAIN, ch.where, ' ')]+ cp23\
|
|
+ [chunk(PLAIN, ch.where, ', ')] +\
|
|
cp21)]
|
|
i, length = i+2, length+2
|
|
pp[i:i] = [chunk(CSLINE, ch.where, 'cindex'), \
|
|
chunk(GROUP, ch.where, cp33 + [\
|
|
chunk(PLAIN, ch.where, ', ')]+ cp31\
|
|
+ [chunk(PLAIN, ch.where, ' ')] +\
|
|
cp32)]
|
|
i, length = i+2, length+2
|
|
|
|
|
|
elif s(buf, ch.data) == 'indexiv':
|
|
length, newi = getnextarg(length, buf, pp, i)
|
|
cp11 = pp[i:newi]
|
|
cp21 = crcopy(pp[i:newi])
|
|
cp31 = crcopy(pp[i:newi])
|
|
cp41 = crcopy(pp[i:newi])
|
|
del pp[i:newi]
|
|
length = length - (newi-i)
|
|
length, newi = getnextarg(length, buf, pp, i)
|
|
cp12 = pp[i:newi]
|
|
cp22 = crcopy(pp[i:newi])
|
|
cp32 = crcopy(pp[i:newi])
|
|
cp42 = crcopy(pp[i:newi])
|
|
del pp[i:newi]
|
|
length = length - (newi-i)
|
|
length, newi = getnextarg(length, buf, pp, i)
|
|
cp13 = pp[i:newi]
|
|
cp23 = crcopy(pp[i:newi])
|
|
cp33 = crcopy(pp[i:newi])
|
|
cp43 = crcopy(pp[i:newi])
|
|
del pp[i:newi]
|
|
length = length - (newi-i)
|
|
length, newi = getnextarg(length, buf, pp, i)
|
|
cp14 = pp[i:newi]
|
|
cp24 = crcopy(pp[i:newi])
|
|
cp34 = crcopy(pp[i:newi])
|
|
cp44 = crcopy(pp[i:newi])
|
|
del pp[i:newi]
|
|
length = length - (newi-i)
|
|
|
|
ch.chtype = chunk_type(CSLINE)
|
|
ch.data = 'cindex'
|
|
ingroupch = cp11 + \
|
|
spacech + cp12 + \
|
|
spacech + cp13 + \
|
|
spacech + cp14
|
|
pp.insert(i, chunk(GROUP, ch.where, ingroupch))
|
|
i, length = i+1, length+1
|
|
ingroupch = cp22 + \
|
|
spacech + cp23 + \
|
|
spacech + cp24 + \
|
|
commach + cp21
|
|
pp[i:i] = cindexch + [\
|
|
chunk(GROUP, ch.where, ingroupch)]
|
|
i, length = i+2, length+2
|
|
ingroupch = cp33 + \
|
|
spacech + cp34 + \
|
|
commach + cp31 + \
|
|
spacech + cp32
|
|
pp[i:i] = cindexch + [\
|
|
chunk(GROUP, ch.where, ingroupch)]
|
|
i, length = i+2, length+2
|
|
ingroupch = cp44 + \
|
|
commach + cp41 + \
|
|
spacech + cp42 + \
|
|
spacech + cp43
|
|
pp[i:i] = cindexch + [\
|
|
chunk(GROUP, ch.where, ingroupch)]
|
|
i, length = i+2, length+2
|
|
|
|
|
|
|
|
else:
|
|
print 'don\'t know what to do with keyword ' + `s(buf, ch.data)`
|
|
|
|
|
|
|
|
re_atsign = regex.compile('[@{}]')
|
|
re_newline = regex.compile('\n')
|
|
|
|
def dumpit(buf, wm, pp):
|
|
|
|
global out
|
|
|
|
i, length = 0, len(pp)
|
|
|
|
addspace = 0
|
|
|
|
while 1:
|
|
if len(pp) != length:
|
|
raise 'FATAL', 'inconsistent length'
|
|
if i == length:
|
|
break
|
|
ch = pp[i]
|
|
i = i + 1
|
|
|
|
if addspace:
|
|
dospace = 1
|
|
addspace = 0
|
|
else:
|
|
dospace = 0
|
|
|
|
if ch.chtype == chunk_type(CSNAME):
|
|
wm('@' + s(buf, ch.data))
|
|
if s(buf, ch.data) == 'node' and \
|
|
pp[i].chtype == chunk_type(PLAIN) and \
|
|
s(buf, pp[i].data) in out.doublenodes:
|
|
##XXX doesnt work yet??
|
|
wm(' ZZZ-' + zfill(`i`, 4))
|
|
if s(buf, ch.data)[0] in string.letters:
|
|
addspace = 1
|
|
elif ch.chtype == chunk_type(PLAIN):
|
|
if dospace and s(buf, ch.data) not in (' ', '\t'):
|
|
wm(' ')
|
|
text = s(buf, ch.data)
|
|
while 1:
|
|
pos = re_atsign.search(text)
|
|
if pos < 0:
|
|
break
|
|
wm(text[:pos] + '@' + text[pos])
|
|
text = text[pos+1:]
|
|
wm(text)
|
|
elif ch.chtype == chunk_type(GROUP):
|
|
wm('{')
|
|
dumpit(buf, wm, ch.data)
|
|
wm('}')
|
|
elif ch.chtype == chunk_type(DENDLINE):
|
|
wm('\n\n')
|
|
while i != length and pp[i].chtype in \
|
|
(chunk_type(DENDLINE), chunk_type(ENDLINE)):
|
|
i = i + 1
|
|
elif ch.chtype == chunk_type(OTHER):
|
|
wm(s(buf, ch.data))
|
|
elif ch.chtype == chunk_type(ACTIVE):
|
|
wm(s(buf, ch.data))
|
|
elif ch.chtype == chunk_type(ENDLINE):
|
|
wm('\n')
|
|
elif ch.chtype == chunk_type(CSLINE):
|
|
if i >= 2 and pp[i-2].chtype not in \
|
|
(chunk_type(ENDLINE), chunk_type(DENDLINE)) \
|
|
and (pp[i-2].chtype != chunk_type(PLAIN) \
|
|
or s(buf, pp[i-2].data)[-1] != '\n'):
|
|
|
|
wm('\n')
|
|
wm('@' + s(buf, ch.data))
|
|
if i == length:
|
|
raise error, 'CSLINE expected another chunk'
|
|
if pp[i].chtype != chunk_type(GROUP):
|
|
raise error, 'CSLINE expected GROUP'
|
|
if type(pp[i].data) != type([]):
|
|
raise error, 'GROUP chould contain []-data'
|
|
|
|
wobj = Wobj().init()
|
|
dumpit(buf, wobj.write, pp[i].data)
|
|
i = i + 1
|
|
text = wobj.data
|
|
del wobj
|
|
if text:
|
|
wm(' ')
|
|
while 1:
|
|
pos = re_newline.search(text)
|
|
if pos < 0:
|
|
break
|
|
print 'WARNING: found newline in csline arg'
|
|
wm(text[:pos] + ' ')
|
|
text = text[pos+1:]
|
|
wm(text)
|
|
if i >= length or \
|
|
pp[i].chtype not in (chunk_type(CSLINE), \
|
|
chunk_type(ENDLINE), chunk_type(DENDLINE)) \
|
|
and (pp[i].chtype != chunk_type(PLAIN) \
|
|
or s(buf, pp[i].data)[0] != '\n'):
|
|
wm('\n')
|
|
|
|
elif ch.chtype == chunk_type(COMMENT):
|
|
## print 'COMMENT: previous chunk =', pp[i-2]
|
|
if pp[i-2].chtype == chunk_type(PLAIN):
|
|
print 'PLAINTEXT =', `s(buf, pp[i-2].data)`
|
|
if s(buf, ch.data) and \
|
|
regex.match('^[ \t]*$', s(buf, ch.data)) < 0:
|
|
if i >= 2 and pp[i-2].chtype not in \
|
|
(chunk_type(ENDLINE), chunk_type(DENDLINE)) \
|
|
and not (pp[i-2].chtype == chunk_type(PLAIN) \
|
|
and regex.match('\\(.\\|\n\\)*[ \t]*\n$', s(buf, pp[i-2].data)) >= 0):
|
|
print 'ADDING NEWLINE'
|
|
wm('\n')
|
|
wm('@c ' + s(buf, ch.data))
|
|
elif ch.chtype == chunk_type(IGNORE):
|
|
pass
|
|
else:
|
|
try:
|
|
str = `s(buf, ch.data)`
|
|
except TypeError:
|
|
str = `ch.data`
|
|
if len(str) > 400:
|
|
str = str[:400] + '...'
|
|
print 'warning:', ch.chtype, 'not handled, data ' + str
|
|
|
|
|
|
|
|
def main():
|
|
outfile = None
|
|
headerfile = 'texipre.dat'
|
|
trailerfile = 'texipost.dat'
|
|
|
|
try:
|
|
opts, args = getopt.getopt(sys.argv[1:], 'o:h:t:')
|
|
except getopt.error:
|
|
args = []
|
|
|
|
if not args:
|
|
print 'usage: partparse [-o outfile] [-h headerfile]',
|
|
print '[-t trailerfile] file ...'
|
|
sys.exit(2)
|
|
|
|
for opt, arg in opts:
|
|
if opt == '-o': outfile = arg
|
|
if opt == '-h': headerfile = arg
|
|
if opt == '-t': trailerfile = arg
|
|
|
|
if not outfile:
|
|
root, ext = os.path.splitext(args[0])
|
|
outfile = root + '.texi'
|
|
|
|
if outfile in args:
|
|
print 'will not overwrite input file', outfile
|
|
sys.exit(2)
|
|
|
|
outf = open(outfile, 'w')
|
|
outf.write(open(headerfile, 'r').read())
|
|
|
|
for file in args:
|
|
if len(args) > 1: print '='*20, file, '='*20
|
|
buf = open(file, 'r').read()
|
|
w, pp = parseit(buf)
|
|
startchange()
|
|
changeit(buf, pp)
|
|
dumpit(buf, outf.write, pp)
|
|
|
|
outf.write(open(trailerfile, 'r').read())
|
|
|
|
outf.close()
|
|
|
|
main()
|