mirror of https://github.com/python/cpython.git synced 2024-12-01 11:15:56 +01:00
Fred Drake e8b46132e2 Massive changes. Fewer warnings from the Python Library Reference. Still
messy, but the thing seems to be working without bombing completely today.
Formatting lib.texi with TeX doesn't seem to do too badly, either!

Info formatting isn't quite there; that might just have to disappear this
1998-02-17 05:54:46 +00:00

2305 lines
64 KiB

# partparse.py: parse a by-Guido-written-and-by-Jan-Hein-edited LaTeX file,
# and generate texinfo source.
# This is *not* a good example of good programming practices. In fact, this
# file could use a complete rewrite, in order to become faster, more
# easily extensible and maintainable.
# However, I added some comments on a few places for the pityful person who
# would ever need to take a look into this file.
# Have I been clear enough??
# -jh
# Yup. I made some performance improvements and hope this lasts a while;
# I don't want to be the schmuck who ends up re-writting it!
# -fld
# (sometime later...)
# Ok, I've re-worked substantial chunks of this. It's only getting worse.
# It just might be gone before the next source release. (Yeah!)
# -fld
import sys, string, regex, getopt, os
from types import IntType, ListType, StringType, TupleType
release_version = sys.version[:3]
# Different parse modes for phase 1
# Show the neighbourhood of the scanned buffer
def epsilon(buf, where):
wmt, wpt = where - 10, where + 10
if wmt < 0:
wmt = 0
if wpt > len(buf):
wpt = len(buf)
return ' Context ' + `buf[wmt:where]` + '.' + `buf[where:wpt]` + '.'
# Should return the line number. never worked
def lin():
global lineno
return ' Line ' + `lineno` + '.'
# Displays the recursion level.
def lv(lvl):
return ' Level ' + `lvl` + '.'
# Combine the three previous functions. Used often.
def lle(lvl, buf, where):
return lv(lvl) + lin() + epsilon(buf, where)
# This class is only needed for _symbolic_ representation of the parse mode.
class Mode:
def __init__(self, arg):
if arg not in the_modes:
raise ValueError, 'mode not in the_modes'
self.mode = arg
def __cmp__(self, other):
if type(self) != type(other):
other = mode[other]
return cmp(self.mode, other.mode)
def __repr__(self):
if self.mode == MODE_REGULAR:
elif self.mode == MODE_VERBATIM:
elif self.mode == MODE_CS_SCAN:
return 'MODE_CS_SCAN'
elif self.mode == MODE_COMMENT:
elif self.mode == MODE_MATH:
return 'MODE_MATH'
elif self.mode == MODE_DMATH:
return 'MODE_DMATH'
elif self.mode == MODE_GOBBLEWHITE:
raise ValueError, 'mode not in the_modes'
# just a wrapper around a class initialisation
mode = {}
for t in the_modes:
mode[t] = Mode(t)
# After phase 1, the text consists of chunks, with a certain type
# this type will be assigned to the chtype member of the chunk
# the where-field contains the file position where this is found
# and the data field contains (1): a tuple describing start- end end
# positions of the substring (can be used as slice for the buf-variable),
# (2) just a string, mostly generated by the changeit routine,
# or (3) a list, describing a (recursive) subgroup of chunks
PLAIN = 0 # ASSUME PLAINTEXT, data = the text
GROUP = 1 # GROUP ({}), data = [chunk, chunk,..]
CSNAME = 2 # CONTROL SEQ TOKEN, data = the command
COMMENT = 3 # data is the actual comment
DMATH = 4 # DISPLAYMATH, data = [chunk, chunk,..]
ENDLINE = 9 # END-OF-LINE, data = '\n'
DENDLINE = 10 # DOUBLE EOL, data='\n', indicates \par
ENV = 11 # LaTeX-environment
# data =(envname,[ch,ch,ch,.])
CSLINE = 12 # for texi: next chunk will be one group
# of args. Will be set all on 1 line
IGNORE = 13 # IGNORE this data
IF = 15 # IF-directive
# data = (flag,negate,[ch, ch, ch,...])
# class, just to display symbolic name
class ChunkType:
def __init__(self, chunk_type):
if chunk_type not in the_types:
raise ValueError, 'chunk_type not in the_types'
self.chunk_type = chunk_type
def __cmp__(self, other):
if type(self) != type(other):
other = chunk_type[other]
return cmp(self.chunk_type, other.chunk_type)
def __repr__(self):
if self.chunk_type == PLAIN:
return 'PLAIN'
elif self.chunk_type == GROUP:
return 'GROUP'
elif self.chunk_type == CSNAME:
return 'CSNAME'
elif self.chunk_type == COMMENT:
return 'COMMENT'
elif self.chunk_type == DMATH:
return 'DMATH'
elif self.chunk_type == MATH:
return 'MATH'
elif self.chunk_type == OTHER:
return 'OTHER'
elif self.chunk_type == ACTIVE:
return 'ACTIVE'
elif self.chunk_type == GOBBLEDWHITE:
elif self.chunk_type == DENDLINE:
return 'DENDLINE'
elif self.chunk_type == ENDLINE:
return 'ENDLINE'
elif self.chunk_type == ENV:
return 'ENV'
elif self.chunk_type == CSLINE:
return 'CSLINE'
elif self.chunk_type == IGNORE:
return 'IGNORE'
elif self.chunk_type == ENDENV:
return 'ENDENV'
elif self.chunk_type == IF:
return 'IF'
raise ValueError, 'chunk_type not in the_types'
# ...and the wrapper
chunk_type = {}
for t in the_types:
chunk_type[t] = ChunkType(t)
# store a type object of the ChunkType-class-instance...
chunk_type_type = type(chunk_type[PLAIN])
# this class contains a part of the parsed buffer
class Chunk:
def __init__(self, chtype, where, data):
if type(chtype) != chunk_type_type:
chtype = chunk_type[chtype]
self.chtype = chtype
self.where = where
self.data = data
def __repr__(self):
return 'chunk' + `self.chtype, self.where, self.data`
# and the wrapper
chunk = Chunk
error = 'partparse.error'
# TeX's catcodes...
# and the names
cc_names = [
# Show a list of catcode-name-symbols
def pcl(codelist):
result = ''
for i in codelist:
result = result + cc_names[i] + ', '
return '[' + result[:-2] + ']'
# the name of the catcode (ACTIVE, OTHER, etc.)
def pc(code):
return cc_names[code]
# Which catcodes make the parser stop parsing regular plaintext
# same for scanning a control sequence name
csname_scancodes = [CC_LETTER]
# same for gobbling LWSP
white_scancodes = [CC_WHITE]
##white_scancodes = [CC_WHITE, CC_ENDLINE]
# make a list of all catcode id's, except for catcode ``other''
all_but_other_codes = range(16)
del all_but_other_codes[CC_OTHER]
##print all_but_other_codes
# when does a comment end
comment_stopcodes = [CC_ENDLINE]
# gather all characters together, specified by a list of catcodes
def code2string(cc, codelist):
##print 'code2string: codelist = ' + pcl(codelist),
result = ''
for category in codelist:
if cc[category]:
result = result + cc[category]
##print 'result = ' + `result`
return result
# automatically generate all characters of catcode other, being the
# complement set in the ASCII range (128 characters)
def make_other_codes(cc):
otherchars = range(256) # could be made 256, no problem
for category in all_but_other_codes:
if cc[category]:
for c in cc[category]:
otherchars[ord(c)] = None
result = ''
for i in otherchars:
if i != None:
result = result + chr(i)
return result
# catcode dump (which characters have which catcodes).
def dump_cc(name, cc):
##print '\t' + name
##print '=' * (8+len(name))
if len(cc) != 16:
raise TypeError, 'cc not good cat class'
## for i in range(16):
## print pc(i) + '\t' + `cc[i]`
# In the beginning,....
epoch_cc = [None] * 16
##dump_cc('epoch_cc', epoch_cc)
initex_cc = epoch_cc[:]
initex_cc[CC_ESCAPE] = '\\'
initex_cc[CC_ENDLINE], initex_cc[CC_IGNORE], initex_cc[CC_WHITE] = \
'\n', '\0', ' '
initex_cc[CC_LETTER] = string.uppercase + string.lowercase
initex_cc[CC_COMMENT], initex_cc[CC_INVALID] = '%', '\x7F'
#initex_cc[CC_OTHER] = make_other_codes(initex_cc) I don't need them, anyway
##dump_cc('initex_cc', initex_cc)
# LPLAIN: LaTeX catcode setting (see lplain.tex)
lplain_cc = initex_cc[:]
lplain_cc[CC_LBRACE], lplain_cc[CC_RBRACE] = '{', '}'
lplain_cc[CC_MATHSHIFT] = '$'
lplain_cc[CC_ALIGNMENT] = '&'
lplain_cc[CC_PARAMETER] = '#'
lplain_cc[CC_SUPERSCRIPT] = '^\x0B' # '^' and C-k
lplain_cc[CC_SUBSCRIPT] = '_\x01' # '_' and C-a
lplain_cc[CC_WHITE] = lplain_cc[CC_WHITE] + '\t'
lplain_cc[CC_ACTIVE] = '~\x0C' # '~' and C-l
lplain_cc[CC_OTHER] = make_other_codes(lplain_cc)
##dump_cc('lplain_cc', lplain_cc)
# Guido's LaTeX environment catcoded '_' as ``other''
# my own purpose catlist
my_cc = lplain_cc[:]
my_cc[CC_SUBSCRIPT] = my_cc[CC_SUBSCRIPT][1:] # remove '_' here
my_cc[CC_OTHER] = my_cc[CC_OTHER] + '_' # add it to OTHER list
dump_cc('my_cc', my_cc)
# needed for un_re, my equivalent for regexp-quote in Emacs
re_meaning = '\\[]^$'
def un_re(str):
result = ''
for i in str:
if i in re_meaning:
result = result + '\\'
result = result + i
return result
# NOTE the negate ('^') operator in *some* of the regexps below
def make_rc_regular(cc):
# problems here if '[]' are included!!
return regex.compile('[' + code2string(cc, regular_stopcodes) + ']')
def make_rc_cs_scan(cc):
return regex.compile('[^' + code2string(cc, csname_scancodes) + ']')
def make_rc_comment(cc):
return regex.compile('[' + code2string(cc, comment_stopcodes) + ']')
def make_rc_endwhite(cc):
return regex.compile('[^' + code2string(cc, white_scancodes) + ']')
# regular: normal mode:
rc_regular = make_rc_regular(my_cc)
# scan: scan a command sequence e.g. `newlength' or `mbox' or `;', `,' or `$'
rc_cs_scan = make_rc_cs_scan(my_cc)
rc_comment = make_rc_comment(my_cc)
rc_endwhite = make_rc_endwhite(my_cc)
# RECURSION-LEVEL will is incremented on entry.
# result contains the list of chunks returned
# together with this list, the buffer position is returned
# RECURSION-LEVEL will be set to zero *again*, when recursively a
# {,D}MATH-mode scan has been enetered.
# This has been done in order to better check for environment-mismatches
def parseit(buf, parsemode=mode[MODE_REGULAR], start=0, lvl=0):
global lineno
result = []
end = len(buf)
if lvl == 0 and parsemode == mode[MODE_REGULAR]:
lineno = 1
lvl = lvl + 1
##print 'parseit(' + epsilon(buf, start) + ', ' + `parsemode` + ', ' + `start` + ', ' + `lvl` + ')'
# some of the more regular modes...
if parsemode in (mode[MODE_REGULAR], mode[MODE_DMATH], mode[MODE_MATH]):
cstate = []
newpos = start
curpmode = parsemode
while 1:
where = newpos
#print '\tnew round: ' + epsilon(buf, where)
if where == end:
if lvl > 1 or curpmode != mode[MODE_REGULAR]:
# not the way we started...
raise EOFError, 'premature end of file.' + lle(lvl, buf, where)
# the real ending of lvl-1 parse
return end, result
pos = rc_regular.search(buf, where)
if pos < 0:
pos = end
if pos != where:
newpos, c = pos, chunk(PLAIN, where, (where, pos))
# ok, pos == where and pos != end
foundchar = buf[where]
if foundchar in my_cc[CC_LBRACE]:
# recursive subgroup parse...
newpos, data = parseit(buf, curpmode, where+1, lvl)
result.append(chunk(GROUP, where, data))
elif foundchar in my_cc[CC_RBRACE]:
if lvl <= 1:
raise error, 'ENDGROUP while in base level.' + lle(lvl, buf, where)
if lvl == 1 and mode != mode[MODE_REGULAR]:
raise error, 'endgroup while in math mode. +lin() + epsilon(buf, where)'
return where + 1, result
elif foundchar in my_cc[CC_ESCAPE]:
# call the routine that actually deals with
# this problem. If do_ret is None, than
# return the value of do_ret
# Note that handle_cs might call this routine
# recursively again...
do_ret, newpos = handlecs(buf, where,
curpmode, lvl, result, end)
if do_ret != None:
return do_ret
elif foundchar in my_cc[CC_COMMENT]:
newpos, data = parseit(buf,
mode[MODE_COMMENT], where+1, lvl)
result.append(chunk(COMMENT, where, data))
elif foundchar in my_cc[CC_MATHSHIFT]:
# note that recursive calls to math-mode
# scanning are called with recursion-level 0
# again, in order to check for bad mathend
if where + 1 != end and buf[where + 1] in my_cc[CC_MATHSHIFT]:
# double mathshift, e.g. '$$'
if curpmode == mode[MODE_REGULAR]:
newpos, data = parseit(buf, mode[MODE_DMATH],
where + 2, 0)
result.append(chunk(DMATH, where, data))
elif curpmode == mode[MODE_MATH]:
raise error, 'wrong math delimiiter' + lin() + epsilon(buf, where)
elif lvl != 1:
raise error, 'bad mathend.' + lle(lvl, buf, where)
return where + 2, result
# single math shift, e.g. '$'
if curpmode == mode[MODE_REGULAR]:
newpos, data = parseit(buf, mode[MODE_MATH],
where + 1, 0)
result.append(chunk(MATH, where, data))
elif curpmode == mode[MODE_DMATH]:
raise error, 'wrong math delimiiter' + lin() + epsilon(buf, where)
elif lvl != 1:
raise error, 'bad mathend.' + lv(lvl, buf, where)
return where + 1, result
elif foundchar in my_cc[CC_IGNORE]:
print 'warning: ignored char', `foundchar`
newpos = where + 1
elif foundchar in my_cc[CC_ACTIVE]:
result.append(chunk(ACTIVE, where, foundchar))
newpos = where + 1
elif foundchar in my_cc[CC_INVALID]:
raise error, 'invalid char ' + `foundchar`
newpos = where + 1
elif foundchar in my_cc[CC_ENDLINE]:
# after an end of line, eat the rest of
# whitespace on the beginning of the next line
# this is what LaTeX more or less does
# also, try to indicate double newlines (\par)
lineno = lineno + 1
savedwhere = where
newpos, dummy = parseit(buf, mode[MODE_GOBBLEWHITE], where + 1, lvl)
if newpos != end and buf[newpos] in my_cc[CC_ENDLINE]:
result.append(chunk(DENDLINE, savedwhere, foundchar))
result.append(chunk(ENDLINE, savedwhere, foundchar))
result.append(chunk(OTHER, where, foundchar))
newpos = where + 1
elif parsemode == mode[MODE_CS_SCAN]:
# scan for a control sequence token. `\ape', `\nut' or `\%'
if start == end:
raise EOFError, 'can\'t find end of csname'
pos = rc_cs_scan.search(buf, start)
if pos < 0:
pos = end
if pos == start:
# first non-letter right where we started the search
# ---> the control sequence name consists of one single
# character. Also: don't eat white space...
if buf[pos] in my_cc[CC_ENDLINE]:
lineno = lineno + 1
pos = pos + 1
return pos, (start, pos)
spos = pos
if buf[pos] == '\n':
lineno = lineno + 1
spos = pos + 1
pos2, dummy = parseit(buf, mode[MODE_GOBBLEWHITE], spos, lvl)
return pos2, (start, pos)
elif parsemode == mode[MODE_GOBBLEWHITE]:
if start == end:
return start, ''
pos = rc_endwhite.search(buf, start)
if pos < 0:
pos = start
return pos, (start, pos)
elif parsemode == mode[MODE_COMMENT]:
pos = rc_comment.search(buf, start)
lineno = lineno + 1
if pos < 0:
print 'no newline perhaps?'
raise EOFError, 'can\'t find end of comment'
pos = pos + 1
pos2, dummy = parseit(buf, mode[MODE_GOBBLEWHITE], pos, lvl)
return pos2, (start, pos)
raise error, 'Unknown mode (' + `parsemode` + ')'
#moreresult = cswitch(buf[x1:x2], buf, newpos, parsemode, lvl)
#boxcommands = 'mbox', 'fbox'
#defcommands = 'def', 'newcommand'
endverbstr = '\\end{verbatim}'
re_endverb = regex.compile(un_re(endverbstr))
# handlecs: helper function for parseit, for the special thing we might
# wanna do after certain command control sequences
# returns: None or return_data, newpos
# in the latter case, the calling function is instructed to immediately
# return with the data in return_data
def handlecs(buf, where, curpmode, lvl, result, end):
global lineno
# get the control sequence name...
newpos, data = parseit(buf, mode[MODE_CS_SCAN], where+1, lvl)
saveddata = data
s_buf_data = s(buf, data)
if s_buf_data in ('begin', 'end'):
# skip the expected '{' and get the LaTeX-envname '}'
newpos, data = parseit(buf, mode[MODE_REGULAR], newpos+1, lvl)
if len(data) != 1:
raise error, 'expected 1 chunk of data.' + lle(lvl, buf, where)
# yucky, we've got an environment
envname = s(buf, data[0].data)
s_buf_saveddata = s(buf, saveddata)
##print 'FOUND ' + s(buf, saveddata) + '. Name ' + `envname` + '.' + lv(lvl)
if s_buf_saveddata == 'begin' and envname == 'verbatim':
# verbatim deserves special treatment
pos = re_endverb.search(buf, newpos)
if pos < 0:
raise error, "%s not found.%s" \
% (`endverbstr`, lle(lvl, buf, where))
result.append(chunk(ENV, where, (envname, [chunk(PLAIN, newpos, (newpos, pos))])))
newpos = pos + len(endverbstr)
elif s_buf_saveddata == 'begin':
# start parsing recursively... If that parse returns
# from an '\end{...}', then should the last item of
# the returned data be a string containing the ended
# environment
newpos, data = parseit(buf, curpmode, newpos, lvl)
if not data or type(data[-1]) is not StringType:
raise error, "missing 'end'" + lle(lvl, buf, where) \
+ epsilon(buf, newpos)
retenv = data[-1]
del data[-1]
if retenv != envname:
#[`retenv`, `envname`]
raise error, 'environments do not match.%s%s' \
% (lle(lvl, buf, where), epsilon(buf, newpos))
result.append(chunk(ENV, where, (retenv, data)))
# 'end'... append the environment name, as just
# pointed out, and order parsit to return...
##print 'POINT of return: ' + epsilon(buf, newpos)
# the tuple will be returned by parseit
return (newpos, result), newpos
# end of \begin ... \end handling
elif s_buf_data[0:2] == 'if':
# another scary monster: the 'if' directive
flag = s_buf_data[2:]
# recursively call parseit, just like environment above..
# the last item of data should contain the if-termination
# e.g., 'else' of 'fi'
newpos, data = parseit(buf, curpmode, newpos, lvl)
if not data or data[-1] not in ('else', 'fi'):
raise error, 'wrong if... termination' + \
lle(lvl, buf, where) + epsilon(buf, newpos)
ifterm = data[-1]
del data[-1]
# 0 means dont_negate flag
result.append(chunk(IF, where, (flag, 0, data)))
if ifterm == 'else':
# do the whole thing again, there is only one way
# to end this one, by 'fi'
newpos, data = parseit(buf, curpmode, newpos, lvl)
if not data or data[-1] not in ('fi', ):
raise error, 'wrong if...else... termination' \
+ lle(lvl, buf, where) \
+ epsilon(buf, newpos)
ifterm = data[-1]
del data[-1]
result.append(chunk(IF, where, (flag, 1, data)))
#done implicitely: return None, newpos
elif s_buf_data in ('else', 'fi'):
result.append(s(buf, data))
# order calling party to return tuple
return (newpos, result), newpos
# end of \if, \else, ... \fi handling
elif s(buf, saveddata) == 'verb':
x2 = saveddata[1]
result.append(chunk(CSNAME, where, data))
if x2 == end:
raise error, 'premature end of command.' + lle(lvl, buf, where)
delimchar = buf[x2]
##print 'VERB: delimchar ' + `delimchar`
pos = regex.compile(un_re(delimchar)).search(buf, x2 + 1)
if pos < 0:
raise error, 'end of \'verb\' argument (' + \
`delimchar` + ') not found.' + \
lle(lvl, buf, where)
result.append(chunk(GROUP, x2, [chunk(PLAIN, x2+1, (x2+1, pos))]))
newpos = pos + 1
result.append(chunk(CSNAME, where, data))
return None, newpos
# this is just a function to get the string value if the possible data-tuple
def s(buf, data):
if type(data) is StringType:
return data
if len(data) != 2 or not (type(data[0]) is type(data[1]) is IntType):
raise TypeError, 'expected tuple of 2 integers'
x1, x2 = data
return buf[x1:x2]
##length, data1, i = getnextarg(length, buf, pp, i + 1)
# make a deep-copy of some chunks
def crcopy(r):
return map(chunkcopy, r)
# copy a chunk, would better be a method of class Chunk...
def chunkcopy(ch):
if ch.chtype == chunk_type[GROUP]:
return chunk(GROUP, ch.where, map(chunkcopy, ch.data))
return chunk(ch.chtype, ch.where, ch.data)
# get next argument for TeX-macro, flatten a group (insert between)
# or return Command Sequence token, or give back one character
def getnextarg(length, buf, pp, item):
##wobj = Wobj()
##dumpit(buf, wobj.write, pp[item:min(length, item + 5)])
##print 'GETNEXTARG, (len, item) =', `length, item` + ' ---> ' + wobj.data + ' <---'
while item < length and pp[item].chtype == chunk_type[ENDLINE]:
del pp[item]
length = length - 1
if item >= length:
raise error, 'no next arg.' + epsilon(buf, pp[-1].where)
if pp[item].chtype == chunk_type[GROUP]:
newpp = pp[item].data
del pp[item]
length = length - 1
changeit(buf, newpp)
length = length + len(newpp)
pp[item:item] = newpp
item = item + len(newpp)
if len(newpp) < 10:
wobj = Wobj()
dumpit(buf, wobj.write, newpp)
##print 'GETNEXTARG: inserted ' + `wobj.data`
return length, item
elif pp[item].chtype == chunk_type[PLAIN]:
#grab one char
print 'WARNING: grabbing one char'
if len(s(buf, pp[item].data)) > 1:
pp.insert(item, chunk(PLAIN, pp[item].where, s(buf, pp[item].data)[:1]))
item, length = item+1, length+1
pp[item].data = s(buf, pp[item].data)[1:]
item = item+1
return length, item
ch = pp[item]
str = `s(buf, ch.data)`
except TypeError:
str = `ch.data`
if len(str) > 400:
str = str[:400] + '...'
print 'GETNEXTARG:', ch.chtype, 'not handled, data ' + str
return length, item
# this one is needed to find the end of LaTeX's optional argument, like
# item[...]
re_endopt = regex.compile(']')
# get a LaTeX-optional argument, you know, the square braces '[' and ']'
def getoptarg(length, buf, pp, item):
wobj = Wobj()
dumpit(buf, wobj.write, pp[item:min(length, item + 5)])
##print 'GETOPTARG, (len, item) =', `length, item` + ' ---> ' + wobj.data + ' <---'
if item >= length or \
pp[item].chtype != chunk_type[PLAIN] or \
s(buf, pp[item].data)[0] != '[':
return length, item
pp[item].data = s(buf, pp[item].data)[1:]
if len(pp[item].data) == 0:
del pp[item]
length = length-1
while 1:
if item == length:
raise error, 'No end of optional arg found'
if pp[item].chtype == chunk_type[PLAIN]:
text = s(buf, pp[item].data)
pos = re_endopt.search(text)
if pos >= 0:
pp[item].data = text[:pos]
if pos == 0:
del pp[item]
length = length-1
text = text[pos+1:]
while text and text[0] in ' \t':
text = text[1:]
if text:
pp.insert(item, chunk(PLAIN, 0, text))
length = length + 1
return length, item
item = item+1
# Wobj just add write-requests to the ``data'' attribute
class Wobj:
data = ''
def write(self, data):
self.data = self.data + data
# ignore these commands
ignoredcommands = ('bcode', 'ecode', 'hline', 'small', '/')
# map commands like these to themselves as plaintext
wordsselves = ('UNIX', 'ABC', 'C', 'ASCII', 'EOF', 'LaTeX', 'POSIX')
# \{ --> {, \} --> }, etc
themselves = ('{', '}', ',', '.', '@', ' ', '\n') + wordsselves
# these ones also themselves (see argargs macro in myformat.sty)
inargsselves = (',', '[', ']', '(', ')')
# this is how *I* would show the difference between emph and strong
# code 1 means: fold to uppercase
markcmds = {'code': ('', ''), 'var': 1, 'emph': ('_', '_'),
'strong': ('*', '*')}
# recognise patter {\FONTCHANGE-CMD TEXT} to \MAPPED-FC-CMD{TEXT}
fontchanges = {'rm': 'r', 'it': 'i', 'em': 'emph', 'bf': 'b', 'tt': 't'}
# transparent for these commands
for_texi = ('emph', 'var', 'strong', 'code', 'kbd', 'key', 'dfn', 'samp',
'file', 'r', 'i', 't')
# try to remove macros and return flat text
def flattext(buf, pp):
pp = crcopy(pp)
##print '---> FLATTEXT ' + `pp`
wobj = Wobj()
i, length = 0, len(pp)
while 1:
if len(pp) != length:
raise 'FATAL', 'inconsistent length'
if i >= length:
ch = pp[i]
i = i+1
if ch.chtype == chunk_type[PLAIN]:
elif ch.chtype == chunk_type[CSNAME]:
s_buf_data = s(buf, ch.data)
if s_buf_data in themselves or hist.inargs and s_buf_data in inargsselves:
ch.chtype = chunk_type[PLAIN]
elif s_buf_data == 'e':
ch.chtype = chunk_type[PLAIN]
ch.data = '\\'
elif len(s_buf_data) == 1 \
and s_buf_data in onlylatexspecial:
ch.chtype = chunk_type[PLAIN]
# if it is followed by an empty group,
# remove that group, it was needed for
# a true space
if i < length \
and pp[i].chtype==chunk_type[GROUP] \
and len(pp[i].data) == 0:
del pp[i]
length = length-1
elif s_buf_data in markcmds.keys():
length, newi = getnextarg(length, buf, pp, i)
str = flattext(buf, pp[i:newi])
del pp[i:newi]
length = length - (newi - i)
ch.chtype = chunk_type[PLAIN]
markcmd = s_buf_data
x = markcmds[markcmd]
if type(x) == TupleType:
pre, after = x
str = pre+str+after
elif x == 1:
str = string.upper(str)
raise 'FATAL', 'corrupt markcmds'
ch.data = str
if s_buf_data not in ignoredcommands:
print 'WARNING: deleting command ' + s_buf_data
print 'PP' + `pp[i-1]`
del pp[i-1]
i, length = i-1, length-1
elif ch.chtype == chunk_type[GROUP]:
length, newi = getnextarg(length, buf, pp, i-1)
i = i-1
## str = flattext(buf, crcopy(pp[i-1:newi]))
## del pp[i:newi]
## length = length - (newi - i)
## ch.chtype = chunk_type[PLAIN]
## ch.data = str
dumpit(buf, wobj.write, pp)
##print 'FLATTEXT: RETURNING ' + `wobj.data`
return wobj.data
# try to generate node names (a bit shorter than the chapter title)
# note that the \nodename command (see elsewhere) overules these efforts
def invent_node_names(text):
words = string.split(text)
##print 'WORDS ' + `words`
if len(words) == 2 \
and string.lower(words[0]) == 'built-in' \
and string.lower(words[1]) not in ('modules', 'functions'):
return words[1]
if len(words) == 3 and string.lower(words[1]) == 'module':
return words[2]
if len(words) == 3 and string.lower(words[1]) == 'object':
return string.join(words[0:2])
if len(words) > 4 \
and (string.lower(string.join(words[-4:])) \
== 'methods and data attributes'):
return string.join(words[:2])
return text
re_commas_etc = regex.compile('[,`\'@{}]')
re_whitespace = regex.compile('[ \t]*')
##nodenamecmd = next_command_p(length, buf, pp, newi, 'nodename')
# look if the next non-white stuff is also a command, resulting in skipping
# double endlines (DENDLINE) too, and thus omitting \par's
# Sometimes this is too much, maybe consider DENDLINE's as stop
def next_command_p(length, buf, pp, i, cmdname):
while 1:
if i >= len(pp):
ch = pp[i]
i = i+1
if ch.chtype == chunk_type[ENDLINE]:
if ch.chtype == chunk_type[DENDLINE]:
if ch.chtype == chunk_type[PLAIN]:
if re_whitespace.search(s(buf, ch.data)) == 0 and \
re_whitespace.match(s(buf, ch.data)) == len(s(buf, ch.data)):
return -1
if ch.chtype == chunk_type[CSNAME]:
if s(buf, ch.data) == cmdname:
return i # _after_ the command
return -1
return -1
# things that are special to LaTeX, but not to texi..
onlylatexspecial = '_~^$#&%'
class Struct: pass
hist = Struct()
out = Struct()
def startchange():
global hist, out
hist.inenv = []
hist.nodenames = []
hist.cindex = []
hist.inargs = 0
hist.enumeratenesting, hist.itemizenesting = 0, 0
out.doublenodes = []
out.doublecindeces = []
spacech = [chunk(PLAIN, 0, ' ')]
commach = [chunk(PLAIN, 0, ', ')]
cindexch = [chunk(CSLINE, 0, 'cindex')]
# the standard variation in symbols for itemize
itemizesymbols = ['bullet', 'minus', 'dots']
# same for enumerate
enumeratesymbols = ['1', 'A', 'a']
## \begin{ {func,data,exc}desc }{name}...
## the resulting texi-code is dependent on the contents of indexsubitem
# indexsubitem: `['XXX', 'function']
# funcdesc:
# deffn {`idxsi`} NAME (FUNCARGS)
# indexsubitem: `['XXX', 'method']`
# funcdesc:
# defmethod {`idxsi[0]`} NAME (FUNCARGS)
# indexsubitem: `['in', 'module', 'MODNAME']'
# datadesc:
# defcv data {`idxsi[1:]`} NAME
# excdesc:
# defcv exception {`idxsi[1:]`} NAME
# funcdesc:
# deffn {function of `idxsi[1:]`} NAME (FUNCARGS)
# indexsubitem: `['OBJECT', 'attribute']'
# datadesc
# defcv attribute {`OBJECT`} NAME
## this routine will be called on \begin{funcdesc}{NAME}{ARGS}
## or \funcline{NAME}{ARGS}
def do_funcdesc(length, buf, pp, i, index=1):
startpoint = i-1
ch = pp[startpoint]
wh = ch.where
length, newi = getnextarg(length, buf, pp, i)
funcname = chunk(GROUP, wh, pp[i:newi])
del pp[i:newi]
length = length - (newi-i)
save = hist.inargs
hist.inargs = 1
length, newi = getnextarg(length, buf, pp, i)
hist.inargs = save
del save
the_args = [chunk(PLAIN, wh, '()'[0])] + pp[i:newi] + \
[chunk(PLAIN, wh, '()'[1])]
del pp[i:newi]
length = length - (newi-i)
idxsi = hist.indexsubitem # words
command = ''
cat_class = ''
if idxsi and idxsi[-1] in ('method', 'protocol', 'attribute'):
command = 'defmethod'
cat_class = string.join(idxsi[:-1])
elif len(idxsi) == 2 and idxsi[1] == 'function':
command = 'deffn'
cat_class = string.join(idxsi)
elif len(idxsi) == 3 and idxsi[:2] == ['in', 'module']:
command = 'deffn'
cat_class = 'function of ' + string.join(idxsi[1:])
elif len(idxsi) > 3 and idxsi[:2] == ['in', 'modules']:
command = 'deffn'
cat_class = 'function of ' + string.join(idxsi[1:])
if not command:
raise error, 'don\'t know what to do with indexsubitem ' + `idxsi`
ch.chtype = chunk_type[CSLINE]
ch.data = command
cslinearg = [chunk(GROUP, wh, [chunk(PLAIN, wh, cat_class)])]
cslinearg.append(chunk(PLAIN, wh, ' '))
cslinearg.append(chunk(PLAIN, wh, ' '))
l = len(cslinearg)
cslinearg[l:l] = the_args
pp.insert(i, chunk(GROUP, wh, cslinearg))
i, length = i+1, length+1
hist.command = command
return length, i
## this routine will be called on \begin{excdesc}{NAME}
## or \excline{NAME}
def do_excdesc(length, buf, pp, i):
startpoint = i-1
ch = pp[startpoint]
wh = ch.where
length, newi = getnextarg(length, buf, pp, i)
excname = chunk(GROUP, wh, pp[i:newi])
del pp[i:newi]
length = length - (newi-i)
idxsi = hist.indexsubitem # words
command = ''
cat_class = ''
class_class = ''
if len(idxsi) == 2 and idxsi[1] == 'exception':
command = 'defvr'
cat_class = string.join(idxsi)
elif len(idxsi) == 3 and idxsi[:2] == ['in', 'module']:
command = 'defcv'
cat_class = 'exception'
class_class = string.join(idxsi[1:])
elif len(idxsi) == 4 and idxsi[:3] == ['exception', 'in', 'module']:
command = 'defcv'
cat_class = 'exception'
class_class = string.join(idxsi[2:])
elif idxsi == ['built-in', 'exception', 'base', 'class']:
command = 'defvr'
cat_class = 'exception base class'
raise error, 'don\'t know what to do with indexsubitem ' + `idxsi`
ch.chtype = chunk_type[CSLINE]
ch.data = command
cslinearg = [chunk(GROUP, wh, [chunk(PLAIN, wh, cat_class)])]
cslinearg.append(chunk(PLAIN, wh, ' '))
if class_class:
cslinearg.append(chunk(GROUP, wh, [chunk(PLAIN, wh, class_class)]))
cslinearg.append(chunk(PLAIN, wh, ' '))
pp.insert(i, chunk(GROUP, wh, cslinearg))
i, length = i+1, length+1
hist.command = command
return length, i
## same for datadesc or dataline...
def do_datadesc(length, buf, pp, i, index=1):
startpoint = i-1
ch = pp[startpoint]
wh = ch.where
length, newi = getnextarg(length, buf, pp, i)
dataname = chunk(GROUP, wh, pp[i:newi])
del pp[i:newi]
length = length - (newi-i)
idxsi = hist.indexsubitem # words
command = 'defcv'
cat_class = 'data'
class_class = ''
if idxsi[-1] in ('attribute', 'option'):
cat_class = idxsi[-1]
class_class = string.join(idxsi[:-1])
elif len(idxsi) == 3 and idxsi[:2] == ['in', 'module']:
class_class = string.join(idxsi[1:])
elif len(idxsi) == 4 and idxsi[:3] == ['data', 'in', 'module']:
class_class = string.join(idxsi[2:])
class_class = string.join(idxsi)
ch.chtype = chunk_type[CSLINE]
ch.data = command
cslinearg = [chunk(GROUP, wh, [chunk(PLAIN, wh, cat_class)])]
cslinearg.append(chunk(PLAIN, wh, ' '))
if class_class:
cslinearg.append(chunk(GROUP, wh, [chunk(PLAIN, wh, class_class)]))
cslinearg.append(chunk(PLAIN, wh, ' '))
pp.insert(i, chunk(GROUP, wh, cslinearg))
i, length = i+1, length+1
hist.command = command
return length, i
def do_opcodedesc(length, buf, pp, i):
startpoint = i-1
ch = pp[startpoint]
wh = ch.where
length, newi = getnextarg(length, buf, pp, i)
dataname = chunk(GROUP, wh, pp[i:newi])
del pp[i:newi]
length = length - (newi-i)
ch.chtype = CSLINE
ch.data = "deffn"
cslinearg = [chunk(PLAIN, wh, 'byte\ code\ instruction'),
chunk(GROUP, wh, [chunk(PLAIN, wh, "byte code instruction")]),
chunk(PLAIN, wh, ' '),
chunk(PLAIN, wh, ' '),
pp[i] = chunk(GROUP, wh, cslinearg)
hist.command = ch.data
return length, i
# regular indices: those that are not set in tt font by default....
regindices = ('cindex', )
# remove illegal characters from node names
def rm_commas_etc(text):
result = ''
changed = 0
while 1:
pos = re_commas_etc.search(text)
if pos >= 0:
changed = 1
result = result + text[:pos]
text = text[pos+1:]
result = result + text
if changed:
print 'Warning: nodename changed to ' + `result`
return result
# boolean flags
flags = {'texi': 1}
# map of \label{} to node names
label_nodes = {}
## changeit: the actual routine, that changes the contents of the parsed
## chunks
def changeit(buf, pp):
global onlylatexspecial, hist, out
i, length = 0, len(pp)
while 1:
# sanity check: length should always equal len(pp)
if len(pp) != length:
raise 'FATAL', 'inconsistent length. thought ' + `length` + ', but should really be ' + `len(pp)`
if i >= length:
ch = pp[i]
i = i + 1
if type(ch) is StringType:
#normally, only chunks are present in pp,
# but in some cases, some extra info
# has been inserted, e.g., the \end{...} clauses
raise 'FATAL', 'got string, probably too many ' + `end`
if ch.chtype == chunk_type[GROUP]:
# check for {\em ...} constructs
data = ch.data
if data and \
data[0].chtype == chunk_type[CSNAME] and \
fontchanges.has_key(s(buf, data[0].data)):
k = s(buf, data[0].data)
del data[0]
pp.insert(i-1, chunk(CSNAME, ch.where, fontchanges[k]))
length, i = length+1, i+1
elif data:
if len(data) \
and data[0].chtype == chunk_type[GROUP] \
and len(data[0].data) \
and data[0].data[0].chtype == chunk_type[CSNAME] \
and s(buf, data[0].data[0].data) == 'e':
data[0] = data[0].data[0]
print "invoking \\e magic group transform..."
## print "GROUP -- ch.data[0].data =", ch.data[0].data
k = s(buf, data[0].data)
if k == "fulllineitems":
del data[0]
pp[i-1:i] = data
i = i - 1
length = length + len(data) - 1
# recursively parse the contents of the group
changeit(buf, data)
elif ch.chtype == chunk_type[IF]:
# \if...
flag, negate, data = ch.data
##print 'IF: flag, negate = ' + `flag, negate`
if flag not in flags.keys():
raise error, 'unknown flag ' + `flag`
value = flags[flag]
if negate:
value = (not value)
del pp[i-1]
length, i = length-1, i-1
if value:
pp[i:i] = data
length = length + len(data)
elif ch.chtype == chunk_type[ENV]:
# \begin{...} ....
envname, data = ch.data
#push this environment name on stack
hist.inenv.insert(0, envname)
#append an endenv chunk after grouped data
data.append(chunk(ENDENV, ch.where, envname))
#delete this object
del pp[i-1]
i, length = i-1, length-1
#insert found data
pp[i:i] = data
length = length + len(data)
if envname == 'verbatim':
pp[i:i] = [chunk(CSLINE, ch.where, 'example'),
chunk(GROUP, ch.where, [])]
length, i = length+2, i+2
elif envname in ('itemize', 'list'):
if hist.itemizenesting > len(itemizesymbols):
raise error, 'too deep itemize nesting'
if envname == 'list':
del pp[i:i+2]
length = length - 2
ingroupch = [chunk(CSNAME, ch.where,
hist.itemizenesting = hist.itemizenesting + 1
pp[i:i] = [chunk(CSLINE, ch.where, 'itemize'),
chunk(GROUP, ch.where, ingroupch)]
length, i = length+2, i+2
elif envname == 'enumerate':
if hist.enumeratenesting > len(enumeratesymbols):
raise error, 'too deep enumerate nesting'
ingroupch = [chunk(PLAIN, ch.where,
hist.enumeratenesting = hist.enumeratenesting + 1
pp[i:i] = [chunk(CSLINE, ch.where, 'enumerate'),
chunk(GROUP, ch.where, ingroupch)]
length, i = length+2, i+2
elif envname == 'description':
ingroupch = [chunk(CSNAME, ch.where, 'b')]
pp[i:i] = [chunk(CSLINE, ch.where, 'table'),
chunk(GROUP, ch.where, ingroupch)]
length, i = length+2, i+2
elif (envname == 'tableiii') or (envname == 'tableii'):
if (envname == 'tableii'):
ltable = 2
ltable = 3
wh = ch.where
newcode = []
#delete tabular format description
# e.g., {|l|c|l|}
length, newi = getnextarg(length, buf, pp, i)
del pp[i:newi]
length = length - (newi-i)
newcode.append(chunk(CSLINE, wh, 'table'))
ingroupch = [chunk(CSNAME, wh, 'asis')]
newcode.append(chunk(GROUP, wh, ingroupch))
newcode.append(chunk(CSLINE, wh, 'item'))
#get the name of macro for @item
# e.g., {code}
length, newi = getnextarg(length, buf, pp, i)
if newi-i != 1:
raise error, 'Sorry, expected 1 chunk argument'
if pp[i].chtype != chunk_type[PLAIN]:
raise error, 'Sorry, expected plain text argument'
hist.itemargmacro = s(buf, pp[i].data)
del pp[i:newi]
length = length - (newi-i)
itembody = []
for count in range(ltable):
length, newi = getnextarg(length, buf, pp, i)
emphgroup = [
chunk(CSNAME, wh, 'emph'),
chunk(GROUP, 0, pp[i:newi])]
del pp[i:newi]
length = length - (newi-i)
if count == 0:
itemarg = emphgroup
elif count == ltable-1:
itembody = itembody + \
[chunk(PLAIN, wh, ' --- ')] + emphgroup
itembody = emphgroup
newcode.append(chunk(GROUP, wh, itemarg))
newcode = newcode + itembody + [chunk(DENDLINE, wh, '\n')]
pp[i:i] = newcode
l = len(newcode)
length, i = length+l, i+l
del newcode, l
if length != len(pp):
raise 'STILL, SOMETHING wrong', `i`
elif envname in ('funcdesc', 'funcdescni'):
pp.insert(i, chunk(PLAIN, ch.where, ''))
i, length = i+1, length+1
length, i = do_funcdesc(length, buf, pp, i,
elif envname == 'excdesc':
pp.insert(i, chunk(PLAIN, ch.where, ''))
i, length = i+1, length+1
length, i = do_excdesc(length, buf, pp, i)
elif envname in ('datadesc', 'datadescni'):
pp.insert(i, chunk(PLAIN, ch.where, ''))
i, length = i+1, length+1
length, i = do_datadesc(length, buf, pp, i,
elif envname == 'opcodedesc':
pp.insert(i, chunk(PLAIN, ch.where, ''))
i, length = i+1, length+1
length, i = do_opcodedesc(length, buf, pp, i)
elif envname == 'seealso':
chunks = [chunk(ENDLINE, ch.where, "\n"),
chunk(CSNAME, ch.where, "b"),
chunk(GROUP, ch.where, [
chunk(PLAIN, ch.where, "See also: ")]),
chunk(ENDLINE, ch.where, "\n"),
chunk(ENDLINE, ch.where, "\n")]
pp[i-1:i] = chunks
length = length + len(chunks) - 1
i = i + len(chunks) - 1
elif envname in ('sloppypar', 'flushleft'):
print 'WARNING: don\'t know what to do with env ' + `envname`
elif ch.chtype == chunk_type[ENDENV]:
envname = ch.data
if envname != hist.inenv[0]:
raise error, '\'end\' does not match. Name ' + `envname` + ', expected ' + `hist.inenv[0]`
del hist.inenv[0]
del pp[i-1]
i, length = i-1, length-1
if envname == 'verbatim':
pp[i:i] = [chunk(CSLINE, ch.where, 'end'),
chunk(GROUP, ch.where, [
chunk(PLAIN, ch.where, 'example')])]
i, length = i+2, length+2
elif envname in ('itemize', 'list'):
hist.itemizenesting = hist.itemizenesting - 1
pp[i:i] = [chunk(CSLINE, ch.where, 'end'),
chunk(GROUP, ch.where, [
chunk(PLAIN, ch.where, 'itemize')])]
i, length = i+2, length+2
elif envname == 'enumerate':
hist.enumeratenesting = hist.enumeratenesting-1
pp[i:i] = [chunk(CSLINE, ch.where, 'end'),
chunk(GROUP, ch.where, [
chunk(PLAIN, ch.where, 'enumerate')])]
i, length = i+2, length+2
elif envname == 'description':
pp[i:i] = [chunk(CSLINE, ch.where, 'end'),
chunk(GROUP, ch.where, [
chunk(PLAIN, ch.where, 'table')])]
i, length = i+2, length+2
elif (envname == 'tableiii') or (envname == 'tableii'):
pp[i:i] = [chunk(CSLINE, ch.where, 'end'),
chunk(GROUP, ch.where, [
chunk(PLAIN, ch.where, 'table')])]
i, length = i+2, length + 2
pp.insert(i, chunk(DENDLINE, ch.where, '\n'))
i, length = i+1, length+1
elif envname in ('funcdesc', 'excdesc', 'datadesc',
'funcdescni', 'datadescni'):
pp[i:i] = [chunk(CSLINE, ch.where, 'end'),
chunk(GROUP, ch.where, [
chunk(PLAIN, ch.where, hist.command)])]
i, length = i+2, length+2
elif envname == 'opcodedesc':
pp[i:i] = [chunk(CSLINE, ch.where, 'end'),
chunk(GROUP, ch.where, [
chunk(PLAIN, ch.where, "deffn")])]
i, length = i+2, length+2
elif envname in ('seealso', 'sloppypar', 'flushleft'):
print 'WARNING: ending env %s has no actions' % `envname`
elif ch.chtype == chunk_type[CSNAME]:
# control name transformations
s_buf_data = s(buf, ch.data)
if s_buf_data == 'optional':
pp[i-1].chtype = chunk_type[PLAIN]
pp[i-1].data = '['
if (i < length) and \
(pp[i].chtype == chunk_type[GROUP]):
pp[i:i+1]=cp + [
chunk(PLAIN, ch.where, ']')]
length = length+len(cp)
elif s_buf_data in ignoredcommands:
del pp[i-1]
i, length = i-1, length-1
elif s_buf_data == '@' and \
i != length and \
pp[i].chtype == chunk_type[PLAIN] and \
s(buf, pp[i].data)[0] == '.':
# \@. --> \. --> @.
ch.data = '.'
del pp[i]
length = length-1
elif s_buf_data == '\\':
# \\ --> \* --> @*
ch.data = '*'
elif len(s_buf_data) == 1 and \
s_buf_data in onlylatexspecial:
ch.chtype = chunk_type[PLAIN]
# check if such a command is followed by
# an empty group: e.g., `\%{}'. If so, remove
# this empty group too
if i < length and \
pp[i].chtype == chunk_type[GROUP] \
and len(pp[i].data) == 0:
del pp[i]
length = length-1
elif hist.inargs and s_buf_data in inargsselves:
# This is the special processing of the
# arguments of the \begin{funcdesc}... or
# \funcline... arguments
# \, --> , \[ --> [, \] --> ]
ch.chtype = chunk_type[PLAIN]
elif s_buf_data == 'setindexsubitem':
stuff = pp[i].data
if len(stuff) != 1:
raise error, "parameter to \\setindexsubitem{} too long"
if pp[i].chtype != chunk_type[GROUP]:
raise error, "bad chunk type following \\setindexsubitem" \
"\nexpected GROUP, got " + str(ch.chtype)
text = s(buf, stuff[0].data)
if text[:1] != '(' or text[-1:] != ')':
raise error, \
'expected indexsubitem enclosed in parenteses'
hist.indexsubitem = string.split(text[1:-1])
del stuff, text
del pp[i-1:i+1]
i = i - 1
length = length - 2
elif s_buf_data == 'newcommand':
print "ignoring definition of \\" + s(buf, pp[i].data[0].data)
del pp[i-1:i+2]
i = i - 1
length = length - 3
elif s_buf_data == 'mbox':
stuff = pp[i].data
pp[i-1:i+1] = stuff
i = i - 1
length = length + len(stuff) - 2
elif s_buf_data == 'version':
ch.chtype = chunk_type[PLAIN]
ch.data = release_version
elif s_buf_data == 'program':
ch.data = "strong"
elif s_buf_data == "fulllineitems":
del pp[i-1]
i, length = i-1, length-1
elif s_buf_data == 'item':
ch.chtype = chunk_type[CSLINE]
length, newi = getoptarg(length, buf, pp, i)
ingroupch = pp[i:newi]
del pp[i:newi]
length = length - (newi-i)
changeit(buf, ingroupch) # catch stuff inside the optional arg
pp.insert(i, chunk(GROUP, ch.where, ingroupch))
i, length = i+1, length+1
elif s_buf_data == 'ttindex':
idxsi = hist.indexsubitem
cat_class = ''
if len(idxsi) >= 2 and idxsi[1] in \
('method', 'function', 'protocol'):
command = 'findex'
elif len(idxsi) >= 2 and idxsi[1] in \
('exception', 'object'):
command = 'vindex'
elif len(idxsi) == 3 and idxsi[:2] == ['in', 'module']:
command = 'cindex'
print 'WARNING: can\'t categorize ' + `idxsi` \
+ ' for \'ttindex\' command'
command = 'cindex'
if not cat_class:
cat_class = '('+string.join(idxsi)+')'
ch.chtype = chunk_type[CSLINE]
ch.data = command
length, newi = getnextarg(length, buf, pp, i)
arg = pp[i:newi]
del pp[i:newi]
length = length - (newi-i)
cat_arg = [chunk(PLAIN, ch.where, cat_class)]
# determine what should be set in roman, and
# what in tt-font
if command in regindices:
arg = [chunk(CSNAME, ch.where, 't'),
chunk(GROUP, ch.where, arg)]
cat_arg = [chunk(CSNAME, ch.where, 'r'),
chunk(GROUP, ch.where, cat_arg)]
ingroupch = arg + \
[chunk(PLAIN, ch.where, ' ')] + \
pp.insert(i, chunk(GROUP, ch.where, ingroupch))
length, i = length+1, i+1
elif s_buf_data == 'ldots':
# \ldots --> \dots{} --> @dots{}
ch.data = 'dots'
if i == length \
or pp[i].chtype != chunk_type[GROUP] \
or pp[i].data != []:
pp.insert(i, chunk(GROUP, ch.where, []))
i, length = i+1, length+1
elif s_buf_data in themselves:
# \UNIX --> &UNIX;
ch.chtype = chunk_type[PLAIN]
if i != length \
and pp[i].chtype == chunk_type[GROUP] \
and pp[i].data == []:
del pp[i]
length = length-1
elif s_buf_data in for_texi:
elif s_buf_data == 'manpage':
ch.data = 'emph'
sect = s(buf, pp[i+1].data[0].data)
pp[i+1].data = "(%s)" % sect
pp[i+1].chtype = chunk_type[PLAIN]
elif s_buf_data == 'e':
# "\e" --> "\"
ch.data = '\\'
ch.chtype = chunk_type[PLAIN]
elif s_buf_data in ('lineiii', 'lineii'):
# This is the most tricky one
# \lineiii{a1}{a2}[{a3}] -->
# @item @<cts. of itemargmacro>{a1}
# a2 [ -- a3]
if not hist.inenv:
raise error, 'no environment for lineiii'
if (hist.inenv[0] != 'tableiii') and \
(hist.inenv[0] != 'tableii'):
raise error, \
'wrong command (%s) in wrong environment (%s)' \
% (s_buf_data, `hist.inenv[0]`)
ch.chtype = chunk_type[CSLINE]
ch.data = 'item'
length, newi = getnextarg(length, buf, pp, i)
ingroupch = [chunk(CSNAME, 0, hist.itemargmacro),
chunk(GROUP, 0, pp[i:newi])]
del pp[i:newi]
length = length - (newi-i)
pp.insert(i, chunk(GROUP, ch.where, ingroupch))
grouppos = i
i, length = i+1, length+1
length, i = getnextarg(length, buf, pp, i)
length, newi = getnextarg(length, buf, pp, i)
if newi > i:
# we have a 3rd arg
pp.insert(i, chunk(PLAIN, ch.where, ' --- '))
i = newi + 1
length = length + 1
if length != len(pp):
raise 'IN LINEIII IS THE ERR', `i`
elif s_buf_data in ('chapter', 'section', 'subsection', 'subsubsection'):
#\xxxsection{A} ---->
# @node A, , ,
# @xxxsection A
## also: remove commas and quotes
ch.chtype = chunk_type[CSLINE]
length, newi = getnextarg(length, buf, pp, i)
afternodenamecmd = next_command_p(length, buf,
pp, newi, 'nodename')
if afternodenamecmd < 0:
cp1 = crcopy(pp[i:newi])
pp[i:newi] = [chunk(GROUP, ch.where, pp[i:newi])]
length, newi = length - (newi-i) + 1, i+1
text = flattext(buf, cp1)
text = invent_node_names(text)
length, endarg = getnextarg(length, buf,
pp, afternodenamecmd)
cp1 = crcopy(pp[afternodenamecmd:endarg])
del pp[newi:endarg]
length = length - (endarg-newi)
pp[i:newi] = [chunk(GROUP, ch.where, pp[i:newi])]
length, newi = length - (newi-i) + 1, i + 1
text = flattext(buf, cp1)
if text[-1] == '.':
text = text[:-1]
if text in hist.nodenames:
print 'WARNING: node name ' + `text` + ' already used'
text = rm_commas_etc(text)
pp[i-1:i-1] = [chunk(CSLINE, ch.where, 'node'),
chunk(GROUP, ch.where, [
chunk(PLAIN, ch.where, text+', , ,')
i, length = newi+2, length+2
elif s_buf_data == 'funcline':
# fold it to a very short environment
pp[i-1:i-1] = [chunk(CSLINE, ch.where, 'end'),
chunk(GROUP, ch.where, [
chunk(PLAIN, ch.where, hist.command)])]
i, length = i+2, length+2
length, i = do_funcdesc(length, buf, pp, i)
elif s_buf_data == 'dataline':
pp[i-1:i-1] = [chunk(CSLINE, ch.where, 'end'),
chunk(GROUP, ch.where, [
chunk(PLAIN, ch.where, hist.command)])]
i, length = i+2, length+2
length, i = do_datadesc(length, buf, pp, i)
elif s_buf_data == 'excline':
pp[i-1:i-1] = [chunk(CSLINE, ch.where, 'end'),
chunk(GROUP, ch.where, [
chunk(PLAIN, ch.where, hist.command)])]
i, length = i+2, length+2
length, i = do_excdesc(length, buf, pp, i)
elif s_buf_data == 'index':
#\index{A} --->
# @cindex A
ch.chtype = chunk_type[CSLINE]
ch.data = 'cindex'
length, newi = getnextarg(length, buf, pp, i)
ingroupch = pp[i:newi]
del pp[i:newi]
length = length - (newi-i)
pp.insert(i, chunk(GROUP, ch.where, ingroupch))
length, i = length+1, i+1
elif s_buf_data == 'bifuncindex':
ch.chtype = chunk_type[CSLINE]
ch.data = 'findex'
length, newi = getnextarg(length, buf, pp, i)
ingroupch = pp[i:newi]
del pp[i:newi]
length = length - (newi-i)
ingroupch.append(chunk(PLAIN, ch.where, ' '))
ingroupch.append(chunk(CSNAME, ch.where, 'r'))
ingroupch.append(chunk(GROUP, ch.where, [
chunk(PLAIN, ch.where,
'(built-in function)')]))
pp.insert(i, chunk(GROUP, ch.where, ingroupch))
length, i = length+1, i+1
elif s_buf_data == 'obindex':
ch.chtype = chunk_type[CSLINE]
ch.data = 'findex'
length, newi = getnextarg(length, buf, pp, i)
ingroupch = pp[i:newi]
del pp[i:newi]
length = length - (newi-i)
ingroupch.append(chunk(PLAIN, ch.where, ' '))
ingroupch.append(chunk(CSNAME, ch.where, 'r'))
ingroupch.append(chunk(GROUP, ch.where, [
chunk(PLAIN, ch.where,
pp.insert(i, chunk(GROUP, ch.where, ingroupch))
length, i = length+1, i+1
elif s_buf_data == 'opindex':
ch.chtype = chunk_type[CSLINE]
ch.data = 'findex'
length, newi = getnextarg(length, buf, pp, i)
ingroupch = pp[i:newi]
del pp[i:newi]
length = length - (newi-i)
ingroupch.append(chunk(PLAIN, ch.where, ' '))
ingroupch.append(chunk(CSNAME, ch.where, 'r'))
ingroupch.append(chunk(GROUP, ch.where, [
chunk(PLAIN, ch.where,
pp.insert(i, chunk(GROUP, ch.where, ingroupch))
length, i = length+1, i+1
elif s_buf_data in ('bimodindex', 'refbimodindex'):
ch.chtype = chunk_type[CSLINE]
ch.data = 'pindex'
length, newi = getnextarg(length, buf, pp, i)
ingroupch = pp[i:newi]
del pp[i:newi]
length = length - (newi-i)
ingroupch.append(chunk(PLAIN, ch.where, ' '))
ingroupch.append(chunk(CSNAME, ch.where, 'r'))
ingroupch.append(chunk(GROUP, ch.where, [
chunk(PLAIN, ch.where,
pp.insert(i, chunk(GROUP, ch.where, ingroupch))
length, i = length+1, i+1
elif s_buf_data == 'refmodindex':
ch.chtype = chunk_type[CSLINE]
ch.data = 'pindex'
length, newi = getnextarg(length, buf, pp, i)
ingroupch = pp[i:newi]
del pp[i:newi]
length = length - (newi-i)
pp.insert(i, chunk(GROUP, ch.where, ingroupch))
length, i = length+1, i+1
elif s_buf_data == 'sectcode':
ch.data = 'code'
elif s_buf_data in ('stmodindex', 'refstmodindex'):
ch.chtype = chunk_type[CSLINE]
# use the program index as module index
ch.data = 'pindex'
length, newi = getnextarg(length, buf, pp, i)
ingroupch = pp[i:newi]
del pp[i:newi]
length = length - (newi-i)
ingroupch.append(chunk(PLAIN, ch.where, ' '))
ingroupch.append(chunk(CSNAME, ch.where, 'r'))
ingroupch.append(chunk(GROUP, ch.where, [
chunk(PLAIN, ch.where,
pp.insert(i, chunk(GROUP, ch.where, ingroupch))
length, i = length+1, i+1
elif s_buf_data in ('stmodindex', 'refstmodindex'):
ch.chtype = chunk_type[CSLINE]
# use the program index as module index
ch.data = 'pindex'
length, newi = getnextarg(length, buf, pp, i)
ingroupch = pp[i:newi]
del pp[i:newi]
length = length - (newi-i)
ingroupch.append(chunk(PLAIN, ch.where, ' '))
ingroupch.append(chunk(CSNAME, ch.where, 'r'))
ingroupch.append(chunk(GROUP, ch.where, [
chunk(PLAIN, ch.where,
pp.insert(i, chunk(GROUP, ch.where, ingroupch))
length, i = length+1, i+1
elif s_buf_data in ('stindex', 'kwindex'):
# XXX must actually go to newindex st
what = (s_buf_data[:2] == "st") and "statement" or "keyword"
wh = ch.where
ch.chtype = chunk_type[CSLINE]
ch.data = 'cindex'
length, newi = getnextarg(length, buf, pp, i)
ingroupch = [chunk(CSNAME, wh, 'code'),
chunk(GROUP, wh, pp[i:newi])]
del pp[i:newi]
length = length - (newi-i)
t = ingroupch[:]
t.append(chunk(PLAIN, wh, ' ' + what))
pp.insert(i, chunk(GROUP, wh, t))
i, length = i+1, length+1
pp.insert(i, chunk(CSLINE, wh, 'cindex'))
i, length = i+1, length+1
t = ingroupch[:]
t.insert(0, chunk(PLAIN, wh, what + ', '))
pp.insert(i, chunk(GROUP, wh, t))
i, length = i+1, length+1
elif s_buf_data == 'indexii':
#\indexii{A}{B} --->
# @cindex A B
# @cindex B, A
length, newi = getnextarg(length, buf, pp, i)
cp11 = pp[i:newi]
cp21 = crcopy(pp[i:newi])
del pp[i:newi]
length = length - (newi-i)
length, newi = getnextarg(length, buf, pp, i)
cp12 = pp[i:newi]
cp22 = crcopy(pp[i:newi])
del pp[i:newi]
length = length - (newi-i)
ch.chtype = chunk_type[CSLINE]
ch.data = 'cindex'
pp.insert(i, chunk(GROUP, ch.where, cp11 + [
chunk(PLAIN, ch.where, ' ')] + cp12))
i, length = i+1, length+1
pp[i:i] = [chunk(CSLINE, ch.where, 'cindex'),
chunk(GROUP, ch.where, cp22 + [
chunk(PLAIN, ch.where, ', ')]+ cp21)]
i, length = i+2, length+2
elif s_buf_data == 'indexiii':
length, newi = getnextarg(length, buf, pp, i)
cp11 = pp[i:newi]
cp21 = crcopy(pp[i:newi])
cp31 = crcopy(pp[i:newi])
del pp[i:newi]
length = length - (newi-i)
length, newi = getnextarg(length, buf, pp, i)
cp12 = pp[i:newi]
cp22 = crcopy(pp[i:newi])
cp32 = crcopy(pp[i:newi])
del pp[i:newi]
length = length - (newi-i)
length, newi = getnextarg(length, buf, pp, i)
cp13 = pp[i:newi]
cp23 = crcopy(pp[i:newi])
cp33 = crcopy(pp[i:newi])
del pp[i:newi]
length = length - (newi-i)
ch.chtype = chunk_type[CSLINE]
ch.data = 'cindex'
pp.insert(i, chunk(GROUP, ch.where, cp11 + [
chunk(PLAIN, ch.where, ' ')] + cp12
+ [chunk(PLAIN, ch.where, ' ')]
+ cp13))
i, length = i+1, length+1
pp[i:i] = [chunk(CSLINE, ch.where, 'cindex'),
chunk(GROUP, ch.where, cp22 + [
chunk(PLAIN, ch.where, ' ')]+ cp23
+ [chunk(PLAIN, ch.where, ', ')] +
i, length = i+2, length+2
pp[i:i] = [chunk(CSLINE, ch.where, 'cindex'),
chunk(GROUP, ch.where, cp33 + [
chunk(PLAIN, ch.where, ', ')]+ cp31
+ [chunk(PLAIN, ch.where, ' ')] +
i, length = i+2, length+2
elif s_buf_data == 'indexiv':
length, newi = getnextarg(length, buf, pp, i)
cp11 = pp[i:newi]
cp21 = crcopy(pp[i:newi])
cp31 = crcopy(pp[i:newi])
cp41 = crcopy(pp[i:newi])
del pp[i:newi]
length = length - (newi-i)
length, newi = getnextarg(length, buf, pp, i)
cp12 = pp[i:newi]
cp22 = crcopy(pp[i:newi])
cp32 = crcopy(pp[i:newi])
cp42 = crcopy(pp[i:newi])
del pp[i:newi]
length = length - (newi-i)
length, newi = getnextarg(length, buf, pp, i)
cp13 = pp[i:newi]
cp23 = crcopy(pp[i:newi])
cp33 = crcopy(pp[i:newi])
cp43 = crcopy(pp[i:newi])
del pp[i:newi]
length = length - (newi-i)
length, newi = getnextarg(length, buf, pp, i)
cp14 = pp[i:newi]
cp24 = crcopy(pp[i:newi])
cp34 = crcopy(pp[i:newi])
cp44 = crcopy(pp[i:newi])
del pp[i:newi]
length = length - (newi-i)
ch.chtype = chunk_type[CSLINE]
ch.data = 'cindex'
ingroupch = cp11 + \
spacech + cp12 + \
spacech + cp13 + \
spacech + cp14
pp.insert(i, chunk(GROUP, ch.where, ingroupch))
i, length = i+1, length+1
ingroupch = cp22 + \
spacech + cp23 + \
spacech + cp24 + \
commach + cp21
pp[i:i] = cindexch + [
chunk(GROUP, ch.where, ingroupch)]
i, length = i+2, length+2
ingroupch = cp33 + \
spacech + cp34 + \
commach + cp31 + \
spacech + cp32
pp[i:i] = cindexch + [
chunk(GROUP, ch.where, ingroupch)]
i, length = i+2, length+2
ingroupch = cp44 + \
commach + cp41 + \
spacech + cp42 + \
spacech + cp43
pp[i:i] = cindexch + [
chunk(GROUP, ch.where, ingroupch)]
i, length = i+2, length+2
elif s_buf_data == 'seemodule':
ch.data = "code"
# this is needed for just one of the input files... -sigh-
while pp[i+1].chtype == chunk_type[COMMENT]:
i = i + 1
data = pp[i+1].data
oparen = chunk(PLAIN, ch.where, " (")
data.insert(0, oparen)
data.append(chunk(PLAIN, ch.where, ")"))
pp[i+1:i+2] = data
length = length + len(data) - 1
elif s_buf_data == 'seetext':
data = pp[i].data
data.insert(0, chunk(ENDLINE, ch.where, "\n"))
pp[i-1:i+1] = data
i = i - 1
length = length + len(data) - 2
elif s_buf_data == "quad":
ch.chtype = PLAIN
ch.data = " "
elif s_buf_data in ('noindent', 'indexsubitem', 'footnote'):
elif s_buf_data in ('url', 'module', 'function', 'cfunction',
'keyword', 'method', 'exception', 'constant',
'email', 'class'):
ch.data = "code"
elif s_buf_data == 'label':
name = s(buf, pp[i].data[0].data)
del pp[i-1:i+1]
length = length - 2
i = i - 1
label_nodes[name] = hist.nodenames[-1]
elif s_buf_data == 'rfc':
ch.chtype = chunk_type[PLAIN]
ch.data = "RFC " + s(buf, pp[i].data[0].data)
del pp[i]
length = length - 1
elif s_buf_data == 'Large':
del pp[i-1]
i = i - 1
length = length - 1
elif s_buf_data == 'ref':
name = s(buf, pp[i].data[0].data)
if label_nodes.has_key(name):
pp[i].data[0].data = label_nodes[name]
pp[i-1:i+1] = [
chunk(PLAIN, ch.where,
"(unknown node reference: %s)" % name)]
length = length - 1
print "WARNING: unknown node label", `name`
print "don't know what to do with keyword " + s_buf_data
re_atsign = regex.compile('[@{}]')
re_newline = regex.compile('\n')
def dumpit(buf, wm, pp):
global out
i, length = 0, len(pp)
addspace = 0
while 1:
if len(pp) != length:
raise 'FATAL', 'inconsistent length'
if i == length:
ch = pp[i]
i = i + 1
dospace = addspace
addspace = 0
if ch.chtype == chunk_type[CSNAME]:
s_buf_data = s(buf, ch.data)
if s_buf_data == 'e':
if s_buf_data == '$':
wm('@' + s_buf_data)
if s_buf_data == 'node' and \
pp[i].chtype == chunk_type[PLAIN] and \
s(buf, pp[i].data) in out.doublenodes:
##XXX doesnt work yet??
wm(' ZZZ-' + zfill(`i`, 4))
if s_buf_data[0] in string.letters:
addspace = 1
elif ch.chtype == chunk_type[PLAIN]:
if dospace and s(buf, ch.data) not in (' ', '\t'):
wm(' ')
text = s(buf, ch.data)
while 1:
pos = re_atsign.search(text)
if pos < 0:
wm(text[:pos] + '@' + text[pos])
text = text[pos+1:]
elif ch.chtype == chunk_type[GROUP]:
dumpit(buf, wm, ch.data)
elif ch.chtype == chunk_type[DENDLINE]:
while i != length and pp[i].chtype in \
(chunk_type[DENDLINE], chunk_type[ENDLINE]):
i = i + 1
elif ch.chtype == chunk_type[OTHER]:
wm(s(buf, ch.data))
elif ch.chtype == chunk_type[ACTIVE]:
wm(s(buf, ch.data))
elif ch.chtype == chunk_type[ENDLINE]:
elif ch.chtype == chunk_type[CSLINE]:
if i >= 2 and pp[i-2].chtype not in \
(chunk_type[ENDLINE], chunk_type[DENDLINE]) \
and (pp[i-2].chtype != chunk_type[PLAIN]
or s(buf, pp[i-2].data)[-1] != '\n'):
wm('@' + s(buf, ch.data))
if i == length:
raise error, 'CSLINE expected another chunk'
if pp[i].chtype != chunk_type[GROUP]:
raise error, 'CSLINE expected GROUP'
if type(pp[i].data) != ListType:
raise error, 'GROUP chould contain []-data'
wobj = Wobj()
dumpit(buf, wobj.write, pp[i].data)
i = i + 1
text = wobj.data
del wobj
if text:
wm(' ')
while 1:
pos = re_newline.search(text)
if pos < 0:
print 'WARNING: found newline in csline arg'
wm(text[:pos] + ' ')
text = text[pos+1:]
if i >= length or \
pp[i].chtype not in (chunk_type[CSLINE],
chunk_type[ENDLINE], chunk_type[DENDLINE]) \
and (pp[i].chtype != chunk_type[PLAIN]
or s(buf, pp[i].data)[0] != '\n'):
elif ch.chtype == chunk_type[COMMENT]:
## print 'COMMENT: previous chunk =', pp[i-2]
## if pp[i-2].chtype == chunk_type[PLAIN]:
## print 'PLAINTEXT =', `s(buf, pp[i-2].data)`
if s(buf, ch.data) and \
regex.match('^[ \t]*$', s(buf, ch.data)) < 0:
if i >= 2 \
and pp[i-2].chtype not in (chunk_type[ENDLINE], chunk_type[DENDLINE]) \
and not (pp[i-2].chtype == chunk_type[PLAIN]
and regex.match('\\(.\\|\n\\)*[ \t]*\n$', s(buf, pp[i-2].data)) >= 0):
wm('@c ' + s(buf, ch.data))
elif ch.chtype == chunk_type[IGNORE]:
str = `s(buf, ch.data)`
except TypeError:
str = `ch.data`
if len(str) > 400:
str = str[:400] + '...'
print 'warning:', ch.chtype, 'not handled, data ' + str
def main():
global release_version
outfile = None
headerfile = 'texipre.dat'
trailerfile = 'texipost.dat'
opts, args = getopt.getopt(sys.argv[1:], 'o:h:t:v:')
except getopt.error:
args = []
if not args:
print 'usage: partparse [-o outfile] [-h headerfile]',
print '[-t trailerfile] file ...'
for opt, arg in opts:
if opt == '-o': outfile = arg
if opt == '-h': headerfile = arg
if opt == '-t': trailerfile = arg
if opt == '-v': release_version = arg
if not outfile:
root, ext = os.path.splitext(args[0])
outfile = root + '.texi'
if outfile in args:
print 'will not overwrite input file', outfile
outf = open(outfile, 'w')
outf.write(open(headerfile, 'r').read())
for file in args:
if len(args) > 1: print '='*20, file, '='*20
buf = open(file, 'r').read()
w, pp = parseit(buf)
changeit(buf, pp)
dumpit(buf, outf.write, pp)
outf.write(open(trailerfile, 'r').read())
if __name__ == "__main__":