# : at the end of a DW* name on input.
if c == ":":
return "y"
+ # Do not allow \ in the middle of a name.
if c == "\\":
- return "y"
+ return "n"
if c == "-":
return "y"
if c == "_":
return "y"
return "n"
+def isShift(c):
+ if ord(c) >= 128:
+ return "y"
+ return "n"
def isIndivid(c):
if c == "[":
return "y"
return "y"
return "n"
+# self._tex DW\-\_ATE and the like
+# self._underbar DW\_ATE and the like
+# self._std the way a DW_ATE and the like looks in the standard
+# self._label With all _ and - removed. Like DWATE
+
class dwtoken:
+ """ Token types:
+ id: identifier
+ ind: a character taken as an individual character.
+ none: No characters seen yet.
+ shift: A character with the high bit of 8 bits set, not something we expect.
+ - In DW4 these high-bit-chars are special 3-character left and right quotes.
+ - charfix.py can replace these with Latex ascii quotes.
+ other: Some other character, but ascii, seemingly. """
def __init__(self):
self._tex = []
self._underbar = []
self._std = []
self._label = []
- # Class is "id", "ind","other","none"
+ # Class is "id", "ind","other","shift","none"
self._class = "none"
- def insertid(self,string):
+ self._linenum = 0
+ def insertid(self,string,line):
self._class = "id"
self._tex = list(string)
self._underbar = self._tex
self._std = self._tex
self._label = self._tex
- def setIndivid(self,c):
+ self._linenum = line
+ def setIndivid(self,c,line):
self._tex = [c]
self._underbar = [c]
self._std = [c]
self._label = [c]
self._class = "ind"
- def setInitialIdChar(self,c):
+ self._linenum = line
+ def setInitialIdChar(self,c,line):
self._tex = [c]
self._class = "id"
+ self._linenum = line
def setNextIdChar(self,c):
self._tex += [c]
- def setInitialOther(self,c):
+ def setInitialShift(self,c,line):
+ self._tex = [c]
+ self._underbar = [c]
+ self._std = [c]
+ self._label = [c]
+ self._class = "shift"
+ self._linenum = line
+ def setNextShift(self,c):
+ self._tex += [c]
+ self._underbar += [c]
+ self._std += [c]
+ self._label += [c]
+ self._class = "shift"
+ def setInitialOther(self,c,line):
self._tex = [c]
self._underbar = [c]
self._std = [c]
self._label = [c]
self._class = "other"
+ self._linenum = line
def setNextOther(self,c):
self._tex += [c]
self._underbar += [c]
class dwline:
"""using an input line, create a list of tokens for the line.
Legal class transitions in tokenize() are:
+ none->shift
none->other
none->id
none->ind
+
other->ind
other->id
+ other->shift
+
+ shift->id
+ shift->ind
+ shift->other
+
id->ind
id->other
+ id->shift
"""
def __init__(self):
# list of dwtoken.
self._toks = []
- def tokenize(self,rec):
+ def tokenize(self,rec,filename,linenum):
"""using an input line, create a list of tokens for the line.
Legal class transitions in tokenize() are:
none->other
other->ind
other->id
id->ind
+ id->id
id->other
"""
dwclass = "none"
global keepcomments
for c in rec:
charnum = charnum +1
+ if ord(c) >= 128:
+ print " Warning: encountered character ord:",ord(c), "at offset",charnum,"line",linenum,filename
if keepcomments == "d" and c == "%" and ( charnum == 0 or rec[charnum - 1] != "\\" ):
# Not keeping comments. We drop % and following to end of line
# unless preceeded by \
# would not be harmful.
continue
elif dwclass == "none" or dwclass == "ind":
+ if isShift(c) == "y":
+ combotok.setInitialShift(c,linenum)
+ dwclass = "shift"
+ continue
if isIndivid(c) == "y":
a = dwtoken()
- a.setIndivid(c);
+ a.setIndivid(c,linenum);
self._toks += [a]
continue
if isIdStart(c) == "y":
- combotok.setInitialIdChar(c)
+ combotok.setInitialIdChar(c,linenum)
dwclass = "id"
continue
# is "other"
- combotok.setInitialOther(c)
+ combotok.setInitialOther(c,linenum)
dwclass = "other"
continue
elif dwclass == "id":
if isIdNext(c) == "y":
combotok.setNextIdChar(c)
continue
+ if isShift(c) == "y":
+ combotok.finishUpId()
+ self._toks += [combotok]
+ combotok = dwtoken()
+ combotok.setInitialShift(c,linenum);
+ dwclass = "shift"
+ continue
if isIndivid(c) == "y":
combotok.finishUpId()
self._toks += [combotok]
combotok = dwtoken()
a = dwtoken()
- a.setIndivid(c);
+ a.setIndivid(c,linenum);
dwclass = "ind"
self._toks += [a]
continue
+ if isIdStart(c) == "y":
+ # It is a valid initial character of an id.
+ # So we have id following id, like \a\a
+ combotok.finishUpId()
+ self._toks += [combotok]
+ combotok = dwtoken()
+ combotok.setInitialIdChar(c,linenum)
+ dwclass = "id"
+ continue
# Other class input, other starts here.
combotok.finishUpId()
self._toks += [combotok]
combotok = dwtoken()
- combotok.setInitialOther(c);
+ combotok.setInitialOther(c,linenum);
+ dwclass = "other"
+ continue
+ elif dwclass == "shift":
+ if isShift(c) == "y":
+ combotok.setNextShift(c);
+ continue
+ if isIndivid(c) == "y":
+ self._toks += [combotok]
+ combotok = dwtoken()
+ a = dwtoken()
+ a.setIndivid(c,linenum);
+ dwclass = "ind"
+ self._toks += [a]
+ continue
+ if isIdStart(c) == "y":
+ self._toks += [combotok]
+ combotok = dwtoken()
+ combotok.setInitialIdChar(c,linenum);
+ dwclass = "id"
+ continue
+ # Shift class input, other starts here.
+ self._toks += [combotok]
+ combotok = dwtoken()
+ combotok.setInitialOther(c,linenum);
dwclass = "other"
continue
elif dwclass == "other":
+ if isShift(c) == "y":
+ self._toks += [combotok]
+ combotok = dwtoken()
+ combotok.setInitialShift(c,linenum);
+ dwclass = "shift"
+ continue
if isIndivid(c) == "y":
self._toks += [combotok]
combotok = dwtoken()
a = dwtoken()
- a.setIndivid(c);
+ a.setIndivid(c,linenum);
dwclass = "ind"
self._toks += [a]
continue
if isIdStart(c) == "y":
self._toks += [combotok]
combotok = dwtoken()
- combotok.setInitialIdChar(c);
+ combotok.setInitialIdChar(c,linenum);
dwclass = "id"
continue
combotok.setNextOther(c);
combotok.finishUpId()
self._toks += [combotok]
dwclass = "none"
+ if dwclass == "shift":
+ self._toks += [combotok]
+ dwclass = "none"
if dwclass == "other":
self._toks += [combotok]
dwclass = "none"
except IOError, message:
print >> sys.stderr , "File could not be opened: ", name
sys.exit(1)
+ linenum=0
while 1:
try:
rec = file.readline()
if len(rec) < 1:
# eof
break
-
+ linenum = linenum +1
aline = dwline()
- aline.tokenize(rec)
+ aline.tokenize(rec,name,linenum)
self._lines += [aline]
def dwprint(self):
for l in self._lines:
l.dwwrite(outfile,lnum)
lnum = lnum + 1
+ # transformtoks looks at the file as a token sequence,
+ # not a line sequence.
+ # New view required by recent changes to .tex
+ def dwtransformfiletoks(self,callfunc,myfile):
+ FIXME
+
+ def dwtransformtoks(self,callfunc,myfile):
+ globaltoklist = []
+ for l in self._lines:
+ for t in l._toks:
+ globaltoklist += [t]
+ toknum = 0
+ tokmax = len(globaltoklist)
+ self.dwtransformfiletoks(callfunc,myfile)
+
def dwtransformline(self,callfunc,myfile):
- lnum=1
+ lnum = 1
for l in self._lines:
l.dwtransformline(callfunc,myfile,lnum)
lnum = lnum + 1
-
-
class dwfiles:
def __init__(self):
def dwtransformline(self,callfunc):
for f in self._files:
f.dwtransformline(callfunc,f)
+ def dwtransformtoks(self,callfunc):
+ for f in self._files:
+ f.dwtransformtoks(callfunc,f)
def setkeepordeletecomments(val):