--- /dev/null
+# Copyright 2012 DWARF Debugging Information Format Committee
+#
+#
+#
+# Looks for certain multi-byte chars and replaces with
+# appropriate ascii.
+# See "The Comprehensive LaTeX Symbol List"
+#http://www.tex.ac.uk/tex-archive/info/symbols/comprehensive/symbols-a4.pdf
+
+#\newcommand{\singlequote}[1]{\textquitedblleft#1\textquotedblright}
+#\newcommand{\doublequote}[1]{\textquoteleft#1\textquoteright}
+
+#
+# utf latex description
+# e2 80 9c \textquotedblleft (two curly left quote chars)
+# e2 80 9d \textquotedblright (two curly right quote chars)
+# or \doublequote{stringtoquote} (our command)
+# e2 80 99 \textquoteleft (single curly left quote char)
+# e2 80 9b \textquoteright (single curly right quote quote char)
+# by itself for contractions like don't
+# or \singlequote{stringtoquote} for a left right pair (our command)
+#
+# e2 80 93 \textendash (minus sign)
+# e2 80 94 \textemdash (long dash )
+# \textthreequartersemdash (long dash, from textcomp package)
+# \textendash (long dash )
+# \leftrightline (from MnSymbol package)
+# e2 80 a6 \dots (...)
+# \textellipsis is also usable.
+# e2 84 a2 \texttrademark trademark symbol
+# e2 ?? ?? \copyright (copyright symbol)
+
+import sys
+import fileio
+
+
+def ischar(tok,c):
+ if tok._class != "ind":
+ return "n"
+ if len(tok._tex) != 1:
+ return "n"
+ if tok._tex[0] != c:
+ return "n"
+ return "y"
+
+
+def append_to_out(out,addthese):
+ for a in addthese:
+ out += [a]
+
+def isutf80prefix(t):
+ if t[0] != chr(226):
+ return "n"
+ if t[1] != chr(128):
+ return "n"
+ return "y"
+def isutf84prefix(t):
+ if t[0] != chr(226):
+ return "n"
+ if t[1] != chr(132):
+ return "n"
+ return "y"
+
+def isutfleftdouble(t):
+ if isutf80prefix(t) != "y":
+ return "n"
+ if t[2] != chr(156):
+ return "n"
+ return "y"
+def isutfrightdouble(t):
+ if isutf80prefix(t) != "y":
+ return "n"
+ if t[2] != chr(157):
+ return "n"
+ return "y"
+def isutfleftsingle(t):
+ if isutf80prefix(t) != "y":
+ return "n"
+ if t[2] != chr(155):
+ return "n"
+ return "y"
+def isutfrightsingle(t):
+ if isutf80prefix(t) != "y":
+ return "n"
+ if t[2] != chr(153):
+ return "n"
+ return "y"
+def isutfdash(t):
+ if isutf80prefix(t) != "y":
+ return "n"
+ if t[2] != chr(148):
+ return "n"
+ return "y"
+def isutfminus(t):
+ if isutf80prefix(t) != "y":
+ return "n"
+ if t[2] != chr(147):
+ return "n"
+ return "y"
+def isutftrademark(t):
+ if isutf84prefix(t) != "y":
+ return "n"
+ if t[2] != chr(162):
+ return "n"
+ return "y"
+
+def isutfdots(t):
+ if isutf80prefix(t) != "y":
+ return "n"
+ if t[2] != chr(166):
+ return "n"
+ return "y"
+
+# Following just for a unique single-right-quote case.
+def maybeinsertnonbreakspace(outtoks,linetoks,nexttoknumin,lasttoknum):
+ """ after the word 'variables' and a quote, insert non break space
+ so things look ok. Bit of a hack to take care of one case. """
+ # Check 2 since it is counting the one after current and
+ # we look before current.
+ if nexttoknumin < 2:
+ return
+ t = linetoks[nexttoknumin-2]
+ rawtok = ''.join(t._tex)
+ if rawtok != "variables":
+ return
+ t1=fileio.dwtoken()
+ t1.setInitialOther("\\")
+ t1.setNextOther(" ")
+ append_to_out(outtoks,[t1])
+
+def maybeinsertspace(outtoks,linetoks,nexttoknumin,lasttoknum):
+ if nexttoknumin > lasttoknum:
+ return
+ t = linetoks[nexttoknumin]
+ rawtok = ''.join(t._tex)
+ if rawtok[0] == " ":
+ return
+ t1=fileio.dwtoken()
+ t1.setIndivid(" ")
+ append_to_out(outtoks,[t1])
+ return
+def transfunc(linetoks,myfile,linenum):
+ if len(linetoks) < 1:
+ return linetoks
+ tnumin = 0
+ changes = 0
+ lasttoknum = len(linetoks) -1
+ outtoks = []
+ nexttoknumin = 0
+ for t in linetoks:
+ nexttoknumin = nexttoknumin + 1
+ rawtok = ''.join(t._tex)
+ #stdname= ''.join(t._std)
+ #linkname = "chap:" + ''.join(t._label)
+ if len(rawtok) == 3:
+ if isutfleftdouble(rawtok) == "y":
+ t1=fileio.dwtoken()
+ t1.insertid("\\doublequote")
+ t2=fileio.dwtoken()
+ t2.setIndivid("{")
+ append_to_out(outtoks,[t1])
+ append_to_out(outtoks,[t2])
+ changes = changes + 1
+ elif isutfrightdouble(rawtok) == "y":
+ t4=fileio.dwtoken()
+ t4.setIndivid("}")
+ append_to_out(outtoks,[t4])
+ changes = changes + 1
+ elif isutfleftsingle(rawtok) == "y":
+ # Here, odd trailing space is so next char does not hit
+ # following word in output.
+ # Sometimes quote right single is in a contraction
+ # not part of a pair, so we don't try to pair them
+ # here with\singlequote{}
+ t1=fileio.dwtoken()
+ t1.insertid("\\textquoteleft")
+ append_to_out(outtoks,[t1])
+ maybeinsertspace(outtoks,linetoks,nexttoknumin,lasttoknum)
+ changes = changes + 1
+ elif isutfrightsingle(rawtok) == "y":
+ t1=fileio.dwtoken()
+ t1.insertid("\\textquoteright")
+ append_to_out(outtoks,[t1])
+ maybeinsertnonbreakspace(outtoks,linetoks,nexttoknumin,lasttoknum)
+ maybeinsertspace(outtoks,linetoks,nexttoknumin,lasttoknum)
+ changes = changes + 1
+ elif isutfdash(rawtok) == "y":
+ t1=fileio.dwtoken()
+ t1.insertid("\\textemdash" )
+ append_to_out(outtoks,[t1])
+ maybeinsertspace(outtoks,linetoks,nexttoknumin,lasttoknum)
+ changes = changes + 1
+ elif isutfminus(rawtok) == "y":
+ t1=fileio.dwtoken()
+ t1.insertid("\\textendash")
+ append_to_out(outtoks,[t1])
+ maybeinsertspace(outtoks,linetoks,nexttoknumin,lasttoknum)
+ changes = changes + 1
+ elif isutftrademark(rawtok) == "y":
+ # Force a non-break space after the TM symbol
+ # so the output has a space for real.
+ t1=fileio.dwtoken()
+ t1.insertid("\\texttrademark\\ ")
+ append_to_out(outtoks,[t1])
+ maybeinsertspace(outtoks,linetoks,nexttoknumin,lasttoknum)
+ changes = changes + 1
+ elif isutfdots(rawtok) == "y":
+ t1=fileio.dwtoken()
+ t1.insertid("\\dots")
+ append_to_out(outtoks,[t1])
+ maybeinsertspace(outtoks,linetoks,nexttoknumin,lasttoknum)
+ changes = changes + 1
+ else:
+ outtoks += [t]
+ else:
+ outtoks += [t]
+ tnumin = tnumin+ 1
+ # End of for loop.
+ return outtoks
+
+def process_files(filelist):
+ dwf = fileio.readFilelist(filelist)
+ dwf.dwtransformline(transfunc)
+ dwf.dwwrite()
+
+def read_all_args():
+ filelist = []
+ cur = 1
+ while len(sys.argv) > cur:
+ v = sys.argv[cur]
+ filelist += [v]
+ cur = int(cur) + 1
+ if len(filelist) < 1:
+ print >> sys.stderr , "No files specified."
+ sys.exit(1)
+ process_files(filelist)
+
+# anylink [-t <class>] ... [file] ...
+
+if __name__ == '__main__':
+ read_all_args()
+
if c == "_":
return "y"
return "n"
+def isShift(c):
+ if ord(c) >= 128:
+ return "y"
+ return "n"
def isIndivid(c):
if c == "[":
return "y"
return "n"
class dwtoken:
+ """ Token types:
+ id: identifier
+ ind: a character taken as an individual character.
+ none: No characters seen yet.
+ shift: A character with the high bit of 8 bits set, not something we expect.
+ - In DW4 these high-bit-chars are special 3-character left and right quotes.
+ - charfix.py can replace these with Latex ascii quotes.
+ other: Some other character, but ascii, seemingly. """
def __init__(self):
self._tex = []
self._underbar = []
self._std = []
self._label = []
- # Class is "id", "ind","other","none"
+ # Class is "id", "ind","other","shift","none"
self._class = "none"
def insertid(self,string):
self._class = "id"
def setNextIdChar(self,c):
self._tex += [c]
+ def setInitialShift(self,c):
+ self._tex = [c]
+ self._underbar = [c]
+ self._std = [c]
+ self._label = [c]
+ self._class = "shift"
+ def setNextShift(self,c):
+ self._tex += [c]
+ self._underbar += [c]
+ self._std += [c]
+ self._label += [c]
+ self._class = "shift"
def setInitialOther(self,c):
self._tex = [c]
self._underbar = [c]
class dwline:
"""using an input line, create a list of tokens for the line.
Legal class transitions in tokenize() are:
+ none->shift
none->other
none->id
none->ind
+
other->ind
other->id
+ other->shift
+
+ shift->id
+ shift->ind
+ shift->other
+
id->ind
id->other
+ id->shift
"""
def __init__(self):
# list of dwtoken.
self._toks = []
- def tokenize(self,rec):
+ def tokenize(self,rec,filename,linenum):
"""using an input line, create a list of tokens for the line.
Legal class transitions in tokenize() are:
none->other
global keepcomments
for c in rec:
charnum = charnum +1
+ if ord(c) >= 128:
+ print " Warning: encountered character ord:",ord(c), "at offset",charnum,"line",linenum,filename
if keepcomments == "d" and c == "%" and ( charnum == 0 or rec[charnum - 1] != "\\" ):
# Not keeping comments. We drop % and following to end of line
# unless preceeded by \
# would not be harmful.
continue
elif dwclass == "none" or dwclass == "ind":
+ if isShift(c) == "y":
+ combotok.setInitialShift(c)
+ dwclass = "shift"
+ continue
if isIndivid(c) == "y":
a = dwtoken()
a.setIndivid(c);
if isIdNext(c) == "y":
combotok.setNextIdChar(c)
continue
+ if isShift(c) == "y":
+ combotok.finishUpId()
+ self._toks += [combotok]
+ combotok = dwtoken()
+ combotok.setInitialShift(c);
+ dwclass = "shift"
+ continue
if isIndivid(c) == "y":
combotok.finishUpId()
self._toks += [combotok]
combotok.setInitialOther(c);
dwclass = "other"
continue
+ elif dwclass == "shift":
+ if isShift(c) == "y":
+ combotok.setNextShift(c);
+ continue
+ if isIndivid(c) == "y":
+ self._toks += [combotok]
+ combotok = dwtoken()
+ a = dwtoken()
+ a.setIndivid(c);
+ dwclass = "ind"
+ self._toks += [a]
+ continue
+ if isIdStart(c) == "y":
+ self._toks += [combotok]
+ combotok = dwtoken()
+ combotok.setInitialIdChar(c);
+ dwclass = "id"
+ continue
+ # Shift class input, other starts here.
+ self._toks += [combotok]
+ combotok = dwtoken()
+ combotok.setInitialOther(c);
+ dwclass = "other"
+ continue
elif dwclass == "other":
+ if isShift(c) == "y":
+ self._toks += [combotok]
+ combotok = dwtoken()
+ combotok.setInitialShift(c);
+ dwclass = "shift"
+ continue
if isIndivid(c) == "y":
self._toks += [combotok]
combotok = dwtoken()
combotok.finishUpId()
self._toks += [combotok]
dwclass = "none"
+ if dwclass == "shift":
+ self._toks += [combotok]
+ dwclass = "none"
if dwclass == "other":
self._toks += [combotok]
dwclass = "none"
except IOError, message:
print >> sys.stderr , "File could not be opened: ", name
sys.exit(1)
+ linenum=0
while 1:
try:
rec = file.readline()
if len(rec) < 1:
# eof
break
-
+ linenum = linenum +1
aline = dwline()
- aline.tokenize(rec)
+ aline.tokenize(rec,name,linenum)
self._lines += [aline]
def dwprint(self):