1 # Copyright 2012 DWARF Debugging Information Format Committee
3 # All the little classes used in storing latex source data.
4 # Reads in the tex source and builds internal lists of the
5 # tokenized source. The tokenization is adequate
6 # for our purposes, but just barely adequate.
16 if isIndivid(c) == "y":
18 if ord(c) >= ord('a') and ord(c) <= ord('z'):
20 if ord(c) >= ord('A') and ord(c) <= ord('Z'):
22 # It is tex/latex, so backslash starts a word.
30 if isIndivid(c) == "y":
32 if ord(c) >= ord('a') and ord(c) <= ord('z'):
34 if ord(c) >= ord('A') and ord(c) <= ord('Z'):
36 if ord(c) >= ord('0') and ord(c) <= ord('9'):
38 # This is so we allow the colon in our tags
39 # Unfortunately, this gives trouble if we have a
40 # : at the end of a DW* name on input.
72 ind: a character taken as an individual character.
73 none: No characters seen yet.
74 shift: A character with the high bit of 8 bits set, not something we expect.
75 - In DW4 these high-bit-chars are special 3-character left and right quotes.
76 - charfix.py can replace these with Latex ascii quotes.
77 other: Some other character, but ascii, seemingly. """
83 # Class is "id", "ind","other","shift","none"
85 def insertid(self,string):
87 self._tex = list(string)
88 self._underbar = self._tex
90 self._label = self._tex
91 def setIndivid(self,c):
97 def setInitialIdChar(self,c):
100 def setNextIdChar(self,c):
103 def setInitialShift(self,c):
108 self._class = "shift"
109 def setNextShift(self,c):
111 self._underbar += [c]
114 self._class = "shift"
115 def setInitialOther(self,c):
120 self._class = "other"
121 def setNextOther(self,c):
123 self._underbar += [c]
126 self._class = "other"
127 def finishUpId(self):
128 """ This transforms the strings from the input form into
129 the internal forms we want.
136 while int(n) < len(self._tex):
138 if n < (len (self._tex) - 1) and c == "\\" and self._tex[n+1] == "-":
141 self._underbar += [c]
145 while int(n) < len(self._underbar):
146 c = self._underbar[n]
147 if n < (len (self._underbar) - 1) and c == "\\" and self._underbar[n+1] == "_":
154 while int(n) < len(self._std):
162 def dwprintquotedshortform(self,d):
163 print "'",self.shortform(d),"'",
164 def shortform(self,d):
167 if self._class == "ind":
169 self.dwprintquotedshortform(self._tex)
172 # This prints the token with end-line oddly.
174 self.dwprintquotedshortform(self._tex)
175 self.dwprintquotedshortform(self._underbar)
176 self.dwprintquotedshortform(self._std)
177 self.dwprintquotedshortform(self._label)
179 def dwwrite(self,outfile):
184 """using an input line, create a list of tokens for the line.
185 Legal class transitions in tokenize() are:
208 def tokenize(self,rec,filename,linenum):
209 """using an input line, create a list of tokens for the line.
210 Legal class transitions in tokenize() are:
226 print " Warning: encountered character ord:",ord(c), "at offset",charnum,"line",linenum,filename
227 if keepcomments == "d" and c == "%" and ( charnum == 0 or rec[charnum - 1] != "\\" ):
228 # Not keeping comments. We drop % and following to end of line
229 # unless preceeded by \
232 if c == "\n" or c == "\r":
233 # Just drop these for now. Allowing them
234 # would not be harmful.
236 elif dwclass == "none" or dwclass == "ind":
237 if isShift(c) == "y":
238 combotok.setInitialShift(c)
241 if isIndivid(c) == "y":
246 if isIdStart(c) == "y":
247 combotok.setInitialIdChar(c)
251 combotok.setInitialOther(c)
254 elif dwclass == "id":
255 if isIdNext(c) == "y":
256 combotok.setNextIdChar(c)
258 if isShift(c) == "y":
259 combotok.finishUpId()
260 self._toks += [combotok]
262 combotok.setInitialShift(c);
265 if isIndivid(c) == "y":
266 combotok.finishUpId()
267 self._toks += [combotok]
274 # Other class input, other starts here.
275 combotok.finishUpId()
276 self._toks += [combotok]
278 combotok.setInitialOther(c);
281 elif dwclass == "shift":
282 if isShift(c) == "y":
283 combotok.setNextShift(c);
285 if isIndivid(c) == "y":
286 self._toks += [combotok]
293 if isIdStart(c) == "y":
294 self._toks += [combotok]
296 combotok.setInitialIdChar(c);
299 # Shift class input, other starts here.
300 self._toks += [combotok]
302 combotok.setInitialOther(c);
305 elif dwclass == "other":
306 if isShift(c) == "y":
307 self._toks += [combotok]
309 combotok.setInitialShift(c);
312 if isIndivid(c) == "y":
313 self._toks += [combotok]
320 if isIdStart(c) == "y":
321 self._toks += [combotok]
323 combotok.setInitialIdChar(c);
326 combotok.setNextOther(c);
328 # Else case impossible.
330 #Finish up final non-empty other or id token
332 combotok.finishUpId()
333 self._toks += [combotok]
335 if dwclass == "shift":
336 self._toks += [combotok]
338 if dwclass == "other":
339 self._toks += [combotok]
341 def dwprint(self,linenum):
342 print "Number of tokens in line ",linenum," : ",len(self._toks)
343 if len(self._toks) == 0:
344 #Just print an empty line.
349 def dwwrite(self, outfile, linenum):
353 def dwtransformline(self,callfunc,myfile,lnum):
354 toks = callfunc(self._toks,myfile,lnum)
359 def __init__(self,name):
365 file = open(name,"r");
366 except IOError, message:
367 print >> sys.stderr , "File could not be opened: ", name
372 rec = file.readline()
380 aline.tokenize(rec,name,linenum)
381 self._lines += [aline]
384 print "Number of lines in ", self._name, ": ",len(self._lines)
386 for l in self._lines:
390 # The lnum is just for debugging messages.
392 outname = self._name + ".out"
395 outfile = open(outname,"w");
396 except IOError, message:
397 print >> sys.stderr , "Output File could not be opened: ", name
400 for l in self._lines:
401 l.dwwrite(outfile,lnum)
403 def dwtransformline(self,callfunc,myfile):
405 for l in self._lines:
406 l.dwtransformline(callfunc,myfile,lnum)
416 def addFile(self,name):
421 print "Number of files: ",len(self._files);
422 for f in self._files:
425 for f in self._files:
427 def dwtransformline(self,callfunc):
428 for f in self._files:
429 f.dwtransformline(callfunc,f)
432 def setkeepordeletecomments(val):
433 """ Pass in "k" or "d" to keep or delete comments, respectively """
437 def readFilelist(filelist):