1 # Copyright 2012 DWARF Debugging Information Format Committee
3 # All the little classes used in storing latex source data.
4 # Reads in the tex source and builds internal lists of the
5 # tokenized source. The tokenization is adequate
6 # for our purposes, but just barely adequate.
16 if isIndivid(c) == "y":
18 if ord(c) >= ord('a') and ord(c) <= ord('z'):
20 if ord(c) >= ord('A') and ord(c) <= ord('Z'):
22 # It is tex/latex, so backslash starts a word.
30 if isIndivid(c) == "y":
32 if ord(c) >= ord('a') and ord(c) <= ord('z'):
34 if ord(c) >= ord('A') and ord(c) <= ord('Z'):
36 if ord(c) >= ord('0') and ord(c) <= ord('9'):
38 # This is so we allow the colon in our tags
39 # Unfortunately, this gives trouble if we have a
40 # : at the end of a DW* name on input.
43 # Do not allow \ in the middle of a name.
70 # self._tex DW\-\_ATE and the like
71 # self._underbar DW\_ATE and the like
72 # self._std the way a DW_ATE and the like looks in the standard
73 # self._label With all _ and - removed. Like DWATE
78 ind: a character taken as an individual character.
79 none: No characters seen yet.
80 shift: A character with the high bit of 8 bits set, not something we expect.
81 - In DW4 these high-bit-chars are special 3-character left and right quotes.
82 - charfix.py can replace these with Latex ascii quotes.
83 other: Some other character, but ascii, seemingly. """
89 # Class is "id", "ind","other","shift","none"
92 def insertid(self,string,line):
94 self._tex = list(string)
95 self._underbar = self._tex
97 self._label = self._tex
99 def setIndivid(self,c,line):
106 def setInitialIdChar(self,c,line):
110 def setNextIdChar(self,c):
113 def setInitialShift(self,c,line):
118 self._class = "shift"
120 def setNextShift(self,c):
122 self._underbar += [c]
125 self._class = "shift"
126 def setInitialOther(self,c,line):
131 self._class = "other"
133 def setNextOther(self,c):
135 self._underbar += [c]
138 self._class = "other"
139 def finishUpId(self):
140 """ This transforms the strings from the input form into
141 the internal forms we want.
148 while int(n) < len(self._tex):
150 if n < (len (self._tex) - 1) and c == "\\" and self._tex[n+1] == "-":
153 self._underbar += [c]
157 while int(n) < len(self._underbar):
158 c = self._underbar[n]
159 if n < (len (self._underbar) - 1) and c == "\\" and self._underbar[n+1] == "_":
166 while int(n) < len(self._std):
174 def dwprintquotedshortform(self,d):
175 print "'",self.shortform(d),"'",
176 def shortform(self,d):
179 if self._class == "ind":
181 self.dwprintquotedshortform(self._tex)
184 # This prints the token with end-line oddly.
186 self.dwprintquotedshortform(self._tex)
187 self.dwprintquotedshortform(self._underbar)
188 self.dwprintquotedshortform(self._std)
189 self.dwprintquotedshortform(self._label)
191 def dwwrite(self,outfile):
196 """using an input line, create a list of tokens for the line.
197 Legal class transitions in tokenize() are:
220 def tokenize(self,rec,filename,linenum):
221 """using an input line, create a list of tokens for the line.
222 Legal class transitions in tokenize() are:
238 print " Warning: encountered character ord:",ord(c), "at offset",charnum,"line",linenum,filename
239 if keepcomments == "d" and c == "%" and ( charnum == 0 or rec[charnum - 1] != "\\" ):
240 # Not keeping comments. We drop % and following to end of line
241 # unless preceeded by \
244 if c == "\n" or c == "\r":
245 # Just drop these for now. Allowing them
246 # would not be harmful.
248 elif dwclass == "none" or dwclass == "ind":
249 if isShift(c) == "y":
250 combotok.setInitialShift(c,linenum)
253 if isIndivid(c) == "y":
255 a.setIndivid(c,linenum);
258 if isIdStart(c) == "y":
259 combotok.setInitialIdChar(c,linenum)
263 combotok.setInitialOther(c,linenum)
266 elif dwclass == "id":
267 if isIdNext(c) == "y":
268 combotok.setNextIdChar(c)
270 if isShift(c) == "y":
271 combotok.finishUpId()
272 self._toks += [combotok]
274 combotok.setInitialShift(c,linenum);
277 if isIndivid(c) == "y":
278 combotok.finishUpId()
279 self._toks += [combotok]
282 a.setIndivid(c,linenum);
286 # Other class input, other starts here.
287 combotok.finishUpId()
288 self._toks += [combotok]
290 combotok.setInitialOther(c,linenum);
293 elif dwclass == "shift":
294 if isShift(c) == "y":
295 combotok.setNextShift(c);
297 if isIndivid(c) == "y":
298 self._toks += [combotok]
301 a.setIndivid(c,linenum);
305 if isIdStart(c) == "y":
306 self._toks += [combotok]
308 combotok.setInitialIdChar(c,linenum);
311 # Shift class input, other starts here.
312 self._toks += [combotok]
314 combotok.setInitialOther(c,linenum);
317 elif dwclass == "other":
318 if isShift(c) == "y":
319 self._toks += [combotok]
321 combotok.setInitialShift(c,linenum);
324 if isIndivid(c) == "y":
325 self._toks += [combotok]
328 a.setIndivid(c,linenum);
332 if isIdStart(c) == "y":
333 self._toks += [combotok]
335 combotok.setInitialIdChar(c,linenum);
338 combotok.setNextOther(c);
340 # Else case impossible.
342 #Finish up final non-empty other or id token
344 combotok.finishUpId()
345 self._toks += [combotok]
347 if dwclass == "shift":
348 self._toks += [combotok]
350 if dwclass == "other":
351 self._toks += [combotok]
353 def dwprint(self,linenum):
354 print "Number of tokens in line ",linenum," : ",len(self._toks)
355 if len(self._toks) == 0:
356 #Just print an empty line.
361 def dwwrite(self, outfile, linenum):
365 def dwtransformline(self,callfunc,myfile,lnum):
366 toks = callfunc(self._toks,myfile,lnum)
371 def __init__(self,name):
377 file = open(name,"r");
378 except IOError, message:
379 print >> sys.stderr , "File could not be opened: ", name
384 rec = file.readline()
392 aline.tokenize(rec,name,linenum)
393 self._lines += [aline]
396 print "Number of lines in ", self._name, ": ",len(self._lines)
398 for l in self._lines:
402 # The lnum is just for debugging messages.
404 outname = self._name + ".out"
407 outfile = open(outname,"w");
408 except IOError, message:
409 print >> sys.stderr , "Output File could not be opened: ", name
412 for l in self._lines:
413 l.dwwrite(outfile,lnum)
415 # transformtoks looks at the file as a token sequence,
416 # not a line sequence.
417 # New view required by recent changes to .tex
418 def dwtransformfiletoks(self,callfunc,myfile):
421 def dwtransformtoks(self,callfunc,myfile):
423 for l in self._lines:
427 tokmax = len(globaltoklist)
428 self.dwtransformfiletoks(callfunc,myfile)
430 def dwtransformline(self,callfunc,myfile):
432 for l in self._lines:
433 l.dwtransformline(callfunc,myfile,lnum)
441 def addFile(self,name):
446 print "Number of files: ",len(self._files);
447 for f in self._files:
450 for f in self._files:
452 def dwtransformline(self,callfunc):
453 for f in self._files:
454 f.dwtransformline(callfunc,f)
455 def dwtransformtoks(self,callfunc):
456 for f in self._files:
457 f.dwtransformtoks(callfunc,f)
460 def setkeepordeletecomments(val):
461 """ Pass in "k" or "d" to keep or delete comments, respectively """
465 def readFilelist(filelist):