1 # Copyright 2012 DWARF Debugging Information Format Committee
3 # All the little classes used in storing latex source data.
4 # Reads in the tex source and builds internal lists of the
5 # tokenized source. The tokenization is adequate
6 # for our purposes, but just barely adequate.
16 if isIndivid(c) == "y":
18 if ord(c) >= ord('a') and ord(c) <= ord('z'):
20 if ord(c) >= ord('A') and ord(c) <= ord('Z'):
22 # It is tex/latex, so backslash starts a word.
30 if isIndivid(c) == "y":
32 if ord(c) >= ord('a') and ord(c) <= ord('z'):
34 if ord(c) >= ord('A') and ord(c) <= ord('Z'):
36 if ord(c) >= ord('0') and ord(c) <= ord('9'):
38 # This is so we allow the colon in our tags
39 # Unfortunately, this gives trouble if we have a
40 # : at the end of a DW* name on input.
43 # Do not allow \ in the middle of a name.
70 # self._tex DW\-\_ATE and the like
71 # self._underbar DW\_ATE and the like
72 # self._std the way a DW_ATE and the like looks in the standard
73 # self._label With all _ and - removed. Like DWATE
78 ind: a character taken as an individual character.
79 none: No characters seen yet.
80 shift: A character with the high bit of 8 bits set, not something we expect.
81 - In DW4 these high-bit-chars are special 3-character left and right quotes.
82 - charfix.py can replace these with Latex ascii quotes.
83 other: Some other character, but ascii, seemingly. """
89 # Class is "id", "ind","other","shift","none"
92 def insertid(self,string,line):
94 self._tex = list(string)
95 self._underbar = self._tex
97 self._label = self._tex
99 def setIndivid(self,c,line):
106 def setInitialIdChar(self,c,line):
110 def setNextIdChar(self,c):
113 def setInitialShift(self,c,line):
118 self._class = "shift"
120 def setNextShift(self,c):
122 self._underbar += [c]
125 self._class = "shift"
126 def setInitialOther(self,c,line):
131 self._class = "other"
133 def setNextOther(self,c):
135 self._underbar += [c]
138 self._class = "other"
139 def finishUpId(self):
140 """ This transforms the strings from the input form into
141 the internal forms we want.
148 while int(n) < len(self._tex):
150 if n < (len (self._tex) - 1) and c == "\\" and self._tex[n+1] == "-":
153 self._underbar += [c]
157 while int(n) < len(self._underbar):
158 c = self._underbar[n]
159 if n < (len (self._underbar) - 1) and c == "\\" and self._underbar[n+1] == "_":
166 while int(n) < len(self._std):
174 def dwprintquotedshortform(self,d):
175 print "'",self.shortform(d),"'",
176 def shortform(self,d):
179 if self._class == "ind":
181 self.dwprintquotedshortform(self._tex)
184 # This prints the token with end-line oddly.
186 self.dwprintquotedshortform(self._tex)
187 self.dwprintquotedshortform(self._underbar)
188 self.dwprintquotedshortform(self._std)
189 self.dwprintquotedshortform(self._label)
191 def dwwrite(self,outfile):
196 """using an input line, create a list of tokens for the line.
197 Legal class transitions in tokenize() are:
220 def tokenize(self,rec,filename,linenum):
221 """using an input line, create a list of tokens for the line.
222 Legal class transitions in tokenize() are:
239 print " Warning: encountered character ord:",ord(c), "at offset",charnum,"line",linenum,filename
240 if keepcomments == "d" and c == "%" and ( charnum == 0 or rec[charnum - 1] != "\\" ):
241 # Not keeping comments. We drop % and following to end of line
242 # unless preceeded by \
245 if c == "\n" or c == "\r":
246 # Just drop these for now. Allowing them
247 # would not be harmful.
249 elif dwclass == "none" or dwclass == "ind":
250 if isShift(c) == "y":
251 combotok.setInitialShift(c,linenum)
254 if isIndivid(c) == "y":
256 a.setIndivid(c,linenum);
259 if isIdStart(c) == "y":
260 combotok.setInitialIdChar(c,linenum)
264 combotok.setInitialOther(c,linenum)
267 elif dwclass == "id":
268 if isIdNext(c) == "y":
269 combotok.setNextIdChar(c)
271 if isShift(c) == "y":
272 combotok.finishUpId()
273 self._toks += [combotok]
275 combotok.setInitialShift(c,linenum);
278 if isIndivid(c) == "y":
279 combotok.finishUpId()
280 self._toks += [combotok]
283 a.setIndivid(c,linenum);
287 if isIdStart(c) == "y":
288 # It is a valid initial character of an id.
289 # So we have id following id, like \a\a
290 combotok.finishUpId()
291 self._toks += [combotok]
293 combotok.setInitialIdChar(c,linenum)
296 # Other class input, other starts here.
297 combotok.finishUpId()
298 self._toks += [combotok]
300 combotok.setInitialOther(c,linenum);
303 elif dwclass == "shift":
304 if isShift(c) == "y":
305 combotok.setNextShift(c);
307 if isIndivid(c) == "y":
308 self._toks += [combotok]
311 a.setIndivid(c,linenum);
315 if isIdStart(c) == "y":
316 self._toks += [combotok]
318 combotok.setInitialIdChar(c,linenum);
321 # Shift class input, other starts here.
322 self._toks += [combotok]
324 combotok.setInitialOther(c,linenum);
327 elif dwclass == "other":
328 if isShift(c) == "y":
329 self._toks += [combotok]
331 combotok.setInitialShift(c,linenum);
334 if isIndivid(c) == "y":
335 self._toks += [combotok]
338 a.setIndivid(c,linenum);
342 if isIdStart(c) == "y":
343 self._toks += [combotok]
345 combotok.setInitialIdChar(c,linenum);
348 combotok.setNextOther(c);
350 # Else case impossible.
352 #Finish up final non-empty other or id token
354 combotok.finishUpId()
355 self._toks += [combotok]
357 if dwclass == "shift":
358 self._toks += [combotok]
360 if dwclass == "other":
361 self._toks += [combotok]
363 def dwprint(self,linenum):
364 print "Number of tokens in line ",linenum," : ",len(self._toks)
365 if len(self._toks) == 0:
366 #Just print an empty line.
371 def dwwrite(self, outfile, linenum):
375 def dwtransformline(self,callfunc,myfile,lnum):
376 toks = callfunc(self._toks,myfile,lnum)
381 def __init__(self,name):
387 file = open(name,"r");
388 except IOError, message:
389 print >> sys.stderr , "File could not be opened: ", name
394 rec = file.readline()
402 aline.tokenize(rec,name,linenum)
403 self._lines += [aline]
406 print "Number of lines in ", self._name, ": ",len(self._lines)
408 for l in self._lines:
412 # The lnum is just for debugging messages.
414 outname = self._name + ".out"
417 outfile = open(outname,"w");
418 except IOError, message:
419 print >> sys.stderr , "Output File could not be opened: ", name
422 for l in self._lines:
423 l.dwwrite(outfile,lnum)
425 # transformtoks looks at the file as a token sequence,
426 # not a line sequence.
427 # New view required by recent changes to .tex
428 def dwtransformfiletoks(self,callfunc,myfile):
431 def dwtransformtoks(self,callfunc,myfile):
433 for l in self._lines:
437 tokmax = len(globaltoklist)
438 self.dwtransformfiletoks(callfunc,myfile)
440 def dwtransformline(self,callfunc,myfile):
442 for l in self._lines:
443 l.dwtransformline(callfunc,myfile,lnum)
451 def addFile(self,name):
456 print "Number of files: ",len(self._files);
457 for f in self._files:
460 for f in self._files:
462 def dwtransformline(self,callfunc):
463 for f in self._files:
464 f.dwtransformline(callfunc,f)
465 def dwtransformtoks(self,callfunc):
466 for f in self._files:
467 f.dwtransformtoks(callfunc,f)
470 def setkeepordeletecomments(val):
471 """ Pass in "k" or "d" to keep or delete comments, respectively """
475 def readFilelist(filelist):