New script removehyphen.py fixes up the use of \-
[dwarf-doc.git] / dwarf5 / tools / fileio.py
index b885c65..feca379 100644 (file)
@@ -1,9 +1,17 @@
+# Copyright 2012 DWARF Debugging Information Format Committee
 
 # All the little classes used in storing latex source data.
-# Copyright 2012 DWARF Debugging Information Format Committee
+# Reads in the tex source and builds internal lists of the
+# tokenized source.  The tokenization is adequate
+# for our purposes, but just barely adequate.
 
 import sys
 
+# Keep if  "k" 
+# Otherwise delete.
+global keepcomments
+keepcomments = "k"
+
 def isIdStart(c):
   if isIndivid(c) == "y":
     return "n"
@@ -39,6 +47,10 @@ def isIdNext(c):
   if c == "_":
     return "y"
   return "n"
+def isShift(c):
+  if ord(c) >= 128:
+    return "y"
+  return "n"
 def isIndivid(c):
   if c == "[":
     return "y"
@@ -54,13 +66,26 @@ def isIndivid(c):
     return "y"
   return "n"
 
+# self._tex        DW\-\_ATE and the like
+# self._underbar   DW\_ATE  and the like
+# self._std   the way a DW_ATE and the like looks in the standard
+# self._label With all _ and - removed.  Like DWATE
+
 class dwtoken:
+  """ Token types: 
+  id: identifier
+  ind: a character taken as an individual character.
+  none: No characters seen yet.
+  shift: A character with the high bit of 8 bits set, not something we expect.
+  -      In DW4 these high-bit-chars are special 3-character left and right quotes.
+  -      charfix.py  can replace these with Latex ascii quotes.
+  other: Some other character, but ascii, seemingly.  """
   def __init__(self):
     self._tex = []
     self._underbar = []
     self._std = []
     self._label = []
-    # Class is "id", "ind","other","none"
+    # Class is "id", "ind","other","shift","none"
     self._class = "none"
   def insertid(self,string):
     self._class =  "id"
@@ -80,6 +105,18 @@ class dwtoken:
   def setNextIdChar(self,c):
     self._tex += [c]
 
+  def setInitialShift(self,c):
+    self._tex = [c]
+    self._underbar = [c]
+    self._std = [c]
+    self._label = [c]
+    self._class =  "shift"
+  def setNextShift(self,c):
+    self._tex += [c]
+    self._underbar += [c]
+    self._std += [c]
+    self._label += [c]
+    self._class =  "shift"
   def setInitialOther(self,c):
     self._tex = [c]
     self._underbar = [c]
@@ -151,20 +188,29 @@ class dwtoken:
 class  dwline:
   """using an input line, create a list of tokens for the line.
      Legal class transitions in tokenize() are:
+     none->shift
      none->other
      none->id
      none->ind
+
      other->ind
      other->id
+     other->shift
+
+     shift->id
+     shift->ind
+     shift->other
+
      id->ind
      id->other
+     id->shift
   """
   def __init__(self):
     # list of dwtoken.
     self._toks = []
 
   
-  def tokenize(self,rec):
+  def tokenize(self,rec,filename,linenum):
     """using an input line, create a list of tokens for the line.
        Legal class transitions in tokenize() are:
        none->other
@@ -177,12 +223,26 @@ class  dwline:
     """
     dwclass = "none"
     combotok = dwtoken()
+    charnum= -1 
+    global keepcomments
     for c in rec:
+      charnum = charnum +1
+      if ord(c) >= 128:
+        print " Warning: encountered character ord:",ord(c), "at offset",charnum,"line",linenum,filename
+      if keepcomments == "d" and c == "%" and ( charnum == 0 or rec[charnum - 1] != "\\" ):  
+        # Not keeping comments. We drop % and following to end of line 
+        # unless preceeded by \ 
+        break
+
       if c == "\n" or c == "\r":
           # Just drop these for now. Allowing them
           # would not be harmful.
           continue
       elif dwclass == "none" or dwclass == "ind":
+        if isShift(c) == "y":
+          combotok.setInitialShift(c)
+          dwclass = "shift"
+          continue
         if isIndivid(c) == "y":
           a = dwtoken()
           a.setIndivid(c);
@@ -200,6 +260,13 @@ class  dwline:
         if isIdNext(c) == "y":
           combotok.setNextIdChar(c)
           continue
+        if isShift(c) == "y":
+          combotok.finishUpId()
+          self._toks += [combotok]
+          combotok = dwtoken()
+          combotok.setInitialShift(c);
+          dwclass = "shift"
+          continue
         if isIndivid(c) == "y":
           combotok.finishUpId()
           self._toks += [combotok]
@@ -216,7 +283,37 @@ class  dwline:
         combotok.setInitialOther(c);
         dwclass = "other"
         continue
+      elif dwclass == "shift":
+        if isShift(c) == "y":
+          combotok.setNextShift(c);
+          continue
+        if isIndivid(c) == "y":
+          self._toks += [combotok]
+          combotok = dwtoken()
+          a = dwtoken()
+          a.setIndivid(c);
+          dwclass = "ind"
+          self._toks += [a]
+          continue
+        if isIdStart(c) == "y":
+          self._toks += [combotok]
+          combotok = dwtoken()
+          combotok.setInitialIdChar(c);
+          dwclass = "id"
+          continue
+        # Shift class input, other starts here.
+        self._toks += [combotok]
+        combotok = dwtoken()
+        combotok.setInitialOther(c);
+        dwclass = "other"
+        continue
       elif dwclass == "other":
+        if isShift(c) == "y":
+          self._toks += [combotok]
+          combotok = dwtoken()
+          combotok.setInitialShift(c);
+          dwclass = "shift"
+          continue
         if isIndivid(c) == "y":
           self._toks += [combotok]
           combotok = dwtoken()
@@ -240,6 +337,9 @@ class  dwline:
       combotok.finishUpId()
       self._toks += [combotok]
       dwclass = "none"
+    if dwclass == "shift":
+      self._toks += [combotok]
+      dwclass = "none"
     if dwclass == "other":
       self._toks += [combotok]
       dwclass = "none"
@@ -255,8 +355,8 @@ class  dwline:
     for t in self._toks:
       t.dwwrite(outfile)
     outfile.write("\n")
-  def dwtransformline(self,callfunc):
-    toks = callfunc(self._toks)
+  def dwtransformline(self,callfunc,myfile,lnum):
+    toks = callfunc(self._toks,myfile,lnum)
     self._toks = toks
     
 
@@ -271,6 +371,7 @@ class dwfile:
     except IOError, message:
       print >> sys.stderr , "File could not be opened: ", name
       sys.exit(1)
+    linenum=0
     while 1:
       try:
         rec = file.readline()
@@ -279,9 +380,9 @@ class dwfile:
       if len(rec) < 1:
         # eof
         break
-
+      linenum = linenum +1
       aline = dwline()
-      aline.tokenize(rec)
+      aline.tokenize(rec,name,linenum)
       self._lines += [aline]
 
   def dwprint(self):
@@ -304,9 +405,11 @@ class dwfile:
     for l in self._lines:
       l.dwwrite(outfile,lnum)
       lnum = lnum + 1
-  def dwtransformline(self,callfunc):
+  def dwtransformline(self,callfunc,myfile):
+    lnum=1
     for l in self._lines:
-      l.dwtransformline(callfunc)
+      l.dwtransformline(callfunc,myfile,lnum)
+      lnum = lnum + 1
     
 
 
@@ -328,8 +431,13 @@ class dwfiles:
       f.dwwrite()
   def dwtransformline(self,callfunc):
     for f in self._files:
-      f.dwtransformline(callfunc)
+      f.dwtransformline(callfunc,f)
+
 
+def setkeepordeletecomments(val):
+  """ Pass in "k" or "d" to keep or delete comments, respectively """
+  global keepcomments
+  keepcomments = val
 
 def readFilelist(filelist):
   dwf = dwfiles()