Adding to the tools, in the tools directory.
[dwarf-doc.git] / dwarf5 / tools / charfix.py
1 # Copyright 2012 DWARF Debugging Information Format Committee
2 #
3
4
5 # Looks for certain multi-byte chars and replaces with
6 # appropriate ascii.
7 # See "The Comprehensive LaTeX Symbol List"
8 #http://www.tex.ac.uk/tex-archive/info/symbols/comprehensive/symbols-a4.pdf
9
10 #\newcommand{\singlequote}[1]{\textquitedblleft#1\textquotedblright}
11 #\newcommand{\doublequote}[1]{\textquoteleft#1\textquoteright}
12
13 #
14 #  utf         latex                 description
15 # e2 80 9c      \textquotedblleft    (two curly left quote chars)  
16 # e2 80 9d      \textquotedblright   (two curly right quote chars)
17 #              or \doublequote{stringtoquote} (our command)
18 # e2 80 99      \textquoteleft       (single curly left quote char)
19 # e2 80 9b      \textquoteright      (single curly right quote quote char)
20 #      by itself for contractions like don't
21 #      or \singlequote{stringtoquote} for a left right pair (our command)
22 #
23 # e2 80 93       \textendash         (minus sign)
24 # e2 80 94      \textemdash          (long dash         )
25 #        \textthreequartersemdash (long dash, from textcomp package)
26 #               \textendash          (long dash         )
27 #               \leftrightline (from MnSymbol package)
28 # e2 80 a6     \dots         (...)
29 #                 \textellipsis  is also usable.
30 # e2 84 a2      \texttrademark      trademark symbol
31 # e2 ?? ??      \copyright          (copyright symbol)
32
33 import sys
34 import fileio
35
36
37 def ischar(tok,c):
38    if tok._class != "ind":
39       return "n"
40    if len(tok._tex) != 1:
41        return "n"
42    if tok._tex[0] != c:
43        return "n"
44    return "y"
45
46
47 def append_to_out(out,addthese):
48   for a in addthese:
49     out += [a]
50
51 def isutf80prefix(t):
52   if t[0] != chr(226):
53     return "n"
54   if t[1] != chr(128):
55     return "n"
56   return "y"
57 def isutf84prefix(t):
58   if t[0] != chr(226):
59     return "n"
60   if t[1] != chr(132):
61     return "n"
62   return "y"
63
64 def isutfleftdouble(t):
65   if isutf80prefix(t) != "y":
66     return "n"
67   if t[2] != chr(156):
68     return "n"
69   return "y"
70 def isutfrightdouble(t):
71   if isutf80prefix(t) != "y":
72     return "n"
73   if t[2] != chr(157):
74     return "n"
75   return "y"
76 def isutfleftsingle(t):
77   if isutf80prefix(t) != "y":
78     return "n"
79   if t[2] != chr(155):
80     return "n"
81   return "y"
82 def isutfrightsingle(t):
83   if isutf80prefix(t) != "y":
84     return "n"
85   if t[2] != chr(153):
86     return "n"
87   return "y"
88 def isutfdash(t):
89   if isutf80prefix(t) != "y":
90     return "n"
91   if t[2] != chr(148):
92     return "n"
93   return "y"
94 def isutfminus(t):
95   if isutf80prefix(t) != "y":
96     return "n"
97   if t[2] != chr(147):
98     return "n"
99   return "y"
100 def isutftrademark(t):
101   if isutf84prefix(t) != "y":
102     return "n"
103   if t[2] != chr(162):
104     return "n"
105   return "y"
106   
107 def isutfdots(t):
108   if isutf80prefix(t) != "y":
109     return "n"
110   if t[2] != chr(166):
111     return "n"
112   return "y"
113
114 # Following just for a unique single-right-quote case.
115 def maybeinsertnonbreakspace(outtoks,linetoks,nexttoknumin,lasttoknum):
116   """ after the word 'variables' and a quote, insert non break space
117   so things look ok. Bit of a hack to take care of one case. """
118   # Check 2 since it is counting the one after current and
119   # we look before current.
120   if nexttoknumin < 2:
121     return
122   t = linetoks[nexttoknumin-2]
123   rawtok = ''.join(t._tex)
124   if rawtok != "variables":
125     return
126   t1=fileio.dwtoken()
127   t1.setInitialOther("\\")
128   t1.setNextOther(" ")
129   append_to_out(outtoks,[t1])
130   
131 def maybeinsertspace(outtoks,linetoks,nexttoknumin,lasttoknum):
132   if nexttoknumin > lasttoknum:
133     return
134   t = linetoks[nexttoknumin]
135   rawtok = ''.join(t._tex)
136   if rawtok[0] == " ":
137     return
138   t1=fileio.dwtoken()
139   t1.setIndivid(" ")
140   append_to_out(outtoks,[t1])
141   return
142 def transfunc(linetoks,myfile,linenum):
143   if len(linetoks) < 1:
144     return linetoks
145   tnumin = 0
146   changes = 0
147   lasttoknum = len(linetoks) -1
148   outtoks = []
149   nexttoknumin = 0 
150   for t in linetoks:
151     nexttoknumin = nexttoknumin + 1
152     rawtok = ''.join(t._tex)
153     #stdname= ''.join(t._std)
154     #linkname = "chap:" + ''.join(t._label)
155     if len(rawtok) == 3:
156         if isutfleftdouble(rawtok) == "y":
157             t1=fileio.dwtoken()
158             t1.insertid("\\doublequote")
159             t2=fileio.dwtoken()
160             t2.setIndivid("{")
161             append_to_out(outtoks,[t1])
162             append_to_out(outtoks,[t2])
163             changes = changes +  1
164         elif isutfrightdouble(rawtok) == "y":
165             t4=fileio.dwtoken()
166             t4.setIndivid("}")
167             append_to_out(outtoks,[t4])
168             changes = changes +  1
169         elif isutfleftsingle(rawtok) == "y":
170             # Here, odd trailing space is so next char does not hit
171             # following word in output.
172             # Sometimes quote right single is in a contraction
173             # not part of a pair, so we don't try to pair them
174             # here with\singlequote{}
175             t1=fileio.dwtoken()
176             t1.insertid("\\textquoteleft")
177             append_to_out(outtoks,[t1])
178             maybeinsertspace(outtoks,linetoks,nexttoknumin,lasttoknum)
179             changes = changes +  1
180         elif isutfrightsingle(rawtok) == "y":
181             t1=fileio.dwtoken()
182             t1.insertid("\\textquoteright")
183             append_to_out(outtoks,[t1])
184             maybeinsertnonbreakspace(outtoks,linetoks,nexttoknumin,lasttoknum)
185             maybeinsertspace(outtoks,linetoks,nexttoknumin,lasttoknum)
186             changes = changes +  1
187         elif isutfdash(rawtok) == "y":
188             t1=fileio.dwtoken()
189             t1.insertid("\\textemdash" )
190             append_to_out(outtoks,[t1])
191             maybeinsertspace(outtoks,linetoks,nexttoknumin,lasttoknum)
192             changes = changes +  1
193         elif isutfminus(rawtok) == "y":
194             t1=fileio.dwtoken()
195             t1.insertid("\\textendash")
196             append_to_out(outtoks,[t1])
197             maybeinsertspace(outtoks,linetoks,nexttoknumin,lasttoknum)
198             changes = changes +  1
199         elif isutftrademark(rawtok) == "y":
200             # Force a non-break space after the TM symbol
201             # so the output has a space for real.
202             t1=fileio.dwtoken()
203             t1.insertid("\\texttrademark\\ ")
204             append_to_out(outtoks,[t1])
205             maybeinsertspace(outtoks,linetoks,nexttoknumin,lasttoknum)
206             changes = changes +  1
207         elif isutfdots(rawtok) == "y":
208             t1=fileio.dwtoken()
209             t1.insertid("\\dots")
210             append_to_out(outtoks,[t1])
211             maybeinsertspace(outtoks,linetoks,nexttoknumin,lasttoknum)
212             changes = changes +  1
213         else:
214             outtoks += [t]
215     else:
216       outtoks += [t]
217     tnumin = tnumin+ 1
218     # End of for loop.
219   return outtoks
220
221 def process_files(filelist):
222   dwf = fileio.readFilelist(filelist)
223   dwf.dwtransformline(transfunc)
224   dwf.dwwrite()
225
226 def read_all_args():
227   filelist = []
228   cur = 1
229   while  len(sys.argv) > cur:
230     v = sys.argv[cur]
231     filelist += [v]
232     cur = int(cur) + 1
233   if len(filelist) < 1:
234     print >> sys.stderr , "No files specified."
235     sys.exit(1)
236   process_files(filelist)
237
238 #  anylink [-t <class>] ... [file] ...
239
240 if __name__ == '__main__':
241   read_all_args()
242