Merge branch 'HEAD' of dwarfgit@dwarfstd.org:dwarf-doc.git
authorRon Brender <ron.brender@gmail.com>
Mon, 14 Apr 2014 16:41:07 +0000 (12:41 -0400)
committerRon Brender <ron.brender@gmail.com>
Mon, 14 Apr 2014 16:41:07 +0000 (12:41 -0400)
14 files changed:
dwarf5/latexdoc/attributesbytag.tex
dwarf5/latexdoc/datarepresentation.tex
dwarf5/latexdoc/debugsectionrelationships.tex
dwarf5/latexdoc/examples.tex
dwarf5/latexdoc/generaldescription.tex
dwarf5/latexdoc/otherdebugginginformation.tex
dwarf5/latexdoc/programscope.tex
dwarf5/tools/README
dwarf5/tools/fileio.py
dwarf5/tools/refclassfixup.py
dwarf5/tools/refer.py [new file with mode: 0644]
dwarf5/tools/repeats.py [new file with mode: 0644]
dwarf5/tools/testrefer.tex [new file with mode: 0644]
dwarf5/tools/testrepeats.txt [new file with mode: 0644]

index 0f34d9f..ec6e568 100644 (file)
@@ -16,7 +16,7 @@ cannot be considered definitive.
 
 In the following table,
 \addtoindex{DECL}
-\livetarg{chap:DECL} 
+\livetarg{chap:DECL}{}
 means include all three of the
 \addtoindex{declaration coordinates} 
 \addtoindexx{declaration coordinates|see {\DWATdeclfileNAME, \DWATdecllineNAME, \DWATdeclcolumnNAME}}
index 13c62cd..2059376 100644 (file)
@@ -1910,7 +1910,7 @@ Table \refersec{tab:visibilityencodings}.
 \setlength{\extrarowheight}{0.1cm}
 \begin{longtable}{l|c}
   \caption{Visibility encodings} \label{tab:visibilityencodings}\\
-  \hline \bfseries Visiibility code name&\bfseries Value \\ \hline
+  \hline \bfseries Visibility code name&\bfseries Value \\ \hline
 \endfirsthead
   \bfseries Visibility code name&\bfseries Value\\ \hline
 \endhead
index 3bc5aaa..a25556c 100644 (file)
@@ -198,7 +198,7 @@ The value of the \DWATaddrbase{} attribute in the
 \DWTAGcompileunit{}, \DWTAGtypeunit{} or \DWTAGpartialunit{} DIE is the
 offset in the \dotdebugaddr{} section of the machine
 addresses for that unit.
-\DWFORMaddrx, \DWOPaddrx and \DWOPconstx{} contain
+\DWFORMaddrx, \DWOPaddrx{} and \DWOPconstx{} contain
 indices relative to that offset.
 
 %l
index 0504d9d..026e356 100644 (file)
@@ -636,7 +636,7 @@ illustrated in Figure \refersec{fig:FortranmultidimensionalcoarrayDWARFdescripti
 \begin{lstlisting}
         INTEGER X(10,11,12)[2,3,*]
 \end{lstlisting}
-\caption{Fortran multidimentional coarray: source fragment}
+\caption{Fortran multidimensional coarray: source fragment}
 \label{fig:Fortranmultidimensionalcoarraysourcefragment}
 \end{figure}
 
index ae5ff85..5767c9c 100644 (file)
@@ -194,8 +194,7 @@ to one of the classes shown in Table \refersec{tab:classesofattributevalue}.
 \DWATallocatedTARG
 &\livelinki{chap:DWATallocatedallocationstatusoftypes}{Allocation status of types}{allocation status of types}  \\
 \DWATartificialTARG
-&\livelinki{chap:DWATartificialobjectsortypesthat}{Objects or types that are not
-actually declared in the source}{objects or types that are not actually declared in the source}  \\
+&\livelinki{chap:DWATartificialobjectsortypesthat}{Objects or types that are not actually declared in the source}{objects or types that are not actually declared in the source}  \\
 \DWATassociatedTARG{} 
 &\livelinki{chap:DWATassociatedassociationstatusoftypes}{Association status of types}{association status of types} \\
 \DWATbasetypesTARG{} 
@@ -931,7 +930,7 @@ The
 following 
 \addtoindexx{DWARF expression!logical operations}
 provide arithmetic and logical operations. Except
-as otherwise specified, the arithmetic operations perfom
+as otherwise specified, the arithmetic operations perform
 addressing arithmetic, that is, unsigned arithmetic that is
 performed modulo one plus the largest representable address
 (for example, 0x100000000 when the 
@@ -2144,8 +2143,7 @@ including the keyword operator (in names such as \doublequote{operator +}),
 if present. See also 
 Section \referfol{chap:linkagenames} regarding the use
 of \DWATlinkagename{} for 
-\addtoindex{mangled names}
-mangled names. 
+\addtoindex{mangled names}.
 Sequences of
 multiple whitespace characters may be compressed.}
 
index 8e03424..30b17ce 100644 (file)
@@ -517,7 +517,7 @@ maximum number of individual operations that may be
 encoded in an instruction. Line number program opcodes
 that alter the \texttt{address} and 
 \addttindex{op\_index} registers use this and
-\addttindex{minimum\_instruction\_length} (see preceeding)
+\addttindex{minimum\_instruction\_length} (see preceding)
 in their calculations.
 
 For non-VLIW
index 1e5c0cc..4ec6587 100644 (file)
@@ -1077,7 +1077,7 @@ The attribute indicates whether the subroutine was
 declared with the \doublequote{pure} keyword or property.
 
 A 
-\hypertarget{chap:DWATrecursiverecursivepropertyofasubroutine}
+\hypertarget{chap:DWATrecursiverecursivepropertyofasubroutine}{}
 subprogram entry may have a 
 \DWATrecursive{} attribute, which
 is a \livelink{chap:classflag}{flag}. 
@@ -1826,7 +1826,7 @@ adjust parameters and/or the result (if any) as appropriate
 to the combined calling and called execution contexts.}
 
 A trampoline is represented by a debugging information entry
-\addtoindexx{trampoline (subprogam) entry}
+\addtoindexx{trampoline (subprogram) entry}
 with the tag \DWTAGsubprogram{} or \DWTAGinlinedsubroutine{}
 that has 
 \addtoindexx{trampoline attribute}
index 130214c..54df74c 100644 (file)
@@ -1,6 +1,47 @@
 These tools are intended to help updating the latex
 source of a DWARF document to get its references
 complete and correct.
+A list of the python source files with the purpose
+of each is near the end of this FILE.
+
+BASIC CHECKS:
+One way to do a spell check on the final document,
+and to get a simple text output to look for word repeats
+is to do the following (assuming you have the necessary
+tools):
+   # -enc Latin1 to avoid utf8, we do not need utf8 here.
+   pdftotext -enc Latin1 dwarf5.pdf
+   # Now you have dwarf5.txt
+   # The _ are not understood by spell
+   sed -e 's/_//g' <dwarf5.txt >dwarf5.txt2
+   # Let upper and lower case compare equal
+   spell dwarf5.txt2 |sort -f |uniq -i
+
+Once you have dwarf5.txt, the following script and tool
+will look for repeated words and short phrases, something
+that can creep in via typos or by accidents of latex use:
+We use several lines to keep the lines short here.
+This is more than a little sloppy and repeats.py
+is not very powerful. But it seems potentially useful.
+   pdftotext -enc Latin1 dwarf5.pdf
+   # Strip out some characters
+   sed -e 's/:/ /g' <../latexdoc/dwarf5.txt >junk.txt
+   sed -e 's/,/ /g' -e 's/;/ /g' <junk.txt >junk2.txt
+   sed -e 's/:/ /g' <junk2.txt >junk3.txt
+   sed -e 's/(/ /g' -e 's/)/ /g' -e 's/[.]/ /g'  <junk3.txt >junk4.txt
+   # Look for 1-word repeated in a 2-word window
+   python repeats.py 2 1 junk4.txt
+   # Look for 2 words repeated in a 4-word window
+   python repeats.py 4 2 junk4.txt
+
+A quick check on indexing lets one find
+   \addtoindex{foo!bar}
+when
+   \addtoindexx{foo!bar}
+is likely meant:
+   grep '!' *tex |grep -v toindexx
+
+
 
 CAUTION: 
 The tools don't really do parsing and
@@ -116,3 +157,103 @@ Performance:
 We simply don't care about performance as long as a task takes
 less than a few minutes. There are only about 70,000 words in a complete
 document so we ignore efficiency issues.
+
+SOURCE FILES:
+The change to internal use of DWTAGfoo etc in the document
+instead of DW\_AT\_foo (see dwarfnamecmds.tex) 
+means many of these commands are not as useful
+as they were originally.  But even so they may form useful
+examples.
+
+refer.py: Produces a list of
+  missing/incorrect/questionable labels,links and
+  references to same. 
+  Useful with the DWTAGfoo (etc) tex files.
+  Written February 2014.
+
+repeats.py: Using a dwarf5.pdf
+  and pdftotext and some scripting and repeats.py one 
+  can find certain errors such as 
+  accidentally-repeated words and phrases
+  whether or not those are generated from Latex 
+  commands or simply typos.
+  Written February 2014.
+
+anylink.py: Looks for designated prefixes like DW_ADDR etc.
+  Used by other code, this was never that useful alone.
+
+attrlink.py: Uses anylink.py to turn DW_AT_ into \livelink
+
+copyfile.py: Uses fileio.py to parse and output a .tex file.
+ So we can use diff to verify the result is byte-for-byte
+ identical to the input.
+
+dellivelink.py: This uses fileio.py and replaces \livelink
+ and \livetarg with \DWXXXyyy per Ron Brender email of
+ Oct 4, 2013. Strives to be idempotent so rerunning
+ produces no further changes.  A few cases not
+ handled perfectly (where a } is at end of line?) 
+ so if needed again could use a bit of fixing.
+ The list of files to process are built into the source,
+ and the list is not totally up to date.
+ The program is obsolete now though, as we
+ use \DW* very differently in the source (from when
+ this was written).
+
+fileio.py:  Given a list of file (.tex) names, it reads in
+  and tokenizes each file.  Functions here let
+  code eventually write stuff back out (changed or not)
+  but the output file always has a ".out" appended, it won't
+  overwrite the input.
+  As of February 2014 it disallows a backslash inside an identifier,
+  forcing a token break.  This is closer to Latex
+  hence useful where we have  \DWTAGfoo\DWTAGbar
+  in the .tex (of course  \DWTAGfoo{}\DWTAGbar{}
+  would make this change irrelevant, but  it seemed better to
+  match latex tokenizing)
+
+formlink.py: Using anylink.py, this transforms DW_FORM_ into
+  \livelink and \livetarg.
+
+printnameswithinteger.py: Identical to dellivelink.py, 
+  so it is horribly misnamed!  This one needs 
+  an input list of files to process supplied on the
+  command line.
+
+
+printstandard.py:  Print the DW_* entries (and only them)
+  in the files named on the command line
+  one per line.   With any \- or \_ removed.
+  use example:
+  python printstandard.py ../latexdoc/*.tex  |sort|uniq 
+
+printtokens.py: Reads and tokenizes files named on the command
+  line . Prints the tokenized data.
+  Solely for debugging the tokenizing.
+
+refclassfixup.py:  Fixes up certain strings
+  (specified in this file) to use \livelink
+
+removehyphen.py: Turns \-\_ into \_ .
+  This was once a needed task. But run long ago, so
+  This is now a useless bit of code.
+
+removeutf8.py:  The pdf we started from had
+  various utf-8 characters. These got in the way of
+  our text processing, so this app deleted those.
+  This is now a useless bit of code.
+  
+taglink.py:  Finds all the instances of DW_TAG_ in 
+  .tex files named on the command line.
+
+tohyphen.py: The opposite of removehyphen.
+  This is now a useless bit of code.
+
+uses.py: Looks for duplicate uses and definitions of 
+  latex tags.  
+  This is based on the original approach to naming
+  and linking in .tex, not the
+  latest use using dwarfnamecmds.tex
+  
+
+
index feca379..20fcd2b 100644 (file)
@@ -40,8 +40,9 @@ def isIdNext(c):
   # : at the end of a DW* name on input.
   if c == ":":
     return "y"
+  # Do not allow \ in the middle of a name.
   if c == "\\":
-    return "y"
+    return "n"
   if c == "-":
     return "y"
   if c == "_":
index ccf8dcd..c0ea7ef 100644 (file)
@@ -3,6 +3,8 @@
 # writes them out in new files.  
 # Used to verify that the output matches the input byte-for-byte
 
+# Transforms certain strings (see refclass below) into livelink commands.
+
 import sys
 import fileio
 
diff --git a/dwarf5/tools/refer.py b/dwarf5/tools/refer.py
new file mode 100644 (file)
index 0000000..7df2334
--- /dev/null
@@ -0,0 +1,912 @@
+# Copyright 2014 DWARF Debugging Information Format Committee
+#
+# Looks at dwarfnamecmds.tex(+) to find all the commands
+# And sees what is actually used and whether references
+# have definitions.
+# The initial implemenation just *assumes* it is run from the tools
+# directory and the file names are built in to the source here.
+#
+# Run as (for example)
+#   python refer.py 
+
+# This is the simplest 'parse' of the .tex that we can manage
+# while still finding what we want to find.
+# One would hardly call it a parser, really.
+
+# There are essentially three namespaces at present in the document.
+# The hyperlink/hypertarget namespace.
+# The label  vref ref namespace
+#   which also involves  our refersec referfol.
+# The index namespace, which we are not presently filling in very much. 
+
+import sys
+import fileio
+
+# These two hold the commands we care about so we can
+# bypass most lines easily in phase two.
+# All the newdwfnamecommands
+global dwfnamecommsdict
+# All the newcommand instances.
+global newcommsdict
+
+# NAME suffix. No index, just text shows.
+# Not very useful so far
+global namedict
+
+
+# Targets for \hypertarget and \hyperlink
+#targhyperdict is all the \hyperarget instances.
+#linkhyperdict is all the \hyperlink instances.
+global targhyperdict
+global linkhyperdict
+
+# label targets (labels)
+global labeldict
+# \refersec \ref \vref to labels
+global labelrefdict
+
+# The index content for named things.
+# The strings here are the links, the targets are
+# built by the latex index software.
+# So this dictionary is not really needed here (yet)
+# and is not yet fully built up.
+global indexdict
+
+
+newcommsdict = {}
+dwfnamecommsdict = {}
+
+# Links meaning \livelink \livetarg \livetargi macros
+#linksdict  = {}
+#targsdict  = {}
+namedict  = {}
+targhyperdict= {}
+linkhyperdict= {}
+labeldict = {}
+labelrefdict = {}
+indexdict = {}
+
+global linestoignore
+linestoignore = []
+
+# lines_to_ignore is a terrible hack.
+def add_lines_to_ignore(myfile,lowline,highline):
+  global linestoignore
+  linestoignore += [(myfile._name,lowline,highline)]
+def in_lines_to_ignore(myfile,line):
+  global linestoignore
+  n = myfile._name
+  for x in linestoignore:
+    (n,l,h) = x
+    if  myfile._name != n:
+      continue
+    if line < l:
+      continue
+    if line >h:
+      continue
+    return "y"
+  return "n"
+   
+
+# a list of words to ignore: silly stuff.
+ignorethesedict = {"of":0, "a":0, "the":0, "and":0, "but":0,"DWARF":0,
+"Standards":0,"Committee":0,"Version":0 }
+
+class tokmention:
+  def __init__(self):
+    self._token = '' 
+    self._file = ""
+    self._line = 0
+  def __init__(self,tok,filename,line):
+    self._token = tok
+    self._file = filename
+    self._line = line
+
+def ischar(tok,c):
+   if tok._class != "ind":
+      return "n"
+   if len(tok._tex) != 1:
+       return "n"
+   if tok._tex[0] != c:
+       return "n"
+   return "y"
+
+def dwspace(tok):
+  if ischar(tok," ") == "y":
+    return "y"
+  if ischar(tok,"\t") == "y":
+    return "y"
+  return "n"
+  
+  
+def isbrace(tok,brace):
+  if tok._class != "ind":
+     return "n"
+  if len(tok._tex) != 1:
+     return "n"
+  if brace == tok._tex[0]:
+     return "y"
+  return "n"
+
+def toknamestring(t):
+  """ Turn a token into its string as a string """
+  return ''.join(t._tex)
+
+
+def pickup(linetoks,tnumin,pattern,myfile,linenum,suppresserr):
+  """ The token pattern characters are
+  i meaning identifier
+  e meaning identifier, but ifnext token is }
+     we construct an empty identifier for it.
+  [space] meaning whitespace
+  { meaning left brace
+  } meaning right brace
+  * meaning any token except } and end-line
+  
+  Precondition:  linetoks[tnumin] is identifier (meaning a command)
+  Returns: a token list, one per non-space in the pattern.
+     For the *, the token is itself a list of whatever it contains.
+  """
+  outtoks = []
+  numabsorbed = 1
+  inlen = len(linetoks) 
+  curnum = tnumin
+  curtok = linetoks[curnum]
+  patterncharnum = -1
+  for c in pattern:
+    patterncharnum = patterncharnum + 1
+    if curnum >= inlen:
+      if suppresserr == "n":
+        print "ERROR line ended surprisingly, pattern ", pattern,"  line ",linenum," file ",myfile._name
+      return outtoks,numabsorbed
+    curtok = linetoks[curnum]
+    if c == " ":
+      while dwspace(curtok) == "y":
+        curnum = curnum + 1
+        if curnum >= inlen:
+          if suppresserr == "n":
+            print "ERROR line ended surprisingly in space, pattern ", pattern, " line ",linenum," file ",myfile._name
+          return outtoks,numabsorbed
+        numabsorbed = numabsorbed + 1
+        curtok = linetoks[curnum]
+      continue
+    elif c == "i":
+      if curtok._class != "id":
+        if suppresserr == "n":
+          print "ERROR line  expected identifier got ",curtok._tex, "pattern" , pattern, " line " ,linenum," file ",myfile._name
+        return outtoks,numabsorbed
+      numabsorbed = numabsorbed + 1
+      outtoks += [curtok]
+      curnum = curnum + 1
+      continue
+    elif c == "e":
+      if curtok._class != "id":
+        if isbrace(curtok,"}") == "y":
+          tk = fileio.dwtoken()
+          tk.insertid("")
+          outtoks += [tk]
+          # Do not update location.
+          continue
+        else:
+          if suppresserr == "n":
+            print "ERROR line  expected identifier got ",curtok._tex, "pattern" , pattern, " line " ,linenum," file ",myfile._name
+          return outtoks,numabsorbed
+      else: 
+        numabsorbed = numabsorbed + 1
+        outtoks += [curtok]
+        curnum = curnum + 1
+      continue
+    elif c == "{":
+      if isbrace(curtok,"{")  == "y":
+        outtoks += [curtok]
+        curnum = curnum + 1
+        numabsorbed = numabsorbed + 1
+      else:
+        if suppresserr == "n":
+          print "ERROR line  expected {  got ",curtok._tex," pattern ",pattern," line " ,linenum," file ",myfile._name
+        return outtoks,numabsorbed
+    elif c == "}":
+      if isbrace(curtok,"}")  == "y":
+        outtoks += [curtok]
+        curnum = curnum + 1
+        numabsorbed = numabsorbed + 1
+      else:
+        if suppresserr == "n":
+          print "ERROR line  expected }  got ",curtok._tex,"pattern",pattern," line " ,linenum," file ",myfile._name
+        return outtoks,numabsorbed
+    elif c == "[":
+      if isbrace(curtok,"[")  == "y":
+        outtoks += [curtok]
+        curnum = curnum + 1
+        numabsorbed = numabsorbed + 1
+      else:
+        if suppresserr == "n":
+          print "ERROR line  expected [  got ",curtok._tex," pattern ",pattern," line " ,linenum," file ",myfile._name
+        return outtoks,numabsorbed
+    elif c == "]":
+      if isbrace(curtok,"]")  == "y":
+        outtoks += [curtok]
+        curnum = curnum + 1
+        numabsorbed = numabsorbed + 1
+      else:
+        if suppresserr == "n":
+          print "ERROR line  expected ]  got ",curtok._tex," pattern ",pattern," line " ,linenum," file ",myfile._name
+        return outtoks,numabsorbed
+    elif c == "*":
+      outlist = []
+      curtok = linetoks[curnum]
+      while isbrace(curtok,"}") == "n" and isbrace(curtok,"]") == "n":
+        if dwspace(curtok) == "n":
+           outlist += [curtok]
+        curnum = curnum + 1
+        if curnum >= inlen:
+          outtoks += [outlist]
+          if patterncharnum < (len(pattern) -1): 
+            if suppresserr == "n":
+              print "ERROR insufficient tokens on line for pattern ", pattern," line " ,linenum," file ",myfile._name
+          return outtoks,numabsorbed
+        numabsorbed = numabsorbed + 1
+        curtok = linetoks[curnum]
+      # Found a right brace, so done here.
+      outtoks += [outlist]
+    else:
+        if suppresserr == "n":
+          print "ERROR pattern had unexpected character ",pattern
+  return outtoks,numabsorbed
+
+def printbadcommand(name,myfile,myline):
+  print "Error: command %s missing operand file %s line %d" %(name,myfile._name,myline)
+
+def applytodict(d,k,v):
+  keystring = k
+  if d.has_key(keystring) == 0:
+     d[keystring] =  [v]
+  else:
+     # This is a duplicate entry.
+     # We will report on it later as appropriate.
+     existing = d.get(keystring)
+     existing += [v]
+     d[keystring] =  existing
+
+# See how many "{" there are on the line.
+# return the count.
+def countbraces(linetoks,tnumin):
+  lasttoknum = len(linetoks) -1
+  lb = 0
+  while tnumin < lasttoknum:
+    x = linetoks[tnumin] 
+    if x._class == "ind":
+       n = toknamestring(x)
+       if n == "{":
+          lb = lb + 1
+    tnumin = tnumin + 1
+  return lb
+     
+
+
+# Here we try two different parses, the [] is optional
+# with simplenametable.
+def processbegin(linetoks,tnumin,myfile,linenum):
+  global targhyperdict
+  global indexdict
+  lbracecount = countbraces(linetoks,tnumin)
+  if lbracecount < 3:
+    return 1
+  ourtoks,inlen = pickup(linetoks,tnumin," i { i } [ * ] { * } { i }",myfile,linenum,"y")
+  if len(ourtoks) == 13:
+    lcom = ourtoks[2]
+    lcomname = toknamestring(lcom)
+    if lcomname != "simplenametable":
+      return inlen
+    targ = ourtoks[11]
+    hypstr = toknamestring(targ)
+    hypmen = tokmention(targ,myfile,linenum)
+    applytodict(labeldict,hypstr,hypmen)
+    return inlen
+  ourtoks,inlen = pickup(linetoks,tnumin," i { i } { * } { i }",myfile,linenum,"y")
+  if len(ourtoks) < 10:
+      return inlen
+  lcom = ourtoks[2]
+  lcomname = toknamestring(lcom)
+  if lcomname != "simplenametable":
+      return inlen
+  targ = ourtoks[8]
+  hypstr = toknamestring(targ)
+  hypmen = tokmention(targ,myfile,linenum)
+  applytodict(labeldict,hypstr,hypmen)
+  return inlen
+
+# If justlink == "y" this is a hypertarget command literally.
+def livetargprocess(linetoks,tnumin,myfile,linenum,justlink):
+  """ \livetarg{chap:DWTAGtemplatevalueparameter}{DWTAGtemplatevalueparameter} """
+  global targhyperdict
+  global indexdict
+  t = linetoks[tnumin]
+  ourtoks,inlen = pickup(linetoks,tnumin,"i { i } { * }",myfile,linenum,"n")
+  if len(ourtoks) > 5:
+    t2 = ourtoks[2];
+    index = tokmention(t2,myfile,linenum)
+    name = toknamestring(t2)
+    applytodict(targhyperdict,name,index)
+    if justlink == "n":
+      t2 = ourtoks[5];
+      # Ignore for now.
+      #name = toknamestring(t2)
+      #if len(name) > 0:
+      #  index = tokmention(t2,myfile,linenum)
+      #  applytodict(indexdict,name,index)
+  else:
+    tn = toknamestring(linetoks[tnumin])
+    printbadcommand(tn,myfile,linenum)
+  return inlen
+def livetargiprocess(linetoks,tnumin,myfile,linenum):
+  """ \livetargi{chap:DWTAGtemplatevalueparameter}{DW\-\_TAG\-\_template\-\_value\-\_parameter}{name of targ} """
+  global targhyperdict
+  global indexdict
+  t = linetoks[tnumin]
+  ourtoks,inlen = pickup(linetoks,tnumin,"i { i } { e } { * }",myfile,linenum,"n")
+  if len(ourtoks) > 5:
+    t2 = ourtoks[2];
+    index = tokmention(t2,myfile,linenum)
+    name = toknamestring(t2)
+    applytodict(targhyperdict,name,index)
+
+    t2 = ourtoks[5];
+    name = toknamestring(t2)
+    if len(name) > 0:
+      index = tokmention(t2,myfile,linenum)
+      applytodict(indexdict,name,index)
+  else:
+    tn = toknamestring(linetoks[tnumin])
+    printbadcommand(tn,myfile,linenum)
+  return inlen
+# if justlink == "y" it is a plain hyperlinkcommand
+def livelinkprocess(linetoks,tnumin,myfile,linenum,justlink):
+  """ \livelink{chap:DWTAGtemplatevalueparameter}{DW\-\_TAG\-\_template\-\_value\-\_parameter} """
+  global linkhyperdict
+  global indexdict
+  t = linetoks[tnumin]
+  ourtoks,inlen = pickup(linetoks,tnumin,"i { i } { * }",myfile,linenum,"n")
+  if len(ourtoks) > 5:
+    t2 = ourtoks[2];
+    index = tokmention(t2,myfile,linenum)
+    name = toknamestring(t2)
+    applytodict(linkhyperdict,name,index)
+
+    # can be multiword. For now do not bother with every index.
+    #t2 = ourtoks[5];
+    #index = tokmention(t2,myfile,linenum)
+    #name = toknamestring(t2)
+    #applytodict(indexdict,name,index)
+  else:
+    tn = toknamestring(linetoks[tnumin])
+    printbadcommand(tn,myfile,linenum)
+  return inlen
+def labelprocess(linetoks,tnumin,myfile,linenum):
+  """ \label{alabel} """
+  global labeldict
+  t = linetoks[tnumin]
+  ourtoks,inlen = pickup(linetoks,tnumin,"i { i }",myfile,linenum,"n")
+  if len(ourtoks) > 2:
+    t2 = ourtoks[2];
+    index = tokmention(t2,myfile,linenum)
+    name = toknamestring(t2)
+    applytodict(labeldict,name,index)
+  else:
+    tn = toknamestring(linetoks[tnumin])
+    printbadcommand(tn,myfile,linenum)
+
+  return inlen
+def addtoindexprocess(linetoks,tnumin,myfile,linenum):
+  """ \addtoindex{strings} """
+  global indexdict
+  ourtoks,inlen = pickup(linetoks,tnumin,"i { * }",myfile,linenum,"n")
+  if len(ourtoks) > 2:
+    # The * means a list of tokens.
+    fake = ""
+    #t2 = ourtoks[2];
+    #index = tokmention(t2,myfile,linenum)
+    #name = toknamestring(t2)
+    #applytodict(indexdict,name,index)
+  else:
+    tn = toknamestring(linetoks[tnumin])
+    printbadcommand(tn,myfile,linenum)
+  return inlen
+def hyperlinkname(name,tnumin,myfile,linenum):
+  global linkhyperdict
+  tkmod = fileio.dwtoken()
+  tkmod.insertid(name)
+  tm = tokmention(tkmod,myfile,linenum)
+  applytodict(linkhyperdict,name,tm)
+  return 1
+def hyperlinkprocess(linetoks,tnumin,myfile,linenum):
+  """ \hyperlink{entryname} """
+  global linkhyperdict
+  ourtoks,inlen = pickup(linetoks,tnumin,"i { i }",myfile,linenum,"n")
+  if len(ourtoks) > 2:
+    t2 = ourtoks[2];
+    index = tokmention(t2,myfile,linenum)
+    name = toknamestring(t2)
+    applytodict(linkhyperdict,name,index)
+  else:
+    tn = toknamestring(linetoks[tnumin])
+    printbadcommand(tn,myfile,linenum)
+  return inlen
+
+def indexprocess(linetoks,tnumin,myfile,linenum):
+  """ \index{indexentryname} """
+  global indexdict
+  ourtoks,inlen = pickup(linetoks,tnumin,"i { * }",myfile,linenum,"n")
+  if len(ourtoks) > 2:
+    fake = ""
+    # For now not bothering with index strings
+    #t2 = ourtoks[2];
+    #index = tokmention(t2,myfile,linenum)
+    #name = toknamestring(t2)
+    #applytodict(indexdict,name,index)
+  else:
+    tn = toknamestring(linetoks[tnumin])
+    printbadcommand(tn,myfile,linenum)
+  return inlen
+def refersecprocess(linetoks,tnumin,myfile,linenum):
+  """ \refersec{label} """
+  global labelrefdict
+  t = linetoks[tnumin]
+  ourtoks,inlen = pickup(linetoks,tnumin,"i { i }",myfile,linenum,"n")
+  if len(ourtoks) > 2:
+    t2 = ourtoks[2];
+    index = tokmention(t2,myfile,linenum)
+    name = toknamestring(t2)
+    applytodict(labelrefdict,name,labelrefdict)
+  else:
+    tn = toknamestring(linetoks[tnumin])
+    printbadcommand(tn,myfile,linenum)
+  return inlen
+
+def firstnonblank(linetoks):
+  tnum = 0
+  lasttoknum = len(linetoks)
+  while tnum < lasttoknum:
+    x = linetoks[tnum]
+    if x._class != "ind":
+      return tnum
+    if toknamestring(x) == " ":
+      tnum = tnum + 1
+      continue
+    elif toknamestring(x) == "\t":
+      tnum = tnum + 1
+      continue
+    return tnum 
+  return tnum
+# Deals solely with finding new commands.
+# This done as a first pass so we can recognize when tokens are
+# really commands, something transfunc2, the second pass, 
+# wants to know. 
+def transfunc1(linetoks,myfile,linenum):
+  global dwfnamecommsdict
+  global newcommsdict
+
+  if len(linetoks) < 1:
+    return linetoks
+  tnum = firstnonblank(linetoks)
+  if tnum >= len(linetoks):
+    return linetoks
+  initialtok = linetoks[tnum]
+  itokstring=toknamestring(initialtok)
+  if itokstring == "\\expandafter\\def\\csname":
+    return linetoks
+  if in_lines_to_ignore(myfile,linenum) == "y":
+    return linetoks
+  if itokstring == "\\newcommand":
+    t1 = linetoks[tnum+1]
+    if not isbrace(t1,'{'):
+      print "Improper character in newcommand", myfile,linenum
+      sys.exit(1)
+    t2 = linetoks[tnum+2]
+    if toknamestring(t2) == "\\simplenametablerule":
+       add_lines_to_ignore(myfile,linenum,linenum+18)
+    if toknamestring(t2) != "\\newdwfnamecommands":
+       tm = tokmention(t2,myfile,linenum)
+       applytodict(newcommsdict,toknamestring(t2),tm)
+    #Be silent on newdwfnamecommands, it is normal.
+    #else:
+    #   print "newcommand on newdwfnamecommands ignored intentionally."
+    return linetoks
+  elif itokstring == "\\newdwfnamecommands":
+    t1 = linetoks[tnum+1]
+    if not isbrace(t1,'{'):
+       print "Improper character in newdwfnamecommands", myfile._name,linenum
+       sys.exit(1)
+    # The token name string will be DWsomething and we want
+    # The token to appear as \DWsomething as that is how references
+    # The usages determine what secondary actions are applied.
+    # are coded.
+    t2 = linetoks[tnum+2]
+    tkmod = fileio.dwtoken()
+    tkmod.insertid("\\" + toknamestring(t2))
+    tm = tokmention(tkmod,myfile,linenum)
+    applytodict(dwfnamecommsdict,toknamestring(tkmod),tm)
+    return linetoks
+  return linetoks
+
+
+def delsuffix(n,suf):
+  slen = len(suf)
+  nlen = len(n)
+  lastcharnum = nlen - slen
+  outstring = n[0:lastcharnum]
+  return outstring
+def deloptionalprefix(n,pref):
+  if not n.startswith(pref):
+    return n
+  plen = len(pref)
+  nlen = len(n)
+  outstring = n[plen:nlen]
+  return outstring
+
+def printodderr(rawname,comname,myfile,linenum):
+  print "Error: this looks like a command is missing: ",rawname,"tested as",comname," in ",myfile._name," at ",linenum
+
+def rawnameiscommand(t,suff):
+   if not t.startswith("\\DW"):
+     return ""
+   if not t.endswith(suff):
+     return ""
+   commandname = delsuffix(t,suff)
+   return commandname
+
+# Delete any leading backslash.
+# Prefix the result with chap:
+def makelinkname(t):
+   s = deloptionalprefix(t,"\\");
+   s2 = "chap:" + s
+   return s2;
+   
+
+# Assumes all new commands known already.
+# This deals with targets and links (various flavors).
+def transfunc2(linetoks,myfile,linenum):
+  global newcommsdict
+  global dwfnamecommsdict
+  global newcommsdict
+
+  # Link naming target
+  global linkhyperdict
+  # TARG suffix
+  global targhyperdict
+  # INDX suffix
+  global indexdict
+  # NAME suffix
+  global namedict
+
+  if len(linetoks) < 1:
+    return linetoks
+  if in_lines_to_ignore(myfile,linenum) == "y":
+    return linetoks
+  initialtok = linetoks[0]
+  itokstring=toknamestring(initialtok)
+  # Skip all the newcommand stuff.
+  if itokstring == "\\newcommand":
+    return linetoks
+  elif itokstring == '\\newdwfnamecommands':
+    return linetoks
+
+  # Now deal with a regular line.
+
+  tnumin = 0
+  changes = 0
+  lasttoknum = len(linetoks) -1
+  for x in linetoks:
+    if int(tnumin) > int(lasttoknum):
+      break
+    t = linetoks[tnumin]
+    if t._class != "id":
+      tnumin = tnumin + 1
+      continue
+    rawname = toknamestring(t)
+    commandname=""
+    #rawnameiscommand(rawname,"",basecommand)
+    if rawname == "\\expandafter\\def\\csname":
+      return linetoks
+    if rawname == "\\begin":
+      tnumcount = processbegin(linetoks,tnumin,myfile,linenum);
+      tnumin = tnumin + tnumcount
+      continue
+    if dwfnamecommsdict.has_key(rawname):
+      # We know this one. 
+      # It is a default case name reference.
+      # index the DWname
+      # Link is to chap:DWname
+      tm = tokmention(t,myfile,linenum)
+      linkname = makelinkname(rawname)
+      indxname = deloptionalprefix(commandname,"\\")
+      applytodict(indexdict,indxname,tm);
+
+      applytodict(linkhyperdict,linkname,tm);
+      tnumin = tnumin + 1
+      continue
+    if  newcommsdict.has_key(rawname):
+      # We know this one. We have to see what it is
+      # To decide what to do.
+      # some DWOPbreg*  DWOPreg*   and MDfive are special.
+      # A variety of other such defined commands are irrelevant to us here.
+
+      tnumcount = 1
+      if rawname == "\\livetarg":
+        tnumcount = livetargprocess(linetoks,tnumin,myfile,linenum,"n")
+      elif rawname == "\\livetargi":
+        tnumcount = livetargiprocess(linetoks,tnumin,myfile,linenum)
+      elif rawname == "\\livelink":
+        tnumcount = livelinkprocess(linetoks,tnumin,myfile,linenum,"n")
+      elif rawname == "\\livelinki":
+        tnumcount = livelinkprocess(linetoks,tnumin,myfile,linenum,"n")
+      #elif rawname == "\\label":
+      #  tnumcount = labelprocess(linetoks,tnumin,myfile,linenum)
+      elif rawname == "\\refersec":
+        # does \ref
+        tnumcount = refersecprocess(linetoks,tnumin,myfile,linenum)
+      elif rawname == "\\referfol":
+        # does \vref from varioref package
+        tnumcount = refersecprocess(linetoks,tnumin,myfile,linenum)
+      elif rawname == "\\index":
+        tnumcount = indexprocess(linetoks,tnumin,myfile,linenum)
+      elif rawname == "\\addtoindex":
+        tnumcount = indexprocess(linetoks,tnumin,myfile,linenum)
+      elif rawname == "\\addtoindexx":
+        tnumcount = indexprocess(linetoks,tnumin,myfile,linenum)
+      elif rawname == "\\addttindex":
+        tnumcount = indexprocess(linetoks,tnumin,myfile,linenum)
+      elif rawname == "\\addttindexx":
+        tnumcount = indexprocess(linetoks,tnumin,myfile,linenum)
+      elif rawname == "\\DWOPbregtwo":
+        tnumcount = hyperlinkname("chap:DWOPbregn",tnumin,myfile,linenum)
+      elif rawname == "\\DWOPbregthree":
+        tnumcount = hyperlinkname("chap:DWOPbregn",tnumin,myfile,linenum)
+      elif rawname == "\\DWOPbregfour":
+        tnumcount = hyperlinkname("chap:DWOPbregn",tnumin,myfile,linenum)
+      elif rawname == "\\DWOPbregfive":
+        tnumcount = hyperlinkname("chap:DWOPbregn",tnumin,myfile,linenum)
+      elif rawname == "\\DWOPbregeleven":
+        tnumcount = hyperlinkname("chap:DWOPbregn",tnumin,myfile,linenum)
+      elif rawname == "\\MDfive":
+        tnumcount = hyperlinkname("def:MDfive",tnumin,myfile,linenum)
+      else:
+        fake = ""
+        # If missing anything important, perhaps turn ths on.
+        #print "Error not handled: %s in file %s line %d" %(rawname,myfile._name,linenum)
+      tnumin = tnumin + tnumcount
+      continue
+    # Suffixes are LINK TARG INDX MARK NAME
+    commandname =rawnameiscommand(rawname,"LINK")
+    if len(commandname) > 0:
+      # index the DWname
+      # Link is to chap:DWname
+      if dwfnamecommsdict.has_key(commandname):
+        tm = tokmention(t,myfile,linenum)
+        linkname = makelinkname(commandname)
+        indxname = deloptionalprefix(commandname,"\\")
+        applytodict(linkhyperdict,linkname,tm)
+        applytodict(indexdict,indxname,tm);
+      else:
+        printodderr(rawname,commandname,myfile,linenum)
+      tnumin = tnumin + 1
+      continue
+    commandname =rawnameiscommand(rawname,"TARG")
+    if len(commandname) > 0:
+      # index DWname
+      # Set chap:DWname as having target defined
+      if dwfnamecommsdict.has_key(commandname):
+        tm = tokmention(t,myfile,linenum)
+        targname = makelinkname(commandname)
+        indxname = deloptionalprefix(commandname,"\\")
+        applytodict(targhyperdict,targname,tm)
+        applytodict(indexdict,indxname,tm);
+      else:
+        printodderr(rawname,commandname,myfile,linenum)
+      tnumin = tnumin + 1
+      continue
+    commandname =rawnameiscommand(rawname,"INDX")
+    if len(commandname) > 0:
+      # Index DWname
+      if dwfnamecommsdict.has_key(commandname):
+        tm = tokmention(t,myfile,linenum)
+        indexname = deloptionalprefix(commandname,"\\")
+        applytodict(indexdict,indexname,tm)
+      else:
+        printodderr(rawname,commandname,myfile,linenum)
+      tnumin = tnumin + 1
+      continue
+    commandname =rawnameiscommand(rawname,"MARK")
+    if len(commandname) > 0:
+      # set chap:DWname as target defined
+      # index DWname
+      if dwfnamecommsdict.has_key(commandname):
+        tm = tokmention(t,myfile,linenum)
+        applytodict(targhyperdict,commandname,tm)
+        indexname = deloptionalprefix(commandname,"\\")
+        applytodict(indexdict,indexname,tm)
+      else:
+        printodderr(rawname,commandname,myfile,linenum)
+      tnumin = tnumin + 1
+      continue
+    commandname =rawnameiscommand(rawname,"NAME")
+    if len(commandname) > 0:
+      # No actions with NAME (but put in namedict anyway).
+      if dwfnamecommsdict.has_key(commandname):
+        tm = tokmention(t,myfile,linenum)
+        applytodict(namedict,commandname,tm)
+      else:
+        printodderr(rawname,commandname,myfile,linenum)
+      tnumin = tnumin + 1
+      continue
+    if rawname == "\\label":
+      # This is a builtin, not our newcommand.
+      tnumcount = labelprocess(linetoks,tnumin,myfile,linenum)
+      tnumin = tnumin + tnumcount
+      continue
+    if rawname == "\\hypertarget":
+      # This is a builtin, not our newcommand.
+      tnumcount = livetargprocess(linetoks,tnumin,myfile,linenum,"y")
+      tnumin = tnumin + tnumcount
+      continue
+    if rawname == "\\hyperlink":
+      # This is a builtin, not our newcommand.
+      tnumcount = livelinkprocess(linetoks,tnumin,myfile,linenum,"y")
+      tnumin = tnumin + tnumcount
+      continue
+    # Some random data or text here.
+
+    tnumin = tnumin + 1
+    # We don't know what this is. Probably ok?
+    # End of for loop.
+  return linetoks
+
+def sort_tokmlist(mylist):
+  aux = [ (''.join(x._token._tex),x) for x in mylist ]
+  aux.sort()
+  return[ (x[1]) for x in aux]
+
+def printtokmention(ct,v):
+  n = v._token
+  name =  toknamestring(n)
+  f = v._file
+  l = v._line
+  print "    [%2d] %s in file %s line %d" %(ct,name,f._name,l)
+
+# Are the lines close? If so 
+def closetogether(l1,l2):
+  d = abs(l1 - l2)
+  if d < 4:
+    return "y"
+  return "n"
+
+def shouldprintalldups(v):
+  if len(v) != 2:
+     return "y"
+  if v[0]._file != v[1]._file:
+     return "y"
+  if closetogether(v[0]._line,v[1]._line) == "y":
+        # Are the lines near one another? 
+        # If so a harmless duplication
+    return "n"
+  return "y"
+
+def printtoomany(name,k,vlist):
+  print "Duplicate in %s: %s:" %(name,k)
+  ct = 0
+  for v in vlist:
+    printtokmention(ct,v)
+    ct = ct + 1
+
+def checkduplicatetargs(dname,d):
+  targs = d.items()
+  for vi in targs:
+    (k,v) = vi
+    if len(v) > 1:
+      if shouldprintalldups(v) == "y":
+        printtoomany(dname,k,v)
+
+def checkmissingtarg(name,targ,refs):
+  rlist = refs.items()
+  for r in rlist:
+    (k,v) = r
+    if targ.has_key(k) == 0:
+       print "target missing from",name,":",k
+
+def checkmissingref(name,targ,refs):
+  rlist = targ.items()
+  for r in rlist:
+    (k,v) = r
+    if refs.has_key(k) == 0:
+       print "Unused target from",name,":",k
+  
+def print_stats():
+  global dwfnamecommsdict
+  global newcommsdict
+  global targhyperdict
+  global linkhyperdict
+  global indexdict
+  global labeldict
+  global labelrefdict
+  
+  checkduplicatetargs("newdwfname commands",dwfnamecommsdict)
+  checkduplicatetargs("commands",newcommsdict)
+  checkduplicatetargs("hypertargets",targhyperdict)
+  checkduplicatetargs("labels",labeldict)
+
+  checkmissingtarg("hyperlinks",targhyperdict,linkhyperdict)
+  checkmissingref("hyperlinks",targhyperdict,linkhyperdict)
+
+  checkmissingtarg("labels",labeldict,labelrefdict)
+  checkmissingref("labels",labeldict,labelrefdict)
+
+# Perhaps these should be controlled by
+# the command line.
+debug   = "n"
+winpath = "n"
+def buildfilepaths(files,basetarg):
+  outlist = []
+  prefix = ""
+  for f in files:
+    prefix = ""
+    if len(basetarg) > 0:
+      prefix = basetarg
+    elif winpath == "y":
+      prefix = "..\\latexdoc\\"
+    else:
+      prefix = "../latexdoc/"
+    outlist += [prefix + f]
+  return outlist
+def read_all_args():
+  filelist1 = []
+  filelist2 = []
+  baselist1 = []
+  baselist2 = []
+  basetarg = ""
+  fileio.setkeepordeletecomments("d")
+  if debug == "y":
+    baselist1 = ["testrefer.tex"]
+    baselist2 = ["testrefer.tex"]
+    basetarg = "./"
+  else:
+    baselist1 = ["dwarfnamecmds.tex",
+              "dwarf5.tex",
+              "generaldescription.tex"]
+
+    baselist2 = ["dwarf5.tex",
+              "attributesbytag.tex",
+              "changesummary.tex",
+              "compression.tex",
+              "copyright.tex",
+              "dataobject.tex",
+              "datarepresentation.tex",
+              "debugsectionrelationships.tex",
+              "encodingdecoding.tex",
+              "examples.tex",
+              "foreword.tex",
+              "generaldescription.tex",
+              "gnulicense.tex",
+              "introduction.tex",
+              "otherdebugginginformation.tex",
+              "programscope.tex",
+              "sectionversionnumbers.tex",
+              "splitobjects.tex",
+              "typeentries.tex"]
+  filelist1 = buildfilepaths(baselist1,basetarg)
+  filelist2 = buildfilepaths(baselist2,basetarg)
+
+  if (len(filelist1) < 1) or (len(filelist2) < 1):
+    print >> sys.stderr , "No files specified to refer.py, internal error."
+    sys.exit(1)
+  # Pickup all the newcommand instances.
+  dwf = fileio.readFilelist(filelist1)
+  dwf.dwtransformline(transfunc1)
+
+  # Now find all the uses.
+  dwf2 = fileio.readFilelist(filelist2)
+  dwf2.dwtransformline(transfunc2)
+  print_stats()
+
+if __name__ == '__main__':
+  read_all_args()
+
diff --git a/dwarf5/tools/repeats.py b/dwarf5/tools/repeats.py
new file mode 100644 (file)
index 0000000..ddb3fa9
--- /dev/null
@@ -0,0 +1,133 @@
+# Copyright 2014 DWARF Debugging Information Format Committee
+# This reads the a text file, usually
+# the output of   pdftotext dwarf5.txt
+# looks at each word in turn, keeping a window
+# of words, looking for repeated words and short
+# phrases
+
+import sys
+
+global checkwindow
+checkwindow = []
+
+# Eliminates lots of stuff 
+# But allows plain numbers through.
+def strisasciialpha(s):
+  ok = "y"
+  for c in s:
+    if c >= 'a' and c <= 'z':
+       continue
+    elif c >= 'A' and c <= 'Z':
+       continue
+    # Pure numbers will be eliminated 
+    # later
+    elif c >= '0' and c <= '9':
+       continue
+    elif c == '-':
+       continue
+    elif c == '_':
+       continue
+    elif c == '\\':
+       continue
+    elif c == '/':
+       continue
+    else:
+       return "n"
+  return "y"
+
+def checkdup(filename,line,checkwindow,phrasewindow,winlen,phraselen):
+  iw = -1 
+  winlen = len(checkwindow)
+  phraselen = len(phrasewindow)
+  # Numbers -- we claim we can never match
+  for w in phrasewindow:
+    if strisasciialpha(w) == "n":
+      return "n"
+    # If it is just a number, do not match
+    try: float(w)
+    except ValueError: continue 
+    else: return "n"
+  for winword in checkwindow:
+    match = 0
+    iw = iw + 1
+    if (iw + phraselen) < winlen:
+      ip = -1
+      match = 0
+      for pw in phrasewindow:
+        ip = ip + 1
+        if checkwindow[iw+ip] == phrasewindow[ip]:
+          match = match +1
+    if match == phraselen:
+      print "duplicated: ",phrasewindow," file ", filename,"line",line
+      return "y"
+  return "n"
+   
+      
+      
+def updatewindow(winin,word,winlen):
+   if len(winin) < winlen:
+      out = winin + [word]
+      return out
+   if winlen == 1:
+      out = [word]
+      return out
+   tmp = winin[1:]
+   out = tmp + [word]
+   return out
+
+def procrecords(filename,recs,winlen,phraselen):
+  global checkwindow
+  phrasewindow = []
+  curline=0
+  for r in recs:
+    curline = curline +1
+    line = r.split()
+    wdct = len(line)
+    if wdct > 0:
+      ct = 0
+      while ct < wdct:
+        w = line[ct]
+        ct = ct + 1
+        pw2 = updatewindow(phrasewindow,w,phraselen)
+        phrasewindow = pw2
+        if len(phrasewindow) < phraselen:
+          pw3 = updatewindow(checkwindow,w,winlen)
+          checkwindow = pw3
+          continue
+        res = checkdup(filename,curline,checkwindow,phrasewindow,winlen,phraselen)
+        pw3 = updatewindow(checkwindow,w,winlen)
+        checkwindow = pw3
+
+def procfile(file,filename,winlen,phraselen):
+  records = file.readlines()
+  procrecords(filename,records,int(winlen),int(phraselen))
+  
+
+def read_args():
+  cur = 1
+  filelist = []
+  if len(sys.argv) != 4:
+    print "Expect 3 arguments N N <file>"
+    sys.exit(1)
+  
+  w = sys.argv[1]
+  p = sys.argv[2]
+  if p > w :
+    winlen = p
+    phraselen = w
+  else:
+    phraselen = p
+    winlen = w
+  v = sys.argv[3]
+  try:
+      file = open(v,"r")
+  except IOError:
+      print "Unable to open ",v
+      sys.exit(1)
+  procfile(file,v,int(winlen),int(phraselen))
+
+if __name__ == '__main__':
+  read_args()
+  
+
+
diff --git a/dwarf5/tools/testrefer.tex b/dwarf5/tools/testrefer.tex
new file mode 100644 (file)
index 0000000..d62d147
--- /dev/null
@@ -0,0 +1,179 @@
+\% Definitions for each of the DWARF names
+% These eliminate the need to use escapes for the underscores or
+% add entries for indexing
+%
+
+\newcommand{\addtoindex}[1]{#1\index{#1}}
+\newcommand{\addttindex}[1]{\texttt{#1}\index{#1@\texttt{#1}}}
+\newcommand{\refersec}[1]{\vref{#1}}  % beware possible rerun loop
+\newcommand{\referfol}[1]{\ref{#1} following}
+
+% Generate a live link in the document
+% use like \livelink{chap:DWOPdup}{DW\_OP\_dup}
+\newcommand{\livelink}[2]{\hyperlink{#1}{#2}\index{#2}}
+% use when the index is different from the text and target.
+\newcommand{\livelinki}[3]{\hyperlink{#1}{#2}\index{#3}}
+% livetarg is the declaration this is the target of livelinks.
+% FIXME: we might want livetarg and livetargi  #2 to be \textbf{#2}
+\newcommand{\livetarg}[2]{\hypertarget{#1}{#2}\index{#2}}
+% When we want the index entry to look different from the name.
+\newcommand{\livetargi}[3]{\hypertarget{#1}{#2}\index{#3}}
+
+\newcommand{\thirtytwobitdwarfformat}[1][]{\livelink{datarep:xxbitdwffmt}{32-bit DWARF format}}
+\newcommand{\sixtyfourbitdwarfformat}[1][]{\livelink{datarep:xxbitdwffmt}{64-bit DWARF format}}
+
+% For index entries. The tt-variant of each pair is designed to
+% allow a word to appear in tt font in the main test and the index
+% but to collate in the index in its non-tt order. (LaTex normally
+% sorts all tt words before all non-tt words.)
+\newcommand{\addtoindex}[1]{#1\index{#1}}
+\newcommand{\addttindex}[1]{\texttt{#1}\index{#1@\texttt{#1}}}
+\newcommand{\addtoindexi}[2]{#1\index{#2}}
+\newcommand{\addttindexi}[2]{\texttt{#1}\index{#2@\texttt{#2}}}
+\newcommand{\addtoindexx}[1]{\index{#1}}
+\newcommand{\addttindexx}[1]{\index{#1@\texttt{#1}}}
+
+
+
+
+
+% A command to define multiple helpful DWARF name commands
+% In a test this causes errors, but in the real run
+% we do not see this in transfunc2
+\newcommand{\newdwfnamecommands}[2]{
+       \expandafter\def\csname #1LINK\endcsname{\index{#2}\hyperlink{chap:#1}{#2}}
+       \expandafter\def\csname #1TARG\endcsname{\index{#2}\hypertarget{chap:#1}{#2}}
+       \expandafter\def\csname #1INDX\endcsname{\index{#2}#2}
+       \expandafter\def\csname #1MARK\endcsname{\hypertarget{chap:#1}{}\index{#2}}     
+       \expandafter\def\csname #1NAME\endcsname{#2}
+       % The normal, most common use in running text...
+       \expandafter\def\csname #1\endcsname{\csname #1LINK\endcsname}
+       }
+
+
+% DW_ACCESS
+%
+\newdwfnamecommands{DWACCESSprivate}{DW\_ACCESS\_private}
+\newdwfnamecommands{DWACCESSprotected}{DW\_ACCESS\_protected}
+\newdwfnamecommands{DWACCESSpublic}{DW\_ACCESS\_public}
+%
+% DW_ADDR
+%
+\newdwfnamecommands{DWADDRnone}{DW\_ADDR\_none}
+%
+% DW_AT
+%
+\newdwfnamecommands{DWATabstractorigin}{DW\_AT\_abstract\_origin}
+\newdwfnamecommands{DWATaccessibility}{DW\_AT\_accessibility}
+\newdwfnamecommands{DWATaddrbase}{DW\_AT\_addr\_base}
+\newdwfnamecommands{DWATaddressclass}{DW\_AT\_address\_class}
+\newdwfnamecommands{DWATallocated}{DW\_AT\_allocated}
+\newdwfnamecommands{DWATartificial}{DW\_AT\_artificial}
+\newdwfnamecommands{DWATassociated}{DW\_AT\_associated}
+%
+\newdwfnamecommands{DWATbasetypes}{DW\_AT\_base\_types}
+\newdwfnamecommands{DWATbinaryscale}{DW\_AT\_binary\_scale}
+\newdwfnamecommands{DWATbitoffset}{DW\_AT\_bit\_offset}
+\newdwfnamecommands{DWATbitsize}{DW\_AT\_bit\_size}
+\newdwfnamecommands{DWATbitstride}{DW\_AT\_bit\_stride}
+\newdwfnamecommands{DWATbyteoffset}{DW\_AT\_byte\_offset}
+\newdwfnamecommands{DWATbytesize}{DW\_AT\_byte\_size}
+\newdwfnamecommands{DWATbytestride}{DW\_AT\_byte\_stride}
+%
+\newdwfnamecommands{DWATcallcolumn}{DW\_AT\_call\_column}
+\newdwfnamecommands{DWATcallfile}{DW\_AT\_call\_file}
+\newdwfnamecommands{DWATcallline}{DW\_AT\_call\_line}
+\newdwfnamecommands{DWATcallingconvention}{DW\_AT\_calling\_convention}
+\newdwfnamecommands{DWATcommonreference}{DW\_AT\_common\_reference}
+\newdwfnamecommands{DWATcompdir}{DW\_AT\_comp\_dir}
+\newdwfnamecommands{DWATconstexpr}{DW\_AT\_const\_expr}
+\newdwfnamecommands{DWATconstvalue}{DW\_AT\_const\_value}
+\newdwfnamecommands{DWATcontainingtype}{DW\_AT\_containing\_type}
+\newdwfnamecommands{DWATcount}{DW\_AT\_count}
+%
+\newdwfnamecommands{DWATdatabitoffset}{DW\_AT\_data\_bit\_offset}
+\newdwfnamecommands{DWATdatalocation}{DW\_AT\_data\_location}
+\newdwfnamecommands{DWATdatamemberlocation}{DW\_AT\_data\_member\_location}
+\newdwfnamecommands{DWATdecimalscale}{DW\_AT\_decimal\_scale}
+       \newcommand{\DWOPbregtwo}{\hyperlink{chap:DWOPbregn}{DW\_OP\_breg2}}            % Link, don't index...
+       \newcommand{\DWOPbregthree}{\hyperlink{chap:DWOPbregn}{DW\_OP\_breg3}}          %
+       \newcommand{\DWOPbregfour}{\hyperlink{chap:DWOPbregn}{DW\_OP\_breg4}}           %
+       \newcommand{\DWOPbregfive}{\hyperlink{chap:DWOPbregn}{DW\_OP\_breg5}}           %
+       \newcommand{\DWOPbregeleven}{\hyperlink{chap:DWOPbregn}{DW\_OP\_breg11}}        %
+\newdwfnamecommands{DWOPbregx}{DW\_OP\_bregx}
+\newdwfnamecommands{DWOPcalltwo}{DW\_OP\_call2}
+\newdwfnamecommands{DWOPcallfour}{DW\_OP\_call4}
+%
+%%%%%%%%%%%%%%%
+%
+% .debug_*, .debug_*.dwo, et al
+%
+\newcommand{\dotdebugabbrev}{\addtoindex{\texttt{.debug\_abbrev}}}
+\newcommand{\dotdebugaddr}{\addtoindex{\texttt{.debug\_addr}}}
+\newcommand{\dotdebugaranges}{\addtoindex{\texttt{.debug\_aranges}}}
+\newcommand{\dotdebugframe}{\addtoindex{\texttt{.debug\_frame}}}
+%
+\newcommand{\dotdata}{\addtoindex{\texttt{.data}}}
+\newcommand{\dottext}{\addtoindex{\texttt{.text}}}
+%
+% Current section version numbers
+%
+\newcommand{\versiondotdebugabbrev}  {5}
+\newcommand{\versiondotdebugaddr}    {5}
+\newcommand{\versiondotdebugstr}     {5}
+\newcommand{\versiondotdebugstroffsets}{5}
+\newcommand{\versiondotdebugtypes}   {\versiondotdebuginfo}
+%
+% DWARF Standard Versions
+%
+\newcommand{\DWARFVersionI}   {\addtoindex{DWARF Version 1}}
+\newcommand{\DWARFVersionII}  {\addtoindex{DWARF Version 2}}
+%
+\newcommand{\MDfive}{\livelink{def:MDfive}{MD5}}
+
+
+\DWACCESSpublic&0x01  \\
+\DWACCESSpublicTARG{}     
+# Following is duplicate def
+\DWACCESSpublicTARG{}     
+
+\DWATcountLINK
+% Following should generate error
+\DWATcountnodwnamecommand + 3
+
+% just shows the name. Not a target or even indexed.
+% Actually an error because we never defined this above.
+A \DWATnonameNAME{} attribute whose value is a
+
+\livelinki{datarep:classreference}{reference}{reference class}
+\livelinki{datarep:classstring}{string}{string class}
+describe the static \livelink{chap:lexicalblock}{block} structure 
+\livelink{chap:lexicalblock}{lexical block} that owns it, 
+class \livelink{chap:classexprloc}{exprloc}  
+using class \livelink{chap:classloclistptr}{loclistptr}
+which is a \livelink{chap:classflag}{flag}.
+\livelink{chap:classflag}{flag}.
+attribute whose value is a \livelink{chap:classreference}{reference} to
+
+\livetargi{chap:declarationcoordinates}{}{declaration coordinates}
+oops \livetarg{chap:classreference}{}
+buy \livetarg{chap:classexprloc}{}
+something \livetarg{chap:classflag}{}
+nice \livetarg{chap:DWATdwoidforunit}{}
+today \livetarg{chap:DWATdwoidforunit}{}
+
+\label{mylabone}
+\label{mylabtwo}
+\label{mylabdup}
+\label{mylabdup}
+\refersec{mylabone}
+\refersec{danglingref}
+
+% defines label
+\begin{simplenametable}[1.9in]{Accessibility codes}{tab:goodlabel}
+% No label
+\begin{simplenametable}[1.9in]
+% no label here, not reported
+\begin{foo}[1.9in]{Accessibility codes}{badlabel}
+\begin{simplenametable}[1.9in]{Accessibility codes}{chap:unusedlabel}
+\refersec{tab:goodlabel}
diff --git a/dwarf5/tools/testrepeats.txt b/dwarf5/tools/testrepeats.txt
new file mode 100644 (file)
index 0000000..49cd550
--- /dev/null
@@ -0,0 +1,17 @@
+
+this is ok
+when
+
+when
+
+there
+there is
+
+when we 
+when
+we
+
+we do something now or
+we do something now.
+The size in bytes of the contents of the other stuff
+