1#!/usr/bin/env python
2##
3## Name:    mkdoc.py
4## Purpose: Extract documentation from header files.
5##
6## Copyright (C) 2018 Michael J. Fromberger. All Rights Reserved.
7##
8## Usage: mkdoc.py <template> <output>
9##
10from __future__ import print_function
11
12import collections, re, sys
13
14# A regular expression to match commented declarations.
15# This is specific to C and not very general; it should work fine for the imath
16# headers but will not adapt well to arbitrary code or to C++.
17doc = re.compile(r'''(?mx)/\*\* # open  /**
18(?P<text>(?:[^*]|\*[^/])*)      # text      Does a thing
19\*/\n                           # close */
20(?P<decl>[^;{]*(?:;$|\{))''')  # decl  void f(x);
21
22# A regular expression matching up to 4 spaces at the head of a line.
23spc = re.compile(r'(?m)^ {1,4}')
24
25# A regular expression matching an insertion point.  An insertion point has the
26# form {{include "header" name ...}}.  If no names are given, all the names in
27# the given header are inserted.
28ins = re.compile(r'{{insert "(?P<file>[^"]*)"(?P<names>(?:\s+\w+)+)?\s*}}')
29
30# A regular expression matching non-identifier characters, for splitting.
31nid = re.compile(r'\W+')
32
33# A cache of already-parsed files, maps filename to declarations.
34CACHE = {}
35
36
37def last_word(s):
38    """Returns the last identifier-shaped word in s."""
39    return nid.split(s.strip())[-1]
40
41
42def typeset(text):
43    """Renders text with verbatim sections into markdown."""
44    lines = []
45    fence = False
46    for line in text.split('\n'):
47        if fence != line.startswith(' '):
48            lines.append('```')
49            fence = not fence
50        lines.append(line)
51    if fence:
52        lines.append('```')
53    for i, line in enumerate(lines):
54        if i == 0: lines[i] = ' -  ' + line
55        elif line: lines[i] = '    ' + line
56    return '\n'.join(lines)
57
58
59class LIndex(object):
60    """Represents a line offset index for text."""
61
62    def __init__(self, text):
63        pos = 0
64
65        # An array of ending offsets for each line, with a sentinel at position
66        # 0 to make the index arithmetic easier.
67        idx = [0]
68
69        # Scan forward for newlines or EOF, and push the offsets of the line
70        # breaks onto the list so we can binary search them later.
71        while pos < len(text):
72            next = text.find('\n', pos)
73            if next < 0:
74                break
75            idx.append(next)
76            pos = next + 1
77        if idx[-1] < len(text):
78            idx.append(len(text))
79        self._len = len(text)
80        self._index = idx
81
82    def linecol(self, pos):
83        """Returns the (line, col) corresponding to pos.
84
85        Line numbers are 1-based, columns are 0-based.
86        """
87        if pos < 0 or pos > self._len:
88            raise IndexError("position %d out of range" % pos)
89
90        # Binary search for the largest line number whose end marker is at or
91        # after pos and whose previous line's end is before pos.
92        idx = self._index
93        i, j = 1, len(idx)
94        while i < j:
95            m = (i + j) / 2
96            if idx[m] < pos:
97                i = m + 1
98            elif idx[m - 1] < pos:
99                return m, pos - idx[m - 1]
100            else:
101                j = m
102
103        # This happens if (and only if) the whole file is one line.
104        return 1, pos
105
106
107class Decl(object):
108    """Represents a single documented declaration."""
109
110    def __init__(self, com, decl, line=None):
111        """Initialize a new documented declaration.
112
113        Params:
114          com: the raw text of the comment
115          decl: the raw text of the declaration
116          line: the line number of the declaration
117        """
118        lp = decl.find('(')
119        if lp < 0:
120            self.name = last_word(decl.rstrip(';'))
121        else:
122            self.name = last_word(decl[:lp])
123        self.decl = ' '.join(decl.rstrip(';{').strip().split())
124        self.comment = spc.sub('', com.rstrip())
125        self.line = line
126
127    def __repr__(self):
128        return '#Decl["%s"]' % self.decl
129
130    def markdown(self, path):
131        pos = self.decl.index(self.name)
132        decl = '%s<a href="%s#L%d">%s</a>%s' % (
133            self.decl[:pos],
134            path,
135            self.line,
136            self.name,
137            self.decl[pos + len(self.name):],
138        )
139        return '''------------
140<a id="{name}"></a><pre>
141{decl};
142</pre>
143{comment}
144'''.format(name=self.name, decl=decl, comment=typeset(self.comment))
145
146
147def parse_decls(text):
148    """Parse a dictionary of declarations from text."""
149    decls = collections.OrderedDict()
150    idx = LIndex(text)
151    for m in doc.finditer(text):
152        line, _ = idx.linecol(m.span('decl')[0])
153        d = Decl(m.group('text'), m.group('decl'), line)
154        decls[d.name] = d
155    return decls
156
157
158def load_file(path):
159    """Load declarations from path, or use cached results."""
160    if path not in CACHE:
161        with file(path, 'rU') as fp:
162            CACHE[path] = parse_decls(fp.read())
163    return CACHE[path]
164
165
166def main(args):
167    if len(args) != 2:
168        print("Usage: mkdoc.py <input> <output>", file=sys.stderr)
169        sys.exit(1)
170
171    doc_template = args[0]
172    doc_markdown = args[1]
173
174    with file(doc_template, 'rU') as input:
175        template = input.read()
176
177    with file(doc_markdown, 'wt') as output:
178        print(
179            '''<!--
180  This file was generated from "{0}" by mkdoc.py
181  DO NOT EDIT
182-->
183'''.format(doc_template),
184            file=output)
185
186        pos = 0  # last position of input copied
187
188        # Look for substitution markers in the template, and replace them with
189        # their content.
190        for ip in ins.finditer(template):
191            output.write(template[pos:ip.start()])
192            pos = ip.end()
193
194            decls = load_file(ip.group('file'))
195            if ip.group('names'):  # pick the selected names, in order
196                decls = collections.OrderedDict(
197                    (key, decls[key])
198                    for key in ip.group('names').strip().split())
199
200            # Render the selected declarations.
201            for decl in decls.values():
202                print(decl.markdown(ip.group('file')), file=output)
203
204        # Clean up any remaining template bits
205        output.write(template[pos:])
206
207
208if __name__ == "__main__":
209    main(sys.argv[1:])
210