1#!/usr/bin/env python 2## 3## Name: mkdoc.py 4## Purpose: Extract documentation from header files. 5## 6## Copyright (C) 2018 Michael J. Fromberger. All Rights Reserved. 7## 8## Usage: mkdoc.py <template> <output> 9## 10from __future__ import print_function 11 12import collections, re, sys 13 14# A regular expression to match commented declarations. 15# This is specific to C and not very general; it should work fine for the imath 16# headers but will not adapt well to arbitrary code or to C++. 17doc = re.compile(r'''(?mx)/\*\* # open /** 18(?P<text>(?:[^*]|\*[^/])*) # text Does a thing 19\*/\n # close */ 20(?P<decl>[^;{]*(?:;$|\{))''') # decl void f(x); 21 22# A regular expression matching up to 4 spaces at the head of a line. 23spc = re.compile(r'(?m)^ {1,4}') 24 25# A regular expression matching an insertion point. An insertion point has the 26# form {{include "header" name ...}}. If no names are given, all the names in 27# the given header are inserted. 28ins = re.compile(r'{{insert "(?P<file>[^"]*)"(?P<names>(?:\s+\w+)+)?\s*}}') 29 30# A regular expression matching non-identifier characters, for splitting. 31nid = re.compile(r'\W+') 32 33# A cache of already-parsed files, maps filename to declarations. 34CACHE = {} 35 36 37def last_word(s): 38 """Returns the last identifier-shaped word in s.""" 39 return nid.split(s.strip())[-1] 40 41 42def typeset(text): 43 """Renders text with verbatim sections into markdown.""" 44 lines = [] 45 fence = False 46 for line in text.split('\n'): 47 if fence != line.startswith(' '): 48 lines.append('```') 49 fence = not fence 50 lines.append(line) 51 if fence: 52 lines.append('```') 53 for i, line in enumerate(lines): 54 if i == 0: lines[i] = ' - ' + line 55 elif line: lines[i] = ' ' + line 56 return '\n'.join(lines) 57 58 59class LIndex(object): 60 """Represents a line offset index for text.""" 61 62 def __init__(self, text): 63 pos = 0 64 65 # An array of ending offsets for each line, with a sentinel at position 66 # 0 to make the index arithmetic easier. 67 idx = [0] 68 69 # Scan forward for newlines or EOF, and push the offsets of the line 70 # breaks onto the list so we can binary search them later. 71 while pos < len(text): 72 next = text.find('\n', pos) 73 if next < 0: 74 break 75 idx.append(next) 76 pos = next + 1 77 if idx[-1] < len(text): 78 idx.append(len(text)) 79 self._len = len(text) 80 self._index = idx 81 82 def linecol(self, pos): 83 """Returns the (line, col) corresponding to pos. 84 85 Line numbers are 1-based, columns are 0-based. 86 """ 87 if pos < 0 or pos > self._len: 88 raise IndexError("position %d out of range" % pos) 89 90 # Binary search for the largest line number whose end marker is at or 91 # after pos and whose previous line's end is before pos. 92 idx = self._index 93 i, j = 1, len(idx) 94 while i < j: 95 m = (i + j) / 2 96 if idx[m] < pos: 97 i = m + 1 98 elif idx[m - 1] < pos: 99 return m, pos - idx[m - 1] 100 else: 101 j = m 102 103 # This happens if (and only if) the whole file is one line. 104 return 1, pos 105 106 107class Decl(object): 108 """Represents a single documented declaration.""" 109 110 def __init__(self, com, decl, line=None): 111 """Initialize a new documented declaration. 112 113 Params: 114 com: the raw text of the comment 115 decl: the raw text of the declaration 116 line: the line number of the declaration 117 """ 118 lp = decl.find('(') 119 if lp < 0: 120 self.name = last_word(decl.rstrip(';')) 121 else: 122 self.name = last_word(decl[:lp]) 123 self.decl = ' '.join(decl.rstrip(';{').strip().split()) 124 self.comment = spc.sub('', com.rstrip()) 125 self.line = line 126 127 def __repr__(self): 128 return '#Decl["%s"]' % self.decl 129 130 def markdown(self, path): 131 pos = self.decl.index(self.name) 132 decl = '%s<a href="%s#L%d">%s</a>%s' % ( 133 self.decl[:pos], 134 path, 135 self.line, 136 self.name, 137 self.decl[pos + len(self.name):], 138 ) 139 return '''------------ 140<a id="{name}"></a><pre> 141{decl}; 142</pre> 143{comment} 144'''.format(name=self.name, decl=decl, comment=typeset(self.comment)) 145 146 147def parse_decls(text): 148 """Parse a dictionary of declarations from text.""" 149 decls = collections.OrderedDict() 150 idx = LIndex(text) 151 for m in doc.finditer(text): 152 line, _ = idx.linecol(m.span('decl')[0]) 153 d = Decl(m.group('text'), m.group('decl'), line) 154 decls[d.name] = d 155 return decls 156 157 158def load_file(path): 159 """Load declarations from path, or use cached results.""" 160 if path not in CACHE: 161 with file(path, 'rU') as fp: 162 CACHE[path] = parse_decls(fp.read()) 163 return CACHE[path] 164 165 166def main(args): 167 if len(args) != 2: 168 print("Usage: mkdoc.py <input> <output>", file=sys.stderr) 169 sys.exit(1) 170 171 doc_template = args[0] 172 doc_markdown = args[1] 173 174 with file(doc_template, 'rU') as input: 175 template = input.read() 176 177 with file(doc_markdown, 'wt') as output: 178 print( 179 '''<!-- 180 This file was generated from "{0}" by mkdoc.py 181 DO NOT EDIT 182--> 183'''.format(doc_template), 184 file=output) 185 186 pos = 0 # last position of input copied 187 188 # Look for substitution markers in the template, and replace them with 189 # their content. 190 for ip in ins.finditer(template): 191 output.write(template[pos:ip.start()]) 192 pos = ip.end() 193 194 decls = load_file(ip.group('file')) 195 if ip.group('names'): # pick the selected names, in order 196 decls = collections.OrderedDict( 197 (key, decls[key]) 198 for key in ip.group('names').strip().split()) 199 200 # Render the selected declarations. 201 for decl in decls.values(): 202 print(decl.markdown(ip.group('file')), file=output) 203 204 # Clean up any remaining template bits 205 output.write(template[pos:]) 206 207 208if __name__ == "__main__": 209 main(sys.argv[1:]) 210