1#!/usr/bin/env python3
2# A tool to parse the FormatStyle struct from Format.h and update the
3# documentation in ../ClangFormatStyleOptions.rst automatically.
4# Run from the directory in which this file is located to update the docs.
5
6import inspect
7import os
8import re
9import sys
10from io import TextIOWrapper
11from typing import Set
12
13CLANG_DIR = os.path.join(os.path.dirname(__file__), '../..')
14FORMAT_STYLE_FILE = os.path.join(CLANG_DIR, 'include/clang/Format/Format.h')
15INCLUDE_STYLE_FILE = os.path.join(CLANG_DIR, 'include/clang/Tooling/Inclusions/IncludeStyle.h')
16DOC_FILE = os.path.join(CLANG_DIR, 'docs/ClangFormatStyleOptions.rst')
17
18PLURALS_FILE = os.path.join(os.path.dirname(__file__), 'plurals.txt')
19
20plurals: Set[str] = set()
21with open(PLURALS_FILE, 'a+') as f:
22  f.seek(0)
23  plurals = set(f.read().splitlines())
24
25def substitute(text, tag, contents):
26  replacement = '\n.. START_%s\n\n%s\n\n.. END_%s\n' % (tag, contents, tag)
27  pattern = r'\n\.\. START_%s\n.*\n\.\. END_%s\n' % (tag, tag)
28  return re.sub(pattern, '%s', text, flags=re.S) % replacement
29
30def register_plural(singular: str, plural: str):
31  if plural not in plurals:
32    if not hasattr(register_plural, "generated_new_plural"):
33      print('Plural generation: you can use '
34      f'`git checkout -- {os.path.relpath(PLURALS_FILE)}` '
35      'to reemit warnings or `git add` to include new plurals\n')
36    register_plural.generated_new_plural = True
37
38    plurals.add(plural)
39    with open(PLURALS_FILE, 'a') as f:
40      f.write(plural + '\n')
41    cf = inspect.currentframe()
42    lineno = ''
43    if cf and cf.f_back:
44      lineno = ':' + str(cf.f_back.f_lineno)
45    print(f'{__file__}{lineno} check if plural of {singular} is {plural}', file=sys.stderr)
46  return plural
47
48def pluralize(word: str):
49  lword = word.lower()
50  if len(lword) >= 2 and lword[-1] == 'y' and lword[-2] not in 'aeiou':
51    return register_plural(word, word[:-1] + 'ies')
52  elif lword.endswith(('s', 'sh', 'ch', 'x', 'z')):
53    return register_plural(word, word[:-1] + 'es')
54  elif lword.endswith('fe'):
55    return register_plural(word, word[:-2] + 'ves')
56  elif lword.endswith('f') and not lword.endswith('ff'):
57    return register_plural(word, word[:-1] + 'ves')
58  else:
59    return register_plural(word, word + 's')
60
61
62def to_yaml_type(typestr: str):
63  if typestr == 'bool':
64    return 'Boolean'
65  elif typestr == 'int':
66    return 'Integer'
67  elif typestr == 'unsigned':
68    return 'Unsigned'
69  elif typestr == 'std::string':
70    return 'String'
71
72  subtype, napplied = re.subn(r'^std::vector<(.*)>$', r'\1', typestr)
73  if napplied == 1:
74    return 'List of ' + pluralize(to_yaml_type(subtype))
75
76  return typestr
77
78def doxygen2rst(text):
79  text = re.sub(r'<tt>\s*(.*?)\s*<\/tt>', r'``\1``', text)
80  text = re.sub(r'\\c ([^ ,;\.]+)', r'``\1``', text)
81  text = re.sub(r'\\\w+ ', '', text)
82  return text
83
84def indent(text, columns, indent_first_line=True):
85  indent_str = ' ' * columns
86  s = re.sub(r'\n([^\n])', '\n' + indent_str + '\\1', text, flags=re.S)
87  if not indent_first_line or s.startswith('\n'):
88    return s
89  return indent_str + s
90
91class Option(object):
92  def __init__(self, name, opt_type, comment, version):
93    self.name = name
94    self.type = opt_type
95    self.comment = comment.strip()
96    self.enum = None
97    self.nested_struct = None
98    self.version = version
99
100  def __str__(self):
101    if self.version:
102      s = '**%s** (``%s``) :versionbadge:`clang-format %s`\n%s' % (self.name, to_yaml_type(self.type), self.version,
103                                 doxygen2rst(indent(self.comment, 2)))
104    else:
105      s = '**%s** (``%s``)\n%s' % (self.name, to_yaml_type(self.type),
106                                 doxygen2rst(indent(self.comment, 2)))
107    if self.enum and self.enum.values:
108      s += indent('\n\nPossible values:\n\n%s\n' % self.enum, 2)
109    if self.nested_struct:
110      s += indent('\n\nNested configuration flags:\n\n%s\n' %self.nested_struct,
111                  2)
112    return s
113
114class NestedStruct(object):
115  def __init__(self, name, comment):
116    self.name = name
117    self.comment = comment.strip()
118    self.values = []
119
120  def __str__(self):
121    return self.comment + '\n' + '\n'.join(map(str, self.values))
122
123class NestedField(object):
124  def __init__(self, name, comment):
125    self.name = name
126    self.comment = comment.strip()
127
128  def __str__(self):
129    return '\n* ``%s`` %s' % (
130        self.name,
131        doxygen2rst(indent(self.comment, 2, indent_first_line=False)))
132
133class Enum(object):
134  def __init__(self, name, comment):
135    self.name = name
136    self.comment = comment.strip()
137    self.values = []
138
139  def __str__(self):
140    return '\n'.join(map(str, self.values))
141
142class NestedEnum(object):
143  def __init__(self, name, enumtype, comment, values):
144    self.name = name
145    self.comment = comment
146    self.values = values
147    self.type = enumtype
148
149  def __str__(self):
150    s = '\n* ``%s %s``\n%s' % (to_yaml_type(self.type), self.name,
151                                 doxygen2rst(indent(self.comment, 2)))
152    s += indent('\nPossible values:\n\n', 2)
153    s += indent('\n'.join(map(str, self.values)), 2)
154    return s
155
156class EnumValue(object):
157  def __init__(self, name, comment, config):
158    self.name = name
159    self.comment = comment
160    self.config = config
161
162  def __str__(self):
163    return '* ``%s`` (in configuration: ``%s``)\n%s' % (
164        self.name,
165        re.sub('.*_', '', self.config),
166        doxygen2rst(indent(self.comment, 2)))
167
168
169class OptionsReader:
170  def __init__(self, header: TextIOWrapper):
171    self.header = header
172    self.in_code_block = False
173    self.code_indent = 0
174    self.lineno = 0
175    self.last_err_lineno = -1
176
177  def __file_path(self):
178    return os.path.relpath(self.header.name)
179
180  def __print_line(self, line: str):
181    print(f'{self.lineno:>6} | {line}', file=sys.stderr)
182
183  def __warning(self, msg: str, line: str):
184    print(f'{self.__file_path()}:{self.lineno}: warning: {msg}:', file=sys.stderr)
185    self.__print_line(line)
186
187  def __clean_comment_line(self, line: str):
188    match = re.match(r'^/// (?P<indent> +)?\\code(\{.(?P<lang>\w+)\})?$', line)
189    if match:
190      if self.in_code_block:
191        self.__warning('`\\code` in another `\\code`', line)
192      self.in_code_block = True
193      indent_str = match.group('indent')
194      if not indent_str:
195        indent_str = ''
196      self.code_indent = len(indent_str)
197      lang = match.group('lang')
198      if not lang:
199        lang = 'c++'
200      return f'\n{indent_str}.. code-block:: {lang}\n\n'
201
202    endcode_match = re.match(r'^/// +\\endcode$', line)
203    if endcode_match:
204      if not self.in_code_block:
205        self.__warning('no correct `\\code` found before this `\\endcode`', line)
206      self.in_code_block = False
207      return ''
208
209    # check code block indentation
210    if (self.in_code_block and not line == '///' and not
211        line.startswith('///  ' + ' ' * self.code_indent)):
212      if self.last_err_lineno == self.lineno - 1:
213        self.__print_line(line)
214      else:
215        self.__warning('code block should be indented', line)
216      self.last_err_lineno = self.lineno
217
218    match = re.match(r'^/// \\warning$', line)
219    if match:
220      return '\n.. warning:: \n\n'
221
222    endwarning_match = re.match(r'^/// +\\endwarning$', line)
223    if endwarning_match:
224      return ''
225    return line[4:] + '\n'
226
227  def read_options(self):
228    class State:
229      BeforeStruct, Finished, InStruct, InNestedStruct, InNestedFieldComment, \
230        InFieldComment, InEnum, InEnumMemberComment = range(8)
231    state = State.BeforeStruct
232
233    options = []
234    enums = {}
235    nested_structs = {}
236    comment = ''
237    enum = None
238    nested_struct = None
239    version = None
240
241    for line in self.header:
242      self.lineno += 1
243      line = line.strip()
244      if state == State.BeforeStruct:
245        if line in ('struct FormatStyle {', 'struct IncludeStyle {'):
246          state = State.InStruct
247      elif state == State.InStruct:
248        if line.startswith('///'):
249          state = State.InFieldComment
250          comment = self.__clean_comment_line(line)
251        elif line == '};':
252          state = State.Finished
253          break
254      elif state == State.InFieldComment:
255        if line.startswith(r'/// \version'):
256          match = re.match(r'/// \\version\s*(?P<version>[0-9.]+)*', line)
257          if match:
258            version = match.group('version')
259        elif line.startswith('///'):
260          comment += self.__clean_comment_line(line)
261        elif line.startswith('enum'):
262          state = State.InEnum
263          name = re.sub(r'enum\s+(\w+)\s*(:((\s*\w+)+)\s*)?\{', '\\1', line)
264          enum = Enum(name, comment)
265        elif line.startswith('struct'):
266          state = State.InNestedStruct
267          name = re.sub(r'struct\s+(\w+)\s*\{', '\\1', line)
268          nested_struct = NestedStruct(name, comment)
269        elif line.endswith(';'):
270          prefix = '// '
271          if line.startswith(prefix):
272            line = line[len(prefix):]
273          state = State.InStruct
274          field_type, field_name = re.match(r'([<>:\w(,\s)]+)\s+(\w+);',
275                                            line).groups()
276
277          if not version:
278            self.__warning(f'missing version for {field_name}', line)
279          option = Option(str(field_name), str(field_type), comment, version)
280          options.append(option)
281          version = None
282        else:
283          raise Exception('Invalid format, expected comment, field or enum\n' + line)
284      elif state == State.InNestedStruct:
285        if line.startswith('///'):
286          state = State.InNestedFieldComment
287          comment = self.__clean_comment_line(line)
288        elif line == '};':
289          state = State.InStruct
290          nested_structs[nested_struct.name] = nested_struct
291      elif state == State.InNestedFieldComment:
292        if line.startswith('///'):
293          comment += self.__clean_comment_line(line)
294        else:
295          state = State.InNestedStruct
296          field_type, field_name = re.match(r'([<>:\w(,\s)]+)\s+(\w+);', line).groups()
297          if field_type in enums:
298            nested_struct.values.append(NestedEnum(field_name,
299                                                   field_type,
300                                                   comment,
301                                                   enums[field_type].values))
302          else:
303            nested_struct.values.append(NestedField(field_type + " " + field_name, comment))
304
305      elif state == State.InEnum:
306        if line.startswith('///'):
307          state = State.InEnumMemberComment
308          comment = self.__clean_comment_line(line)
309        elif line == '};':
310          state = State.InStruct
311          enums[enum.name] = enum
312        else:
313          # Enum member without documentation. Must be documented where the enum
314          # is used.
315          pass
316      elif state == State.InEnumMemberComment:
317        if line.startswith('///'):
318          comment += self.__clean_comment_line(line)
319        else:
320          state = State.InEnum
321          val = line.replace(',', '')
322          pos = val.find(" // ")
323          if pos != -1:
324            config = val[pos + 4:]
325            val = val[:pos]
326          else:
327            config = val
328          enum.values.append(EnumValue(val, comment, config))
329    if state != State.Finished:
330      raise Exception('Not finished by the end of file')
331
332    for option in options:
333      if option.type not in ['bool', 'unsigned', 'int', 'std::string',
334                             'std::vector<std::string>',
335                             'std::vector<IncludeCategory>',
336                             'std::vector<RawStringFormat>']:
337        if option.type in enums:
338          option.enum = enums[option.type]
339        elif option.type in nested_structs:
340          option.nested_struct = nested_structs[option.type]
341        else:
342          raise Exception('Unknown type: %s' % option.type)
343    return options
344
345
346with open(FORMAT_STYLE_FILE) as f:
347  opts = OptionsReader(f).read_options()
348with open(INCLUDE_STYLE_FILE) as f:
349  opts += OptionsReader(f).read_options()
350
351opts = sorted(opts, key=lambda x: x.name)
352options_text = '\n\n'.join(map(str, opts))
353
354with open(DOC_FILE) as f:
355  contents = f.read()
356
357contents = substitute(contents, 'FORMAT_STYLE_OPTIONS', options_text)
358
359with open(DOC_FILE, 'wb') as output:
360  output.write(contents.encode())
361