1#!/usr/bin/env python3 2# A tool to parse the FormatStyle struct from Format.h and update the 3# documentation in ../ClangFormatStyleOptions.rst automatically. 4# Run from the directory in which this file is located to update the docs. 5 6import inspect 7import os 8import re 9import sys 10from io import TextIOWrapper 11from typing import Set 12 13CLANG_DIR = os.path.join(os.path.dirname(__file__), '../..') 14FORMAT_STYLE_FILE = os.path.join(CLANG_DIR, 'include/clang/Format/Format.h') 15INCLUDE_STYLE_FILE = os.path.join(CLANG_DIR, 'include/clang/Tooling/Inclusions/IncludeStyle.h') 16DOC_FILE = os.path.join(CLANG_DIR, 'docs/ClangFormatStyleOptions.rst') 17 18PLURALS_FILE = os.path.join(os.path.dirname(__file__), 'plurals.txt') 19 20plurals: Set[str] = set() 21with open(PLURALS_FILE, 'a+') as f: 22 f.seek(0) 23 plurals = set(f.read().splitlines()) 24 25def substitute(text, tag, contents): 26 replacement = '\n.. START_%s\n\n%s\n\n.. END_%s\n' % (tag, contents, tag) 27 pattern = r'\n\.\. START_%s\n.*\n\.\. END_%s\n' % (tag, tag) 28 return re.sub(pattern, '%s', text, flags=re.S) % replacement 29 30def register_plural(singular: str, plural: str): 31 if plural not in plurals: 32 if not hasattr(register_plural, "generated_new_plural"): 33 print('Plural generation: you can use ' 34 f'`git checkout -- {os.path.relpath(PLURALS_FILE)}` ' 35 'to reemit warnings or `git add` to include new plurals\n') 36 register_plural.generated_new_plural = True 37 38 plurals.add(plural) 39 with open(PLURALS_FILE, 'a') as f: 40 f.write(plural + '\n') 41 cf = inspect.currentframe() 42 lineno = '' 43 if cf and cf.f_back: 44 lineno = ':' + str(cf.f_back.f_lineno) 45 print(f'{__file__}{lineno} check if plural of {singular} is {plural}', file=sys.stderr) 46 return plural 47 48def pluralize(word: str): 49 lword = word.lower() 50 if len(lword) >= 2 and lword[-1] == 'y' and lword[-2] not in 'aeiou': 51 return register_plural(word, word[:-1] + 'ies') 52 elif lword.endswith(('s', 'sh', 'ch', 'x', 'z')): 53 return register_plural(word, word[:-1] + 'es') 54 elif lword.endswith('fe'): 55 return register_plural(word, word[:-2] + 'ves') 56 elif lword.endswith('f') and not lword.endswith('ff'): 57 return register_plural(word, word[:-1] + 'ves') 58 else: 59 return register_plural(word, word + 's') 60 61 62def to_yaml_type(typestr: str): 63 if typestr == 'bool': 64 return 'Boolean' 65 elif typestr == 'int': 66 return 'Integer' 67 elif typestr == 'unsigned': 68 return 'Unsigned' 69 elif typestr == 'std::string': 70 return 'String' 71 72 subtype, napplied = re.subn(r'^std::vector<(.*)>$', r'\1', typestr) 73 if napplied == 1: 74 return 'List of ' + pluralize(to_yaml_type(subtype)) 75 76 return typestr 77 78def doxygen2rst(text): 79 text = re.sub(r'<tt>\s*(.*?)\s*<\/tt>', r'``\1``', text) 80 text = re.sub(r'\\c ([^ ,;\.]+)', r'``\1``', text) 81 text = re.sub(r'\\\w+ ', '', text) 82 return text 83 84def indent(text, columns, indent_first_line=True): 85 indent_str = ' ' * columns 86 s = re.sub(r'\n([^\n])', '\n' + indent_str + '\\1', text, flags=re.S) 87 if not indent_first_line or s.startswith('\n'): 88 return s 89 return indent_str + s 90 91class Option(object): 92 def __init__(self, name, opt_type, comment, version): 93 self.name = name 94 self.type = opt_type 95 self.comment = comment.strip() 96 self.enum = None 97 self.nested_struct = None 98 self.version = version 99 100 def __str__(self): 101 if self.version: 102 s = '**%s** (``%s``) :versionbadge:`clang-format %s`\n%s' % (self.name, to_yaml_type(self.type), self.version, 103 doxygen2rst(indent(self.comment, 2))) 104 else: 105 s = '**%s** (``%s``)\n%s' % (self.name, to_yaml_type(self.type), 106 doxygen2rst(indent(self.comment, 2))) 107 if self.enum and self.enum.values: 108 s += indent('\n\nPossible values:\n\n%s\n' % self.enum, 2) 109 if self.nested_struct: 110 s += indent('\n\nNested configuration flags:\n\n%s\n' %self.nested_struct, 111 2) 112 return s 113 114class NestedStruct(object): 115 def __init__(self, name, comment): 116 self.name = name 117 self.comment = comment.strip() 118 self.values = [] 119 120 def __str__(self): 121 return self.comment + '\n' + '\n'.join(map(str, self.values)) 122 123class NestedField(object): 124 def __init__(self, name, comment): 125 self.name = name 126 self.comment = comment.strip() 127 128 def __str__(self): 129 return '\n* ``%s`` %s' % ( 130 self.name, 131 doxygen2rst(indent(self.comment, 2, indent_first_line=False))) 132 133class Enum(object): 134 def __init__(self, name, comment): 135 self.name = name 136 self.comment = comment.strip() 137 self.values = [] 138 139 def __str__(self): 140 return '\n'.join(map(str, self.values)) 141 142class NestedEnum(object): 143 def __init__(self, name, enumtype, comment, values): 144 self.name = name 145 self.comment = comment 146 self.values = values 147 self.type = enumtype 148 149 def __str__(self): 150 s = '\n* ``%s %s``\n%s' % (to_yaml_type(self.type), self.name, 151 doxygen2rst(indent(self.comment, 2))) 152 s += indent('\nPossible values:\n\n', 2) 153 s += indent('\n'.join(map(str, self.values)), 2) 154 return s 155 156class EnumValue(object): 157 def __init__(self, name, comment, config): 158 self.name = name 159 self.comment = comment 160 self.config = config 161 162 def __str__(self): 163 return '* ``%s`` (in configuration: ``%s``)\n%s' % ( 164 self.name, 165 re.sub('.*_', '', self.config), 166 doxygen2rst(indent(self.comment, 2))) 167 168 169class OptionsReader: 170 def __init__(self, header: TextIOWrapper): 171 self.header = header 172 self.in_code_block = False 173 self.code_indent = 0 174 self.lineno = 0 175 self.last_err_lineno = -1 176 177 def __file_path(self): 178 return os.path.relpath(self.header.name) 179 180 def __print_line(self, line: str): 181 print(f'{self.lineno:>6} | {line}', file=sys.stderr) 182 183 def __warning(self, msg: str, line: str): 184 print(f'{self.__file_path()}:{self.lineno}: warning: {msg}:', file=sys.stderr) 185 self.__print_line(line) 186 187 def __clean_comment_line(self, line: str): 188 match = re.match(r'^/// (?P<indent> +)?\\code(\{.(?P<lang>\w+)\})?$', line) 189 if match: 190 if self.in_code_block: 191 self.__warning('`\\code` in another `\\code`', line) 192 self.in_code_block = True 193 indent_str = match.group('indent') 194 if not indent_str: 195 indent_str = '' 196 self.code_indent = len(indent_str) 197 lang = match.group('lang') 198 if not lang: 199 lang = 'c++' 200 return f'\n{indent_str}.. code-block:: {lang}\n\n' 201 202 endcode_match = re.match(r'^/// +\\endcode$', line) 203 if endcode_match: 204 if not self.in_code_block: 205 self.__warning('no correct `\\code` found before this `\\endcode`', line) 206 self.in_code_block = False 207 return '' 208 209 # check code block indentation 210 if (self.in_code_block and not line == '///' and not 211 line.startswith('/// ' + ' ' * self.code_indent)): 212 if self.last_err_lineno == self.lineno - 1: 213 self.__print_line(line) 214 else: 215 self.__warning('code block should be indented', line) 216 self.last_err_lineno = self.lineno 217 218 match = re.match(r'^/// \\warning$', line) 219 if match: 220 return '\n.. warning:: \n\n' 221 222 endwarning_match = re.match(r'^/// +\\endwarning$', line) 223 if endwarning_match: 224 return '' 225 return line[4:] + '\n' 226 227 def read_options(self): 228 class State: 229 BeforeStruct, Finished, InStruct, InNestedStruct, InNestedFieldComment, \ 230 InFieldComment, InEnum, InEnumMemberComment = range(8) 231 state = State.BeforeStruct 232 233 options = [] 234 enums = {} 235 nested_structs = {} 236 comment = '' 237 enum = None 238 nested_struct = None 239 version = None 240 241 for line in self.header: 242 self.lineno += 1 243 line = line.strip() 244 if state == State.BeforeStruct: 245 if line in ('struct FormatStyle {', 'struct IncludeStyle {'): 246 state = State.InStruct 247 elif state == State.InStruct: 248 if line.startswith('///'): 249 state = State.InFieldComment 250 comment = self.__clean_comment_line(line) 251 elif line == '};': 252 state = State.Finished 253 break 254 elif state == State.InFieldComment: 255 if line.startswith(r'/// \version'): 256 match = re.match(r'/// \\version\s*(?P<version>[0-9.]+)*', line) 257 if match: 258 version = match.group('version') 259 elif line.startswith('///'): 260 comment += self.__clean_comment_line(line) 261 elif line.startswith('enum'): 262 state = State.InEnum 263 name = re.sub(r'enum\s+(\w+)\s*(:((\s*\w+)+)\s*)?\{', '\\1', line) 264 enum = Enum(name, comment) 265 elif line.startswith('struct'): 266 state = State.InNestedStruct 267 name = re.sub(r'struct\s+(\w+)\s*\{', '\\1', line) 268 nested_struct = NestedStruct(name, comment) 269 elif line.endswith(';'): 270 prefix = '// ' 271 if line.startswith(prefix): 272 line = line[len(prefix):] 273 state = State.InStruct 274 field_type, field_name = re.match(r'([<>:\w(,\s)]+)\s+(\w+);', 275 line).groups() 276 277 if not version: 278 self.__warning(f'missing version for {field_name}', line) 279 option = Option(str(field_name), str(field_type), comment, version) 280 options.append(option) 281 version = None 282 else: 283 raise Exception('Invalid format, expected comment, field or enum\n' + line) 284 elif state == State.InNestedStruct: 285 if line.startswith('///'): 286 state = State.InNestedFieldComment 287 comment = self.__clean_comment_line(line) 288 elif line == '};': 289 state = State.InStruct 290 nested_structs[nested_struct.name] = nested_struct 291 elif state == State.InNestedFieldComment: 292 if line.startswith('///'): 293 comment += self.__clean_comment_line(line) 294 else: 295 state = State.InNestedStruct 296 field_type, field_name = re.match(r'([<>:\w(,\s)]+)\s+(\w+);', line).groups() 297 if field_type in enums: 298 nested_struct.values.append(NestedEnum(field_name, 299 field_type, 300 comment, 301 enums[field_type].values)) 302 else: 303 nested_struct.values.append(NestedField(field_type + " " + field_name, comment)) 304 305 elif state == State.InEnum: 306 if line.startswith('///'): 307 state = State.InEnumMemberComment 308 comment = self.__clean_comment_line(line) 309 elif line == '};': 310 state = State.InStruct 311 enums[enum.name] = enum 312 else: 313 # Enum member without documentation. Must be documented where the enum 314 # is used. 315 pass 316 elif state == State.InEnumMemberComment: 317 if line.startswith('///'): 318 comment += self.__clean_comment_line(line) 319 else: 320 state = State.InEnum 321 val = line.replace(',', '') 322 pos = val.find(" // ") 323 if pos != -1: 324 config = val[pos + 4:] 325 val = val[:pos] 326 else: 327 config = val 328 enum.values.append(EnumValue(val, comment, config)) 329 if state != State.Finished: 330 raise Exception('Not finished by the end of file') 331 332 for option in options: 333 if option.type not in ['bool', 'unsigned', 'int', 'std::string', 334 'std::vector<std::string>', 335 'std::vector<IncludeCategory>', 336 'std::vector<RawStringFormat>']: 337 if option.type in enums: 338 option.enum = enums[option.type] 339 elif option.type in nested_structs: 340 option.nested_struct = nested_structs[option.type] 341 else: 342 raise Exception('Unknown type: %s' % option.type) 343 return options 344 345 346with open(FORMAT_STYLE_FILE) as f: 347 opts = OptionsReader(f).read_options() 348with open(INCLUDE_STYLE_FILE) as f: 349 opts += OptionsReader(f).read_options() 350 351opts = sorted(opts, key=lambda x: x.name) 352options_text = '\n\n'.join(map(str, opts)) 353 354with open(DOC_FILE) as f: 355 contents = f.read() 356 357contents = substitute(contents, 'FORMAT_STYLE_OPTIONS', options_text) 358 359with open(DOC_FILE, 'wb') as output: 360 output.write(contents.encode()) 361