1" Vim indent file 2" Language: DTD (Document Type Definition for XML) 3" Maintainer: Doug Kearns <[email protected]> 4" Previous Maintainer: Nikolai Weibull <[email protected]> 5" Last Change: 24 Sep 2021 6 7setlocal indentexpr=GetDTDIndent() 8setlocal indentkeys=!^F,o,O,> 9setlocal nosmartindent 10 11let b:undo_indent = "setl inde< indk< si<" 12 13if exists("*GetDTDIndent") 14 finish 15endif 16 17let s:cpo_save = &cpo 18set cpo&vim 19 20" TODO: Needs to be adjusted to stop at [, <, and ]. 21let s:token_pattern = '^[^[:space:]]\+' 22 23function s:lex1(input, start, ...) 24 let pattern = a:0 > 0 ? a:1 : s:token_pattern 25 let start = matchend(a:input, '^\_s*', a:start) 26 if start == -1 27 return ["", a:start] 28 endif 29 let end = matchend(a:input, pattern, start) 30 if end == -1 31 return ["", a:start] 32 endif 33 let token = strpart(a:input, start, end - start) 34 return [token, end] 35endfunction 36 37function s:lex(input, start, ...) 38 let pattern = a:0 > 0 ? a:1 : s:token_pattern 39 let info = s:lex1(a:input, a:start, pattern) 40 while info[0] == '--' 41 let info = s:lex1(a:input, info[1], pattern) 42 while info[0] != "" && info[0] != '--' 43 let info = s:lex1(a:input, info[1], pattern) 44 endwhile 45 if info[0] == "" 46 return info 47 endif 48 let info = s:lex1(a:input, info[1], pattern) 49 endwhile 50 return info 51endfunction 52 53function s:indent_to_innermost_parentheses(line, end) 54 let token = '(' 55 let end = a:end 56 let parentheses = [end - 1] 57 while token != "" 58 let [token, end] = s:lex(a:line, end, '^\%([(),|]\|[A-Za-z0-9_-]\+\|#P\=CDATA\|%[A-Za-z0-9_-]\+;\)[?*+]\=') 59 if token[0] == '(' 60 call add(parentheses, end - 1) 61 elseif token[0] == ')' 62 if len(parentheses) == 1 63 return [-1, end] 64 endif 65 call remove(parentheses, -1) 66 endif 67 endwhile 68 return [parentheses[-1] - strridx(a:line, "\n", parentheses[-1]), end] 69endfunction 70 71" TODO: Line and end could be script global (think OO members). 72function GetDTDIndent() 73 if v:lnum == 1 74 return 0 75 endif 76 77 " Begin by searching back for a <! that isn’t inside a comment. 78 " From here, depending on what follows immediately after, parse to 79 " where we’re at to determine what to do. 80 if search('<!', 'bceW') == 0 81 return indent(v:lnum - 1) 82 endif 83 let lnum = line('.') 84 let col = col('.') 85 let indent = indent('.') 86 let line = lnum == v:lnum ? getline(lnum) : join(getline(lnum, v:lnum - 1), "\n") 87 88 let [declaration, end] = s:lex1(line, col) 89 if declaration == "" 90 return indent + shiftwidth() 91 elseif declaration == '--' 92 " We’re looking at a comment. Now, simply determine if the comment is 93 " terminated or not. If it isn’t, let Vim take care of that using 94 " 'comments' and 'autoindent'. Otherwise, indent to the first lines level. 95 while declaration != "" 96 let [declaration, end] = s:lex(line, end) 97 if declaration == "-->" 98 return indent 99 endif 100 endwhile 101 return -1 102 elseif declaration == 'ELEMENT' 103 " Check for element name. If none exists, indent one level. 104 let [name, end] = s:lex(line, end) 105 if name == "" 106 return indent + shiftwidth() 107 endif 108 109 " Check for token following element name. This can be a specification of 110 " whether the start or end tag may be omitted. If nothing is found, indent 111 " one level. 112 let [token, end] = s:lex(line, end, '^\%([-O(]\|ANY\|EMPTY\)') 113 let n = 0 114 while token =~ '[-O]' && n < 2 115 let [token, end] = s:lex(line, end, '^\%([-O(]\|ANY\|EMPTY\)') 116 let n += 1 117 endwhile 118 if token == "" 119 return indent + shiftwidth() 120 endif 121 122 " Next comes the content model. If the token we’ve found isn’t a 123 " parenthesis it must be either ANY, EMPTY or some random junk. Either 124 " way, we’re done indenting this element, so set it to that of the first 125 " line so that the terminating “>” winds up having the same indentation. 126 if token != '(' 127 return indent 128 endif 129 130 " Now go through the content model. We need to keep track of the nesting 131 " of parentheses. As soon as we hit 0 we’re done. If that happens we must 132 " have a complete content model. Thus set indentation to be the same as that 133 " of the first line so that the terminating “>” winds up having the same 134 " indentation. Otherwise, we’ll indent to the innermost parentheses not yet 135 " matched. 136 let [indent_of_innermost, end] = s:indent_to_innermost_parentheses(line, end) 137 if indent_of_innermost != -1 138 return indent_of_innermost 139 endif 140 141 " Finally, look for any additions and/or exceptions to the content model. 142 " This is defined by a “+” or “-” followed by another content model 143 " declaration. 144 " TODO: Can the “-” be separated by whitespace from the “(”? 145 let seen = { '+(': 0, '-(': 0 } 146 while 1 147 let [additions_exceptions, end] = s:lex(line, end, '^[+-](') 148 if additions_exceptions != '+(' && additions_exceptions != '-(' 149 let [token, end] = s:lex(line, end) 150 if token == '>' 151 return indent 152 endif 153 " TODO: Should use s:lex here on getline(v:lnum) and check for >. 154 return getline(v:lnum) =~ '^\s*>' || count(values(seen), 0) == 0 ? indent : (indent + shiftwidth()) 155 endif 156 157 " If we’ve seen an addition or exception already and this is of the same 158 " kind, the user is writing a broken DTD. Time to bail. 159 if seen[additions_exceptions] 160 return indent 161 endif 162 let seen[additions_exceptions] = 1 163 164 let [indent_of_innermost, end] = s:indent_to_innermost_parentheses(line, end) 165 if indent_of_innermost != -1 166 return indent_of_innermost 167 endif 168 endwhile 169 elseif declaration == 'ATTLIST' 170 " Check for element name. If none exists, indent one level. 171 let [name, end] = s:lex(line, end) 172 if name == "" 173 return indent + shiftwidth() 174 endif 175 176 " Check for any number of attributes. 177 while 1 178 " Check for attribute name. If none exists, indent one level, unless the 179 " current line is a lone “>”, in which case we indent to the same level 180 " as the first line. Otherwise, if the attribute name is “>”, we have 181 " actually hit the end of the attribute list, in which case we indent to 182 " the same level as the first line. 183 let [name, end] = s:lex(line, end) 184 if name == "" 185 " TODO: Should use s:lex here on getline(v:lnum) and check for >. 186 return getline(v:lnum) =~ '^\s*>' ? indent : (indent + shiftwidth()) 187 elseif name == ">" 188 return indent 189 endif 190 191 " Check for attribute value declaration. If none exists, indent two 192 " levels. Otherwise, if it’s an enumerated value, check for nested 193 " parentheses and indent to the innermost one if we don’t reach the end 194 " of the listc. Otherwise, just continue with looking for the default 195 " attribute value. 196 " TODO: Do validation of keywords 197 " (CDATA|NMTOKEN|NMTOKENS|ID|IDREF|IDREFS|ENTITY|ENTITIES)? 198 let [value, end] = s:lex(line, end, '^\%((\|[^[:space:]]\+\)') 199 if value == "" 200 return indent + shiftwidth() * 2 201 elseif value == 'NOTATION' 202 " If this is a enumerated value based on notations, read another token 203 " for the actual value. If it doesn’t exist, indent three levels. 204 " TODO: If validating according to above, value must be equal to '('. 205 let [value, end] = s:lex(line, end, '^\%((\|[^[:space:]]\+\)') 206 if value == "" 207 return indent + shiftwidth() * 3 208 endif 209 endif 210 211 if value == '(' 212 let [indent_of_innermost, end] = s:indent_to_innermost_parentheses(line, end) 213 if indent_of_innermost != -1 214 return indent_of_innermost 215 endif 216 endif 217 218 " Finally look for the attribute’s default value. If non exists, indent 219 " two levels. 220 let [default, end] = s:lex(line, end, '^\%("\_[^"]*"\|#\(REQUIRED\|IMPLIED\|FIXED\)\)') 221 if default == "" 222 return indent + shiftwidth() * 2 223 elseif default == '#FIXED' 224 " We need to look for the fixed value. If non exists, indent three 225 " levels. 226 let [default, end] = s:lex(line, end, '^"\_[^"]*"') 227 if default == "" 228 return indent + shiftwidth() * 3 229 endif 230 endif 231 endwhile 232 elseif declaration == 'ENTITY' 233 " Check for entity name. If none exists, indent one level. Otherwise, if 234 " the name actually turns out to be a percent sign, “%”, this is a 235 " parameter entity. Read another token to determine the entity name and, 236 " again, if none exists, indent one level. 237 let [name, end] = s:lex(line, end) 238 if name == "" 239 return indent + shiftwidth() 240 elseif name == '%' 241 let [name, end] = s:lex(line, end) 242 if name == "" 243 return indent + shiftwidth() 244 endif 245 endif 246 247 " Now check for the entity value. If none exists, indent one level. If it 248 " does exist, indent to same level as first line, as we’re now done with 249 " this entity. 250 " 251 " The entity value can be a string in single or double quotes (no escapes 252 " to worry about, as entities are used instead). However, it can also be 253 " that this is an external unparsed entity. In that case we have to look 254 " further for (possibly) a public ID and an URI followed by the NDATA 255 " keyword and the actual notation name. For the public ID and URI, indent 256 " two levels, if they don’t exist. If the NDATA keyword doesn’t exist, 257 " indent one level. Otherwise, if the actual notation name doesn’t exist, 258 " indent two level. If it does, indent to same level as first line, as 259 " we’re now done with this entity. 260 let [value, end] = s:lex(line, end) 261 if value == "" 262 return indent + shiftwidth() 263 elseif value == 'SYSTEM' || value == 'PUBLIC' 264 let [quoted_string, end] = s:lex(line, end, '\%("[^"]\+"\|''[^'']\+''\)') 265 if quoted_string == "" 266 return indent + shiftwidth() * 2 267 endif 268 269 if value == 'PUBLIC' 270 let [quoted_string, end] = s:lex(line, end, '\%("[^"]\+"\|''[^'']\+''\)') 271 if quoted_string == "" 272 return indent + shiftwidth() * 2 273 endif 274 endif 275 276 let [ndata, end] = s:lex(line, end) 277 if ndata == "" 278 return indent + shiftwidth() 279 endif 280 281 let [name, end] = s:lex(line, end) 282 return name == "" ? (indent + shiftwidth() * 2) : indent 283 else 284 return indent 285 endif 286 elseif declaration == 'NOTATION' 287 " Check for notation name. If none exists, indent one level. 288 let [name, end] = s:lex(line, end) 289 if name == "" 290 return indent + shiftwidth() 291 endif 292 293 " Now check for the external ID. If none exists, indent one level. 294 let [id, end] = s:lex(line, end) 295 if id == "" 296 return indent + shiftwidth() 297 elseif id == 'SYSTEM' || id == 'PUBLIC' 298 let [quoted_string, end] = s:lex(line, end, '\%("[^"]\+"\|''[^'']\+''\)') 299 if quoted_string == "" 300 return indent + shiftwidth() * 2 301 endif 302 303 if id == 'PUBLIC' 304 let [quoted_string, end] = s:lex(line, end, '\%("[^"]\+"\|''[^'']\+''\|>\)') 305 if quoted_string == "" 306 " TODO: Should use s:lex here on getline(v:lnum) and check for >. 307 return getline(v:lnum) =~ '^\s*>' ? indent : (indent + shiftwidth() * 2) 308 elseif quoted_string == '>' 309 return indent 310 endif 311 endif 312 endif 313 314 return indent 315 endif 316 317 " TODO: Processing directives could be indented I suppose. But perhaps it’s 318 " just as well to let the user decide how to indent them (perhaps extending 319 " this function to include proper support for whatever processing directive 320 " language they want to use). 321 322 " Conditional sections are simply passed along to let Vim decide what to do 323 " (and hence the user). 324 return -1 325endfunction 326 327let &cpo = s:cpo_save 328unlet s:cpo_save 329