xref: /vim-8.2.3635/runtime/indent/dtd.vim (revision 6e649224)
1" Vim indent file
2" Language:		DTD (Document Type Definition for XML)
3" Maintainer:		Doug Kearns <[email protected]>
4" Previous Maintainer:	Nikolai Weibull <[email protected]>
5" Last Change:		24 Sep 2021
6
7setlocal indentexpr=GetDTDIndent()
8setlocal indentkeys=!^F,o,O,>
9setlocal nosmartindent
10
11let b:undo_indent = "setl inde< indk< si<"
12
13if exists("*GetDTDIndent")
14  finish
15endif
16
17let s:cpo_save = &cpo
18set cpo&vim
19
20" TODO: Needs to be adjusted to stop at [, <, and ].
21let s:token_pattern = '^[^[:space:]]\+'
22
23function s:lex1(input, start, ...)
24  let pattern = a:0 > 0 ? a:1 : s:token_pattern
25  let start = matchend(a:input, '^\_s*', a:start)
26  if start == -1
27    return ["", a:start]
28  endif
29  let end = matchend(a:input, pattern, start)
30  if end == -1
31    return ["", a:start]
32  endif
33  let token = strpart(a:input, start, end - start)
34  return [token, end]
35endfunction
36
37function s:lex(input, start, ...)
38  let pattern = a:0 > 0 ? a:1 : s:token_pattern
39  let info = s:lex1(a:input, a:start, pattern)
40  while info[0] == '--'
41    let info = s:lex1(a:input, info[1], pattern)
42    while info[0] != "" && info[0] != '--'
43      let info = s:lex1(a:input, info[1], pattern)
44    endwhile
45    if info[0] == ""
46      return info
47    endif
48    let info = s:lex1(a:input, info[1], pattern)
49  endwhile
50  return info
51endfunction
52
53function s:indent_to_innermost_parentheses(line, end)
54  let token = '('
55  let end = a:end
56  let parentheses = [end - 1]
57  while token != ""
58    let [token, end] = s:lex(a:line, end, '^\%([(),|]\|[A-Za-z0-9_-]\+\|#P\=CDATA\|%[A-Za-z0-9_-]\+;\)[?*+]\=')
59    if token[0] == '('
60      call add(parentheses, end - 1)
61    elseif token[0] == ')'
62      if len(parentheses) == 1
63        return [-1, end]
64      endif
65      call remove(parentheses, -1)
66    endif
67  endwhile
68  return [parentheses[-1] - strridx(a:line, "\n", parentheses[-1]), end]
69endfunction
70
71" TODO: Line and end could be script global (think OO members).
72function GetDTDIndent()
73  if v:lnum == 1
74    return 0
75  endif
76
77  " Begin by searching back for a <! that isn’t inside a comment.
78  " From here, depending on what follows immediately after, parse to
79  " where we’re at to determine what to do.
80  if search('<!', 'bceW') == 0
81    return indent(v:lnum - 1)
82  endif
83  let lnum = line('.')
84  let col = col('.')
85  let indent = indent('.')
86  let line = lnum == v:lnum ? getline(lnum) : join(getline(lnum, v:lnum - 1), "\n")
87
88  let [declaration, end] = s:lex1(line, col)
89  if declaration == ""
90    return indent + shiftwidth()
91  elseif declaration == '--'
92    " We’re looking at a comment.  Now, simply determine if the comment is
93    " terminated or not.  If it isn’t, let Vim take care of that using
94    " 'comments' and 'autoindent'. Otherwise, indent to the first lines level.
95    while declaration != ""
96      let [declaration, end] = s:lex(line, end)
97      if declaration == "-->"
98        return indent
99      endif
100    endwhile
101    return -1
102  elseif declaration == 'ELEMENT'
103    " Check for element name.  If none exists, indent one level.
104    let [name, end] = s:lex(line, end)
105    if name == ""
106      return indent + shiftwidth()
107    endif
108
109    " Check for token following element name.  This can be a specification of
110    " whether the start or end tag may be omitted.  If nothing is found, indent
111    " one level.
112    let [token, end] = s:lex(line, end, '^\%([-O(]\|ANY\|EMPTY\)')
113    let n = 0
114    while token =~ '[-O]' && n < 2
115      let [token, end] = s:lex(line, end, '^\%([-O(]\|ANY\|EMPTY\)')
116      let n += 1
117    endwhile
118    if token == ""
119      return indent + shiftwidth()
120    endif
121
122    " Next comes the content model.  If the token we’ve found isn’t a
123    " parenthesis it must be either ANY, EMPTY or some random junk.  Either
124    " way, we’re done indenting this element, so set it to that of the first
125    " line so that the terminating “>” winds up having the same indentation.
126    if token != '('
127      return indent
128    endif
129
130    " Now go through the content model.  We need to keep track of the nesting
131    " of parentheses.  As soon as we hit 0 we’re done.  If that happens we must
132    " have a complete content model.  Thus set indentation to be the same as that
133    " of the first line so that the terminating “>” winds up having the same
134    " indentation.  Otherwise, we’ll indent to the innermost parentheses not yet
135    " matched.
136    let [indent_of_innermost, end] = s:indent_to_innermost_parentheses(line, end)
137    if indent_of_innermost != -1
138      return indent_of_innermost
139    endif
140
141    " Finally, look for any additions and/or exceptions to the content model.
142    " This is defined by a “+” or “-” followed by another content model
143    " declaration.
144    " TODO: Can the “-” be separated by whitespace from the “(”?
145    let seen = { '+(': 0, '-(': 0 }
146    while 1
147      let [additions_exceptions, end] = s:lex(line, end, '^[+-](')
148      if additions_exceptions != '+(' && additions_exceptions != '-('
149        let [token, end] = s:lex(line, end)
150        if token == '>'
151          return indent
152        endif
153        " TODO: Should use s:lex here on getline(v:lnum) and check for >.
154        return getline(v:lnum) =~ '^\s*>' || count(values(seen), 0) == 0 ? indent : (indent + shiftwidth())
155      endif
156
157      " If we’ve seen an addition or exception already and this is of the same
158      " kind, the user is writing a broken DTD.  Time to bail.
159      if seen[additions_exceptions]
160        return indent
161      endif
162      let seen[additions_exceptions] = 1
163
164      let [indent_of_innermost, end] = s:indent_to_innermost_parentheses(line, end)
165      if indent_of_innermost != -1
166        return indent_of_innermost
167      endif
168    endwhile
169  elseif declaration == 'ATTLIST'
170    " Check for element name.  If none exists, indent one level.
171    let [name, end] = s:lex(line, end)
172    if name == ""
173      return indent + shiftwidth()
174    endif
175
176    " Check for any number of attributes.
177    while 1
178      " Check for attribute name.  If none exists, indent one level, unless the
179      " current line is a lone “>”, in which case we indent to the same level
180      " as the first line.  Otherwise, if the attribute name is “>”, we have
181      " actually hit the end of the attribute list, in which case we indent to
182      " the same level as the first line.
183      let [name, end] = s:lex(line, end)
184      if name == ""
185        " TODO: Should use s:lex here on getline(v:lnum) and check for >.
186        return getline(v:lnum) =~ '^\s*>' ? indent : (indent + shiftwidth())
187      elseif name == ">"
188        return indent
189      endif
190
191      " Check for attribute value declaration.  If none exists, indent two
192      " levels.  Otherwise, if it’s an enumerated value, check for nested
193      " parentheses and indent to the innermost one if we don’t reach the end
194      " of the listc.  Otherwise, just continue with looking for the default
195      " attribute value.
196      " TODO: Do validation of keywords
197      " (CDATA|NMTOKEN|NMTOKENS|ID|IDREF|IDREFS|ENTITY|ENTITIES)?
198      let [value, end] = s:lex(line, end, '^\%((\|[^[:space:]]\+\)')
199      if value == ""
200        return indent + shiftwidth() * 2
201      elseif value == 'NOTATION'
202        " If this is a enumerated value based on notations, read another token
203        " for the actual value.  If it doesn’t exist, indent three levels.
204        " TODO: If validating according to above, value must be equal to '('.
205        let [value, end] = s:lex(line, end, '^\%((\|[^[:space:]]\+\)')
206        if value == ""
207          return indent + shiftwidth() * 3
208        endif
209      endif
210
211      if value == '('
212        let [indent_of_innermost, end] = s:indent_to_innermost_parentheses(line, end)
213        if indent_of_innermost != -1
214          return indent_of_innermost
215        endif
216      endif
217
218      " Finally look for the attribute’s default value.  If non exists, indent
219      " two levels.
220      let [default, end] = s:lex(line, end, '^\%("\_[^"]*"\|#\(REQUIRED\|IMPLIED\|FIXED\)\)')
221      if default == ""
222        return indent + shiftwidth() * 2
223      elseif default == '#FIXED'
224        " We need to look for the fixed value.  If non exists, indent three
225        " levels.
226        let [default, end] = s:lex(line, end, '^"\_[^"]*"')
227        if default == ""
228          return indent + shiftwidth() * 3
229        endif
230      endif
231    endwhile
232  elseif declaration == 'ENTITY'
233    " Check for entity name.  If none exists, indent one level.  Otherwise, if
234    " the name actually turns out to be a percent sign, “%”, this is a
235    " parameter entity.  Read another token to determine the entity name and,
236    " again, if none exists, indent one level.
237    let [name, end] = s:lex(line, end)
238    if name == ""
239      return indent + shiftwidth()
240    elseif name == '%'
241      let [name, end] = s:lex(line, end)
242      if name == ""
243        return indent + shiftwidth()
244      endif
245    endif
246
247    " Now check for the entity value.  If none exists, indent one level.  If it
248    " does exist, indent to same level as first line, as we’re now done with
249    " this entity.
250    "
251    " The entity value can be a string in single or double quotes (no escapes
252    " to worry about, as entities are used instead).  However, it can also be
253    " that this is an external unparsed entity.  In that case we have to look
254    " further for (possibly) a public ID and an URI followed by the NDATA
255    " keyword and the actual notation name.  For the public ID and URI, indent
256    " two levels, if they don’t exist.  If the NDATA keyword doesn’t exist,
257    " indent one level.  Otherwise, if the actual notation name doesn’t exist,
258    " indent two level.  If it does, indent to same level as first line, as
259    " we’re now done with this entity.
260    let [value, end] = s:lex(line, end)
261    if value == ""
262      return indent + shiftwidth()
263    elseif value == 'SYSTEM' || value == 'PUBLIC'
264      let [quoted_string, end] = s:lex(line, end, '\%("[^"]\+"\|''[^'']\+''\)')
265      if quoted_string == ""
266        return indent + shiftwidth() * 2
267      endif
268
269      if value == 'PUBLIC'
270        let [quoted_string, end] = s:lex(line, end, '\%("[^"]\+"\|''[^'']\+''\)')
271        if quoted_string == ""
272          return indent + shiftwidth() * 2
273        endif
274      endif
275
276      let [ndata, end] = s:lex(line, end)
277      if ndata == ""
278        return indent + shiftwidth()
279      endif
280
281      let [name, end] = s:lex(line, end)
282      return name == "" ? (indent + shiftwidth() * 2) : indent
283    else
284      return indent
285    endif
286  elseif declaration == 'NOTATION'
287    " Check for notation name.  If none exists, indent one level.
288    let [name, end] = s:lex(line, end)
289    if name == ""
290      return indent + shiftwidth()
291    endif
292
293    " Now check for the external ID.  If none exists, indent one level.
294    let [id, end] = s:lex(line, end)
295    if id == ""
296      return indent + shiftwidth()
297    elseif id == 'SYSTEM' || id == 'PUBLIC'
298      let [quoted_string, end] = s:lex(line, end, '\%("[^"]\+"\|''[^'']\+''\)')
299      if quoted_string == ""
300        return indent + shiftwidth() * 2
301      endif
302
303      if id == 'PUBLIC'
304        let [quoted_string, end] = s:lex(line, end, '\%("[^"]\+"\|''[^'']\+''\|>\)')
305        if quoted_string == ""
306          " TODO: Should use s:lex here on getline(v:lnum) and check for >.
307          return getline(v:lnum) =~ '^\s*>' ? indent : (indent + shiftwidth() * 2)
308        elseif quoted_string == '>'
309          return indent
310        endif
311      endif
312    endif
313
314    return indent
315  endif
316
317  " TODO: Processing directives could be indented I suppose.  But perhaps it’s
318  " just as well to let the user decide how to indent them (perhaps extending
319  " this function to include proper support for whatever processing directive
320  " language they want to use).
321
322  " Conditional sections are simply passed along to let Vim decide what to do
323  " (and hence the user).
324  return -1
325endfunction
326
327let &cpo = s:cpo_save
328unlet s:cpo_save
329