xref: /vim-8.2.3635/runtime/indent/dtd.vim (revision 2bf24176)
1" Vim indent file
2" Language:	    DTD (Document Type Definition for XML)
3" Maintainer:       Nikolai Weibull <[email protected]>
4" Latest Revision:  2011-07-08
5
6let s:cpo_save = &cpo
7set cpo&vim
8
9setlocal indentexpr=GetDTDIndent()
10setlocal indentkeys=!^F,o,O,>
11setlocal nosmartindent
12
13if exists("*GetDTDIndent")
14  finish
15endif
16
17" TODO: Needs to be adjusted to stop at [, <, and ].
18let s:token_pattern = '^[^[:space:]]\+'
19
20function s:lex1(input, start, ...)
21  let pattern = a:0 > 0 ? a:1 : s:token_pattern
22  let start = matchend(a:input, '^\_s*', a:start)
23  if start == -1
24    return ["", a:start]
25  endif
26  let end = matchend(a:input, pattern, start)
27  if end == -1
28    return ["", a:start]
29  endif
30  let token = strpart(a:input, start, end - start)
31  return [token, end]
32endfunction
33
34function s:lex(input, start, ...)
35  let pattern = a:0 > 0 ? a:1 : s:token_pattern
36  let info = s:lex1(a:input, a:start, pattern)
37  while info[0] == '--'
38    let info = s:lex1(a:input, info[1], pattern)
39    while info[0] != "" && info[0] != '--'
40      let info = s:lex1(a:input, info[1], pattern)
41    endwhile
42    if info[0] == ""
43      return info
44    endif
45    let info = s:lex1(a:input, info[1], pattern)
46  endwhile
47  return info
48endfunction
49
50function s:indent_to_innermost_parentheses(line, end)
51  let token = '('
52  let end = a:end
53  let parentheses = [end - 1]
54  while token != ""
55    let [token, end] = s:lex(a:line, end, '^\%([(),|]\|[A-Za-z0-9_-]\+\|#P\=CDATA\|%[A-Za-z0-9_-]\+;\)[?*+]\=')
56    if token[0] == '('
57      call add(parentheses, end - 1)
58    elseif token[0] == ')'
59      if len(parentheses) == 1
60        return [-1, end]
61      endif
62      call remove(parentheses, -1)
63    endif
64  endwhile
65  return [parentheses[-1] - strridx(a:line, "\n", parentheses[-1]), end]
66endfunction
67
68" TODO: Line and end could be script global (think OO members).
69function GetDTDIndent()
70  if v:lnum == 1
71    return 0
72  endif
73
74  " Begin by searching back for a <! that isn’t inside a comment.
75  " From here, depending on what follows immediately after, parse to
76  " where we’re at to determine what to do.
77  if search('<!', 'bceW') == 0
78    return indent(v:lnum - 1)
79  endif
80  let lnum = line('.')
81  let col = col('.')
82  let indent = indent('.')
83  let line = lnum == v:lnum ? getline(lnum) : join(getline(lnum, v:lnum - 1), "\n")
84
85  let [declaration, end] = s:lex1(line, col)
86  if declaration == ""
87    return indent + &sw
88  elseif declaration == '--'
89    " We’re looking at a comment.  Now, simply determine if the comment is
90    " terminated or not.  If it isn’t, let Vim take care of that using
91    " 'comments' and 'autoindent'. Otherwise, indent to the first lines level.
92    while declaration != ""
93      let [declaration, end] = s:lex(line, end)
94      if declaration == "-->"
95        return indent
96      endif
97    endwhile
98    return -1
99  elseif declaration == 'ELEMENT'
100    " Check for element name.  If none exists, indent one level.
101    let [name, end] = s:lex(line, end)
102    if name == ""
103      return indent + &sw
104    endif
105
106    " Check for token following element name.  This can be a specification of
107    " whether the start or end tag may be omitted.  If nothing is found, indent
108    " one level.
109    let [token, end] = s:lex(line, end, '^\%([-O(]\|ANY\|EMPTY\)')
110    let n = 0
111    while token =~ '[-O]' && n < 2
112      let [token, end] = s:lex(line, end, '^\%([-O(]\|ANY\|EMPTY\)')
113      let n += 1
114    endwhile
115    if token == ""
116      return indent + &sw
117    endif
118
119    " Next comes the content model.  If the token we’ve found isn’t a
120    " parenthesis it must be either ANY, EMPTY or some random junk.  Either
121    " way, we’re done indenting this element, so set it to that of the first
122    " line so that the terminating “>” winds up having the same indention.
123    if token != '('
124      return indent
125    endif
126
127    " Now go through the content model.  We need to keep track of the nesting
128    " of parentheses.  As soon as we hit 0 we’re done.  If that happens we must
129    " have a complete content model.  Thus set indention to be the same as that
130    " of the first line so that the terminating “>” winds up having the same
131    " indention.  Otherwise, we’ll indent to the innermost parentheses not yet
132    " matched.
133    let [indent_of_innermost, end] = s:indent_to_innermost_parentheses(line, end)
134    if indent_of_innermost != -1
135      return indent_of_innermost
136    endif
137
138    " Finally, look for any additions and/or exceptions to the content model.
139    " This is defined by a “+” or “-” followed by another content model
140    " declaration.
141    " TODO: Can the “-” be separated by whitespace from the “(”?
142    let seen = { '+(': 0, '-(': 0 }
143    while 1
144      let [additions_exceptions, end] = s:lex(line, end, '^[+-](')
145      if additions_exceptions != '+(' && additions_exceptions != '-('
146        let [token, end] = s:lex(line, end)
147        if token == '>'
148          return indent
149        endif
150        " TODO: Should use s:lex here on getline(v:lnum) and check for >.
151        return getline(v:lnum) =~ '^\s*>' || count(values(seen), 0) == 0 ? indent : (indent + &sw)
152      endif
153
154      " If we’ve seen an addition or exception already and this is of the same
155      " kind, the user is writing a broken DTD.  Time to bail.
156      if seen[additions_exceptions]
157        return indent
158      endif
159      let seen[additions_exceptions] = 1
160
161      let [indent_of_innermost, end] = s:indent_to_innermost_parentheses(line, end)
162      if indent_of_innermost != -1
163        return indent_of_innermost
164      endif
165    endwhile
166  elseif declaration == 'ATTLIST'
167    " Check for element name.  If none exists, indent one level.
168    let [name, end] = s:lex(line, end)
169    if name == ""
170      return indent + &sw
171    endif
172
173    " Check for any number of attributes.
174    while 1
175      " Check for attribute name.  If none exists, indent one level, unless the
176      " current line is a lone “>”, in which case we indent to the same level
177      " as the first line.  Otherwise, if the attribute name is “>”, we have
178      " actually hit the end of the attribute list, in which case we indent to
179      " the same level as the first line.
180      let [name, end] = s:lex(line, end)
181      if name == ""
182        " TODO: Should use s:lex here on getline(v:lnum) and check for >.
183        return getline(v:lnum) =~ '^\s*>' ? indent : (indent + &sw)
184      elseif name == ">"
185        return indent
186      endif
187
188      " Check for attribute value declaration.  If none exists, indent two
189      " levels.  Otherwise, if it’s an enumerated value, check for nested
190      " parentheses and indent to the innermost one if we don’t reach the end
191      " of the listc.  Otherwise, just continue with looking for the default
192      " attribute value.
193      " TODO: Do validation of keywords
194      " (CDATA|NMTOKEN|NMTOKENS|ID|IDREF|IDREFS|ENTITY|ENTITIES)?
195      let [value, end] = s:lex(line, end, '^\%((\|[^[:space:]]\+\)')
196      if value == ""
197        return indent + &sw * 2
198      elseif value == 'NOTATION'
199        " If this is a enumerated value based on notations, read another token
200        " for the actual value.  If it doesn’t exist, indent three levels.
201        " TODO: If validating according to above, value must be equal to '('.
202        let [value, end] = s:lex(line, end, '^\%((\|[^[:space:]]\+\)')
203        if value == ""
204          return indent + &sw * 3
205        endif
206      endif
207
208      if value == '('
209        let [indent_of_innermost, end] = s:indent_to_innermost_parentheses(line, end)
210        if indent_of_innermost != -1
211          return indent_of_innermost
212        endif
213      endif
214
215      " Finally look for the attribute’s default value.  If non exists, indent
216      " two levels.
217      let [default, end] = s:lex(line, end, '^\%("\_[^"]*"\|#\(REQUIRED\|IMPLIED\|FIXED\)\)')
218      if default == ""
219        return indent + &sw * 2
220      elseif default == '#FIXED'
221        " We need to look for the fixed value.  If non exists, indent three
222        " levels.
223        let [default, end] = s:lex(line, end, '^"\_[^"]*"')
224        if default == ""
225          return indent + &sw * 3
226        endif
227      endif
228    endwhile
229  elseif declaration == 'ENTITY'
230    " Check for entity name.  If none exists, indent one level.  Otherwise, if
231    " the name actually turns out to be a percent sign, “%”, this is a
232    " parameter entity.  Read another token to determine the entity name and,
233    " again, if none exists, indent one level.
234    let [name, end] = s:lex(line, end)
235    if name == ""
236      return indent + &sw
237    elseif name == '%'
238      let [name, end] = s:lex(line, end)
239      if name == ""
240        return indent + &sw
241      endif
242    endif
243
244    " Now check for the entity value.  If none exists, indent one level.  If it
245    " does exist, indent to same level as first line, as we’re now done with
246    " this entity.
247    "
248    " The entity value can be a string in single or double quotes (no escapes
249    " to worry about, as entities are used instead).  However, it can also be
250    " that this is an external unparsed entity.  In that case we have to look
251    " further for (possibly) a public ID and an URI followed by the NDATA
252    " keyword and the actual notation name.  For the public ID and URI, indent
253    " two levels, if they don’t exist.  If the NDATA keyword doesn’t exist,
254    " indent one level.  Otherwise, if the actual notation name doesn’t exist,
255    " indent two level.  If it does, indent to same level as first line, as
256    " we’re now done with this entity.
257    let [value, end] = s:lex(line, end)
258    if value == ""
259      return indent + &sw
260    elseif value == 'SYSTEM' || value == 'PUBLIC'
261      let [quoted_string, end] = s:lex(line, end, '\%("[^"]\+"\|''[^'']\+''\)')
262      if quoted_string == ""
263        return indent + &sw * 2
264      endif
265
266      if value == 'PUBLIC'
267        let [quoted_string, end] = s:lex(line, end, '\%("[^"]\+"\|''[^'']\+''\)')
268        if quoted_string == ""
269          return indent + &sw * 2
270        endif
271      endif
272
273      let [ndata, end] = s:lex(line, end)
274      if ndata == ""
275        return indent + &sw
276      endif
277
278      let [name, end] = s:lex(line, end)
279      return name == "" ? (indent + &sw * 2) : indent
280    else
281      return indent
282    endif
283  elseif declaration == 'NOTATION'
284    " Check for notation name.  If none exists, indent one level.
285    let [name, end] = s:lex(line, end)
286    if name == ""
287      return indent + &sw
288    endif
289
290    " Now check for the external ID.  If none exists, indent one level.
291    let [id, end] = s:lex(line, end)
292    if id == ""
293      return indent + &sw
294    elseif id == 'SYSTEM' || id == 'PUBLIC'
295      let [quoted_string, end] = s:lex(line, end, '\%("[^"]\+"\|''[^'']\+''\)')
296      if quoted_string == ""
297        return indent + &sw * 2
298      endif
299
300      if id == 'PUBLIC'
301        let [quoted_string, end] = s:lex(line, end, '\%("[^"]\+"\|''[^'']\+''\|>\)')
302        if quoted_string == ""
303          " TODO: Should use s:lex here on getline(v:lnum) and check for >.
304          return getline(v:lnum) =~ '^\s*>' ? indent : (indent + &sw * 2)
305        elseif quoted_string == '>'
306          return indent
307        endif
308      endif
309    endif
310
311    return indent
312  endif
313
314  " TODO: Processing directives could be indented I suppose.  But perhaps it’s
315  " just as well to let the user decide how to indent them (perhaps extending
316  " this function to include proper support for whatever processing directive
317  " language they want to use).
318
319  " Conditional sections are simply passed along to let Vim decide what to do
320  " (and hence the user).
321  return -1
322endfunction
323
324let &cpo = s:cpo_save
325unlet s:cpo_save
326