libgo: update to go1.9
[official-gcc.git] / libgo / go / go / doc / comment.go
blob4228e8cd9c5573000984b0d38c581561fa88ea62
1 // Copyright 2009 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
5 // Godoc comment extraction and comment -> HTML formatting.
7 package doc
9 import (
10 "io"
11 "regexp"
12 "strings"
13 "text/template" // for HTMLEscape
14 "unicode"
15 "unicode/utf8"
18 var (
19 ldquo = []byte("“")
20 rdquo = []byte("”")
23 // Escape comment text for HTML. If nice is set,
24 // also turn `` into “ and '' into ”.
25 func commentEscape(w io.Writer, text string, nice bool) {
26 last := 0
27 if nice {
28 for i := 0; i < len(text)-1; i++ {
29 ch := text[i]
30 if ch == text[i+1] && (ch == '`' || ch == '\'') {
31 template.HTMLEscape(w, []byte(text[last:i]))
32 last = i + 2
33 switch ch {
34 case '`':
35 w.Write(ldquo)
36 case '\'':
37 w.Write(rdquo)
39 i++ // loop will add one more
43 template.HTMLEscape(w, []byte(text[last:]))
46 const (
47 // Regexp for Go identifiers
48 identRx = `[\pL_][\pL_0-9]*`
50 // Regexp for URLs
51 // Match parens, and check in pairedParensPrefixLen for balance - see #5043
52 // Match .,:;?! within path, but not at end - see #18139, #16565
53 // This excludes some rare yet valid urls ending in common punctuation
54 // in order to allow sentences ending in URLs.
56 // protocol (required) e.g. http
57 protoPart = `(https?|ftp|file|gopher|mailto|nntp)`
58 // host (required) e.g. www.example.com or [::1]:8080
59 hostPart = `([a-zA-Z0-9_@\-.\[\]:]+)`
60 // path+query+fragment (optional) e.g. /path/index.html?q=foo#bar
61 pathPart = `([.,:;?!]*[a-zA-Z0-9$'()*+&#=@~_/\-\[\]%])*`
63 urlRx = protoPart + `://` + hostPart + pathPart
66 var matchRx = regexp.MustCompile(`(` + urlRx + `)|(` + identRx + `)`)
68 var (
69 html_a = []byte(`<a href="`)
70 html_aq = []byte(`">`)
71 html_enda = []byte("</a>")
72 html_i = []byte("<i>")
73 html_endi = []byte("</i>")
74 html_p = []byte("<p>\n")
75 html_endp = []byte("</p>\n")
76 html_pre = []byte("<pre>")
77 html_endpre = []byte("</pre>\n")
78 html_h = []byte(`<h3 id="`)
79 html_hq = []byte(`">`)
80 html_endh = []byte("</h3>\n")
83 // pairedParensPrefixLen returns the length of the longest prefix of s containing paired parentheses.
84 func pairedParensPrefixLen(s string) int {
85 parens := 0
86 l := len(s)
87 for i, ch := range s {
88 switch ch {
89 case '(':
90 if parens == 0 {
91 l = i
93 parens++
94 case ')':
95 parens--
96 if parens == 0 {
97 l = len(s)
98 } else if parens < 0 {
99 return i
103 return l
106 // Emphasize and escape a line of text for HTML. URLs are converted into links;
107 // if the URL also appears in the words map, the link is taken from the map (if
108 // the corresponding map value is the empty string, the URL is not converted
109 // into a link). Go identifiers that appear in the words map are italicized; if
110 // the corresponding map value is not the empty string, it is considered a URL
111 // and the word is converted into a link. If nice is set, the remaining text's
112 // appearance is improved where it makes sense (e.g., `` is turned into &ldquo;
113 // and '' into &rdquo;).
114 func emphasize(w io.Writer, line string, words map[string]string, nice bool) {
115 for {
116 m := matchRx.FindStringSubmatchIndex(line)
117 if m == nil {
118 break
120 // m >= 6 (two parenthesized sub-regexps in matchRx, 1st one is urlRx)
122 // write text before match
123 commentEscape(w, line[0:m[0]], nice)
125 // adjust match if necessary
126 match := line[m[0]:m[1]]
127 if n := pairedParensPrefixLen(match); n < len(match) {
128 // match contains unpaired parentheses (rare);
129 // redo matching with shortened line for correct indices
130 m = matchRx.FindStringSubmatchIndex(line[:m[0]+n])
131 match = match[:n]
134 // analyze match
135 url := ""
136 italics := false
137 if words != nil {
138 url, italics = words[match]
140 if m[2] >= 0 {
141 // match against first parenthesized sub-regexp; must be match against urlRx
142 if !italics {
143 // no alternative URL in words list, use match instead
144 url = match
146 italics = false // don't italicize URLs
149 // write match
150 if len(url) > 0 {
151 w.Write(html_a)
152 template.HTMLEscape(w, []byte(url))
153 w.Write(html_aq)
155 if italics {
156 w.Write(html_i)
158 commentEscape(w, match, nice)
159 if italics {
160 w.Write(html_endi)
162 if len(url) > 0 {
163 w.Write(html_enda)
166 // advance
167 line = line[m[1]:]
169 commentEscape(w, line, nice)
172 func indentLen(s string) int {
173 i := 0
174 for i < len(s) && (s[i] == ' ' || s[i] == '\t') {
177 return i
180 func isBlank(s string) bool {
181 return len(s) == 0 || (len(s) == 1 && s[0] == '\n')
184 func commonPrefix(a, b string) string {
185 i := 0
186 for i < len(a) && i < len(b) && a[i] == b[i] {
189 return a[0:i]
192 func unindent(block []string) {
193 if len(block) == 0 {
194 return
197 // compute maximum common white prefix
198 prefix := block[0][0:indentLen(block[0])]
199 for _, line := range block {
200 if !isBlank(line) {
201 prefix = commonPrefix(prefix, line[0:indentLen(line)])
204 n := len(prefix)
206 // remove
207 for i, line := range block {
208 if !isBlank(line) {
209 block[i] = line[n:]
214 // heading returns the trimmed line if it passes as a section heading;
215 // otherwise it returns the empty string.
216 func heading(line string) string {
217 line = strings.TrimSpace(line)
218 if len(line) == 0 {
219 return ""
222 // a heading must start with an uppercase letter
223 r, _ := utf8.DecodeRuneInString(line)
224 if !unicode.IsLetter(r) || !unicode.IsUpper(r) {
225 return ""
228 // it must end in a letter or digit:
229 r, _ = utf8.DecodeLastRuneInString(line)
230 if !unicode.IsLetter(r) && !unicode.IsDigit(r) {
231 return ""
234 // exclude lines with illegal characters
235 if strings.ContainsAny(line, ",.;:!?+*/=()[]{}_^°&§~%#@<\">\\") {
236 return ""
239 // allow "'" for possessive "'s" only
240 for b := line; ; {
241 i := strings.IndexRune(b, '\'')
242 if i < 0 {
243 break
245 if i+1 >= len(b) || b[i+1] != 's' || (i+2 < len(b) && b[i+2] != ' ') {
246 return "" // not followed by "s "
248 b = b[i+2:]
251 return line
254 type op int
256 const (
257 opPara op = iota
258 opHead
259 opPre
262 type block struct {
263 op op
264 lines []string
267 var nonAlphaNumRx = regexp.MustCompile(`[^a-zA-Z0-9]`)
269 func anchorID(line string) string {
270 // Add a "hdr-" prefix to avoid conflicting with IDs used for package symbols.
271 return "hdr-" + nonAlphaNumRx.ReplaceAllString(line, "_")
274 // ToHTML converts comment text to formatted HTML.
275 // The comment was prepared by DocReader,
276 // so it is known not to have leading, trailing blank lines
277 // nor to have trailing spaces at the end of lines.
278 // The comment markers have already been removed.
280 // Each span of unindented non-blank lines is converted into
281 // a single paragraph. There is one exception to the rule: a span that
282 // consists of a single line, is followed by another paragraph span,
283 // begins with a capital letter, and contains no punctuation
284 // is formatted as a heading.
286 // A span of indented lines is converted into a <pre> block,
287 // with the common indent prefix removed.
289 // URLs in the comment text are converted into links; if the URL also appears
290 // in the words map, the link is taken from the map (if the corresponding map
291 // value is the empty string, the URL is not converted into a link).
293 // Go identifiers that appear in the words map are italicized; if the corresponding
294 // map value is not the empty string, it is considered a URL and the word is converted
295 // into a link.
296 func ToHTML(w io.Writer, text string, words map[string]string) {
297 for _, b := range blocks(text) {
298 switch b.op {
299 case opPara:
300 w.Write(html_p)
301 for _, line := range b.lines {
302 emphasize(w, line, words, true)
304 w.Write(html_endp)
305 case opHead:
306 w.Write(html_h)
307 id := ""
308 for _, line := range b.lines {
309 if id == "" {
310 id = anchorID(line)
311 w.Write([]byte(id))
312 w.Write(html_hq)
314 commentEscape(w, line, true)
316 if id == "" {
317 w.Write(html_hq)
319 w.Write(html_endh)
320 case opPre:
321 w.Write(html_pre)
322 for _, line := range b.lines {
323 emphasize(w, line, nil, false)
325 w.Write(html_endpre)
330 func blocks(text string) []block {
331 var (
332 out []block
333 para []string
335 lastWasBlank = false
336 lastWasHeading = false
339 close := func() {
340 if para != nil {
341 out = append(out, block{opPara, para})
342 para = nil
346 lines := strings.SplitAfter(text, "\n")
347 unindent(lines)
348 for i := 0; i < len(lines); {
349 line := lines[i]
350 if isBlank(line) {
351 // close paragraph
352 close()
354 lastWasBlank = true
355 continue
357 if indentLen(line) > 0 {
358 // close paragraph
359 close()
361 // count indented or blank lines
362 j := i + 1
363 for j < len(lines) && (isBlank(lines[j]) || indentLen(lines[j]) > 0) {
366 // but not trailing blank lines
367 for j > i && isBlank(lines[j-1]) {
370 pre := lines[i:j]
371 i = j
373 unindent(pre)
375 // put those lines in a pre block
376 out = append(out, block{opPre, pre})
377 lastWasHeading = false
378 continue
381 if lastWasBlank && !lastWasHeading && i+2 < len(lines) &&
382 isBlank(lines[i+1]) && !isBlank(lines[i+2]) && indentLen(lines[i+2]) == 0 {
383 // current line is non-blank, surrounded by blank lines
384 // and the next non-blank line is not indented: this
385 // might be a heading.
386 if head := heading(line); head != "" {
387 close()
388 out = append(out, block{opHead, []string{head}})
389 i += 2
390 lastWasHeading = true
391 continue
395 // open paragraph
396 lastWasBlank = false
397 lastWasHeading = false
398 para = append(para, lines[i])
401 close()
403 return out
406 // ToText prepares comment text for presentation in textual output.
407 // It wraps paragraphs of text to width or fewer Unicode code points
408 // and then prefixes each line with the indent. In preformatted sections
409 // (such as program text), it prefixes each non-blank line with preIndent.
410 func ToText(w io.Writer, text string, indent, preIndent string, width int) {
411 l := lineWrapper{
412 out: w,
413 width: width,
414 indent: indent,
416 for _, b := range blocks(text) {
417 switch b.op {
418 case opPara:
419 // l.write will add leading newline if required
420 for _, line := range b.lines {
421 l.write(line)
423 l.flush()
424 case opHead:
425 w.Write(nl)
426 for _, line := range b.lines {
427 l.write(line + "\n")
429 l.flush()
430 case opPre:
431 w.Write(nl)
432 for _, line := range b.lines {
433 if isBlank(line) {
434 w.Write([]byte("\n"))
435 } else {
436 w.Write([]byte(preIndent))
437 w.Write([]byte(line))
444 type lineWrapper struct {
445 out io.Writer
446 printed bool
447 width int
448 indent string
449 n int
450 pendSpace int
453 var nl = []byte("\n")
454 var space = []byte(" ")
456 func (l *lineWrapper) write(text string) {
457 if l.n == 0 && l.printed {
458 l.out.Write(nl) // blank line before new paragraph
460 l.printed = true
462 for _, f := range strings.Fields(text) {
463 w := utf8.RuneCountInString(f)
464 // wrap if line is too long
465 if l.n > 0 && l.n+l.pendSpace+w > l.width {
466 l.out.Write(nl)
467 l.n = 0
468 l.pendSpace = 0
470 if l.n == 0 {
471 l.out.Write([]byte(l.indent))
473 l.out.Write(space[:l.pendSpace])
474 l.out.Write([]byte(f))
475 l.n += l.pendSpace + w
476 l.pendSpace = 1
480 func (l *lineWrapper) flush() {
481 if l.n == 0 {
482 return
484 l.out.Write(nl)
485 l.pendSpace = 0
486 l.n = 0