libgo/go/go/doc/comment.go

   1 // Copyright 2009 The Go Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style
   3 // license that can be found in the LICENSE file.
   4
   5 // Godoc comment extraction and comment -> HTML formatting.
   6
   7 package doc
   8
   9 import (
  10         "io"
  11         "regexp"
  12         "strings"
  13         "text/template" // for HTMLEscape
  14         "unicode"
  15         "unicode/utf8"
  16 )
  17
  18 var (
  19         ldquo = []byte("&ldquo;")
  20         rdquo = []byte("&rdquo;")
  21 )
  22
  23 // Escape comment text for HTML. If nice is set,
  24 // also turn `` into &ldquo; and '' into &rdquo;.
  25 func commentEscape(w io.Writer, text string, nice bool) {
  26         last := 0
  27         if nice {
  28                 for i := 0; i < len(text)-1; i++ {
  29                         ch := text[i]
  30                         if ch == text[i+1] && (ch == '`' || ch == '\'') {
  31                                 template.HTMLEscape(w, []byte(text[last:i]))
  32                                 last = i + 2
  33                                 switch ch {
  34                                 case '`':
  35                                         w.Write(ldquo)
  36                                 case '\'':
  37                                         w.Write(rdquo)
  38                                 }
  39                                 i++ // loop will add one more
  40                         }
  41                 }
  42         }
  43         template.HTMLEscape(w, []byte(text[last:]))
  44 }
  45
  46 const (
  47         // Regexp for Go identifiers
  48         identRx = `[\pL_][\pL_0-9]*`
  49
  50         // Regexp for URLs
  51         // Match parens, and check in pairedParensPrefixLen for balance - see #5043
  52         // Match .,:;?! within path, but not at end - see #18139, #16565
  53         // This excludes some rare yet valid urls ending in common punctuation
  54         // in order to allow sentences ending in URLs.
  55
  56         // protocol (required) e.g. http
  57         protoPart = `(https?|ftp|file|gopher|mailto|nntp)`
  58         // host (required) e.g. www.example.com or [::1]:8080
  59         hostPart = `([a-zA-Z0-9_@\-.\[\]:]+)`
  60         // path+query+fragment (optional) e.g. /path/index.html?q=foo#bar
  61         pathPart = `([.,:;?!]*[a-zA-Z0-9$'()*+&#=@~_/\-\[\]%])*`
  62
  63         urlRx = protoPart + `://` + hostPart + pathPart
  64 )
  65
  66 var matchRx = regexp.MustCompile(`(` + urlRx + `)|(` + identRx + `)`)
  67
  68 var (
  69         html_a      = []byte(`<a href="`)
  70         html_aq     = []byte(`">`)
  71         html_enda   = []byte("</a>")
  72         html_i      = []byte("<i>")
  73         html_endi   = []byte("</i>")
  74         html_p      = []byte("<p>\n")
  75         html_endp   = []byte("</p>\n")
  76         html_pre    = []byte("<pre>")
  77         html_endpre = []byte("</pre>\n")
  78         html_h      = []byte(`<h3 id="`)
  79         html_hq     = []byte(`">`)
  80         html_endh   = []byte("</h3>\n")
  81 )
  82
  83 // pairedParensPrefixLen returns the length of the longest prefix of s containing paired parentheses.
  84 func pairedParensPrefixLen(s string) int {
  85         parens := 0
  86         l := len(s)
  87         for i, ch := range s {
  88                 switch ch {
  89                 case '(':
  90                         if parens == 0 {
  91                                 l = i
  92                         }
  93                         parens++
  94                 case ')':
  95                         parens--
  96                         if parens == 0 {
  97                                 l = len(s)
  98                         } else if parens < 0 {
  99                                 return i
 100                         }
 101                 }
 102         }
 103         return l
 104 }
 105
 106 // Emphasize and escape a line of text for HTML. URLs are converted into links;
 107 // if the URL also appears in the words map, the link is taken from the map (if
 108 // the corresponding map value is the empty string, the URL is not converted
 109 // into a link). Go identifiers that appear in the words map are italicized; if
 110 // the corresponding map value is not the empty string, it is considered a URL
 111 // and the word is converted into a link. If nice is set, the remaining text's
 112 // appearance is improved where it makes sense (e.g., `` is turned into &ldquo;
 113 // and '' into &rdquo;).
 114 func emphasize(w io.Writer, line string, words map[string]string, nice bool) {
 115         for {
 116                 m := matchRx.FindStringSubmatchIndex(line)
 117                 if m == nil {
 118                         break
 119                 }
 120                 // m >= 6 (two parenthesized sub-regexps in matchRx, 1st one is urlRx)
 121
 122                 // write text before match
 123                 commentEscape(w, line[0:m[0]], nice)
 124
 125                 // adjust match if necessary
 126                 match := line[m[0]:m[1]]
 127                 if n := pairedParensPrefixLen(match); n < len(match) {
 128                         // match contains unpaired parentheses (rare);
 129                         // redo matching with shortened line for correct indices
 130                         m = matchRx.FindStringSubmatchIndex(line[:m[0]+n])
 131                         match = match[:n]
 132                 }
 133
 134                 // analyze match
 135                 url := ""
 136                 italics := false
 137                 if words != nil {
 138                         url, italics = words[match]
 139                 }
 140                 if m[2] >= 0 {
 141                         // match against first parenthesized sub-regexp; must be match against urlRx
 142                         if !italics {
 143                                 // no alternative URL in words list, use match instead
 144                                 url = match
 145                         }
 146                         italics = false // don't italicize URLs
 147                 }
 148
 149                 // write match
 150                 if len(url) > 0 {
 151                         w.Write(html_a)
 152                         template.HTMLEscape(w, []byte(url))
 153                         w.Write(html_aq)
 154                 }
 155                 if italics {
 156                         w.Write(html_i)
 157                 }
 158                 commentEscape(w, match, nice)
 159                 if italics {
 160                         w.Write(html_endi)
 161                 }
 162                 if len(url) > 0 {
 163                         w.Write(html_enda)
 164                 }
 165
 166                 // advance
 167                 line = line[m[1]:]
 168         }
 169         commentEscape(w, line, nice)
 170 }
 171
 172 func indentLen(s string) int {
 173         i := 0
 174         for i < len(s) && (s[i] == ' ' || s[i] == '\t') {
 175                 i++
 176         }
 177         return i
 178 }
 179
 180 func isBlank(s string) bool {
 181         return len(s) == 0 || (len(s) == 1 && s[0] == '\n')
 182 }
 183
 184 func commonPrefix(a, b string) string {
 185         i := 0
 186         for i < len(a) && i < len(b) && a[i] == b[i] {
 187                 i++
 188         }
 189         return a[0:i]
 190 }
 191
 192 func unindent(block []string) {
 193         if len(block) == 0 {
 194                 return
 195         }
 196
 197         // compute maximum common white prefix
 198         prefix := block[0][0:indentLen(block[0])]
 199         for _, line := range block {
 200                 if !isBlank(line) {
 201                         prefix = commonPrefix(prefix, line[0:indentLen(line)])
 202                 }
 203         }
 204         n := len(prefix)
 205
 206         // remove
 207         for i, line := range block {
 208                 if !isBlank(line) {
 209                         block[i] = line[n:]
 210                 }
 211         }
 212 }
 213
 214 // heading returns the trimmed line if it passes as a section heading;
 215 // otherwise it returns the empty string.
 216 func heading(line string) string {
 217         line = strings.TrimSpace(line)
 218         if len(line) == 0 {
 219                 return ""
 220         }
 221
 222         // a heading must start with an uppercase letter
 223         r, _ := utf8.DecodeRuneInString(line)
 224         if !unicode.IsLetter(r) || !unicode.IsUpper(r) {
 225                 return ""
 226         }
 227
 228         // it must end in a letter or digit:
 229         r, _ = utf8.DecodeLastRuneInString(line)
 230         if !unicode.IsLetter(r) && !unicode.IsDigit(r) {
 231                 return ""
 232         }
 233
 234         // exclude lines with illegal characters
 235         if strings.ContainsAny(line, ",.;:!?+*/=()[]{}_^°&§~%#@<\">\\") {
 236                 return ""
 237         }
 238
 239         // allow "'" for possessive "'s" only
 240         for b := line; ; {
 241                 i := strings.IndexRune(b, '\'')
 242                 if i < 0 {
 243                         break
 244                 }
 245                 if i+1 >= len(b) || b[i+1] != 's' || (i+2 < len(b) && b[i+2] != ' ') {
 246                         return "" // not followed by "s "
 247                 }
 248                 b = b[i+2:]
 249         }
 250
 251         return line
 252 }
 253
 254 type op int
 255
 256 const (
 257         opPara op = iota
 258         opHead
 259         opPre
 260 )
 261
 262 type block struct {
 263         op    op
 264         lines []string
 265 }
 266
 267 var nonAlphaNumRx = regexp.MustCompile(`[^a-zA-Z0-9]`)
 268
 269 func anchorID(line string) string {
 270         // Add a "hdr-" prefix to avoid conflicting with IDs used for package symbols.
 271         return "hdr-" + nonAlphaNumRx.ReplaceAllString(line, "_")
 272 }
 273
 274 // ToHTML converts comment text to formatted HTML.
 275 // The comment was prepared by DocReader,
 276 // so it is known not to have leading, trailing blank lines
 277 // nor to have trailing spaces at the end of lines.
 278 // The comment markers have already been removed.
 279 //
 280 // Each span of unindented non-blank lines is converted into
 281 // a single paragraph. There is one exception to the rule: a span that
 282 // consists of a single line, is followed by another paragraph span,
 283 // begins with a capital letter, and contains no punctuation
 284 // is formatted as a heading.
 285 //
 286 // A span of indented lines is converted into a <pre> block,
 287 // with the common indent prefix removed.
 288 //
 289 // URLs in the comment text are converted into links; if the URL also appears
 290 // in the words map, the link is taken from the map (if the corresponding map
 291 // value is the empty string, the URL is not converted into a link).
 292 //
 293 // Go identifiers that appear in the words map are italicized; if the corresponding
 294 // map value is not the empty string, it is considered a URL and the word is converted
 295 // into a link.
 296 func ToHTML(w io.Writer, text string, words map[string]string) {
 297         for _, b := range blocks(text) {
 298                 switch b.op {
 299                 case opPara:
 300                         w.Write(html_p)
 301                         for _, line := range b.lines {
 302                                 emphasize(w, line, words, true)
 303                         }
 304                         w.Write(html_endp)
 305                 case opHead:
 306                         w.Write(html_h)
 307                         id := ""
 308                         for _, line := range b.lines {
 309                                 if id == "" {
 310                                         id = anchorID(line)
 311                                         w.Write([]byte(id))
 312                                         w.Write(html_hq)
 313                                 }
 314                                 commentEscape(w, line, true)
 315                         }
 316                         if id == "" {
 317                                 w.Write(html_hq)
 318                         }
 319                         w.Write(html_endh)
 320                 case opPre:
 321                         w.Write(html_pre)
 322                         for _, line := range b.lines {
 323                                 emphasize(w, line, nil, false)
 324                         }
 325                         w.Write(html_endpre)
 326                 }
 327         }
 328 }
 329
 330 func blocks(text string) []block {
 331         var (
 332                 out  []block
 333                 para []string
 334
 335                 lastWasBlank   = false
 336                 lastWasHeading = false
 337         )
 338
 339         close := func() {
 340                 if para != nil {
 341                         out = append(out, block{opPara, para})
 342                         para = nil
 343                 }
 344         }
 345
 346         lines := strings.SplitAfter(text, "\n")
 347         unindent(lines)
 348         for i := 0; i < len(lines); {
 349                 line := lines[i]
 350                 if isBlank(line) {
 351                         // close paragraph
 352                         close()
 353                         i++
 354                         lastWasBlank = true
 355                         continue
 356                 }
 357                 if indentLen(line) > 0 {
 358                         // close paragraph
 359                         close()
 360
 361                         // count indented or blank lines
 362                         j := i + 1
 363                         for j < len(lines) && (isBlank(lines[j]) || indentLen(lines[j]) > 0) {
 364                                 j++
 365                         }
 366                         // but not trailing blank lines
 367                         for j > i && isBlank(lines[j-1]) {
 368                                 j--
 369                         }
 370                         pre := lines[i:j]
 371                         i = j
 372
 373                         unindent(pre)
 374
 375                         // put those lines in a pre block
 376                         out = append(out, block{opPre, pre})
 377                         lastWasHeading = false
 378                         continue
 379                 }
 380
 381                 if lastWasBlank && !lastWasHeading && i+2 < len(lines) &&
 382                         isBlank(lines[i+1]) && !isBlank(lines[i+2]) && indentLen(lines[i+2]) == 0 {
 383                         // current line is non-blank, surrounded by blank lines
 384                         // and the next non-blank line is not indented: this
 385                         // might be a heading.
 386                         if head := heading(line); head != "" {
 387                                 close()
 388                                 out = append(out, block{opHead, []string{head}})
 389                                 i += 2
 390                                 lastWasHeading = true
 391                                 continue
 392                         }
 393                 }
 394
 395                 // open paragraph
 396                 lastWasBlank = false
 397                 lastWasHeading = false
 398                 para = append(para, lines[i])
 399                 i++
 400         }
 401         close()
 402
 403         return out
 404 }
 405
 406 // ToText prepares comment text for presentation in textual output.
 407 // It wraps paragraphs of text to width or fewer Unicode code points
 408 // and then prefixes each line with the indent. In preformatted sections
 409 // (such as program text), it prefixes each non-blank line with preIndent.
 410 func ToText(w io.Writer, text string, indent, preIndent string, width int) {
 411         l := lineWrapper{
 412                 out:    w,
 413                 width:  width,
 414                 indent: indent,
 415         }
 416         for _, b := range blocks(text) {
 417                 switch b.op {
 418                 case opPara:
 419                         // l.write will add leading newline if required
 420                         for _, line := range b.lines {
 421                                 l.write(line)
 422                         }
 423                         l.flush()
 424                 case opHead:
 425                         w.Write(nl)
 426                         for _, line := range b.lines {
 427                                 l.write(line + "\n")
 428                         }
 429                         l.flush()
 430                 case opPre:
 431                         w.Write(nl)
 432                         for _, line := range b.lines {
 433                                 if isBlank(line) {
 434                                         w.Write([]byte("\n"))
 435                                 } else {
 436                                         w.Write([]byte(preIndent))
 437                                         w.Write([]byte(line))
 438                                 }
 439                         }
 440                 }
 441         }
 442 }
 443
 444 type lineWrapper struct {
 445         out       io.Writer
 446         printed   bool
 447         width     int
 448         indent    string
 449         n         int
 450         pendSpace int
 451 }
 452
 453 var nl = []byte("\n")
 454 var space = []byte(" ")
 455
 456 func (l *lineWrapper) write(text string) {
 457         if l.n == 0 && l.printed {
 458                 l.out.Write(nl) // blank line before new paragraph
 459         }
 460         l.printed = true
 461
 462         for _, f := range strings.Fields(text) {
 463                 w := utf8.RuneCountInString(f)
 464                 // wrap if line is too long
 465                 if l.n > 0 && l.n+l.pendSpace+w > l.width {
 466                         l.out.Write(nl)
 467                         l.n = 0
 468                         l.pendSpace = 0
 469                 }
 470                 if l.n == 0 {
 471                         l.out.Write([]byte(l.indent))
 472                 }
 473                 l.out.Write(space[:l.pendSpace])
 474                 l.out.Write([]byte(f))
 475                 l.n += l.pendSpace + w
 476                 l.pendSpace = 1
 477         }
 478 }
 479
 480 func (l *lineWrapper) flush() {
 481         if l.n == 0 {
 482                 return
 483         }
 484         l.out.Write(nl)
 485         l.pendSpace = 0
 486         l.n = 0
 487 }