1 // Copyright 2009 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
5 // Package tabwriter implements a write filter (tabwriter.Writer) that
6 // translates tabbed columns in input into properly aligned text.
8 // The package is using the Elastic Tabstops algorithm described at
9 // http://nickgravgaard.com/elastictabstops/index.html.
11 // The text/tabwriter package is frozen and is not accepting new features.
20 // ----------------------------------------------------------------------------
21 // Filter implementation
23 // A cell represents a segment of text terminated by tabs or line breaks.
24 // The text itself is stored in a separate buffer; cell only describes the
25 // segment's size in bytes, its width in runes, and whether it's an htab
26 // ('\t') terminated cell.
29 size
int // cell size in bytes
30 width
int // cell width in runes
31 htab
bool // true if the cell is terminated by an htab ('\t')
34 // A Writer is a filter that inserts padding around tab-delimited
35 // columns in its input to align them in the output.
37 // The Writer treats incoming bytes as UTF-8-encoded text consisting
38 // of cells terminated by horizontal ('\t') or vertical ('\v') tabs,
39 // and newline ('\n') or formfeed ('\f') characters; both newline and
40 // formfeed act as line breaks.
42 // Tab-terminated cells in contiguous lines constitute a column. The
43 // Writer inserts padding as needed to make all cells in a column have
44 // the same width, effectively aligning the columns. It assumes that
45 // all characters have the same width, except for tabs for which a
46 // tabwidth must be specified. Column cells must be tab-terminated, not
47 // tab-separated: non-tab terminated trailing text at the end of a line
48 // forms a cell but that cell is not part of an aligned column.
49 // For instance, in this example (where | stands for a horizontal tab):
56 // the b and c are in distinct columns (the b column is not contiguous
57 // all the way). The d and e are not in a column at all (there's no
58 // terminating tab, nor would the column be contiguous).
60 // The Writer assumes that all Unicode code points have the same width;
61 // this may not be true in some fonts or if the string contains combining
64 // If DiscardEmptyColumns is set, empty columns that are terminated
65 // entirely by vertical (or "soft") tabs are discarded. Columns
66 // terminated by horizontal (or "hard") tabs are not affected by
69 // If a Writer is configured to filter HTML, HTML tags and entities
70 // are passed through. The widths of tags and entities are
71 // assumed to be zero (tags) and one (entities) for formatting purposes.
73 // A segment of text may be escaped by bracketing it with Escape
74 // characters. The tabwriter passes escaped text segments through
75 // unchanged. In particular, it does not interpret any tabs or line
76 // breaks within the segment. If the StripEscape flag is set, the
77 // Escape characters are stripped from the output; otherwise they
78 // are passed through as well. For the purpose of formatting, the
79 // width of the escaped text is always computed excluding the Escape
82 // The formfeed character acts like a newline but it also terminates
83 // all columns in the current line (effectively calling Flush). Tab-
84 // terminated cells in the next line start new columns. Unless found
85 // inside an HTML tag or inside an escaped text segment, formfeed
86 // characters appear as newlines in the output.
88 // The Writer must buffer input internally, because proper spacing
89 // of one line may depend on the cells in future lines. Clients must
90 // call Flush when done calling Write.
102 buf bytes
.Buffer
// collected text excluding tabs or line breaks
103 pos
int // buffer position up to which cell.width of incomplete cell has been computed
104 cell cell
// current incomplete cell; cell.width is up to buf[pos] excluding ignored sections
105 endChar
byte // terminating char of escaped sequence (Escape for escapes, '>', ';' for HTML tags/entities, or 0)
106 lines
[][]cell
// list of lines; each line is a list of cells
107 widths
[]int // list of column widths in runes - re-used during formatting
110 func (b
*Writer
) addLine() { b
.lines
= append(b
.lines
, []cell
{}) }
112 // Reset the current state.
113 func (b
*Writer
) reset() {
118 b
.lines
= b
.lines
[0:0]
119 b
.widths
= b
.widths
[0:0]
123 // Internal representation (current state):
125 // - all text written is appended to buf; tabs and line breaks are stripped away
126 // - at any given time there is a (possibly empty) incomplete cell at the end
127 // (the cell starts after a tab or line break)
128 // - cell.size is the number of bytes belonging to the cell so far
129 // - cell.width is text width in runes of that cell from the start of the cell to
130 // position pos; html tags and entities are excluded from this width if html
131 // filtering is enabled
132 // - the sizes and widths of processed text are kept in the lines list
133 // which contains a list of cells for each line
134 // - the widths list is a temporary list with current widths used during
135 // formatting; it is kept in Writer because it's re-used
137 // |<---------- size ---------->|
139 // |<- width ->|<- ignored ->| |
141 // [---processed---tab------------<tag>...</tag>...]
144 // buf start of incomplete cell pos
146 // Formatting can be controlled with these flags.
148 // Ignore html tags and treat entities (starting with '&'
149 // and ending in ';') as single characters (width = 1).
150 FilterHTML
uint = 1 << iota
152 // Strip Escape characters bracketing escaped text segments
153 // instead of passing them through unchanged with the text.
156 // Force right-alignment of cell content.
157 // Default is left-alignment.
160 // Handle empty columns as if they were not present in
161 // the input in the first place.
164 // Always use tabs for indentation columns (i.e., padding of
165 // leading empty cells on the left) independent of padchar.
168 // Print a vertical bar ('|') between columns (after formatting).
169 // Discarded columns appear as zero-width columns ("||").
173 // A Writer must be initialized with a call to Init. The first parameter (output)
174 // specifies the filter output. The remaining parameters control the formatting:
176 // minwidth minimal cell width including any padding
177 // tabwidth width of tab characters (equivalent number of spaces)
178 // padding padding added to a cell before computing its width
179 // padchar ASCII char used for padding
180 // if padchar == '\t', the Writer will assume that the
181 // width of a '\t' in the formatted output is tabwidth,
182 // and cells are left-aligned independent of align_left
183 // (for correct-looking results, tabwidth must correspond
184 // to the tab width in the viewer displaying the result)
185 // flags formatting control
187 func (b
*Writer
) Init(output io
.Writer
, minwidth
, tabwidth
, padding
int, padchar
byte, flags
uint) *Writer
{
188 if minwidth
< 0 || tabwidth
< 0 || padding
< 0 {
189 panic("negative minwidth, tabwidth, or padding")
192 b
.minwidth
= minwidth
193 b
.tabwidth
= tabwidth
195 for i
:= range b
.padbytes
{
196 b
.padbytes
[i
] = padchar
199 // tab padding enforces left-alignment
209 // debugging support (keep code around)
210 func (b
*Writer
) dump() {
212 for i
, line
:= range b
.lines
{
214 for _
, c
:= range line
{
215 print("[", string(b
.buf
.Bytes()[pos
:pos
+c
.size
]), "]")
223 // local error wrapper so we can distinguish errors we want to return
224 // as errors from genuine panics (which we don't want to return as errors)
225 type osError
struct {
229 func (b
*Writer
) write0(buf
[]byte) {
230 n
, err
:= b
.output
.Write(buf
)
231 if n
!= len(buf
) && err
== nil {
232 err
= io
.ErrShortWrite
239 func (b
*Writer
) writeN(src
[]byte, n
int) {
248 newline
= []byte{'\n'}
249 tabs
= []byte("\t\t\t\t\t\t\t\t")
252 func (b
*Writer
) writePadding(textw
, cellw
int, useTabs
bool) {
253 if b
.padbytes
[0] == '\t' || useTabs
{
254 // padding is done with tabs
256 return // tabs have no width - can't do any padding
258 // make cellw the smallest multiple of b.tabwidth
259 cellw
= (cellw
+ b
.tabwidth
- 1) / b
.tabwidth
* b
.tabwidth
260 n
:= cellw
- textw
// amount of padding
262 panic("internal error")
264 b
.writeN(tabs
, (n
+b
.tabwidth
-1)/b
.tabwidth
)
268 // padding is done with non-tab characters
269 b
.writeN(b
.padbytes
[0:], cellw
-textw
)
272 var vbar
= []byte{'|'}
274 func (b
*Writer
) writeLines(pos0
int, line0
, line1
int) (pos
int) {
276 for i
:= line0
; i
< line1
; i
++ {
279 // if TabIndent is set, use tabs to pad leading empty cells
280 useTabs
:= b
.flags
&TabIndent
!= 0
282 for j
, c
:= range line
{
283 if j
> 0 && b
.flags
&Debug
!= 0 {
284 // indicate column break
290 if j
< len(b
.widths
) {
291 b
.writePadding(c
.width
, b
.widths
[j
], useTabs
)
296 if b
.flags
&AlignRight
== 0 { // align left
297 b
.write0(b
.buf
.Bytes()[pos
: pos
+c
.size
])
299 if j
< len(b
.widths
) {
300 b
.writePadding(c
.width
, b
.widths
[j
], false)
302 } else { // align right
303 if j
< len(b
.widths
) {
304 b
.writePadding(c
.width
, b
.widths
[j
], false)
306 b
.write0(b
.buf
.Bytes()[pos
: pos
+c
.size
])
312 if i
+1 == len(b
.lines
) {
313 // last buffered line - we don't have a newline, so just write
314 // any outstanding buffered data
315 b
.write0(b
.buf
.Bytes()[pos
: pos
+b
.cell
.size
])
318 // not the last line - write newline
325 // Format the text between line0 and line1 (excluding line1); pos
326 // is the buffer position corresponding to the beginning of line0.
327 // Returns the buffer position corresponding to the beginning of
328 // line1 and an error, if any.
330 func (b
*Writer
) format(pos0
int, line0
, line1
int) (pos
int) {
332 column
:= len(b
.widths
)
333 for this
:= line0
; this
< line1
; this
++ {
334 line
:= b
.lines
[this
]
336 if column
< len(line
)-1 {
337 // cell exists in this column => this line
338 // has more cells than the previous line
339 // (the last cell per line is ignored because cells are
340 // tab-terminated; the last cell per line describes the
341 // text before the newline/formfeed and does not belong
344 // print unprinted lines until beginning of block
345 pos
= b
.writeLines(pos
, line0
, this
)
348 // column block begin
349 width
:= b
.minwidth
// minimal column width
350 discardable
:= true // true if all cells in this column are empty and "soft"
351 for ; this
< line1
; this
++ {
353 if column
< len(line
)-1 {
354 // cell exists in this column
357 if w
:= c
.width
+ b
.padding
; w
> width
{
360 // update discardable
361 if c
.width
> 0 || c
.htab
{
370 // discard empty columns if necessary
371 if discardable
&& b
.flags
&DiscardEmptyColumns
!= 0 {
375 // format and print all columns to the right of this column
376 // (we know the widths of this column and all columns to the left)
377 b
.widths
= append(b
.widths
, width
) // push width
378 pos
= b
.format(pos
, line0
, this
)
379 b
.widths
= b
.widths
[0 : len(b
.widths
)-1] // pop width
384 // print unprinted lines until end
385 return b
.writeLines(pos
, line0
, line1
)
388 // Append text to current cell.
389 func (b
*Writer
) append(text
[]byte) {
391 b
.cell
.size
+= len(text
)
394 // Update the cell width.
395 func (b
*Writer
) updateWidth() {
396 b
.cell
.width
+= utf8
.RuneCount(b
.buf
.Bytes()[b
.pos
:b
.buf
.Len()])
400 // To escape a text segment, bracket it with Escape characters.
401 // For instance, the tab in this string "Ignore this tab: \xff\t\xff"
402 // does not terminate a cell and constitutes a single character of
403 // width one for formatting purposes.
405 // The value 0xff was chosen because it cannot appear in a valid UTF-8 sequence.
407 const Escape
= '\xff'
409 // Start escaped mode.
410 func (b
*Writer
) startEscape(ch
byte) {
421 // Terminate escaped mode. If the escaped text was an HTML tag, its width
422 // is assumed to be zero for formatting purposes; if it was an HTML entity,
423 // its width is assumed to be one. In all other cases, the width is the
424 // unicode width of the text.
426 func (b
*Writer
) endEscape() {
430 if b
.flags
&StripEscape
== 0 {
431 b
.cell
.width
-= 2 // don't count the Escape chars
433 case '>': // tag of zero width
435 b
.cell
.width
++ // entity, count as one rune
441 // Terminate the current cell by adding it to the list of cells of the
442 // current line. Returns the number of cells in that line.
444 func (b
*Writer
) terminateCell(htab
bool) int {
446 line
:= &b
.lines
[len(b
.lines
)-1]
447 *line
= append(*line
, b
.cell
)
452 func handlePanic(err
*error
, op
string) {
453 if e
:= recover(); e
!= nil {
454 if nerr
, ok
:= e
.(osError
); ok
{
458 panic("tabwriter: panic during " + op
)
462 // Flush should be called after the last call to Write to ensure
463 // that any data buffered in the Writer is written to output. Any
464 // incomplete escape sequence at the end is considered
465 // complete for formatting purposes.
466 func (b
*Writer
) Flush() error
{
470 func (b
*Writer
) flush() (err error
) {
471 defer b
.reset() // even in the presence of errors
472 defer handlePanic(&err
, "Flush")
474 // add current cell if not empty
477 // inside escape - terminate it even if incomplete
480 b
.terminateCell(false)
483 // format contents of buffer
484 b
.format(0, 0, len(b
.lines
))
488 var hbar
= []byte("---\n")
490 // Write writes buf to the writer b.
491 // The only errors returned are ones encountered
492 // while writing to the underlying output stream.
494 func (b
*Writer
) Write(buf
[]byte) (n
int, err error
) {
495 defer handlePanic(&err
, "Write")
497 // split text into cells
499 for i
, ch
:= range buf
{
503 case '\t', '\v', '\n', '\f':
507 n
= i
+ 1 // ch consumed
508 ncells
:= b
.terminateCell(ch
== '\t')
509 if ch
== '\n' || ch
== '\f' {
512 if ch
== '\f' || ncells
== 1 {
513 // A '\f' always forces a flush. Otherwise, if the previous
514 // line has only one cell which does not have an impact on
515 // the formatting of the following lines (the last cell per
516 // line is ignored by format()), thus we can flush the
518 if err
= b
.Flush(); err
!= nil {
521 if ch
== '\f' && b
.flags
&Debug
!= 0 {
522 // indicate section break
529 // start of escaped sequence
533 if b
.flags
&StripEscape
!= 0 {
536 b
.startEscape(Escape
)
539 // possibly an html tag/entity
540 if b
.flags
&FilterHTML
!= 0 {
541 // begin of tag/entity
554 if ch
== Escape
&& b
.flags
&StripEscape
!= 0 {
555 j
= i
// strip Escape
558 n
= i
+ 1 // ch consumed
564 // append leftover text
570 // NewWriter allocates and initializes a new tabwriter.Writer.
571 // The parameters are the same as for the Init function.
573 func NewWriter(output io
.Writer
, minwidth
, tabwidth
, padding
int, padchar
byte, flags
uint) *Writer
{
574 return new(Writer
).Init(output
, minwidth
, tabwidth
, padding
, padchar
, flags
)