Do not use .Xo/.Xc to work around ancient roff limits.
[netbsd-mini2440.git] / share / misc / nanpa.sed
blob2c59a655a356494fec9cf6936a452838831abc98
1 # $NetBSD: nanpa.sed,v 1.1 2003/03/03 01:13:36 jhawk Exp $
3 # Parse HTML tables output by 
4 #   http://docs.nanpa.com/cgi-bin/npa_reports/nanpa
5 # Specifically, for each html table row (TR),
6 # print the <TD> elements separated by colons.
8 # This could break on HTML comments.
10 :top
11 #                               Strip ^Ms
12 s/\r//g
13 #                               Join all lines with unterminated HTML tags
14 /<[^>]*$/{
15         N
16         b top
18 #                               Replace all </TR> with EOL tag
19 s;</[Tt][Rr]>;$;g
20 #                               Join lines with only <TR>.
21 /<[Tt][Rr][^>]*>$/{
22         N
23         s/\n//g
24         b top
26 #                               Also, join all lines starting with <TR>.
27 /<[TtRr][^>]*>[^$]*$/{
28         N
29         s/\n//g
30         b top
32 #                               Remove EOL markers
33 s/\$$//
34 #                               Remove lines not starting with <TR>
35 /<[Tt][Rr][^>]*>/!d
36 #                               Replace all <TD> with colon
37 s/[     ]*<TD[^>]*> */:/g
38 #                               Strip all HTML tags
39 s/<[^>]*>//g
40 #                               Handle HTML characters
41 s/&nbsp;/ /g
42 #                               Compress spaces/tabs
43 s/[     ][      ]*/ /g
44 #                               Strip leading colons
45 s/^://
46 #                               Strip leading/trailing whitespace
47 s/^ //
48 s/ $//