1 // Read a tabular cross-reference file generated by ctags, then read a list of
2 // html files generated by Vim's TOhtml command on C++ code. Link words
3 // in the html files to cross-references from ctags.
6 // linkify [tags file] [html files]...
8 // Still plenty of holes:
9 // - unnecessarily linking definition location to itself
10 // - except SubX definitions, which start at start of line
11 // - can't detect strings in spite of attempt to support them below, because
12 // Vim's generated html turns quotes into html entities
13 // - distinguishing function and variable names
14 // - distinguishing Mu code in C++ files
15 // - distinguishing between function overloads
16 // - if there's duplicate tags we aren't smart enough to distinguish between
17 // them yet, so we simply don't add any link at all
18 // - but even that's not perfect, because sometimes the tags file has a
19 // single definition but there's still multiple overloads (say I defined
20 // 'clear()' on some type, and it's already defined on STL classes)
21 // - ctags misses some symbols in layered code
37 using std::istringstream
;
38 using std::ostringstream
;
45 using std::isspace
; // unicode-aware
50 syminfo() :line_num(0) {}
53 bool has_data(istream
& in
) {
55 if (in
.eof()) return false;
60 bool starts_with(const string
& s
, const string
& pat
) {
61 string::const_iterator a
=s
.begin(), b
=pat
.begin();
62 for (/*nada*/; a
!=s
.end() && b
!=pat
.end(); ++a
, ++b
)
63 if (*a
!= *b
) return false;
64 return b
== pat
.end();
67 bool ends_with(const string
& s
, const string
& pat
) {
68 string::const_reverse_iterator a
=s
.rbegin(), b
=pat
.rbegin();
69 for (/*nada*/; a
!=s
.rend() && b
!=pat
.rend(); ++a
, ++b
)
70 if (*a
!= *b
) return false;
71 return b
== pat
.rend();
74 void encode_some_html_entities(string
& s
) {
75 std::string::size_type pos
= 0;
77 pos
= s
.find_first_of("<>", pos
);
78 if (pos
== std::string::npos
) break;
79 std::string replacement
;
81 case '<': replacement
= "<"; break;
82 case '>': replacement
= ">"; break;
84 s
.replace(pos
, 1, replacement
);
85 pos
+= replacement
.size();
89 void read_tags(const string
& filename
, map
<string
, syminfo
>& info
) {
90 ifstream
in(filename
.c_str());
91 //? cerr << "reading " << filename << '\n';
93 while (has_data(in
)) {
94 string symbol
; in
>> symbol
;
95 if (symbol
== "operator") {
97 getline(in
, dummy
); // skip
100 encode_some_html_entities(symbol
);
101 //? cerr << symbol << '\n';
102 if (info
.find(symbol
) != info
.end()) {
103 info
[symbol
].line_num
= -1;
104 info
[symbol
].filename
.clear();
108 in
>> info
[symbol
].line_num
;
109 in
>> info
[symbol
].filename
;
111 getline(in
, dummy
); // skip rest of line
112 //? cerr << symbol << ": " << info[symbol].filename << ':' << info[symbol].line_num << '\n';
117 void replace_tags_in_file(const string
& filename
, const map
<string
, syminfo
>& info
) {
118 //? cerr << info.size() << " symbols\n";
119 ifstream
in(filename
.c_str());
120 ofstream
out((filename
+".out").c_str());
121 while (has_data(in
)) {
122 // send lines that don't start with '<span' straight through
125 if (!starts_with(line
, "<span ")) {
129 static int span_size
= string("</span>").size();
130 int skip_first_span
= line
.find("</span>") + span_size
;
131 out
<< line
.substr(0, skip_first_span
);
132 istringstream
in2(line
.substr(skip_first_span
));
133 in2
>> std::noskipws
;
134 // only in .subx files, refuse to linkify the first word on a line
135 bool at_start_of_line
= ends_with(filename
, ".subx.html");
136 //? cerr << filename << ": " << at_start_of_line << '\n';
137 while (has_data(in2
)) {
138 if (isspace(in2
.peek())) {
139 //? cerr << "space\n";
142 at_start_of_line
= false;
144 // within a line, send straight through all characters inside '<..>'
145 else if (in2
.peek() == '<') {
149 //? cerr << "span: " << c << '\n';
153 // don't include initial tag when computing 'at_start_of_line'
154 //? cerr << "end tag\n";
157 // send straight through all characters inside strings (handling escapes)
160 //? cerr << "string\n";
171 at_start_of_line
= false;
173 else if (c
== '\'') {
174 //? cerr << "character\n";
181 else if (c
== '\'') {
185 at_start_of_line
= false;
187 // send straight through any characters after '#' (comments)
189 //? cerr << "comment\n";
191 while (in2
>> c
) out
<< c
;
192 at_start_of_line
= false;
194 // send straight through any characters after '//' (comments)
195 else if (c
== '/' && in2
.peek() == '/') {
196 //? cerr << "comment\n";
198 while (in2
>> c
) out
<< c
;
199 at_start_of_line
= false;
201 // send through open parens at start of line
204 at_start_of_line
= false;
208 at_start_of_line
= false;
211 //? cerr << "rest\n";
212 if (c
== ',' || c
== ':') {
214 at_start_of_line
= false;
220 if (isspace(c
) || c
== '<' || c
== '"' || c
== '\'' || c
== '/' || c
== ',' || c
== ':' || c
== '(' || c
== ')') { // keep sync'd with other clauses above
226 string symbol
= out2
.str();
227 if (symbol
== "equal" || symbol
== "index" || symbol
== "put-index" || symbol
== "length") {
228 //? cerr << " blacklisted\n";
231 else if (info
.find(symbol
) == info
.end()) {
232 //? cerr << " no info\n";
236 const syminfo
& s
= info
.find(symbol
)->second
;
237 if (s
.filename
.empty()) {
238 //? cerr << " empty info\n";
242 if (at_start_of_line
) {
243 //? cerr << " at start of line; refusing to linkify " << symbol << "\n";
247 //? cerr << " link\n";
248 out
<< "<a href='" << s
.filename
<< ".html#L" << s
.line_num
<< "'>" << symbol
<< "</a>";
254 } // done parsing line
258 in
.close(); out
.close();
261 int main(int argc
, const char* argv
[]) {
262 map
<string
, syminfo
> info
;
263 read_tags(argv
[1], info
);
264 for (int i
= 2; i
< argc
; ++i
)
265 replace_tags_in_file(argv
[i
], info
);