Convert link titles to correct codepage before displaying it on screen.
[elinks.git] / src / protocol / gopher / gopher.c
blob12b779e6b7773766bc804e993c39b521ce8e5b5c
1 /* Gopher access protocol (RFC 1436) */
3 /* Based on version of HTGopher.c in the lynx tree.
5 * Author tags:
6 * TBL Tim Berners-Lee
7 * FM Foteos Macrides
9 * A little history:
10 * 26 Sep 90 Adapted from other accesses (News, HTTP) TBL
11 * 29 Nov 91 Downgraded to C, for portable implementation.
12 * 10 Mar 96 Foteos Macrides (macrides@sci.wfbr.edu). Added a
13 * form-based CSO/PH gateway. Can be invoked via a
14 * "cso://host[:port]/" or "gopher://host:105/2"
15 * URL. If a gopher URL is used with a query token
16 * ('?'), the old ISINDEX procedure will be used
17 * instead of the form-based gateway.
18 * 15 Mar 96 Foteos Macrides (macrides@sci.wfbr.edu). Pass
19 * port 79, gtype 0 gopher URLs to the finger
20 * gateway.
23 #ifdef HAVE_CONFIG_H
24 #include <config.h>
25 #endif
27 #include <errno.h>
28 #include <stdlib.h>
29 #include <string.h>
31 #include "elinks.h"
33 #include "cache/cache.h"
34 #include "intl/gettext/libintl.h"
35 #include "main/module.h"
36 #include "network/connection.h"
37 #include "network/socket.h"
38 #include "protocol/common.h"
39 #include "protocol/gopher/gopher.h"
40 #include "protocol/protocol.h"
41 #include "protocol/uri.h"
42 #include "util/conv.h"
43 #include "util/memory.h"
44 #include "util/string.h"
46 struct module gopher_protocol_module = struct_module(
47 /* name: */ N_("Gopher"),
48 /* options: */ NULL,
49 /* hooks: */ NULL,
50 /* submodules: */ NULL,
51 /* data: */ NULL,
52 /* init: */ NULL,
53 /* done: */ NULL
57 /* Gopher entity types */
58 enum gopher_entity {
59 GOPHER_UNKNOWN = 0 , /* Special fall-back entity */
60 GOPHER_FILE = '0',
61 GOPHER_DIRECTORY = '1',
62 GOPHER_CSO = '2',
63 GOPHER_ERROR = '3',
64 GOPHER_MACBINHEX = '4',
65 GOPHER_PCBINARY = '5',
66 GOPHER_UUENCODED = '6',
67 GOPHER_INDEX = '7',
68 GOPHER_TELNET = '8',
69 GOPHER_BINARY = '9',
70 GOPHER_GIF = 'g',
71 GOPHER_HTML = 'h', /* HTML */
72 GOPHER_CHTML = 'H', /* HTML */
73 GOPHER_MIME = 'm',
74 GOPHER_SOUND = 's',
75 GOPHER_WWW = 'w', /* W3 address */
76 GOPHER_IMAGE = 'I',
77 GOPHER_TN3270 = 'T',
78 GOPHER_INFO = 'i', /* Information or separator line */
79 GOPHER_DUPLICATE = '+',
80 GOPHER_PLUS_IMAGE = ':', /* Addition from Gopher Plus */
81 GOPHER_PLUS_MOVIE = ';',
82 GOPHER_PLUS_SOUND = '<',
83 GOPHER_PLUS_PDF = 'P',
86 /* Default Gopher Node type is directory listing */
87 #define DEFAULT_GOPHER_ENTITY GOPHER_DIRECTORY
89 #define entity_needs_gopher_access(entity) \
90 ((entity) != GOPHER_TELNET \
91 && (entity) != GOPHER_TN3270 \
92 && (entity) != GOPHER_WWW)
94 struct gopher_entity_info {
95 enum gopher_entity type;
96 unsigned char *description;
97 unsigned char *content_type;
100 /* This table provides some hard-coded associations between entity type
101 * and MIME type. A NULL MIME type in this table indicates
102 * that the MIME type should be deduced from the extension.
104 * - Lynx uses "text/plain" for GOPHER_FILE, but it can be anything.
105 * - Lynx uses "image/gif" for GOPHER_IMAGE and GOPHER_PLUS_IMAGE,
106 * but they really can be anything.
107 * - GOPHER_BINARY can be, for example, a tar ball, so using
108 * "application/octet-stream" is a bad idea.
110 static struct gopher_entity_info gopher_entity_info[] = {
111 { GOPHER_BINARY, " (BINARY)", NULL },
112 { GOPHER_CHTML, " (CHTML)", "text/html" },
113 { GOPHER_CSO, " (CSO)", "text/html" },
114 { GOPHER_DIRECTORY, " (DIRECTORY)", "text/html" },
115 { GOPHER_FILE, " (FILE)", NULL /* "text/plain" */ },
116 { GOPHER_GIF, " (GIF IMAGE)", "image/gif" },
117 { GOPHER_HTML, " (HTML)", "text/html" },
118 { GOPHER_IMAGE, " (IMAGE)", NULL /* "image/gif" */ },
119 { GOPHER_INDEX, " (INDEX)", "text/html" },
120 { GOPHER_MACBINHEX, "(BINARY HEX)", "application/octet-stream" },
121 { GOPHER_MIME, " (MIME)", "application/octet-stream" },
122 { GOPHER_PCBINARY, " (PCBINARY)", "application/octet-stream" },
123 { GOPHER_PLUS_IMAGE, " (IMAGE+)", NULL /* "image/gif" */ },
124 { GOPHER_PLUS_MOVIE, " (MOVIE)", "video/mpeg" },
125 { GOPHER_PLUS_PDF, " (PDF)", "application/pdf" },
126 { GOPHER_PLUS_SOUND, " (SOUND+)", "audio/basic" },
127 { GOPHER_SOUND, " (SOUND)", "audio/basic" },
128 { GOPHER_TELNET, " (TELNET)", NULL },
129 { GOPHER_TN3270, " (TN3270)", NULL },
130 { GOPHER_UUENCODED, " (UUENCODED)", "application/octet-stream" },
131 { GOPHER_WWW, "(W3 ADDRESS)", NULL },
133 { GOPHER_INFO, " ", NULL },
134 { GOPHER_ERROR, NULL, NULL },
135 /* XXX: Keep GOPHER_UNKNOWN last so it is easy to access. */
136 { GOPHER_UNKNOWN, " ", "application/octet-stream" },
139 static struct gopher_entity_info *
140 get_gopher_entity_info(enum gopher_entity type)
142 int entry;
144 for (entry = 0; entry < sizeof_array(gopher_entity_info) - 1; entry++)
145 if (gopher_entity_info[entry].type == type)
146 return &gopher_entity_info[entry];
148 assert(gopher_entity_info[entry].type == GOPHER_UNKNOWN);
150 return &gopher_entity_info[entry];
153 static unsigned char *
154 get_gopher_entity_description(enum gopher_entity type)
156 struct gopher_entity_info *info = get_gopher_entity_info(type);
158 return info ? info->description : NULL;
162 struct gopher_connection_info {
163 struct gopher_entity_info *entity;
165 int commandlen;
166 unsigned char command[1];
169 /* De-escape a selector into a command. */
170 /* The % hex escapes are converted. Otherwise, the string is copied. */
171 static void
172 add_uri_decoded(struct string *command, unsigned char *string, int length,
173 int replace_plus)
175 int oldlen = command->length;
177 assert(string);
179 if (!length) return;
181 if (replace_plus) {
182 /* Remove plus signs 921006 */
183 if (!add_string_replace(command, string, length, '+', ' '))
184 return;
186 } else if (!add_bytes_to_string(command, string, length)) {
187 return;
190 assert(command->length > oldlen);
191 /* FIXME: Decoding the whole command string should not be a problem,
192 * and I don't remember why I didn't do that in the first place.
193 * --jonas */
194 decode_uri(command->source + oldlen);
196 /* Evil decode_uri_string() modifies the string */
197 command->length = strlen(command->source);
200 static enum connection_state init_gopher_index_cache_entry(struct connection *conn);
202 static enum connection_state
203 add_gopher_command(struct connection *conn, struct string *command,
204 enum gopher_entity entity,
205 unsigned char *selector, int selectorlen)
207 unsigned char *query;
208 int querylen;
210 if (!init_string(command))
211 return S_OUT_OF_MEM;
213 /* Look for search string */
214 query = memchr(selector, '?', selectorlen);
216 /* Check if no search is required */
217 if (!query || !query[1]) {
218 /* Exclude '?' */
219 if (query) selectorlen -= 1;
220 query = NULL;
221 querylen = 0;
222 } else {
223 query += 1;
224 querylen = selector + selectorlen - query;
225 /* Exclude '?' */
226 selectorlen -= querylen + 1;
227 if (querylen >= 7 && !strncasecmp(query, "search=", 7)) {
228 query += 7;
229 querylen -= 7;
233 switch (entity) {
234 case GOPHER_INDEX:
235 /* No search required? */
236 if (!query) {
237 done_string(command);
238 return init_gopher_index_cache_entry(conn);
241 add_uri_decoded(command, selector, selectorlen, 0);
242 add_char_to_string(command, '\t');
243 add_uri_decoded(command, query, querylen, 1);
244 break;
246 case GOPHER_CSO:
247 /* No search required */
248 if (!query) {
249 done_string(command);
250 /* Display "cover page" */
251 #if 0
252 return init_gopher_cso_cache_entry(conn);
253 #endif
254 return S_GOPHER_CSO_ERROR;
257 add_uri_decoded(command, selector, selectorlen, 0);
258 add_to_string(command, "query ");
259 add_uri_decoded(command, query, querylen, 1);
260 break;
262 default:
263 /* Not index */
264 add_uri_decoded(command, selector, selectorlen, 0);
267 add_crlf_to_string(command);
269 return S_CONN;
272 static enum connection_state
273 init_gopher_connection_info(struct connection *conn)
275 struct gopher_connection_info *gopher;
276 enum connection_state state;
277 struct string command;
278 enum gopher_entity entity = DEFAULT_GOPHER_ENTITY;
279 unsigned char *selector = conn->uri->data;
280 int selectorlen = conn->uri->datalen;
281 struct gopher_entity_info *entity_info;
282 size_t size;
284 /* Get entity type, and selector string. */
285 /* Pick up gopher_entity */
286 if (selectorlen > 1 && selector[1] == '/') {
287 entity = *selector++;
288 selectorlen--;
291 /* This is probably a hack. It serves as a work around when no entity is
292 * available in the Gopher URI. Instead of segfaulting later the content
293 * will be served as application/octet-stream. However, it could
294 * possible break handling Gopher URIs with entities which are really
295 * unknown because parts of the real Gopher entity character is added to
296 * the selector. A possible work around is to always expect a '/'
297 * _after_ the Gopher entity. If the <entity-char> '/' combo is not
298 * found assume that the whole URI data part is the selector. */
299 entity_info = get_gopher_entity_info(entity);
300 if (entity_info->type == GOPHER_UNKNOWN && entity != GOPHER_UNKNOWN) {
301 selector--;
302 selectorlen++;
305 state = add_gopher_command(conn, &command, entity, selector, selectorlen);
306 if (state != S_CONN)
307 return state;
309 /* Atleast the command should contain \r\n to ask the server
310 * wazzup! */
311 assert(command.length >= 2);
313 size = sizeof(*gopher) + command.length;
314 gopher = mem_calloc(1, size);
315 if (!gopher) {
316 done_string(&command);
317 return S_OUT_OF_MEM;
320 gopher->entity = entity_info;
321 gopher->commandlen = command.length;
323 memcpy(gopher->command, command.source, command.length);
324 done_string(&command);
326 conn->info = gopher;
328 return S_CONN;
332 /* Add a link. The title of the destination is set, as there is no way of
333 * knowing what the title is when we arrive.
335 * text points to the text to be put into the file, 0 terminated.
336 * addr points to the hypertext reference address 0 terminated.
339 static void
340 add_gopher_link(struct string *buffer, const unsigned char *text,
341 const unsigned char *addr)
343 add_format_to_string(buffer, "<a href=\"%s\">%s</a>",
344 addr, text);
347 static void
348 add_gopher_search_field(struct string *buffer, const unsigned char *text,
349 const unsigned char *addr)
351 add_format_to_string(buffer,
352 "<form action=\"%s\">"
353 "<table>"
354 "<td> </td>"
355 "<td>%s:</td>"
356 "<td><input maxlength=\"256\" name=\"search\" value=\"\"></td>"
357 "<td><input type=submit value=\"Search\"></td>"
358 "</table>"
359 "</form>",
360 addr, text);
363 static void
364 add_gopher_description(struct string *buffer, enum gopher_entity entity)
366 unsigned char *description = get_gopher_entity_description(entity);
368 if (!description)
369 return;
371 add_to_string(buffer, "<b>");
372 add_to_string(buffer, description);
373 add_to_string(buffer, "</b> ");
376 static void
377 encode_selector_string(struct string *buffer, unsigned char *selector)
379 unsigned char *slashes;
381 /* Rather hackishly only convert slashes if there are
382 * two successive ones. */
383 while ((slashes = strstr(selector, "//"))) {
384 *slashes = 0;
385 encode_uri_string(buffer, selector, -1, 0);
386 encode_uri_string(buffer, "//", 2, 1);
387 *slashes = '/';
388 selector = slashes + 2;
391 encode_uri_string(buffer, selector, -1, 0);
394 static void
395 add_gopher_menu_line(struct string *buffer, unsigned char *line)
397 /* Gopher menu fields */
398 unsigned char *name = line;
399 unsigned char *selector = NULL;
400 unsigned char *host = NULL;
401 unsigned char *port = NULL;
402 enum gopher_entity entity = *name++;
404 if (!entity) {
405 add_char_to_string(buffer, '\n');
406 return;
409 if (*name) {
410 selector = strchr(name, ASCII_TAB);
411 if (selector) {
412 /* Terminate name */
413 *selector++ = '\0';
415 /* Gopher+ Type=0+ objects can be binary, and will have
416 * 9 or 5 beginning their selector. Make sure we don't
417 * trash the terminal by treating them as text. - FM */
418 if (entity == GOPHER_FILE
419 && (*selector == GOPHER_BINARY ||
420 *selector == GOPHER_PCBINARY))
421 entity = *selector;
424 host = selector ? strchr(selector, ASCII_TAB) : NULL;
425 if (host) {
426 /* Terminate selector */
427 *host++ = '\0';
430 port = host ? strchr(host, ASCII_TAB) : NULL;
431 if (port) {
432 unsigned char *end;
433 int portno;
435 errno = 0;
436 portno = strtol(port + 1, (char **) &end, 10);
437 if (errno || !uri_port_is_valid(portno)) {
438 port = NULL;
440 } else {
441 /* Try to wipe out the default gopher port
442 * number from being appended to links. */
443 if (portno == 70
444 && entity_needs_gopher_access(entity))
445 portno = 0;
447 /* If the port number is 0 it means no port
448 * number is needed in which case it can be
449 * wiped out completely. Else append it to the
450 * host string a la W3. */
451 if (portno == 0) {
452 *port = '\0';
453 } else {
454 *port = ':';
456 /* Chop port if there is junk after the
457 * number */
458 *end = '\0';
464 /* Nameless files are separator lines */
465 if (entity == GOPHER_FILE) {
466 int i = strlen(name) - 1;
468 while (name[i] == ' ' && i >= 0)
469 name[i--] = '\0';
471 if (i < 0)
472 entity = GOPHER_INFO;
475 if (entity != GOPHER_INDEX) {
476 add_gopher_description(buffer, entity);
479 switch (entity) {
480 case GOPHER_WWW:
481 /* Gopher pointer to W3 */
482 if (selector) {
483 add_gopher_link(buffer, name, selector);
484 break;
487 /* Fall through if no selector is defined so the
488 * text is just displayed. */
490 case GOPHER_INFO:
491 /* Information or separator line */
492 add_to_string(buffer, name);
493 break;
495 default:
497 struct string address;
498 unsigned char *format = selector && *selector
499 ? "%s://%s@%s/" : "%s://%s%s/";
501 /* If port is defined it means that both @selector and @host
502 * was correctly parsed. */
503 if (!port || !init_string(&address)) {
504 /* Parse error: Bad menu item */
505 add_to_string(buffer, name);
506 break;
509 assert(selector && host);
511 if (entity == GOPHER_TELNET) {
512 add_format_to_string(&address, format,
513 "telnet", selector, host);
515 } else if (entity == GOPHER_TN3270) {
516 add_format_to_string(&address, format,
517 "tn3270", selector, host);
519 } else {
520 add_format_to_string(&address, "gopher://%s/%c",
521 host, entity);
523 /* Encode selector string */
524 encode_selector_string(&address, selector);
527 /* Error response from Gopher doesn't deserve to
528 * be a hyperlink. */
529 if (entity == GOPHER_INDEX) {
530 add_gopher_search_field(buffer, name, address.source);
532 } else if (address.length > 0
533 && strlcmp(address.source, address.length - 1,
534 "gopher://error.host:1/", -1)) {
535 add_gopher_link(buffer, name, address.source);
537 } else {
538 add_to_string(buffer, name);
541 done_string(&address);
545 add_char_to_string(buffer, '\n');
549 /* Search for line ending \r\n pair */
550 static unsigned char *
551 get_gopher_line_end(unsigned char *data, int datalen)
553 for (; datalen > 1; data++, datalen--)
554 if (data[0] == ASCII_CR && data[1] == ASCII_LF)
555 return data + 2;
557 return NULL;
560 static inline unsigned char *
561 check_gopher_last_line(unsigned char *line, unsigned char *end)
563 assert(line < end);
565 /* Just to be safe NUL terminate the line */
566 end[-2] = 0;
568 return line[0] == '.' && !line[1] ? NULL : line;
571 /* Parse a Gopher Menu document */
572 static enum connection_state
573 read_gopher_directory_data(struct connection *conn, struct read_buffer *rb)
575 enum connection_state state = S_TRANS;
576 struct string buffer;
577 unsigned char *end;
579 if (conn->from == 0) {
580 enum connection_state state;
582 state = init_directory_listing(&buffer, conn->uri);
583 if (state != S_OK)
584 return state;
586 } else if (!init_string(&buffer)) {
587 return S_OUT_OF_MEM;
590 while ((end = get_gopher_line_end(rb->data, rb->length))) {
591 unsigned char *line = check_gopher_last_line(rb->data, end);
593 /* Break on line with a dot by itself */
594 if (!line) {
595 state = S_OK;
596 break;
599 add_gopher_menu_line(&buffer, line);
600 conn->received += end - rb->data;
601 kill_buffer_data(rb, end - rb->data);
604 if (state != S_TRANS || conn->socket->state == SOCKET_CLOSED)
605 add_to_string(&buffer,
606 "</pre>\n"
607 "</body>\n"
608 "</html>\n");
610 add_fragment(conn->cached, conn->from, buffer.source, buffer.length);
611 conn->from += buffer.length;
613 done_string(&buffer);
615 return state;
619 static struct cache_entry *
620 init_gopher_cache_entry(struct connection *conn)
622 struct gopher_connection_info *gopher = conn->info;
623 struct cache_entry *cached = get_cache_entry(conn->uri);
625 if (!cached) return NULL;
627 conn->cached = cached;
629 if (!cached->content_type
630 && gopher
631 && gopher->entity
632 && gopher->entity->content_type) {
633 cached->content_type = stracpy(gopher->entity->content_type);
634 if (!cached->content_type) return NULL;
637 return cached;
640 /* Display a Gopher Index document. */
641 static enum connection_state
642 init_gopher_index_cache_entry(struct connection *conn)
644 unsigned char *where;
645 struct string buffer;
647 if (!init_gopher_cache_entry(conn)
648 || !init_string(&buffer))
649 return S_OUT_OF_MEM;
651 where = get_uri_string(conn->uri, URI_PUBLIC);
653 /* TODO: Use different function when using UTF-8
654 * in terminal (decode_uri_for_display replaces
655 * bytes of UTF-8 characters width '*'). */
656 if (where) decode_uri_for_display(where);
658 add_format_to_string(&buffer,
659 "<html>\n"
660 "<head>\n"
661 "<title>Searchable gopher index at %s</title>\n"
662 "</head>\n"
663 "<body>\n"
664 "<h1>Searchable gopher index at %s</h1>\n",
665 empty_string_or_(where), empty_string_or_(where));
667 if (where) {
668 add_gopher_search_field(&buffer, "Please enter search keywords",
669 where);
672 mem_free_if(where);
674 /* FIXME: I think this needs a form or something */
676 add_fragment(conn->cached, conn->from, buffer.source, buffer.length);
677 conn->from += buffer.length;
678 done_string(&buffer);
680 conn->cached->content_type = stracpy("text/html");
682 return conn->cached->content_type ? S_OK : S_OUT_OF_MEM;
686 static void
687 read_gopher_response_data(struct socket *socket, struct read_buffer *rb)
689 struct connection *conn = socket->conn;
690 struct gopher_connection_info *gopher = conn->info;
691 enum connection_state state = S_TRANS;
693 assert(gopher && gopher->entity);
695 if (!conn->cached && !init_gopher_cache_entry(conn)) {
696 abort_connection(conn, S_OUT_OF_MEM);
697 return;
700 /* Now read the data from the socket */
701 switch (gopher->entity->type) {
702 case GOPHER_DIRECTORY:
703 case GOPHER_INDEX:
704 state = read_gopher_directory_data(conn, rb);
705 break;
707 case GOPHER_CSO:
708 #if 0
709 /* FIXME: Merge CSO support */
710 state = read_gopher_cso_data(conn, rb);
711 #endif
712 state = S_GOPHER_CSO_ERROR;
713 break;
715 case GOPHER_SOUND:
716 case GOPHER_PLUS_SOUND:
717 case GOPHER_PLUS_MOVIE:
718 case GOPHER_PLUS_PDF:
719 case GOPHER_MACBINHEX:
720 case GOPHER_PCBINARY:
721 case GOPHER_UUENCODED:
722 case GOPHER_BINARY:
723 case GOPHER_FILE:
724 case GOPHER_HTML:
725 case GOPHER_CHTML:
726 case GOPHER_GIF:
727 case GOPHER_IMAGE:
728 case GOPHER_PLUS_IMAGE:
729 default:
730 /* Add the received data as a new cache entry fragment and do
731 * the connection data accounting. */
732 add_fragment(conn->cached, conn->from, rb->data, rb->length);
734 conn->received += rb->length;
735 conn->from += rb->length;
737 kill_buffer_data(rb, rb->length);
740 /* Has the transport layer forced a shut down? */
741 if (socket->state == SOCKET_CLOSED) {
742 state = S_OK;
745 if (state != S_TRANS) {
746 abort_connection(conn, state);
747 return;
750 read_from_socket(conn->socket, rb, S_TRANS, read_gopher_response_data);
754 static void
755 send_gopher_command(struct socket *socket)
757 struct connection *conn = socket->conn;
758 struct gopher_connection_info *gopher = conn->info;
760 request_from_socket(socket, gopher->command, gopher->commandlen,
761 S_SENT, SOCKET_END_ONCLOSE, read_gopher_response_data);
765 /* FIXME: No decoding of strange data types as yet. */
766 void
767 gopher_protocol_handler(struct connection *conn)
769 struct uri *uri = conn->uri;
770 enum connection_state state = S_CONN;
772 switch (get_uri_port(uri)) {
773 case 105:
774 /* If it's a port 105 GOPHER_CSO gopher_entity with no ISINDEX
775 * token ('?'), use the form-based CSO gateway (otherwise,
776 * return an ISINDEX cover page or do the ISINDEX search).
777 * - FM */
778 if (uri->datalen == 1 && *uri->data == GOPHER_CSO) {
779 /* FIXME: redirect_cache() */
780 abort_connection(conn, S_GOPHER_CSO_ERROR);
782 break;
784 case 79:
785 #if 0
786 /* This is outcommented because it apparently means that the
787 * finger protocol handler needs to be extended for handling
788 * this the way Lynx does. --jonas */
789 /* If it's a port 79/0[/...] URL, use the finger gateway.
790 * - FM */
791 if (uri->datalen >= 1 && *uri->data == GOPHER_FILE) {
792 /* FIXME: redirect_cache() */
793 abort_connection(conn, S_OK);
795 #endif
796 break;
799 state = init_gopher_connection_info(conn);
800 if (state != S_CONN) {
801 /* FIXME: Handle bad selector ... */
802 abort_connection(conn, state);
803 return;
806 /* Set up a socket to the server for the data */
807 conn->from = 0;
808 make_connection(conn->socket, conn->uri, send_gopher_command,
809 conn->cache_mode >= CACHE_MODE_FORCE_RELOAD);