1 /* Gopher access protocol (RFC 1436) */
3 /* Based on version of HTGopher.c in the lynx tree.
10 * 26 Sep 90 Adapted from other accesses (News, HTTP) TBL
11 * 29 Nov 91 Downgraded to C, for portable implementation.
12 * 10 Mar 96 Foteos Macrides (macrides@sci.wfbr.edu). Added a
13 * form-based CSO/PH gateway. Can be invoked via a
14 * "cso://host[:port]/" or "gopher://host:105/2"
15 * URL. If a gopher URL is used with a query token
16 * ('?'), the old ISINDEX procedure will be used
17 * instead of the form-based gateway.
18 * 15 Mar 96 Foteos Macrides (macrides@sci.wfbr.edu). Pass
19 * port 79, gtype 0 gopher URLs to the finger
33 #include "cache/cache.h"
34 #include "intl/gettext/libintl.h"
35 #include "main/module.h"
36 #include "network/connection.h"
37 #include "network/socket.h"
38 #include "protocol/common.h"
39 #include "protocol/gopher/gopher.h"
40 #include "protocol/protocol.h"
41 #include "protocol/uri.h"
42 #include "util/conv.h"
43 #include "util/memory.h"
44 #include "util/string.h"
46 struct module gopher_protocol_module
= struct_module(
47 /* name: */ N_("Gopher"),
50 /* submodules: */ NULL
,
57 /* Gopher entity types */
59 GOPHER_UNKNOWN
= 0 , /* Special fall-back entity */
61 GOPHER_DIRECTORY
= '1',
64 GOPHER_MACBINHEX
= '4',
65 GOPHER_PCBINARY
= '5',
66 GOPHER_UUENCODED
= '6',
71 GOPHER_HTML
= 'h', /* HTML */
72 GOPHER_CHTML
= 'H', /* HTML */
75 GOPHER_WWW
= 'w', /* W3 address */
78 GOPHER_INFO
= 'i', /* Information or separator line */
79 GOPHER_DUPLICATE
= '+',
80 GOPHER_PLUS_IMAGE
= ':', /* Addition from Gopher Plus */
81 GOPHER_PLUS_MOVIE
= ';',
82 GOPHER_PLUS_SOUND
= '<',
83 GOPHER_PLUS_PDF
= 'P',
86 /* Default Gopher Node type is directory listing */
87 #define DEFAULT_GOPHER_ENTITY GOPHER_DIRECTORY
89 #define entity_needs_gopher_access(entity) \
90 ((entity) != GOPHER_TELNET \
91 && (entity) != GOPHER_TN3270 \
92 && (entity) != GOPHER_WWW)
94 struct gopher_entity_info
{
95 enum gopher_entity type
;
96 unsigned char *description
;
97 unsigned char *content_type
;
100 /* This table provides some hard-coded associations between entity type
101 * and MIME type. A NULL MIME type in this table indicates
102 * that the MIME type should be deduced from the extension.
104 * - Lynx uses "text/plain" for GOPHER_FILE, but it can be anything.
105 * - Lynx uses "image/gif" for GOPHER_IMAGE and GOPHER_PLUS_IMAGE,
106 * but they really can be anything.
107 * - GOPHER_BINARY can be, for example, a tar ball, so using
108 * "application/octet-stream" is a bad idea.
110 static struct gopher_entity_info gopher_entity_info
[] = {
111 { GOPHER_BINARY
, " (BINARY)", NULL
},
112 { GOPHER_CHTML
, " (CHTML)", "text/html" },
113 { GOPHER_CSO
, " (CSO)", "text/html" },
114 { GOPHER_DIRECTORY
, " (DIRECTORY)", "text/html" },
115 { GOPHER_FILE
, " (FILE)", NULL
/* "text/plain" */ },
116 { GOPHER_GIF
, " (GIF IMAGE)", "image/gif" },
117 { GOPHER_HTML
, " (HTML)", "text/html" },
118 { GOPHER_IMAGE
, " (IMAGE)", NULL
/* "image/gif" */ },
119 { GOPHER_INDEX
, " (INDEX)", "text/html" },
120 { GOPHER_MACBINHEX
, "(BINARY HEX)", "application/octet-stream" },
121 { GOPHER_MIME
, " (MIME)", "application/octet-stream" },
122 { GOPHER_PCBINARY
, " (PCBINARY)", "application/octet-stream" },
123 { GOPHER_PLUS_IMAGE
, " (IMAGE+)", NULL
/* "image/gif" */ },
124 { GOPHER_PLUS_MOVIE
, " (MOVIE)", "video/mpeg" },
125 { GOPHER_PLUS_PDF
, " (PDF)", "application/pdf" },
126 { GOPHER_PLUS_SOUND
, " (SOUND+)", "audio/basic" },
127 { GOPHER_SOUND
, " (SOUND)", "audio/basic" },
128 { GOPHER_TELNET
, " (TELNET)", NULL
},
129 { GOPHER_TN3270
, " (TN3270)", NULL
},
130 { GOPHER_UUENCODED
, " (UUENCODED)", "application/octet-stream" },
131 { GOPHER_WWW
, "(W3 ADDRESS)", NULL
},
133 { GOPHER_INFO
, " ", NULL
},
134 { GOPHER_ERROR
, NULL
, NULL
},
135 /* XXX: Keep GOPHER_UNKNOWN last so it is easy to access. */
136 { GOPHER_UNKNOWN
, " ", "application/octet-stream" },
139 static struct gopher_entity_info
*
140 get_gopher_entity_info(enum gopher_entity type
)
144 for (entry
= 0; entry
< sizeof_array(gopher_entity_info
) - 1; entry
++)
145 if (gopher_entity_info
[entry
].type
== type
)
146 return &gopher_entity_info
[entry
];
148 assert(gopher_entity_info
[entry
].type
== GOPHER_UNKNOWN
);
150 return &gopher_entity_info
[entry
];
153 static unsigned char *
154 get_gopher_entity_description(enum gopher_entity type
)
156 struct gopher_entity_info
*info
= get_gopher_entity_info(type
);
158 return info
? info
->description
: NULL
;
162 struct gopher_connection_info
{
163 struct gopher_entity_info
*entity
;
166 unsigned char command
[1];
169 /* De-escape a selector into a command. */
170 /* The % hex escapes are converted. Otherwise, the string is copied. */
172 add_uri_decoded(struct string
*command
, unsigned char *string
, int length
,
175 int oldlen
= command
->length
;
182 /* Remove plus signs 921006 */
183 if (!add_string_replace(command
, string
, length
, '+', ' '))
186 } else if (!add_bytes_to_string(command
, string
, length
)) {
190 assert(command
->length
> oldlen
);
191 /* FIXME: Decoding the whole command string should not be a problem,
192 * and I don't remember why I didn't do that in the first place.
194 decode_uri(command
->source
+ oldlen
);
196 /* Evil decode_uri_string() modifies the string */
197 command
->length
= strlen(command
->source
);
200 static enum connection_state
init_gopher_index_cache_entry(struct connection
*conn
);
202 static enum connection_state
203 add_gopher_command(struct connection
*conn
, struct string
*command
,
204 enum gopher_entity entity
,
205 unsigned char *selector
, int selectorlen
)
207 unsigned char *query
;
210 if (!init_string(command
))
213 /* Look for search string */
214 query
= memchr(selector
, '?', selectorlen
);
216 /* Check if no search is required */
217 if (!query
|| !query
[1]) {
219 if (query
) selectorlen
-= 1;
224 querylen
= selector
+ selectorlen
- query
;
226 selectorlen
-= querylen
+ 1;
227 if (querylen
>= 7 && !strncasecmp(query
, "search=", 7)) {
235 /* No search required? */
237 done_string(command
);
238 return init_gopher_index_cache_entry(conn
);
241 add_uri_decoded(command
, selector
, selectorlen
, 0);
242 add_char_to_string(command
, '\t');
243 add_uri_decoded(command
, query
, querylen
, 1);
247 /* No search required */
249 done_string(command
);
250 /* Display "cover page" */
252 return init_gopher_cso_cache_entry(conn
);
254 return S_GOPHER_CSO_ERROR
;
257 add_uri_decoded(command
, selector
, selectorlen
, 0);
258 add_to_string(command
, "query ");
259 add_uri_decoded(command
, query
, querylen
, 1);
264 add_uri_decoded(command
, selector
, selectorlen
, 0);
267 add_crlf_to_string(command
);
272 static enum connection_state
273 init_gopher_connection_info(struct connection
*conn
)
275 struct gopher_connection_info
*gopher
;
276 enum connection_state state
;
277 struct string command
;
278 enum gopher_entity entity
= DEFAULT_GOPHER_ENTITY
;
279 unsigned char *selector
= conn
->uri
->data
;
280 int selectorlen
= conn
->uri
->datalen
;
281 struct gopher_entity_info
*entity_info
;
284 /* Get entity type, and selector string. */
285 /* Pick up gopher_entity */
286 if (selectorlen
> 1 && selector
[1] == '/') {
287 entity
= *selector
++;
291 /* This is probably a hack. It serves as a work around when no entity is
292 * available in the Gopher URI. Instead of segfaulting later the content
293 * will be served as application/octet-stream. However, it could
294 * possible break handling Gopher URIs with entities which are really
295 * unknown because parts of the real Gopher entity character is added to
296 * the selector. A possible work around is to always expect a '/'
297 * _after_ the Gopher entity. If the <entity-char> '/' combo is not
298 * found assume that the whole URI data part is the selector. */
299 entity_info
= get_gopher_entity_info(entity
);
300 if (entity_info
->type
== GOPHER_UNKNOWN
&& entity
!= GOPHER_UNKNOWN
) {
305 state
= add_gopher_command(conn
, &command
, entity
, selector
, selectorlen
);
309 /* Atleast the command should contain \r\n to ask the server
311 assert(command
.length
>= 2);
313 size
= sizeof(*gopher
) + command
.length
;
314 gopher
= mem_calloc(1, size
);
316 done_string(&command
);
320 gopher
->entity
= entity_info
;
321 gopher
->commandlen
= command
.length
;
323 memcpy(gopher
->command
, command
.source
, command
.length
);
324 done_string(&command
);
332 /* Add a link. The title of the destination is set, as there is no way of
333 * knowing what the title is when we arrive.
335 * text points to the text to be put into the file, 0 terminated.
336 * addr points to the hypertext reference address 0 terminated.
340 add_gopher_link(struct string
*buffer
, const unsigned char *text
,
341 const unsigned char *addr
)
343 add_format_to_string(buffer
, "<a href=\"%s\">%s</a>",
348 add_gopher_search_field(struct string
*buffer
, const unsigned char *text
,
349 const unsigned char *addr
)
351 add_format_to_string(buffer
,
352 "<form action=\"%s\">"
356 "<td><input maxlength=\"256\" name=\"search\" value=\"\"></td>"
357 "<td><input type=submit value=\"Search\"></td>"
364 add_gopher_description(struct string
*buffer
, enum gopher_entity entity
)
366 unsigned char *description
= get_gopher_entity_description(entity
);
371 add_to_string(buffer
, "<b>");
372 add_to_string(buffer
, description
);
373 add_to_string(buffer
, "</b> ");
377 encode_selector_string(struct string
*buffer
, unsigned char *selector
)
379 unsigned char *slashes
;
381 /* Rather hackishly only convert slashes if there are
382 * two successive ones. */
383 while ((slashes
= strstr(selector
, "//"))) {
385 encode_uri_string(buffer
, selector
, -1, 0);
386 encode_uri_string(buffer
, "//", 2, 1);
388 selector
= slashes
+ 2;
391 encode_uri_string(buffer
, selector
, -1, 0);
395 add_gopher_menu_line(struct string
*buffer
, unsigned char *line
)
397 /* Gopher menu fields */
398 unsigned char *name
= line
;
399 unsigned char *selector
= NULL
;
400 unsigned char *host
= NULL
;
401 unsigned char *port
= NULL
;
402 enum gopher_entity entity
= *name
++;
405 add_char_to_string(buffer
, '\n');
410 selector
= strchr(name
, ASCII_TAB
);
415 /* Gopher+ Type=0+ objects can be binary, and will have
416 * 9 or 5 beginning their selector. Make sure we don't
417 * trash the terminal by treating them as text. - FM */
418 if (entity
== GOPHER_FILE
419 && (*selector
== GOPHER_BINARY
||
420 *selector
== GOPHER_PCBINARY
))
424 host
= selector
? strchr(selector
, ASCII_TAB
) : NULL
;
426 /* Terminate selector */
430 port
= host
? strchr(host
, ASCII_TAB
) : NULL
;
436 portno
= strtol(port
+ 1, (char **) &end
, 10);
437 if (errno
|| !uri_port_is_valid(portno
)) {
441 /* Try to wipe out the default gopher port
442 * number from being appended to links. */
444 && entity_needs_gopher_access(entity
))
447 /* If the port number is 0 it means no port
448 * number is needed in which case it can be
449 * wiped out completely. Else append it to the
450 * host string a la W3. */
456 /* Chop port if there is junk after the
464 /* Nameless files are separator lines */
465 if (entity
== GOPHER_FILE
) {
466 int i
= strlen(name
) - 1;
468 while (name
[i
] == ' ' && i
>= 0)
472 entity
= GOPHER_INFO
;
475 if (entity
!= GOPHER_INDEX
) {
476 add_gopher_description(buffer
, entity
);
481 /* Gopher pointer to W3 */
483 add_gopher_link(buffer
, name
, selector
);
487 /* Fall through if no selector is defined so the
488 * text is just displayed. */
491 /* Information or separator line */
492 add_to_string(buffer
, name
);
497 struct string address
;
498 unsigned char *format
= selector
&& *selector
499 ? "%s://%s@%s/" : "%s://%s%s/";
501 /* If port is defined it means that both @selector and @host
502 * was correctly parsed. */
503 if (!port
|| !init_string(&address
)) {
504 /* Parse error: Bad menu item */
505 add_to_string(buffer
, name
);
509 assert(selector
&& host
);
511 if (entity
== GOPHER_TELNET
) {
512 add_format_to_string(&address
, format
,
513 "telnet", selector
, host
);
515 } else if (entity
== GOPHER_TN3270
) {
516 add_format_to_string(&address
, format
,
517 "tn3270", selector
, host
);
520 add_format_to_string(&address
, "gopher://%s/%c",
523 /* Encode selector string */
524 encode_selector_string(&address
, selector
);
527 /* Error response from Gopher doesn't deserve to
529 if (entity
== GOPHER_INDEX
) {
530 add_gopher_search_field(buffer
, name
, address
.source
);
532 } else if (address
.length
> 0
533 && strlcmp(address
.source
, address
.length
- 1,
534 "gopher://error.host:1/", -1)) {
535 add_gopher_link(buffer
, name
, address
.source
);
538 add_to_string(buffer
, name
);
541 done_string(&address
);
545 add_char_to_string(buffer
, '\n');
549 /* Search for line ending \r\n pair */
550 static unsigned char *
551 get_gopher_line_end(unsigned char *data
, int datalen
)
553 for (; datalen
> 1; data
++, datalen
--)
554 if (data
[0] == ASCII_CR
&& data
[1] == ASCII_LF
)
560 static inline unsigned char *
561 check_gopher_last_line(unsigned char *line
, unsigned char *end
)
565 /* Just to be safe NUL terminate the line */
568 return line
[0] == '.' && !line
[1] ? NULL
: line
;
571 /* Parse a Gopher Menu document */
572 static enum connection_state
573 read_gopher_directory_data(struct connection
*conn
, struct read_buffer
*rb
)
575 enum connection_state state
= S_TRANS
;
576 struct string buffer
;
579 if (conn
->from
== 0) {
580 enum connection_state state
;
582 state
= init_directory_listing(&buffer
, conn
->uri
);
586 } else if (!init_string(&buffer
)) {
590 while ((end
= get_gopher_line_end(rb
->data
, rb
->length
))) {
591 unsigned char *line
= check_gopher_last_line(rb
->data
, end
);
593 /* Break on line with a dot by itself */
599 add_gopher_menu_line(&buffer
, line
);
600 conn
->received
+= end
- rb
->data
;
601 kill_buffer_data(rb
, end
- rb
->data
);
604 if (state
!= S_TRANS
|| conn
->socket
->state
== SOCKET_CLOSED
)
605 add_to_string(&buffer
,
610 add_fragment(conn
->cached
, conn
->from
, buffer
.source
, buffer
.length
);
611 conn
->from
+= buffer
.length
;
613 done_string(&buffer
);
619 static struct cache_entry
*
620 init_gopher_cache_entry(struct connection
*conn
)
622 struct gopher_connection_info
*gopher
= conn
->info
;
623 struct cache_entry
*cached
= get_cache_entry(conn
->uri
);
625 if (!cached
) return NULL
;
627 conn
->cached
= cached
;
629 if (!cached
->content_type
632 && gopher
->entity
->content_type
) {
633 cached
->content_type
= stracpy(gopher
->entity
->content_type
);
634 if (!cached
->content_type
) return NULL
;
640 /* Display a Gopher Index document. */
641 static enum connection_state
642 init_gopher_index_cache_entry(struct connection
*conn
)
644 unsigned char *where
;
645 struct string buffer
;
647 if (!init_gopher_cache_entry(conn
)
648 || !init_string(&buffer
))
651 where
= get_uri_string(conn
->uri
, URI_PUBLIC
);
653 /* TODO: Use different function when using UTF-8
654 * in terminal (decode_uri_for_display replaces
655 * bytes of UTF-8 characters width '*'). */
656 if (where
) decode_uri_for_display(where
);
658 add_format_to_string(&buffer
,
661 "<title>Searchable gopher index at %s</title>\n"
664 "<h1>Searchable gopher index at %s</h1>\n",
665 empty_string_or_(where
), empty_string_or_(where
));
668 add_gopher_search_field(&buffer
, "Please enter search keywords",
674 /* FIXME: I think this needs a form or something */
676 add_fragment(conn
->cached
, conn
->from
, buffer
.source
, buffer
.length
);
677 conn
->from
+= buffer
.length
;
678 done_string(&buffer
);
680 conn
->cached
->content_type
= stracpy("text/html");
682 return conn
->cached
->content_type
? S_OK
: S_OUT_OF_MEM
;
687 read_gopher_response_data(struct socket
*socket
, struct read_buffer
*rb
)
689 struct connection
*conn
= socket
->conn
;
690 struct gopher_connection_info
*gopher
= conn
->info
;
691 enum connection_state state
= S_TRANS
;
693 assert(gopher
&& gopher
->entity
);
695 if (!conn
->cached
&& !init_gopher_cache_entry(conn
)) {
696 abort_connection(conn
, S_OUT_OF_MEM
);
700 /* Now read the data from the socket */
701 switch (gopher
->entity
->type
) {
702 case GOPHER_DIRECTORY
:
704 state
= read_gopher_directory_data(conn
, rb
);
709 /* FIXME: Merge CSO support */
710 state
= read_gopher_cso_data(conn
, rb
);
712 state
= S_GOPHER_CSO_ERROR
;
716 case GOPHER_PLUS_SOUND
:
717 case GOPHER_PLUS_MOVIE
:
718 case GOPHER_PLUS_PDF
:
719 case GOPHER_MACBINHEX
:
720 case GOPHER_PCBINARY
:
721 case GOPHER_UUENCODED
:
728 case GOPHER_PLUS_IMAGE
:
730 /* Add the received data as a new cache entry fragment and do
731 * the connection data accounting. */
732 add_fragment(conn
->cached
, conn
->from
, rb
->data
, rb
->length
);
734 conn
->received
+= rb
->length
;
735 conn
->from
+= rb
->length
;
737 kill_buffer_data(rb
, rb
->length
);
740 /* Has the transport layer forced a shut down? */
741 if (socket
->state
== SOCKET_CLOSED
) {
745 if (state
!= S_TRANS
) {
746 abort_connection(conn
, state
);
750 read_from_socket(conn
->socket
, rb
, S_TRANS
, read_gopher_response_data
);
755 send_gopher_command(struct socket
*socket
)
757 struct connection
*conn
= socket
->conn
;
758 struct gopher_connection_info
*gopher
= conn
->info
;
760 request_from_socket(socket
, gopher
->command
, gopher
->commandlen
,
761 S_SENT
, SOCKET_END_ONCLOSE
, read_gopher_response_data
);
765 /* FIXME: No decoding of strange data types as yet. */
767 gopher_protocol_handler(struct connection
*conn
)
769 struct uri
*uri
= conn
->uri
;
770 enum connection_state state
= S_CONN
;
772 switch (get_uri_port(uri
)) {
774 /* If it's a port 105 GOPHER_CSO gopher_entity with no ISINDEX
775 * token ('?'), use the form-based CSO gateway (otherwise,
776 * return an ISINDEX cover page or do the ISINDEX search).
778 if (uri
->datalen
== 1 && *uri
->data
== GOPHER_CSO
) {
779 /* FIXME: redirect_cache() */
780 abort_connection(conn
, S_GOPHER_CSO_ERROR
);
786 /* This is outcommented because it apparently means that the
787 * finger protocol handler needs to be extended for handling
788 * this the way Lynx does. --jonas */
789 /* If it's a port 79/0[/...] URL, use the finger gateway.
791 if (uri
->datalen
>= 1 && *uri
->data
== GOPHER_FILE
) {
792 /* FIXME: redirect_cache() */
793 abort_connection(conn
, S_OK
);
799 state
= init_gopher_connection_info(conn
);
800 if (state
!= S_CONN
) {
801 /* FIXME: Handle bad selector ... */
802 abort_connection(conn
, state
);
806 /* Set up a socket to the server for the data */
808 make_connection(conn
->socket
, conn
->uri
, send_gopher_command
,
809 conn
->cache_mode
>= CACHE_MODE_FORCE_RELOAD
);