Retry only for https protocol
[elinks.git] / src / protocol / uri.h
blobdc70ad21801295f37edc19f60c33c9f209d4c90c
1 #ifndef EL__PROTOCOL_URI_H
2 #define EL__PROTOCOL_URI_H
4 #include "main/object.h"
6 struct string;
8 #define POST_CHAR 1
9 #define POST_CHAR_S "\001"
10 #define FILE_CHAR '\002'
12 /* The uri structure is used to store the start position and length of commonly
13 * used uri fields. It is initialized by parse_uri(). It is possible that the
14 * start of a field is set but that the length is zero so instead of testing
15 * *uri-><fieldname> always use uri-><fieldname>len. */
16 /* XXX: Lots of places in the code assume that the string members point into
17 * the same string. That means if you need to use a NUL terminated uri field
18 * either temporary modify the string, allocated a copy or change the function
19 * used to take a length parameter (in the reverse precedence - modifying the
20 * string should not be done since you never know what kind of memory actually
21 * contains the string --pasky). */
22 /* TODO: We should probably add path+query members instead of data. */
24 struct uri {
25 /** The start of the URI (and thus start of the protocol %string).
26 * The format of the whole %string is like:
27 * "http6://elinks.cz/dir/file?query#frag" ::POST_CHAR post_data "\0"
29 * The post_data is not really %part of the URI but ELinks keeps it
30 * in the same %string and can then distinguish between cache entries
31 * for different POST requests. See uri.post for its syntax. */
32 unsigned char *string;
34 /* The internal type of protocol. Can _never_ be PROTOCOL_UNKNOWN. */
35 int protocol; /* enum protocol */
37 /* A special ELinks extension allows i.e. 'http4' or 'ftp6' protocols,
38 * forcing the given IP family. 0 means the IP family is not forced. */
39 int ip_family;
41 unsigned char *user;
42 unsigned char *password;
43 unsigned char *host;
44 unsigned char *port;
45 /* @data can contain both the path and query uri fields.
46 * It can never be NULL but can have zero length. */
47 unsigned char *data;
48 unsigned char *fragment;
50 /** POST data attached to the URI. If uri.string contains a
51 * ::POST_CHAR, then @c post points to the following
52 * character. Otherwise NULL. The syntax of the POST data
53 * is:
55 * [content-type '\\n']
56 * (hexadecimal-byte | ::FILE_CHAR file-name ::FILE_CHAR)*
58 * - If content-type is present, ELinks sends "Content-Type: ",
59 * content-type, and CRLF in the head of the POST request.
61 * - Each hexadecimal-byte is a byte for the body of the POST
62 * request. It is encoded as two lower-case hexadecimal
63 * digits, most significant first. For example, "0a" for
64 * ::ASCII_LF.
66 * - file-name is the name of a file that ELinks should send
67 * to the server. It is in the charset accepted by open(),
68 * and some characters (especially ::FILE_CHAR) are
69 * percent-encoded. */
70 unsigned char *post;
72 /* @protocollen should only be usable if @protocol is either
73 * PROTOCOL_USER or an uri string should be composed. */
74 unsigned int protocollen:16;
75 unsigned int userlen:16;
76 unsigned int passwordlen:16;
77 unsigned int hostlen:16;
78 unsigned int portlen:8;
79 unsigned int datalen:16;
80 unsigned int fragmentlen:16;
82 /* Flags */
83 unsigned int ipv6:1; /* URI contains IPv6 host */
84 unsigned int form:1; /* URI originated from form */
86 /* Usage count object. */
87 struct object object;
90 enum uri_errno {
91 URI_ERRNO_OK, /* Parsing went well */
92 URI_ERRNO_EMPTY, /* The URI string was empty */
93 URI_ERRNO_INVALID_PROTOCOL, /* No protocol was found */
94 URI_ERRNO_NO_SLASHES, /* Slashes after protocol missing */
95 URI_ERRNO_TOO_MANY_SLASHES, /* Too many slashes after protocol */
96 URI_ERRNO_TRAILING_DOTS, /* '.' after host */
97 URI_ERRNO_NO_HOST, /* Host part is missing */
98 URI_ERRNO_NO_PORT_COLON, /* ':' after host without port */
99 URI_ERRNO_NO_HOST_SLASH, /* Slash after host missing */
100 URI_ERRNO_IPV6_SECURITY, /* IPv6 security bug detected */
101 URI_ERRNO_INVALID_PORT, /* Port number is bad */
102 URI_ERRNO_INVALID_PORT_RANGE, /* Port number is not within 0-65535 */
105 /* Initializes the members of the uri struct, as they are encountered.
106 * If an uri component is recognized both it's length and starting point is
107 * set. */
108 /* Returns what error was encountered or URI_ERRNO_OK if parsing went well. */
109 enum uri_errno parse_uri(struct uri *uri, unsigned char *uristring);
112 /* Returns the raw zero-terminated URI string the (struct uri) is associated
113 * with. Thus, chances are high that it is the original URI received, not any
114 * cheap reconstruction. */
115 #define struri(uri) ((uri)->string)
118 enum uri_component {
119 /**** The "raw" URI components */
121 URI_PROTOCOL = (1 << 0),
122 URI_IP_FAMILY = (1 << 1),
123 URI_USER = (1 << 2),
124 URI_PASSWORD = (1 << 3),
125 URI_HOST = (1 << 4),
126 URI_PORT = (1 << 5),
127 URI_DEFAULT_PORT = (1 << 6),
128 URI_DATA = (1 << 7),
129 URI_FRAGMENT = (1 << 8),
130 URI_POST = (1 << 9),
131 URI_POST_INFO = (1 << 10),
134 /**** Flags affecting appearance of the components above, or special
135 * mutations of mixups of some of the raw components. */
137 /* Control for ``encoding'' URIs into Internationalized Domain Names.
138 * Hopefully only a few lowlevel places should have to use it and it
139 * should never be exposed to the user. */
140 URI_IDN = (1 << 11),
142 /* Add stuff from uri->data and up and prefixes a '/' */
143 URI_PATH = (1 << 12),
145 /* Add filename from last direcory separator in uri->data to end of
146 * path. */
147 URI_FILENAME = (1 << 13),
149 /* Add query part from uri->data not including the '?' */
150 URI_QUERY = (1 << 14),
153 /**** Some predefined classes for formatting of URIs */
155 /* Special flags */
156 URI_SPECIAL = URI_DEFAULT_PORT | URI_PATH | URI_FILENAME | URI_QUERY,
158 /* The usual suspects */
159 URI_RARE = URI_SPECIAL | URI_POST | URI_POST_INFO | URI_IDN,
161 /* Used _only_ for displaying URIs in dialogs or document titles. */
162 URI_PUBLIC = ~(URI_PASSWORD | URI_RARE) | URI_POST_INFO,
164 /* Used for getting the original URI with no internal post encoding */
165 URI_ORIGINAL = ~URI_RARE,
167 /* Used for getting the URI with no #fragment */
168 URI_BASE = ~(URI_RARE | URI_FRAGMENT) | URI_POST,
170 /* Used for getting data-less URI (stuff only up to the slash). */
171 URI_SERVER = ~(URI_RARE | URI_DATA | URI_FRAGMENT),
173 /* Used in the HTTP Auth code */
174 URI_HTTP_AUTH = ~(URI_RARE | URI_USER | URI_PASSWORD | URI_DATA | URI_FRAGMENT),
176 /* Used for the value of HTTP "Host" header info */
177 URI_HTTP_HOST = URI_HOST | URI_PORT | URI_IDN,
179 /* Used for the host part of HTTP referrer. Stripped from user info. */
180 URI_HTTP_REFERRER_HOST = URI_PROTOCOL | URI_HOST | URI_PORT,
182 /* Used for the whole HTTP referrer. Contains no user/passwd info. */
183 URI_HTTP_REFERRER = URI_HTTP_REFERRER_HOST | URI_DATA,
185 /* Used for HTTP CONNECT method info */
186 URI_HTTP_CONNECT = URI_HOST | URI_PORT | URI_DEFAULT_PORT,
188 /* Used for adding directory listing HTML header, */
189 URI_DIR_LOCATION = URI_PROTOCOL | URI_HOST | URI_PORT | URI_IDN,
191 /* Used for getting the host of a DNS query. As a hidden bonus we get
192 * IPv6 hostnames without the brackets because we don't ask for
193 * URI_PORT. */
194 URI_DNS_HOST = URI_HOST | URI_IDN,
196 /* Used for adding the unproxied URI and encode it using IDN to string */
197 URI_PROXY = ~(URI_RARE | URI_FRAGMENT) | URI_IDN,
199 /* Used for comparing keepalive connection URIs */
200 /* (We don't need to bother by explicit IP family, we don't care
201 * whether the actual query goes over IPv4 or IPv6 but only about
202 * new connections. Of course another thing is what the user expects
203 * us to care about... ;-) --pasky */
204 URI_KEEPALIVE = URI_PROTOCOL | URI_USER | URI_PASSWORD | URI_HOST | URI_PORT,
206 /* Used for the form action URI using the GET method */
207 URI_FORM_GET = URI_SERVER | URI_PATH,
211 /* List for maintaining multiple URIs. Free it with mem_free() */
212 struct uri_list {
213 int size;
214 struct uri **uris;
217 #define foreach_uri(uri, index, list) \
218 for (index = 0; index < (list)->size; index++) \
219 if ((uri = (list)->uris[index]))
221 /* Adds @uri to the URI list */
222 struct uri *add_to_uri_list(struct uri_list *list, struct uri *uri);
224 /* Free all entries in the URI list */
225 void free_uri_list(struct uri_list *list);
228 /* A small URI struct cache to increase reusability. */
229 /* XXX: Now there are a few rules to abide.
231 * Any URI string that should be registered in the cache has to have lowercased
232 * both the protocol and hostname parts. This is strictly checked and will
233 * otherwise cause an assertion failure.
235 * However this will not be a problem if you either first call join_urls()
236 * which you want to do anyway to resolve relative references or use the
237 * get_translated_uri() interface.
239 * The remaining support for RFC 2396 section 3.1 is done through get_protocol()
240 * and get_user_program() which will treat upper case letters
241 * as equivalent to lower case in protocol names. */
243 /* Register a new URI in the cache where @components controls which parts are
244 * added to the returned URI. */
245 struct uri *get_uri(unsigned char *string, enum uri_component components);
247 /* Dereference an URI from the cache */
248 void done_uri(struct uri *uri);
250 /* Take a reference of an URI already registered in the cache. */
251 static inline struct uri *
252 get_uri_reference(struct uri *uri)
254 object_lock(uri);
255 return uri;
258 /* Get URI using the string returned by get_uri_string(@uri, @components) */
259 struct uri *get_composed_uri(struct uri *uri, enum uri_component components);
261 /* Resolves an URI relative to a current working directory (CWD) and possibly
262 * extracts the fragment. It is possible to just use it to extract fragment
263 * and get the resulting URI from the cache.
264 * @uristring is the URI to resolve or translate.
265 * @cwd if non NULL @uristring will be translated using this CWD. */
266 struct uri *get_translated_uri(unsigned char *uristring, unsigned char *cwd);
268 /* Normalizes the directory structure given in uristring. XXX: The function
269 * modifies the uristring and returns it. The uri argument should be NULL
270 * if the uri is not the parsed uristring. */
271 unsigned char *normalize_uri(struct uri *uri, unsigned char *uristring);
273 /* Check if two URIs are equal. If @components are 0 simply compare the whole
274 * URI else only compare the specific parts. */
275 int compare_uri(const struct uri *uri1, const struct uri *uri2,
276 enum uri_component components);
278 /* These functions recreate the URI string part by part. */
279 /* The @components bitmask describes the set of URI components used for
280 * construction of the URI string. */
282 /* Adds the components to an already initialized string. */
283 struct string *add_uri_to_string(struct string *string, const struct uri *uri,
284 enum uri_component components);
286 /* Takes an uri string, parses it and adds the desired components. Useful if
287 * there is no struct uri around. */
288 struct string *add_string_uri_to_string(struct string *string, unsigned char *uristring, enum uri_component components);
290 /* Returns the new URI string or NULL upon an error. */
291 unsigned char *get_uri_string(const struct uri *uri,
292 enum uri_component components);
294 /* Returns either the uri's port number if available or the protocol's
295 * default port. It is zarro for user protocols. */
296 int get_uri_port(const struct uri *uri);
298 /* Tcp port range */
299 #define LOWEST_PORT 0
300 #define HIGHEST_PORT 65535
302 #define uri_port_is_valid(port) \
303 (LOWEST_PORT <= (port) && (port) <= HIGHEST_PORT)
306 /* Encode and add @namelen bytes from @name to @string. If @namelen is -1 it is
307 * set to strlen(@name). If the boolean convert_slashes is zero '/'-chars will
308 * not be encoded. */
309 void encode_uri_string(struct string *string, const unsigned char *name, int namelen,
310 int convert_slashes);
312 /* special version for Windows directory listing */
313 void encode_win32_uri_string(struct string *string, unsigned char *name, int namelen);
315 void decode_uri_string(struct string *string);
316 void decode_uri(unsigned char *uristring);
318 /* Decodes and replaces illicit screen chars with '*'. */
319 void decode_uri_string_for_display(struct string *string);
320 void decode_uri_for_display(unsigned char *uristring);
322 /* Returns allocated string containing the biggest possible extension.
323 * If url is 'jabadaba.1.foo.gz' the returned extension is '1.foo.gz' */
324 unsigned char *get_extension_from_uri(struct uri *uri);
327 /* Resolves a @relative URI to absolute form using @base URI.
328 * Example: if @base is http://elinks.cz/ and @relative is #news
329 * the outcome would be http://elinks.cz/#news */
330 unsigned char *join_urls(struct uri *base, unsigned char *relative);
332 /* Return position if end of string @s matches a known tld or -1 if not.
333 * If @slen < 0, then string length will be obtained by a strlen() call,
334 * else @slen is used as @s length. */
335 int end_with_known_tld(const unsigned char *s, int slen);
338 static inline int
339 get_real_uri_length(struct uri *uri)
341 return uri->post ? uri->post - struri(uri) - 1 : strlen(struri(uri));
344 /* Checks if @address contains a valid IP address. */
345 int is_ip_address(const unsigned char *address, int addresslen);
347 /* Check whether domain is matching server
348 * Ie.
349 * example.com matches www.example.com/
350 * example.com doesn't match www.example.com.org/
351 * example.com doesn't match www.example.comm/
352 * example.com doesn't match example.co
354 int is_in_domain(unsigned char *domain, unsigned char *server, int server_len);
356 #endif