Doxygen: @relates
[elinks/kon.git] / src / util / conv.c
blobe35ecdce4a98ccce756a04bab40138faf0dbae4d
1 /** Conversion functions
2 * @file */
4 #ifdef HAVE_CONFIG_H
5 #include "config.h"
6 #endif
8 #include <ctype.h>
9 #include <errno.h>
10 #ifdef HAVE_LIMITS_H
11 #include <limits.h>
12 #endif
13 #include <stdlib.h>
14 #include <string.h>
16 #include "elinks.h"
18 #include "intl/charsets.h" /* NBSP_CHAR */
19 #include "util/conv.h"
20 #include "util/error.h"
21 #include "util/string.h"
22 #include "util/time.h"
26 /** This function takes string @a s and stores the @a number (of a
27 * result width @a width) in string format there, starting at position
28 * [*@a slen]. If the number would take more space than @a width, it
29 * is truncated and only the _last_ digits of it are inserted to the
30 * string. If the number takes less space than @a width, it is padded
31 * by @a fillchar from left.
32 * @a base defined which base should be used (10, 16, 8, 2, ...)
33 * @a upper selects either hexa uppercased chars or lowercased chars.
35 * A NUL char is always added at the end of the string. @a s must point
36 * to a sufficiently large memory space, at least *@a slen + @a width + 1.
38 * Examples:
40 * @code
41 * elinks_ulongcat(s, NULL, 12345, 4, 0, 10, 0) : s = "2345"
42 * elinks_ulongcat(s, NULL, 255, 4, '*', 16, 1) : s = "**FF"
43 * elinks_ulongcat(s, NULL, 123, 5, '0', 10, 0) : s = "00123"
44 * @endcode
46 * Note that this function exists to provide a fast and efficient, however
47 * still quite powerful alternative to sprintf(). It is optimized for speed and
48 * is *MUCH* faster than sprintf(). If you can use it, use it ;-). But do not
49 * get too enthusiastic, do not use it in cases where it would break i18n.
51 * @returns 0 if OK or width needed for the whole number to fit there,
52 * if it had to be truncated. A negative value signs an error. */
53 int inline
54 elinks_ulongcat(unsigned char *s, unsigned int *slen,
55 unsigned long number, unsigned int width,
56 unsigned char fillchar, unsigned int base,
57 unsigned int upper)
59 static const unsigned char unum[]= "0123456789ABCDEF";
60 static const unsigned char lnum[]= "0123456789abcdef";
61 const unsigned char *to_num = (upper ? unum : lnum);
62 unsigned int start = slen ? *slen : 0;
63 unsigned int nlen = 1; /* '0' is one char, we can't have less. */
64 unsigned int pos = start; /* starting position of the number */
65 unsigned long q = number;
66 int ret = 0;
68 if (width < 1 || !s || base < 2 || base > 16) return -1;
70 /* Count the length of the number in chars. */
71 while (q > (base - 1)) {
72 nlen++;
73 q /= base;
76 /* If max. width attained, truncate. */
77 if (nlen > width) {
78 ret = nlen;
79 nlen = width;
82 if (slen) *slen += nlen;
84 /* Fill left space with fillchar. */
85 if (fillchar) {
86 /* ie. width = 4 nlen = 2 -> pad = 2 */
87 unsigned int pad = width - nlen;
89 if (pad > 0) {
90 /* Relocate the start of number. */
91 if (slen) *slen += pad;
92 pos += pad;
94 /* Pad. */
95 while (pad > 0) s[--pad + start] = fillchar;
99 s[pos + nlen] = '\0';
101 /* Now write number starting from end. */
102 while (nlen > 0) {
103 s[--nlen + pos] = to_num[(number % base)];
104 number /= base;
107 return ret;
110 /** Similar to elinks_ulongcat() but for @c long number. */
111 int inline
112 elinks_longcat(unsigned char *s, unsigned int *slen,
113 long number, unsigned int width,
114 unsigned char fillchar, unsigned int base,
115 unsigned int upper)
117 unsigned char *p = s;
119 if (number < 0 && width > 0) {
120 if (slen) p[(*slen)++] = '-';
121 else *(p++) = '-';
122 number = -number;
123 width--;
126 return elinks_ulongcat(p, slen, number, width, fillchar, base, upper);
130 /** @relates string */
131 struct string *
132 add_long_to_string(struct string *string, long number)
134 unsigned char buffer[32];
135 int length = 0;
136 int width;
138 assert(string);
139 if_assert_failed { return NULL; }
141 width = longcat(buffer, &length, number, sizeof(buffer) - 1, 0);
142 if (width < 0 || !length) return NULL;
144 return add_bytes_to_string(string, buffer, length);
147 /** @relates string */
148 struct string *
149 add_knum_to_string(struct string *string, long num)
151 int ret;
152 unsigned char t[32];
153 int tlen = 0;
155 if (num && (num / (1024 * 1024)) * (1024 * 1024) == num) {
156 ret = longcat(&t, &tlen, num / (1024 * 1024), sizeof(t) - 2, 0);
157 t[tlen++] = 'M';
158 t[tlen] = '\0';
159 } else if (num && (num / 1024) * 1024 == num) {
160 ret = longcat(&t, &tlen, num / 1024, sizeof(t) - 2, 0);
161 t[tlen++] = 'k';
162 t[tlen] = '\0';
163 } else {
164 ret = longcat(&t, &tlen, num, sizeof(t) - 1, 0);
167 if (ret < 0 || !tlen) return NULL;
169 add_bytes_to_string(string, t, tlen);
171 return string;
174 /** @relates string */
175 struct string *
176 add_xnum_to_string(struct string *string, off_t xnum)
178 unsigned char suff[3] = "\0i";
179 off_t d = -1;
181 /* XXX: I don't completely like the computation of d here. --pasky */
182 /* Mebi (Mi), 2^20 */
183 if (xnum >= 1024 * 1024) {
184 suff[0] = 'M';
185 d = (xnum * (int) 10 / (int) ((int) (1024 * 1024))) % 10;
186 xnum /= 1024*1024;
187 /* Kibi (Ki), 2^10 */
188 } else if (xnum >= 1024) {
189 suff[0] = 'K';
190 d = (xnum * (int) 10 / (int) 1024) % 10;
191 xnum /= 1024;
194 assert(xnum == (long) xnum);
195 add_long_to_string(string, xnum);
197 if (xnum < 10 && d != -1) {
198 add_char_to_string(string, '.');
199 add_long_to_string(string, d);
201 add_char_to_string(string, ' ');
203 if (suff[0]) add_to_string(string, suff);
204 add_char_to_string(string, 'B');
205 return string;
208 /** @relates string */
209 struct string *
210 add_duration_to_string(struct string *string, long seconds)
212 unsigned char q[64];
213 int qlen = 0;
215 if (seconds < 0) seconds = 0;
217 /* Days */
218 if (seconds >= (24 * 3600)) {
219 ulongcat(q, &qlen, (seconds / (24 * 3600)), 5, 0);
220 q[qlen++] = 'd';
221 q[qlen++] = ' ';
224 /* Hours and minutes */
225 if (seconds >= 3600) {
226 seconds %= (24 * 3600);
227 ulongcat(q, &qlen, (seconds / 3600), 4, 0);
228 q[qlen++] = ':';
229 ulongcat(q, &qlen, ((seconds / 60) % 60), 2, '0');
230 } else {
231 /* Only minutes */
232 ulongcat(q, &qlen, (seconds / 60), 2, 0);
235 /* Seconds */
236 q[qlen++] = ':';
237 ulongcat(q, &qlen, (seconds % 60), 2, '0');
239 add_to_string(string, q);
240 return string;
243 /** @relates string */
244 struct string *
245 add_timeval_to_string(struct string *string, timeval_T *timeval)
247 return add_duration_to_string(string, timeval_to_seconds(timeval));
250 #ifdef HAVE_STRFTIME
251 struct string *
252 add_date_to_string(struct string *string, const unsigned char *fmt,
253 const time_t *date)
255 unsigned char buffer[MAX_STR_LEN];
256 time_t when_time = date ? *date : time(NULL);
257 struct tm *when_local = localtime(&when_time);
259 if (strftime(buffer, sizeof(buffer), fmt, when_local) <= 0)
260 return NULL;
262 return add_to_string(string, buffer);
264 #endif
266 /* Encoders and string changers */
268 struct string *
269 add_string_replace(struct string *string, unsigned char *src, int len,
270 unsigned char replaceable, unsigned char replacement)
272 int oldlength = string->length;
274 if (!add_bytes_to_string(string, src, len))
275 return NULL;
277 for (src = string->source + oldlength; len; len--, src++)
278 if (*src == replaceable)
279 *src = replacement;
281 return string;
284 struct string *
285 add_html_to_string(struct string *string, const unsigned char *src, int len)
287 for (; len; len--, src++) {
288 if (*src < 0x20
289 || *src == '<' || *src == '>' || *src == '&'
290 || *src == '\"' || *src == '\'') {
291 int rollback_length = string->length;
293 if (!add_bytes_to_string(string, "&#", 2)
294 || !add_long_to_string(string, (long) *src)
295 || !add_char_to_string(string, ';')) {
296 string->length = rollback_length;
297 string->source[rollback_length] = '\0';
298 return NULL;
300 } else {
301 if (!add_char_to_string(string, *src))
302 return NULL;
306 return string;
309 struct string *
310 add_cp_html_to_string(struct string *string, int src_codepage,
311 const unsigned char *src, int len)
313 const unsigned char *const end = src + len;
314 unicode_val_T unicode;
316 while (src != end) {
317 if (is_cp_utf8(src_codepage)) {
318 #ifdef CONFIG_UTF8
319 unicode = utf8_to_unicode((unsigned char **) &src,
320 end);
321 if (unicode == UCS_NO_CHAR)
322 break;
323 #else /* !CONFIG_UTF8 */
324 /* Cannot parse UTF-8 without CONFIG_UTF8.
325 * Pretend the input is ISO-8859-1 instead. */
326 unicode = *src++;
327 #endif /* !CONFIG_UTF8 */
328 } else {
329 unicode = cp2u(src_codepage, *src++);
332 if (unicode < 0x20 || unicode >= 0x7F
333 || unicode == '<' || unicode == '>' || unicode == '&'
334 || unicode == '\"' || unicode == '\'') {
335 int rollback_length = string->length;
337 if (!add_bytes_to_string(string, "&#", 2)
338 || !add_long_to_string(string, unicode)
339 || !add_char_to_string(string, ';')) {
340 string->length = rollback_length;
341 string->source[rollback_length] = '\0';
342 return NULL;
344 } else {
345 if (!add_char_to_string(string, unicode))
346 return NULL;
350 return string;
353 /* TODO Optimize later --pasky */
354 struct string *
355 add_quoted_to_string(struct string *string, const unsigned char *src, int len)
357 for (; len; len--, src++) {
358 if (isquote(*src) || *src == '\\')
359 add_char_to_string(string, '\\');
360 add_char_to_string(string, *src);
363 return string;
366 struct string *
367 add_shell_quoted_to_string(struct string *string, unsigned char *src, int len)
369 add_char_to_string(string, '\'');
370 for (; len; len--, ++src)
371 if (*src == '\'')
372 add_to_string(string, "'\\''");
373 else
374 add_char_to_string(string, *src);
375 add_char_to_string(string, '\'');
377 return string;
380 struct string *
381 add_shell_safe_to_string(struct string *string, unsigned char *cmd, int cmdlen)
383 int prev_safe = 0;
385 for (; cmdlen; cmdlen--, cmd++) {
386 if ((*cmd == '-' && prev_safe) ||
387 (prev_safe = is_safe_in_shell(*cmd))) {
388 add_char_to_string(string, *cmd);
389 } else {
390 /* XXX: Not all programs we might exec are capable of
391 * decoding these. For some, we should just report
392 * an error rather than exec with an encoded string. */
393 add_char_to_string(string, '%');
394 add_char_to_string(string, hx((*cmd & 0xf0) >> 4));
395 add_char_to_string(string, hx(*cmd & 0x0f));
399 return string;
403 long
404 strtolx(unsigned char *str, unsigned char **end)
406 long num;
407 unsigned char postfix;
409 errno = 0;
410 num = strtol(str, (char **) end, 10);
411 if (errno) return 0;
412 if (!*end) return num;
414 postfix = toupper(**end);
415 if (postfix == 'K') {
416 (*end)++;
417 if (num < -INT_MAX / 1024) return -INT_MAX;
418 if (num > INT_MAX / 1024) return INT_MAX;
419 return num * 1024;
422 if (postfix == 'M') {
423 (*end)++;
424 if (num < -INT_MAX / (1024 * 1024)) return -INT_MAX;
425 if (num > INT_MAX / (1024 * 1024)) return INT_MAX;
426 return num * (1024 * 1024);
429 return num;
433 month2num(const unsigned char *str)
435 unsigned char month[3] = { str[0]|32, str[1]|32, str[2]|32 };
437 switch (month[0]) {
438 case 'j': /* jan, jun, jul */
439 if (month[1] == 'a') {
440 if (month[2] == 'n') return 0; /* jan */
441 return -1;
443 if (month[1] == 'u') {
444 if (month[2] == 'n') return 5; /* jun */
445 if (month[2] == 'l') return 6; /* jul */
447 return -1;
448 case 'm': /* mar, may */
449 if (month[1] == 'a') {
450 if (month[2] == 'r') return 2; /* mar */
451 if (month[2] == 'y') return 4; /* may */
453 return -1;
454 case 'a': /* apr, aug */
455 if (month[1] == 'p') {
456 if (month[2] == 'r') return 3; /* apr */
457 return -1;
459 if (month[1] == 'u' && month[2] == 'g') return 7; /* aug */
460 return -1;
461 case 's':
462 if (month[1] == 'e' && month[2] == 'p') return 8; /* sep */
463 return -1;
464 case 'o':
465 if (month[1] == 'c' && month[2] == 't') return 9; /* oct */
466 return -1;
467 case 'n':
468 if (month[1] == 'o' && month[2] == 'v') return 10; /* nov */
469 return -1;
470 case 'd':
471 if (month[1] == 'e' && month[2] == 'c') return 11; /* dec */
472 return -1;
473 case 'f':
474 if (month[1] == 'e' && month[2] == 'b') return 1; /* feb */
475 return -1;
476 default:
477 return -1;
481 /** This function drops control chars, nbsp char and limit the number
482 * of consecutive space chars to one. It modifies its argument. */
483 void
484 clr_spaces(unsigned char *str)
486 unsigned char *s;
487 unsigned char *dest = str;
489 assert(str);
491 for (s = str; *s; s++)
492 if (*s < ' ' || *s == NBSP_CHAR) *s = ' ';
494 for (s = str; *s; s++) {
495 if (*s == ' ' && (dest == str || s[1] == ' ' || !s[1]))
496 continue;
498 *dest++ = *s;
501 *dest = '\0';
504 /** Replace invalid chars in @a title with ' ' and trim all starting/ending
505 * spaces. */
506 void
507 sanitize_title(unsigned char *title)
509 int len = strlen(title);
511 if (!len) return;
513 while (len--) {
514 if (title[len] < ' ' || title[len] == NBSP_CHAR)
515 title[len] = ' ';
517 trim_chars(title, ' ', NULL);
520 /** Returns 0 if @a url contains invalid chars, 1 if ok.
521 * It trims starting/ending spaces. */
523 sanitize_url(unsigned char *url)
525 int len = strlen(url);
527 if (!len) return 1;
529 while (len--) {
530 if (url[len] < ' ')
531 return 0;
533 trim_chars(url, ' ', NULL);
534 return 1;