Merge with git+ssh://pasky.or.cz/srv/git/elinks.git
[elinks.git] / src / util / conv.c
blob5beb001f3112679bb216474b5aa511ef8b2a3027
1 /* Conversion functions */
3 #ifdef HAVE_CONFIG_H
4 #include "config.h"
5 #endif
7 #include <ctype.h>
8 #include <errno.h>
9 #ifdef HAVE_LIMITS_H
10 #include <limits.h>
11 #endif
12 #include <stdlib.h>
13 #include <string.h>
15 #include "elinks.h"
17 #include "intl/charsets.h" /* NBSP_CHAR */
18 #include "util/conv.h"
19 #include "util/error.h"
20 #include "util/string.h"
21 #include "util/time.h"
25 /* This function takes string @s and stores the @number (of a result width
26 * @width) in string format there, starting at position [*@slen]. If the number
27 * would take more space than @width, it is truncated and only the _last_
28 * digits of it are inserted to the string. If the number takes less space than
29 * @width, it is padded by @fillchar from left.
30 * @base defined which base should be used (10, 16, 8, 2, ...)
31 * @upper selects either hexa uppercased chars or lowercased chars.
33 * A NUL char is always added at the end of the string. @s must point to a
34 * sufficiently large memory space, at least *@slen + @width + 1.
36 * Examples:
38 * elinks_ulongcat(s, NULL, 12345, 4, 0, 10, 0) : s = "2345"
39 * elinks_ulongcat(s, NULL, 255, 4, '*', 16, 1) : s = "**FF"
40 * elinks_ulongcat(s, NULL, 123, 5, '0', 10, 0) : s = "00123"
42 * Note that this function exists to provide a fast and efficient, however
43 * still quite powerful alternative to sprintf(). It is optimized for speed and
44 * is *MUCH* faster than sprintf(). If you can use it, use it ;-). But do not
45 * get too enthusiastic, do not use it in cases where it would break i18n.
47 /* The function returns 0 if OK or width needed for the whole number to fit
48 * there, if it had to be truncated. A negative value signs an error. */
49 int inline
50 elinks_ulongcat(unsigned char *s, unsigned int *slen,
51 unsigned long number, unsigned int width,
52 unsigned char fillchar, unsigned int base,
53 unsigned int upper)
55 static const unsigned char unum[]= "0123456789ABCDEF";
56 static const unsigned char lnum[]= "0123456789abcdef";
57 const unsigned char *to_num = (upper ? unum : lnum);
58 unsigned int start = slen ? *slen : 0;
59 unsigned int nlen = 1; /* '0' is one char, we can't have less. */
60 unsigned int pos = start; /* starting position of the number */
61 unsigned long q = number;
62 int ret = 0;
64 if (width < 1 || !s || base < 2 || base > 16) return -1;
66 /* Count the length of the number in chars. */
67 while (q > (base - 1)) {
68 nlen++;
69 q /= base;
72 /* If max. width attained, truncate. */
73 if (nlen > width) {
74 ret = nlen;
75 nlen = width;
78 if (slen) *slen += nlen;
80 /* Fill left space with fillchar. */
81 if (fillchar) {
82 /* ie. width = 4 nlen = 2 -> pad = 2 */
83 unsigned int pad = width - nlen;
85 if (pad > 0) {
86 /* Relocate the start of number. */
87 if (slen) *slen += pad;
88 pos += pad;
90 /* Pad. */
91 while (pad > 0) s[--pad + start] = fillchar;
95 s[pos + nlen] = '\0';
97 /* Now write number starting from end. */
98 while (nlen > 0) {
99 s[--nlen + pos] = to_num[(number % base)];
100 number /= base;
103 return ret;
106 /* Similar to elinks_ulongcat() but for long number. */
107 int inline
108 elinks_longcat(unsigned char *s, unsigned int *slen,
109 long number, unsigned int width,
110 unsigned char fillchar, unsigned int base,
111 unsigned int upper)
113 unsigned char *p = s;
115 if (number < 0 && width > 0) {
116 if (slen) p[(*slen)++] = '-';
117 else *(p++) = '-';
118 number = -number;
119 width--;
122 return elinks_ulongcat(p, slen, number, width, fillchar, base, upper);
126 struct string *
127 add_long_to_string(struct string *string, long number)
129 unsigned char buffer[32];
130 int length = 0;
131 int width;
133 assert(string);
134 if_assert_failed { return NULL; }
136 width = longcat(buffer, &length, number, sizeof(buffer) - 1, 0);
137 if (width < 0 || !length) return NULL;
139 return add_bytes_to_string(string, buffer, length);
142 struct string *
143 add_knum_to_string(struct string *string, long num)
145 int ret;
146 unsigned char t[32];
147 int tlen = 0;
149 if (num && (num / (1024 * 1024)) * (1024 * 1024) == num) {
150 ret = longcat(&t, &tlen, num / (1024 * 1024), sizeof(t) - 2, 0);
151 t[tlen++] = 'M';
152 t[tlen] = '\0';
153 } else if (num && (num / 1024) * 1024 == num) {
154 ret = longcat(&t, &tlen, num / 1024, sizeof(t) - 2, 0);
155 t[tlen++] = 'k';
156 t[tlen] = '\0';
157 } else {
158 ret = longcat(&t, &tlen, num, sizeof(t) - 1, 0);
161 if (ret < 0 || !tlen) return NULL;
163 add_bytes_to_string(string, t, tlen);
165 return string;
168 struct string *
169 add_xnum_to_string(struct string *string, off_t xnum)
171 unsigned char suff[3] = "\0i";
172 off_t d = -1;
174 /* XXX: I don't completely like the computation of d here. --pasky */
175 /* Mebi (Mi), 2^20 */
176 if (xnum >= 1024 * 1024) {
177 suff[0] = 'M';
178 d = (xnum * (int) 10 / (int) ((int) (1024 * 1024))) % 10;
179 xnum /= 1024*1024;
180 /* Kibi (Ki), 2^10 */
181 } else if (xnum >= 1024) {
182 suff[0] = 'K';
183 d = (xnum * (int) 10 / (int) 1024) % 10;
184 xnum /= 1024;
187 assert(xnum == (long) xnum);
188 add_long_to_string(string, xnum);
190 if (xnum < 10 && d != -1) {
191 add_char_to_string(string, '.');
192 add_long_to_string(string, d);
194 add_char_to_string(string, ' ');
196 if (suff[0]) add_to_string(string, suff);
197 add_char_to_string(string, 'B');
198 return string;
201 struct string *
202 add_duration_to_string(struct string *string, long seconds)
204 unsigned char q[64];
205 int qlen = 0;
207 if (seconds < 0) seconds = 0;
209 /* Days */
210 if (seconds >= (24 * 3600)) {
211 ulongcat(q, &qlen, (seconds / (24 * 3600)), 5, 0);
212 q[qlen++] = 'd';
213 q[qlen++] = ' ';
216 /* Hours and minutes */
217 if (seconds >= 3600) {
218 seconds %= (24 * 3600);
219 ulongcat(q, &qlen, (seconds / 3600), 4, 0);
220 q[qlen++] = ':';
221 ulongcat(q, &qlen, ((seconds / 60) % 60), 2, '0');
222 } else {
223 /* Only minutes */
224 ulongcat(q, &qlen, (seconds / 60), 2, 0);
227 /* Seconds */
228 q[qlen++] = ':';
229 ulongcat(q, &qlen, (seconds % 60), 2, '0');
231 add_to_string(string, q);
232 return string;
235 struct string *
236 add_timeval_to_string(struct string *string, timeval_T *timeval)
238 return add_duration_to_string(string, timeval_to_seconds(timeval));
241 #ifdef HAVE_STRFTIME
242 struct string *
243 add_date_to_string(struct string *string, unsigned char *fmt, time_t *date)
245 unsigned char buffer[MAX_STR_LEN];
246 time_t when_time = date ? *date : time(NULL);
247 struct tm *when_local = localtime(&when_time);
249 if (strftime(buffer, sizeof(buffer), fmt, when_local) <= 0)
250 return NULL;
252 return add_to_string(string, buffer);
254 #endif
256 /* Encoders and string changers */
258 struct string *
259 add_string_replace(struct string *string, unsigned char *src, int len,
260 unsigned char replaceable, unsigned char replacement)
262 int oldlength = string->length;
264 if (!add_bytes_to_string(string, src, len))
265 return NULL;
267 for (src = string->source + oldlength; len; len--, src++)
268 if (*src == replaceable)
269 *src = replacement;
271 return string;
274 struct string *
275 add_html_to_string(struct string *string, const unsigned char *src, int len)
277 for (; len; len--, src++) {
278 if (*src < 0x20 || *src >= 0x7F
279 || *src == '<' || *src == '>' || *src == '&'
280 || *src == '\"' || *src == '\'') {
281 int rollback_length = string->length;
283 if (!add_bytes_to_string(string, "&#", 2)
284 || !add_long_to_string(string, (long) *src)
285 || !add_char_to_string(string, ';')) {
286 string->length = rollback_length;
287 string->source[rollback_length] = '\0';
288 return NULL;
291 } else {
292 if (!add_char_to_string(string, *src))
293 return NULL;
297 return string;
300 /* TODO Optimize later --pasky */
301 struct string *
302 add_quoted_to_string(struct string *string, const unsigned char *src, int len)
304 for (; len; len--, src++) {
305 if (isquote(*src) || *src == '\\')
306 add_char_to_string(string, '\\');
307 add_char_to_string(string, *src);
310 return string;
313 struct string *
314 add_shell_quoted_to_string(struct string *string, unsigned char *src, int len)
316 add_char_to_string(string, '\'');
317 for (; len; len--, ++src)
318 if (*src == '\'')
319 add_to_string(string, "'\\''");
320 else
321 add_char_to_string(string, *src);
322 add_char_to_string(string, '\'');
324 return string;
327 struct string *
328 add_shell_safe_to_string(struct string *string, unsigned char *cmd, int cmdlen)
330 int prev_safe = 0;
332 for (; cmdlen; cmdlen--, cmd++) {
333 if ((*cmd == '-' && prev_safe) ||
334 (prev_safe = is_safe_in_shell(*cmd))) {
335 add_char_to_string(string, *cmd);
336 } else {
337 /* XXX: Not all programs we might exec are capable of
338 * decoding these. For some, we should just report
339 * an error rather than exec with an encoded string. */
340 add_char_to_string(string, '%');
341 add_char_to_string(string, hx((*cmd & 0xf0) >> 4));
342 add_char_to_string(string, hx(*cmd & 0x0f));
346 return string;
350 long
351 strtolx(unsigned char *str, unsigned char **end)
353 long num;
354 unsigned char postfix;
356 errno = 0;
357 num = strtol(str, (char **) end, 10);
358 if (errno) return 0;
359 if (!*end) return num;
361 postfix = toupper(**end);
362 if (postfix == 'K') {
363 (*end)++;
364 if (num < -INT_MAX / 1024) return -INT_MAX;
365 if (num > INT_MAX / 1024) return INT_MAX;
366 return num * 1024;
369 if (postfix == 'M') {
370 (*end)++;
371 if (num < -INT_MAX / (1024 * 1024)) return -INT_MAX;
372 if (num > INT_MAX / (1024 * 1024)) return INT_MAX;
373 return num * (1024 * 1024);
376 return num;
380 month2num(const unsigned char *str)
382 unsigned char month[3] = { str[0]|32, str[1]|32, str[2]|32 };
384 switch (month[0]) {
385 case 'j': /* jan, jun, jul */
386 if (month[1] == 'a') {
387 if (month[2] == 'n') return 0; /* jan */
388 return -1;
390 if (month[1] == 'u') {
391 if (month[2] == 'n') return 5; /* jun */
392 if (month[2] == 'l') return 6; /* jul */
394 return -1;
395 case 'm': /* mar, may */
396 if (month[1] == 'a') {
397 if (month[2] == 'r') return 2; /* mar */
398 if (month[2] == 'y') return 4; /* may */
400 return -1;
401 case 'a': /* apr, aug */
402 if (month[1] == 'p') {
403 if (month[2] == 'r') return 3; /* apr */
404 return -1;
406 if (month[1] == 'u' && month[2] == 'g') return 7; /* aug */
407 return -1;
408 case 's':
409 if (month[1] == 'e' && month[2] == 'p') return 8; /* sep */
410 return -1;
411 case 'o':
412 if (month[1] == 'c' && month[2] == 't') return 9; /* oct */
413 return -1;
414 case 'n':
415 if (month[1] == 'o' && month[2] == 'v') return 10; /* nov */
416 return -1;
417 case 'd':
418 if (month[1] == 'e' && month[2] == 'c') return 11; /* dec */
419 return -1;
420 case 'f':
421 if (month[1] == 'e' && month[2] == 'b') return 1; /* feb */
422 return -1;
423 default:
424 return -1;
428 /* This function drops control chars, nbsp char and limit the number of consecutive
429 * space chars to one. It modifies its argument. */
430 void
431 clr_spaces(unsigned char *str)
433 unsigned char *s;
434 unsigned char *dest = str;
436 assert(str);
438 for (s = str; *s; s++)
439 if (*s < ' ' || *s == NBSP_CHAR) *s = ' ';
441 for (s = str; *s; s++) {
442 if (*s == ' ' && (dest == str || s[1] == ' ' || !s[1]))
443 continue;
445 *dest++ = *s;
448 *dest = '\0';
451 /* Replace invalid chars in @title with ' ' and trim all starting/ending
452 * spaces. */
453 void
454 sanitize_title(unsigned char *title)
456 int len = strlen(title);
458 if (!len) return;
460 while (len--) {
461 if (title[len] < ' ' || title[len] == NBSP_CHAR)
462 title[len] = ' ';
464 trim_chars(title, ' ', NULL);
467 /* Returns 0 if @url contains invalid chars, 1 if ok.
468 * It trims starting/ending spaces. */
470 sanitize_url(unsigned char *url)
472 int len = strlen(url);
474 if (!len) return 1;
476 while (len--) {
477 if (url[len] < ' ')
478 return 0;
480 trim_chars(url, ' ', NULL);
481 return 1;