Remember fragment of the splitted char and decode it next time. Idea by Jonas.
[elinks.git] / src / util / conv.c
blob248f52ddeeee298bd81c2e785f3d1357e2b12831
1 /* Conversion functions */
3 #ifdef HAVE_CONFIG_H
4 #include "config.h"
5 #endif
7 #include <ctype.h>
8 #include <errno.h>
9 #ifdef HAVE_LIMITS_H
10 #include <limits.h>
11 #endif
12 #include <stdlib.h>
13 #include <string.h>
15 #include "elinks.h"
17 #include "intl/charsets.h" /* NBSP_CHAR */
18 #include "util/conv.h"
19 #include "util/error.h"
20 #include "util/string.h"
21 #include "util/time.h"
25 /* This function takes string @s and stores the @number (of a result width
26 * @width) in string format there, starting at position [*@slen]. If the number
27 * would take more space than @width, it is truncated and only the _last_
28 * digits of it are inserted to the string. If the number takes less space than
29 * @width, it is padded by @fillchar from left.
30 * @base defined which base should be used (10, 16, 8, 2, ...)
31 * @upper selects either hexa uppercased chars or lowercased chars.
33 * A NUL char is always added at the end of the string. @s must point to a
34 * sufficiently large memory space, at least *@slen + @width + 1.
36 * Examples:
38 * elinks_ulongcat(s, NULL, 12345, 4, 0, 10, 0) : s = "2345"
39 * elinks_ulongcat(s, NULL, 255, 4, '*', 16, 1) : s = "**FF"
40 * elinks_ulongcat(s, NULL, 123, 5, '0', 10, 0) : s = "00123"
42 * Note that this function exists to provide a fast and efficient, however
43 * still quite powerful alternative to sprintf(). It is optimized for speed and
44 * is *MUCH* faster than sprintf(). If you can use it, use it ;-). But do not
45 * get too enthusiastic, do not use it in cases where it would break i18n.
47 /* The function returns 0 if OK or width needed for the whole number to fit
48 * there, if it had to be truncated. A negative value signs an error. */
49 int inline
50 elinks_ulongcat(unsigned char *s, unsigned int *slen,
51 unsigned long number, unsigned int width,
52 unsigned char fillchar, unsigned int base,
53 unsigned int upper)
55 static unsigned char unum[]= "0123456789ABCDEF";
56 static unsigned char lnum[]= "0123456789abcdef";
57 unsigned char *to_num = (unsigned char *) (upper ? &unum : &lnum);
58 unsigned int start = slen ? *slen : 0;
59 unsigned int nlen = 1; /* '0' is one char, we can't have less. */
60 unsigned int pos = start; /* starting position of the number */
61 unsigned long q = number;
62 int ret = 0;
64 if (width < 1 || !s || base < 2 || base > 16) return -1;
66 /* Count the length of the number in chars. */
67 while (q > (base - 1)) {
68 nlen++;
69 q /= base;
72 /* If max. width attained, truncate. */
73 if (nlen > width) {
74 ret = nlen;
75 nlen = width;
78 if (slen) *slen += nlen;
80 /* Fill left space with fillchar. */
81 if (fillchar) {
82 /* ie. width = 4 nlen = 2 -> pad = 2 */
83 unsigned int pad = width - nlen;
85 if (pad > 0) {
86 /* Relocate the start of number. */
87 if (slen) *slen += pad;
88 pos += pad;
90 /* Pad. */
91 while (pad > 0) s[--pad + start] = fillchar;
95 s[pos + nlen] = '\0';
97 /* Now write number starting from end. */
98 while (nlen > 0) {
99 s[--nlen + pos] = to_num[(number % base)];
100 number /= base;
103 return ret;
106 /* Similar to elinks_ulongcat() but for long number. */
107 int inline
108 elinks_longcat(unsigned char *s, unsigned int *slen,
109 long number, unsigned int width,
110 unsigned char fillchar, unsigned int base,
111 unsigned int upper)
113 unsigned char *p = s;
115 if (number < 0 && width > 0) {
116 if (slen) p[(*slen)++] = '-';
117 else *(p++) = '-';
118 number = -number;
119 width--;
122 return elinks_ulongcat(p, slen, number, width, fillchar, base, upper);
126 struct string *
127 add_long_to_string(struct string *string, long number)
129 unsigned char buffer[32];
130 int length = 0;
131 int width;
133 assert(string);
134 if_assert_failed { return NULL; }
136 width = longcat(buffer, &length, number, sizeof(buffer) - 1, 0);
137 if (width < 0 || !length) return NULL;
139 return add_bytes_to_string(string, buffer, length);
142 struct string *
143 add_knum_to_string(struct string *string, long num)
145 int ret;
146 unsigned char t[32];
147 int tlen = 0;
149 if (num && (num / (1024 * 1024)) * (1024 * 1024) == num) {
150 ret = longcat(&t, &tlen, num / (1024 * 1024), sizeof(t) - 2, 0);
151 t[tlen++] = 'M';
152 t[tlen] = '\0';
153 } else if (num && (num / 1024) * 1024 == num) {
154 ret = longcat(&t, &tlen, num / 1024, sizeof(t) - 2, 0);
155 t[tlen++] = 'k';
156 t[tlen] = '\0';
157 } else {
158 ret = longcat(&t, &tlen, num, sizeof(t) - 1, 0);
161 if (ret < 0 || !tlen) return NULL;
163 add_bytes_to_string(string, t, tlen);
165 return string;
168 struct string *
169 add_xnum_to_string(struct string *string, off_t xnum)
171 unsigned char suff[3] = "\0i";
172 off_t d = -1;
174 /* XXX: I don't completely like the computation of d here. --pasky */
175 /* Mebi (Mi), 2^20 */
176 if (xnum >= 1024 * 1024) {
177 suff[0] = 'M';
178 d = (xnum * (int) 10 / (int) ((int) (1024 * 1024))) % 10;
179 xnum /= 1024*1024;
180 /* Kibi (Ki), 2^10 */
181 } else if (xnum >= 1024) {
182 suff[0] = 'K';
183 d = (xnum * (int) 10 / (int) 1024) % 10;
184 xnum /= 1024;
187 assert(xnum == (long) xnum);
188 add_long_to_string(string, xnum);
190 if (xnum < 10 && d != -1) {
191 add_char_to_string(string, '.');
192 add_long_to_string(string, d);
194 add_char_to_string(string, ' ');
196 if (suff[0]) add_to_string(string, suff);
197 add_char_to_string(string, 'B');
198 return string;
201 struct string *
202 add_duration_to_string(struct string *string, long seconds)
204 unsigned char q[64];
205 int qlen = 0;
207 if (seconds < 0) seconds = 0;
209 /* Days */
210 if (seconds >= (24 * 3600)) {
211 ulongcat(q, &qlen, (seconds / (24 * 3600)), 5, 0);
212 q[qlen++] = 'd';
213 q[qlen++] = ' ';
216 /* Hours and minutes */
217 if (seconds >= 3600) {
218 seconds %= (24 * 3600);
219 ulongcat(q, &qlen, (seconds / 3600), 4, 0);
220 q[qlen++] = ':';
221 ulongcat(q, &qlen, ((seconds / 60) % 60), 2, '0');
222 } else {
223 /* Only minutes */
224 ulongcat(q, &qlen, (seconds / 60), 2, 0);
227 /* Seconds */
228 q[qlen++] = ':';
229 ulongcat(q, &qlen, (seconds % 60), 2, '0');
231 add_to_string(string, q);
232 return string;
235 struct string *
236 add_timeval_to_string(struct string *string, timeval_T *timeval)
238 return add_duration_to_string(string, timeval_to_seconds(timeval));
241 #ifdef HAVE_STRFTIME
242 struct string *
243 add_date_to_string(struct string *string, unsigned char *fmt, time_t *date)
245 unsigned char buffer[MAX_STR_LEN];
246 time_t when_time = date ? *date : time(NULL);
247 struct tm *when_local = localtime(&when_time);
249 if (strftime(buffer, sizeof(buffer), fmt, when_local) <= 0)
250 return NULL;
252 return add_to_string(string, buffer);
254 #endif
256 /* Encoders and string changers */
258 struct string *
259 add_string_replace(struct string *string, unsigned char *src, int len,
260 unsigned char replaceable, unsigned char replacement)
262 int oldlength = string->length;
264 if (!add_bytes_to_string(string, src, len))
265 return NULL;
267 for (src = string->source + oldlength; len; len--, src++)
268 if (*src == replaceable)
269 *src = replacement;
271 return string;
274 struct string *
275 add_html_to_string(struct string *string, unsigned char *src, int len)
278 #define isalphanum(q) (isalnum(q) || (q) == '-' || (q) == '_')
280 for (; len; len--, src++) {
281 if (isalphanum(*src) || *src == ' '
282 || *src == '.' || *src == ':' || *src == ';') {
283 add_bytes_to_string(string, src, 1);
284 } else {
285 add_bytes_to_string(string, "&#", 2);
286 add_long_to_string(string, (long) *src);
287 add_char_to_string(string, ';');
291 #undef isalphanum
293 return string;
296 /* TODO Optimize later --pasky */
297 struct string *
298 add_quoted_to_string(struct string *string, unsigned char *src, int len)
300 for (; len; len--, src++) {
301 if (isquote(*src) || *src == '\\')
302 add_char_to_string(string, '\\');
303 add_char_to_string(string, *src);
306 return string;
309 struct string *
310 add_shell_quoted_to_string(struct string *string, unsigned char *src, int len)
312 add_char_to_string(string, '\'');
313 for (; len; len--, ++src)
314 if (*src == '\'')
315 add_to_string(string, "'\\''");
316 else
317 add_char_to_string(string, *src);
318 add_char_to_string(string, '\'');
320 return string;
323 struct string *
324 add_shell_safe_to_string(struct string *string, unsigned char *cmd, int cmdlen)
326 int prev_safe = 0;
328 for (; cmdlen; cmdlen--, cmd++) {
329 if ((*cmd == '-' && prev_safe) ||
330 (prev_safe = is_safe_in_shell(*cmd))) {
331 add_char_to_string(string, *cmd);
332 } else {
333 /* XXX: Not all programs we might exec are capable of
334 * decoding these. For some, we should just report
335 * an error rather than exec with an encoded string. */
336 add_char_to_string(string, '%');
337 add_char_to_string(string, hx((*cmd & 0xf0) >> 4));
338 add_char_to_string(string, hx(*cmd & 0x0f));
342 return string;
346 long
347 strtolx(unsigned char *str, unsigned char **end)
349 long num;
350 unsigned char postfix;
352 errno = 0;
353 num = strtol(str, (char **) end, 10);
354 if (errno) return 0;
355 if (!*end) return num;
357 postfix = toupper(**end);
358 if (postfix == 'K') {
359 (*end)++;
360 if (num < -INT_MAX / 1024) return -INT_MAX;
361 if (num > INT_MAX / 1024) return INT_MAX;
362 return num * 1024;
365 if (postfix == 'M') {
366 (*end)++;
367 if (num < -INT_MAX / (1024 * 1024)) return -INT_MAX;
368 if (num > INT_MAX / (1024 * 1024)) return INT_MAX;
369 return num * (1024 * 1024);
372 return num;
376 month2num(const unsigned char *str)
378 unsigned char month[3] = { str[0]|32, str[1]|32, str[2]|32 };
380 switch (month[0]) {
381 case 'j': /* jan, jun, jul */
382 if (month[1] == 'a') {
383 if (month[2] == 'n') return 0; /* jan */
384 return -1;
386 if (month[1] == 'u') {
387 if (month[2] == 'n') return 5; /* jun */
388 if (month[2] == 'l') return 6; /* jul */
390 return -1;
391 case 'm': /* mar, may */
392 if (month[1] == 'a') {
393 if (month[2] == 'r') return 2; /* mar */
394 if (month[2] == 'y') return 4; /* may */
396 return -1;
397 case 'a': /* apr, aug */
398 if (month[1] == 'p') {
399 if (month[2] == 'r') return 3; /* apr */
400 return -1;
402 if (month[1] == 'u' && month[2] == 'g') return 7; /* aug */
403 return -1;
404 case 's':
405 if (month[1] == 'e' && month[2] == 'p') return 8; /* sep */
406 return -1;
407 case 'o':
408 if (month[1] == 'c' && month[2] == 't') return 9; /* oct */
409 return -1;
410 case 'n':
411 if (month[1] == 'o' && month[2] == 'v') return 10; /* nov */
412 return -1;
413 case 'd':
414 if (month[1] == 'e' && month[2] == 'c') return 11; /* dec */
415 return -1;
416 case 'f':
417 if (month[1] == 'e' && month[2] == 'b') return 1; /* feb */
418 return -1;
419 default:
420 return -1;
424 /* This function drops control chars, nbsp char and limit the number of consecutive
425 * space chars to one. It modifies its argument. */
426 void
427 clr_spaces(unsigned char *str)
429 unsigned char *s;
430 unsigned char *dest = str;
432 assert(str);
434 for (s = str; *s; s++)
435 if (*s < ' ' || *s == NBSP_CHAR) *s = ' ';
437 for (s = str; *s; s++) {
438 if (*s == ' ' && (dest == str || s[1] == ' ' || !s[1]))
439 continue;
441 *dest++ = *s;
444 *dest = '\0';
447 /* Replace invalid chars in @title with ' ' and trim all starting/ending
448 * spaces. */
449 void
450 sanitize_title(unsigned char *title)
452 int len = strlen(title);
454 if (!len) return;
456 while (len--) {
457 if (title[len] < ' ' || title[len] == NBSP_CHAR)
458 title[len] = ' ';
460 trim_chars(title, ' ', NULL);
463 /* Returns 0 if @url contains invalid chars, 1 if ok.
464 * It trims starting/ending spaces. */
466 sanitize_url(unsigned char *url)
468 int len = strlen(url);
470 if (!len) return 1;
472 while (len--) {
473 if (url[len] < ' ')
474 return 0;
476 trim_chars(url, ' ', NULL);
477 return 1;