1 /** Conversion functions
18 #include "intl/charsets.h" /* NBSP_CHAR */
19 #include "util/conv.h"
20 #include "util/error.h"
21 #include "util/string.h"
22 #include "util/time.h"
26 /** This function takes string @a s and stores the @a number (of a
27 * result width @a width) in string format there, starting at position
28 * [*@a slen]. If the number would take more space than @a width, it
29 * is truncated and only the _last_ digits of it are inserted to the
30 * string. If the number takes less space than @a width, it is padded
31 * by @a fillchar from left.
32 * @a base defined which base should be used (10, 16, 8, 2, ...)
33 * @a upper selects either hexa uppercased chars or lowercased chars.
35 * A NUL char is always added at the end of the string. @a s must point
36 * to a sufficiently large memory space, at least *@a slen + @a width + 1.
41 * elinks_ulongcat(s, NULL, 12345, 4, 0, 10, 0) : s = "2345"
42 * elinks_ulongcat(s, NULL, 255, 4, '*', 16, 1) : s = "**FF"
43 * elinks_ulongcat(s, NULL, 123, 5, '0', 10, 0) : s = "00123"
46 * Note that this function exists to provide a fast and efficient, however
47 * still quite powerful alternative to sprintf(). It is optimized for speed and
48 * is *MUCH* faster than sprintf(). If you can use it, use it ;-). But do not
49 * get too enthusiastic, do not use it in cases where it would break i18n.
51 * @returns 0 if OK or width needed for the whole number to fit there,
52 * if it had to be truncated. A negative value signs an error. */
54 elinks_ulongcat(unsigned char *s
, unsigned int *slen
,
55 unsigned long number
, unsigned int width
,
56 unsigned char fillchar
, unsigned int base
,
59 static const unsigned char unum
[]= "0123456789ABCDEF";
60 static const unsigned char lnum
[]= "0123456789abcdef";
61 const unsigned char *to_num
= (upper
? unum
: lnum
);
62 unsigned int start
= slen
? *slen
: 0;
63 unsigned int nlen
= 1; /* '0' is one char, we can't have less. */
64 unsigned int pos
= start
; /* starting position of the number */
65 unsigned long q
= number
;
68 if (width
< 1 || !s
|| base
< 2 || base
> 16) return -1;
70 /* Count the length of the number in chars. */
71 while (q
> (base
- 1)) {
76 /* If max. width attained, truncate. */
82 if (slen
) *slen
+= nlen
;
84 /* Fill left space with fillchar. */
86 /* ie. width = 4 nlen = 2 -> pad = 2 */
87 unsigned int pad
= width
- nlen
;
90 /* Relocate the start of number. */
91 if (slen
) *slen
+= pad
;
95 while (pad
> 0) s
[--pad
+ start
] = fillchar
;
101 /* Now write number starting from end. */
103 s
[--nlen
+ pos
] = to_num
[(number
% base
)];
110 /** Similar to elinks_ulongcat() but for @c long number. */
112 elinks_longcat(unsigned char *s
, unsigned int *slen
,
113 long number
, unsigned int width
,
114 unsigned char fillchar
, unsigned int base
,
117 unsigned char *p
= s
;
119 if (number
< 0 && width
> 0) {
120 if (slen
) p
[(*slen
)++] = '-';
126 return elinks_ulongcat(p
, slen
, number
, width
, fillchar
, base
, upper
);
130 /** @relates string */
132 add_long_to_string(struct string
*string
, long number
)
134 unsigned char buffer
[32];
139 if_assert_failed
{ return NULL
; }
141 width
= longcat(buffer
, &length
, number
, sizeof(buffer
) - 1, 0);
142 if (width
< 0 || !length
) return NULL
;
144 return add_bytes_to_string(string
, buffer
, length
);
147 /** @relates string */
149 add_knum_to_string(struct string
*string
, long num
)
155 if (num
&& (num
/ (1024 * 1024)) * (1024 * 1024) == num
) {
156 ret
= longcat(&t
, &tlen
, num
/ (1024 * 1024), sizeof(t
) - 2, 0);
159 } else if (num
&& (num
/ 1024) * 1024 == num
) {
160 ret
= longcat(&t
, &tlen
, num
/ 1024, sizeof(t
) - 2, 0);
164 ret
= longcat(&t
, &tlen
, num
, sizeof(t
) - 1, 0);
167 if (ret
< 0 || !tlen
) return NULL
;
169 add_bytes_to_string(string
, t
, tlen
);
174 /** @relates string */
176 add_xnum_to_string(struct string
*string
, off_t xnum
)
178 unsigned char suff
[3] = "\0i";
181 /* XXX: I don't completely like the computation of d here. --pasky */
182 /* Mebi (Mi), 2^20 */
183 if (xnum
>= 1024 * 1024) {
185 d
= (xnum
* (int) 10 / (int) ((int) (1024 * 1024))) % 10;
187 /* Kibi (Ki), 2^10 */
188 } else if (xnum
>= 1024) {
190 d
= (xnum
* (int) 10 / (int) 1024) % 10;
194 assert(xnum
== (long) xnum
);
195 add_long_to_string(string
, xnum
);
197 if (xnum
< 10 && d
!= -1) {
198 add_char_to_string(string
, '.');
199 add_long_to_string(string
, d
);
201 add_char_to_string(string
, ' ');
203 if (suff
[0]) add_to_string(string
, suff
);
204 add_char_to_string(string
, 'B');
208 /** @relates string */
210 add_duration_to_string(struct string
*string
, long seconds
)
215 if (seconds
< 0) seconds
= 0;
218 if (seconds
>= (24 * 3600)) {
219 ulongcat(q
, &qlen
, (seconds
/ (24 * 3600)), 5, 0);
224 /* Hours and minutes */
225 if (seconds
>= 3600) {
226 seconds
%= (24 * 3600);
227 ulongcat(q
, &qlen
, (seconds
/ 3600), 4, 0);
229 ulongcat(q
, &qlen
, ((seconds
/ 60) % 60), 2, '0');
232 ulongcat(q
, &qlen
, (seconds
/ 60), 2, 0);
237 ulongcat(q
, &qlen
, (seconds
% 60), 2, '0');
239 add_to_string(string
, q
);
243 /** @relates string */
245 add_timeval_to_string(struct string
*string
, timeval_T
*timeval
)
247 return add_duration_to_string(string
, timeval_to_seconds(timeval
));
252 add_date_to_string(struct string
*string
, const unsigned char *fmt
,
255 unsigned char buffer
[MAX_STR_LEN
];
256 time_t when_time
= date
? *date
: time(NULL
);
257 struct tm
*when_local
= localtime(&when_time
);
259 if (strftime(buffer
, sizeof(buffer
), fmt
, when_local
) <= 0)
262 return add_to_string(string
, buffer
);
266 /* Encoders and string changers */
269 add_string_replace(struct string
*string
, unsigned char *src
, int len
,
270 unsigned char replaceable
, unsigned char replacement
)
272 int oldlength
= string
->length
;
274 if (!add_bytes_to_string(string
, src
, len
))
277 for (src
= string
->source
+ oldlength
; len
; len
--, src
++)
278 if (*src
== replaceable
)
285 add_html_to_string(struct string
*string
, const unsigned char *src
, int len
)
287 for (; len
; len
--, src
++) {
289 || *src
== '<' || *src
== '>' || *src
== '&'
290 || *src
== '\"' || *src
== '\'') {
291 int rollback_length
= string
->length
;
293 if (!add_bytes_to_string(string
, "&#", 2)
294 || !add_long_to_string(string
, (long) *src
)
295 || !add_char_to_string(string
, ';')) {
296 string
->length
= rollback_length
;
297 string
->source
[rollback_length
] = '\0';
301 if (!add_char_to_string(string
, *src
))
310 add_cp_html_to_string(struct string
*string
, int src_codepage
,
311 const unsigned char *src
, int len
)
313 const unsigned char *const end
= src
+ len
;
314 unicode_val_T unicode
;
317 if (is_cp_utf8(src_codepage
)) {
319 unicode
= utf8_to_unicode((unsigned char **) &src
,
321 if (unicode
== UCS_NO_CHAR
)
323 #else /* !CONFIG_UTF8 */
324 /* Cannot parse UTF-8 without CONFIG_UTF8.
325 * Pretend the input is ISO-8859-1 instead. */
327 #endif /* !CONFIG_UTF8 */
329 unicode
= cp2u(src_codepage
, *src
++);
332 if (unicode
< 0x20 || unicode
>= 0x7F
333 || unicode
== '<' || unicode
== '>' || unicode
== '&'
334 || unicode
== '\"' || unicode
== '\'') {
335 int rollback_length
= string
->length
;
337 if (!add_bytes_to_string(string
, "&#", 2)
338 || !add_long_to_string(string
, unicode
)
339 || !add_char_to_string(string
, ';')) {
340 string
->length
= rollback_length
;
341 string
->source
[rollback_length
] = '\0';
345 if (!add_char_to_string(string
, unicode
))
353 /* TODO Optimize later --pasky */
355 add_quoted_to_string(struct string
*string
, const unsigned char *src
, int len
)
357 for (; len
; len
--, src
++) {
358 if (isquote(*src
) || *src
== '\\')
359 add_char_to_string(string
, '\\');
360 add_char_to_string(string
, *src
);
367 add_shell_quoted_to_string(struct string
*string
, unsigned char *src
, int len
)
369 add_char_to_string(string
, '\'');
370 for (; len
; len
--, ++src
)
372 add_to_string(string
, "'\\''");
374 add_char_to_string(string
, *src
);
375 add_char_to_string(string
, '\'');
381 add_shell_safe_to_string(struct string
*string
, unsigned char *cmd
, int cmdlen
)
385 for (; cmdlen
; cmdlen
--, cmd
++) {
386 if ((*cmd
== '-' && prev_safe
) ||
387 (prev_safe
= is_safe_in_shell(*cmd
))) {
388 add_char_to_string(string
, *cmd
);
390 /* XXX: Not all programs we might exec are capable of
391 * decoding these. For some, we should just report
392 * an error rather than exec with an encoded string. */
393 add_char_to_string(string
, '%');
394 add_char_to_string(string
, hx((*cmd
& 0xf0) >> 4));
395 add_char_to_string(string
, hx(*cmd
& 0x0f));
404 strtolx(unsigned char *str
, unsigned char **end
)
407 unsigned char postfix
;
410 num
= strtol(str
, (char **) end
, 10);
412 if (!*end
) return num
;
414 postfix
= toupper(**end
);
415 if (postfix
== 'K') {
417 if (num
< -INT_MAX
/ 1024) return -INT_MAX
;
418 if (num
> INT_MAX
/ 1024) return INT_MAX
;
422 if (postfix
== 'M') {
424 if (num
< -INT_MAX
/ (1024 * 1024)) return -INT_MAX
;
425 if (num
> INT_MAX
/ (1024 * 1024)) return INT_MAX
;
426 return num
* (1024 * 1024);
433 month2num(const unsigned char *str
)
435 unsigned char month
[3] = { str
[0]|32, str
[1]|32, str
[2]|32 };
438 case 'j': /* jan, jun, jul */
439 if (month
[1] == 'a') {
440 if (month
[2] == 'n') return 0; /* jan */
443 if (month
[1] == 'u') {
444 if (month
[2] == 'n') return 5; /* jun */
445 if (month
[2] == 'l') return 6; /* jul */
448 case 'm': /* mar, may */
449 if (month
[1] == 'a') {
450 if (month
[2] == 'r') return 2; /* mar */
451 if (month
[2] == 'y') return 4; /* may */
454 case 'a': /* apr, aug */
455 if (month
[1] == 'p') {
456 if (month
[2] == 'r') return 3; /* apr */
459 if (month
[1] == 'u' && month
[2] == 'g') return 7; /* aug */
462 if (month
[1] == 'e' && month
[2] == 'p') return 8; /* sep */
465 if (month
[1] == 'c' && month
[2] == 't') return 9; /* oct */
468 if (month
[1] == 'o' && month
[2] == 'v') return 10; /* nov */
471 if (month
[1] == 'e' && month
[2] == 'c') return 11; /* dec */
474 if (month
[1] == 'e' && month
[2] == 'b') return 1; /* feb */
481 /** This function drops control chars, nbsp char and limit the number
482 * of consecutive space chars to one. It modifies its argument. */
484 clr_spaces(unsigned char *str
)
487 unsigned char *dest
= str
;
491 for (s
= str
; *s
; s
++)
492 if (*s
< ' ' || *s
== NBSP_CHAR
) *s
= ' ';
494 for (s
= str
; *s
; s
++) {
495 if (*s
== ' ' && (dest
== str
|| s
[1] == ' ' || !s
[1]))
504 /** Replace invalid chars in @a title with ' ' and trim all starting/ending
507 sanitize_title(unsigned char *title
)
509 int len
= strlen(title
);
514 if (title
[len
] < ' ' || title
[len
] == NBSP_CHAR
)
517 trim_chars(title
, ' ', NULL
);
520 /** Returns 0 if @a url contains invalid chars, 1 if ok.
521 * It trims starting/ending spaces. */
523 sanitize_url(unsigned char *url
)
525 int len
= strlen(url
);
533 trim_chars(url
, ' ', NULL
);