1 /*-------------------------------------------------------------------------
4 * Various data encoding/decoding things.
6 * Copyright (c) 2001-2022, PostgreSQL Global Development Group
10 * src/backend/utils/adt/encode.c
12 *-------------------------------------------------------------------------
18 #include "mb/pg_wchar.h"
19 #include "utils/builtins.h"
20 #include "utils/memutils.h"
24 * Encoding conversion API.
25 * encode_len() and decode_len() compute the amount of space needed, while
26 * encode() and decode() perform the actual conversions. It is okay for
27 * the _len functions to return an overestimate, but not an underestimate.
28 * (Having said that, large overestimates could cause unnecessary errors,
29 * so it's better to get it right.) The conversion routines write to the
30 * buffer at *res and return the true length of their output.
34 uint64 (*encode_len
) (const char *data
, size_t dlen
);
35 uint64 (*decode_len
) (const char *data
, size_t dlen
);
36 uint64 (*encode
) (const char *data
, size_t dlen
, char *res
);
37 uint64 (*decode
) (const char *data
, size_t dlen
, char *res
);
40 static const struct pg_encoding
*pg_find_encoding(const char *name
);
47 binary_encode(PG_FUNCTION_ARGS
)
49 bytea
*data
= PG_GETARG_BYTEA_PP(0);
50 Datum name
= PG_GETARG_DATUM(1);
57 const struct pg_encoding
*enc
;
59 namebuf
= TextDatumGetCString(name
);
61 enc
= pg_find_encoding(namebuf
);
64 (errcode(ERRCODE_INVALID_PARAMETER_VALUE
),
65 errmsg("unrecognized encoding: \"%s\"", namebuf
)));
67 dataptr
= VARDATA_ANY(data
);
68 datalen
= VARSIZE_ANY_EXHDR(data
);
70 resultlen
= enc
->encode_len(dataptr
, datalen
);
73 * resultlen possibly overflows uint32, therefore on 32-bit machines it's
74 * unsafe to rely on palloc's internal check.
76 if (resultlen
> MaxAllocSize
- VARHDRSZ
)
78 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED
),
79 errmsg("result of encoding conversion is too large")));
81 result
= palloc(VARHDRSZ
+ resultlen
);
83 res
= enc
->encode(dataptr
, datalen
, VARDATA(result
));
85 /* Make this FATAL 'cause we've trodden on memory ... */
87 elog(FATAL
, "overflow - encode estimate too small");
89 SET_VARSIZE(result
, VARHDRSZ
+ res
);
91 PG_RETURN_TEXT_P(result
);
95 binary_decode(PG_FUNCTION_ARGS
)
97 text
*data
= PG_GETARG_TEXT_PP(0);
98 Datum name
= PG_GETARG_DATUM(1);
105 const struct pg_encoding
*enc
;
107 namebuf
= TextDatumGetCString(name
);
109 enc
= pg_find_encoding(namebuf
);
112 (errcode(ERRCODE_INVALID_PARAMETER_VALUE
),
113 errmsg("unrecognized encoding: \"%s\"", namebuf
)));
115 dataptr
= VARDATA_ANY(data
);
116 datalen
= VARSIZE_ANY_EXHDR(data
);
118 resultlen
= enc
->decode_len(dataptr
, datalen
);
121 * resultlen possibly overflows uint32, therefore on 32-bit machines it's
122 * unsafe to rely on palloc's internal check.
124 if (resultlen
> MaxAllocSize
- VARHDRSZ
)
126 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED
),
127 errmsg("result of decoding conversion is too large")));
129 result
= palloc(VARHDRSZ
+ resultlen
);
131 res
= enc
->decode(dataptr
, datalen
, VARDATA(result
));
133 /* Make this FATAL 'cause we've trodden on memory ... */
135 elog(FATAL
, "overflow - decode estimate too small");
137 SET_VARSIZE(result
, VARHDRSZ
+ res
);
139 PG_RETURN_BYTEA_P(result
);
147 static const char hextbl
[] = "0123456789abcdef";
149 static const int8 hexlookup
[128] = {
150 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
151 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
152 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
153 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1,
154 -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1,
155 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
156 -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1,
157 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
161 hex_encode(const char *src
, size_t len
, char *dst
)
163 const char *end
= src
+ len
;
167 *dst
++ = hextbl
[(*src
>> 4) & 0xF];
168 *dst
++ = hextbl
[*src
& 0xF];
171 return (uint64
) len
* 2;
175 get_hex(const char *cp
)
177 unsigned char c
= (unsigned char) *cp
;
185 (errcode(ERRCODE_INVALID_PARAMETER_VALUE
),
186 errmsg("invalid hexadecimal digit: \"%.*s\"",
193 hex_decode(const char *src
, size_t len
, char *dst
)
206 if (*s
== ' ' || *s
== '\n' || *s
== '\t' || *s
== '\r')
211 v1
= get_hex(s
) << 4;
215 (errcode(ERRCODE_INVALID_PARAMETER_VALUE
),
216 errmsg("invalid hexadecimal data: odd number of digits")));
227 hex_enc_len(const char *src
, size_t srclen
)
229 return (uint64
) srclen
<< 1;
233 hex_dec_len(const char *src
, size_t srclen
)
235 return (uint64
) srclen
>> 1;
242 static const char _base64
[] =
243 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
245 static const int8 b64lookup
[128] = {
246 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
247 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
248 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 62, -1, -1, -1, 63,
249 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, -1, -1, -1, -1, -1, -1,
250 -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
251 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1,
252 -1, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
253 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, -1, -1, -1, -1, -1,
257 pg_base64_encode(const char *src
, size_t len
, char *dst
)
271 buf
|= (unsigned char) *s
<< (pos
<< 3);
278 *p
++ = _base64
[(buf
>> 18) & 0x3f];
279 *p
++ = _base64
[(buf
>> 12) & 0x3f];
280 *p
++ = _base64
[(buf
>> 6) & 0x3f];
281 *p
++ = _base64
[buf
& 0x3f];
294 *p
++ = _base64
[(buf
>> 18) & 0x3f];
295 *p
++ = _base64
[(buf
>> 12) & 0x3f];
296 *p
++ = (pos
== 0) ? _base64
[(buf
>> 6) & 0x3f] : '=';
304 pg_base64_decode(const char *src
, size_t len
, char *dst
)
306 const char *srcend
= src
+ len
,
319 if (c
== ' ' || c
== '\t' || c
== '\n' || c
== '\r')
333 (errcode(ERRCODE_INVALID_PARAMETER_VALUE
),
334 errmsg("unexpected \"=\" while decoding base64 sequence")));
341 if (c
> 0 && c
< 127)
342 b
= b64lookup
[(unsigned char) c
];
345 (errcode(ERRCODE_INVALID_PARAMETER_VALUE
),
346 errmsg("invalid symbol \"%.*s\" found while decoding base64 sequence",
347 pg_mblen(s
- 1), s
- 1)));
349 /* add it to buffer */
350 buf
= (buf
<< 6) + b
;
354 *p
++ = (buf
>> 16) & 255;
355 if (end
== 0 || end
> 1)
356 *p
++ = (buf
>> 8) & 255;
357 if (end
== 0 || end
> 2)
366 (errcode(ERRCODE_INVALID_PARAMETER_VALUE
),
367 errmsg("invalid base64 end sequence"),
368 errhint("Input data is missing padding, is truncated, or is otherwise corrupted.")));
375 pg_base64_enc_len(const char *src
, size_t srclen
)
377 /* 3 bytes will be converted to 4, linefeed after 76 chars */
378 return ((uint64
) srclen
+ 2) * 4 / 3 + (uint64
) srclen
/ (76 * 3 / 4);
382 pg_base64_dec_len(const char *src
, size_t srclen
)
384 return ((uint64
) srclen
* 3) >> 2;
389 * Minimally escape bytea to text.
390 * De-escape text to bytea.
392 * We must escape zero bytes and high-bit-set bytes to avoid generating
393 * text that might be invalid in the current encoding, or that might
394 * change to something else if passed through an encoding conversion
395 * (leading to failing to de-escape to the original bytea value).
396 * Also of course backslash itself has to be escaped.
398 * De-escaping processes \\ and any \### octal
401 #define VAL(CH) ((CH) - '0')
402 #define DIG(VAL) ((VAL) + '0')
405 esc_encode(const char *src
, size_t srclen
, char *dst
)
407 const char *end
= src
+ srclen
;
413 unsigned char c
= (unsigned char) *src
;
415 if (c
== '\0' || IS_HIGHBIT_SET(c
))
419 rp
[2] = DIG((c
>> 3) & 7);
444 esc_decode(const char *src
, size_t srclen
, char *dst
)
446 const char *end
= src
+ srclen
;
454 else if (src
+ 3 < end
&&
455 (src
[1] >= '0' && src
[1] <= '3') &&
456 (src
[2] >= '0' && src
[2] <= '7') &&
457 (src
[3] >= '0' && src
[3] <= '7'))
465 *rp
++ = val
+ VAL(src
[3]);
468 else if (src
+ 1 < end
&&
477 * One backslash, not followed by ### valid octal. Should never
478 * get here, since esc_dec_len does same check.
481 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION
),
482 errmsg("invalid input syntax for type %s", "bytea")));
492 esc_enc_len(const char *src
, size_t srclen
)
494 const char *end
= src
+ srclen
;
499 if (*src
== '\0' || IS_HIGHBIT_SET(*src
))
501 else if (*src
== '\\')
513 esc_dec_len(const char *src
, size_t srclen
)
515 const char *end
= src
+ srclen
;
522 else if (src
+ 3 < end
&&
523 (src
[1] >= '0' && src
[1] <= '3') &&
524 (src
[2] >= '0' && src
[2] <= '7') &&
525 (src
[3] >= '0' && src
[3] <= '7'))
528 * backslash + valid octal
532 else if (src
+ 1 < end
&&
536 * two backslashes = backslash
543 * one backslash, not followed by ### valid octal
546 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION
),
547 errmsg("invalid input syntax for type %s", "bytea")));
562 struct pg_encoding enc
;
569 hex_enc_len
, hex_dec_len
, hex_encode
, hex_decode
575 pg_base64_enc_len
, pg_base64_dec_len
, pg_base64_encode
, pg_base64_decode
581 esc_enc_len
, esc_dec_len
, esc_encode
, esc_decode
587 NULL
, NULL
, NULL
, NULL
592 static const struct pg_encoding
*
593 pg_find_encoding(const char *name
)
597 for (i
= 0; enclist
[i
].name
; i
++)
598 if (pg_strcasecmp(enclist
[i
].name
, name
) == 0)
599 return &enclist
[i
].enc
;