Snapshot of upstream SQLite 3.42.0
[sqlcipher.git] / ext / misc / base85.c
blobe7ef0a04c97538bbc66faf56e3094804f7023dfd
1 /*
2 ** 2022-11-16
3 **
4 ** The author disclaims copyright to this source code. In place of
5 ** a legal notice, here is a blessing:
6 **
7 ** May you do good and not evil.
8 ** May you find forgiveness for yourself and forgive others.
9 ** May you share freely, never taking more than you give.
11 *************************************************************************
13 ** This is a utility for converting binary to base85 or vice-versa.
14 ** It can be built as a standalone program or an SQLite3 extension.
16 ** Much like base64 representations, base85 can be sent through a
17 ** sane USASCII channel unmolested. It also plays nicely in CSV or
18 ** written as TCL brace-enclosed literals or SQL string literals.
19 ** It is not suited for unmodified use in XML-like documents.
21 ** The encoding used resembles Ascii85, but was devised by the author
22 ** (Larry Brasfield) before Mozilla, Adobe, ZMODEM or other Ascii85
23 ** variant sources existed, in the 1984 timeframe on a VAX mainframe.
24 ** Further, this is an independent implementation of a base85 system.
25 ** Hence, the author has rightfully put this into the public domain.
27 ** Base85 numerals are taken from the set of 7-bit USASCII codes,
28 ** excluding control characters and Space ! " ' ( ) { | } ~ Del
29 ** in code order representing digit values 0 to 84 (base 10.)
31 ** Groups of 4 bytes, interpreted as big-endian 32-bit values,
32 ** are represented as 5-digit base85 numbers with MS to LS digit
33 ** order. Groups of 1-3 bytes are represented with 2-4 digits,
34 ** still big-endian but 8-24 bit values. (Using big-endian yields
35 ** the simplest transition to byte groups smaller than 4 bytes.
36 ** These byte groups can also be considered base-256 numbers.)
37 ** Groups of 0 bytes are represented with 0 digits and vice-versa.
38 ** No pad characters are used; Encoded base85 numeral sequence
39 ** (aka "group") length maps 1-to-1 to the decoded binary length.
41 ** Any character not in the base85 numeral set delimits groups.
42 ** When base85 is streamed or stored in containers of indefinite
43 ** size, newline is used to separate it into sub-sequences of no
44 ** more than 80 digits so that fgets() can be used to read it.
46 ** Length limitations are not imposed except that the runtime
47 ** SQLite string or blob length limits are respected. Otherwise,
48 ** any length binary sequence can be represented and recovered.
49 ** Base85 sequences can be concatenated by separating them with
50 ** a non-base85 character; the conversion to binary will then
51 ** be the concatenation of the represented binary sequences.
53 ** The standalone program either converts base85 on stdin to create
54 ** a binary file or converts a binary file to base85 on stdout.
55 ** Read or make it blurt its help for invocation details.
57 ** The SQLite3 extension creates a function, base85(x), which will
58 ** either convert text base85 to a blob or a blob to text base85
59 ** and return the result (or throw an error for other types.)
60 ** Unless built with OMIT_BASE85_CHECKER defined, it also creates a
61 ** function, is_base85(t), which returns 1 iff the text t contains
62 ** nothing other than base85 numerals and whitespace, or 0 otherwise.
64 ** To build the extension:
65 ** Set shell variable SQDIR=<your favorite SQLite checkout directory>
66 ** and variable OPTS to -DOMIT_BASE85_CHECKER if is_base85() unwanted.
67 ** *Nix: gcc -O2 -shared -I$SQDIR $OPTS -fPIC -o base85.so base85.c
68 ** OSX: gcc -O2 -dynamiclib -fPIC -I$SQDIR $OPTS -o base85.dylib base85.c
69 ** Win32: gcc -O2 -shared -I%SQDIR% %OPTS% -o base85.dll base85.c
70 ** Win32: cl /Os -I%SQDIR% %OPTS% base85.c -link -dll -out:base85.dll
72 ** To build the standalone program, define PP symbol BASE85_STANDALONE. Eg.
73 ** *Nix or OSX: gcc -O2 -DBASE85_STANDALONE base85.c -o base85
74 ** Win32: gcc -O2 -DBASE85_STANDALONE -o base85.exe base85.c
75 ** Win32: cl /Os /MD -DBASE85_STANDALONE base85.c
78 #include <stdio.h>
79 #include <memory.h>
80 #include <string.h>
81 #include <assert.h>
82 #ifndef OMIT_BASE85_CHECKER
83 # include <ctype.h>
84 #endif
86 #ifndef BASE85_STANDALONE
88 # include "sqlite3ext.h"
90 SQLITE_EXTENSION_INIT1;
92 #else
94 # ifdef _WIN32
95 # include <io.h>
96 # include <fcntl.h>
97 # else
98 # define setmode(fd,m)
99 # endif
101 static char *zHelp =
102 "Usage: base85 <dirFlag> <binFile>\n"
103 " <dirFlag> is either -r to read or -w to write <binFile>,\n"
104 " content to be converted to/from base85 on stdout/stdin.\n"
105 " <binFile> names a binary file to be rendered or created.\n"
106 " Or, the name '-' refers to the stdin or stdout stream.\n"
109 static void sayHelp(){
110 printf("%s", zHelp);
112 #endif
114 #ifndef U8_TYPEDEF
115 typedef unsigned char u8;
116 #define U8_TYPEDEF
117 #endif
119 /* Classify c according to interval within USASCII set w.r.t. base85
120 * Values of 1 and 3 are base85 numerals. Values of 0, 2, or 4 are not.
122 #define B85_CLASS( c ) (((c)>='#')+((c)>'&')+((c)>='*')+((c)>'z'))
124 /* Provide digitValue to b85Numeral offset as a function of above class. */
125 static u8 b85_cOffset[] = { 0, '#', 0, '*'-4, 0 };
126 #define B85_DNOS( c ) b85_cOffset[B85_CLASS(c)]
128 /* Say whether c is a base85 numeral. */
129 #define IS_B85( c ) (B85_CLASS(c) & 1)
131 #if 0 /* Not used, */
132 static u8 base85DigitValue( char c ){
133 u8 dv = (u8)(c - '#');
134 if( dv>87 ) return 0xff;
135 return (dv > 3)? dv-3 : dv;
137 #endif
139 /* Width of base64 lines. Should be an integer multiple of 5. */
140 #define B85_DARK_MAX 80
143 static char * skipNonB85( char *s, int nc ){
144 char c;
145 while( nc-- > 0 && (c = *s) && !IS_B85(c) ) ++s;
146 return s;
149 /* Convert small integer, known to be in 0..84 inclusive, to base85 numeral.
150 * Do not use the macro form with argument expression having a side-effect.*/
151 #if 0
152 static char base85Numeral( u8 b ){
153 return (b < 4)? (char)(b + '#') : (char)(b - 4 + '*');
155 #else
156 # define base85Numeral( dn )\
157 ((char)(((dn) < 4)? (char)((dn) + '#') : (char)((dn) - 4 + '*')))
158 #endif
160 static char *putcs(char *pc, char *s){
161 char c;
162 while( (c = *s++)!=0 ) *pc++ = c;
163 return pc;
166 /* Encode a byte buffer into base85 text. If pSep!=0, it's a C string
167 ** to be appended to encoded groups to limit their length to B85_DARK_MAX
168 ** or to terminate the last group (to aid concatenation.)
170 static char* toBase85( u8 *pIn, int nbIn, char *pOut, char *pSep ){
171 int nCol = 0;
172 while( nbIn >= 4 ){
173 int nco = 5;
174 unsigned long qbv = (((unsigned long)pIn[0])<<24) |
175 (pIn[1]<<16) | (pIn[2]<<8) | pIn[3];
176 while( nco > 0 ){
177 unsigned nqv = (unsigned)(qbv/85UL);
178 unsigned char dv = qbv - 85UL*nqv;
179 qbv = nqv;
180 pOut[--nco] = base85Numeral(dv);
182 nbIn -= 4;
183 pIn += 4;
184 pOut += 5;
185 if( pSep && (nCol += 5)>=B85_DARK_MAX ){
186 pOut = putcs(pOut, pSep);
187 nCol = 0;
190 if( nbIn > 0 ){
191 int nco = nbIn + 1;
192 unsigned long qv = *pIn++;
193 int nbe = 1;
194 while( nbe++ < nbIn ){
195 qv = (qv<<8) | *pIn++;
197 nCol += nco;
198 while( nco > 0 ){
199 u8 dv = (u8)(qv % 85);
200 qv /= 85;
201 pOut[--nco] = base85Numeral(dv);
203 pOut += (nbIn+1);
205 if( pSep && nCol>0 ) pOut = putcs(pOut, pSep);
206 *pOut = 0;
207 return pOut;
210 /* Decode base85 text into a byte buffer. */
211 static u8* fromBase85( char *pIn, int ncIn, u8 *pOut ){
212 if( ncIn>0 && pIn[ncIn-1]=='\n' ) --ncIn;
213 while( ncIn>0 ){
214 static signed char nboi[] = { 0, 0, 1, 2, 3, 4 };
215 char *pUse = skipNonB85(pIn, ncIn);
216 unsigned long qv = 0L;
217 int nti, nbo;
218 ncIn -= (pUse - pIn);
219 pIn = pUse;
220 nti = (ncIn>5)? 5 : ncIn;
221 nbo = nboi[nti];
222 if( nbo==0 ) break;
223 while( nti>0 ){
224 char c = *pIn++;
225 u8 cdo = B85_DNOS(c);
226 --ncIn;
227 if( cdo==0 ) break;
228 qv = 85 * qv + (c - cdo);
229 --nti;
231 nbo -= nti; /* Adjust for early (non-digit) end of group. */
232 switch( nbo ){
233 case 4:
234 *pOut++ = (qv >> 24)&0xff;
235 case 3:
236 *pOut++ = (qv >> 16)&0xff;
237 case 2:
238 *pOut++ = (qv >> 8)&0xff;
239 case 1:
240 *pOut++ = qv&0xff;
241 case 0:
242 break;
245 return pOut;
248 #ifndef OMIT_BASE85_CHECKER
249 /* Say whether input char sequence is all (base85 and/or whitespace).*/
250 static int allBase85( char *p, int len ){
251 char c;
252 while( len-- > 0 && (c = *p++) != 0 ){
253 if( !IS_B85(c) && !isspace(c) ) return 0;
255 return 1;
257 #endif
259 #ifndef BASE85_STANDALONE
261 # ifndef OMIT_BASE85_CHECKER
262 /* This function does the work for the SQLite is_base85(t) UDF. */
263 static void is_base85(sqlite3_context *context, int na, sqlite3_value *av[]){
264 assert(na==1);
265 switch( sqlite3_value_type(av[0]) ){
266 case SQLITE_TEXT:
268 int rv = allBase85( (char *)sqlite3_value_text(av[0]),
269 sqlite3_value_bytes(av[0]) );
270 sqlite3_result_int(context, rv);
272 break;
273 case SQLITE_NULL:
274 sqlite3_result_null(context);
275 break;
276 default:
277 sqlite3_result_error(context, "is_base85 accepts only text or NULL", -1);
278 return;
281 # endif
283 /* This function does the work for the SQLite base85(x) UDF. */
284 static void base85(sqlite3_context *context, int na, sqlite3_value *av[]){
285 int nb, nc, nv = sqlite3_value_bytes(av[0]);
286 int nvMax = sqlite3_limit(sqlite3_context_db_handle(context),
287 SQLITE_LIMIT_LENGTH, -1);
288 char *cBuf;
289 u8 *bBuf;
290 assert(na==1);
291 switch( sqlite3_value_type(av[0]) ){
292 case SQLITE_BLOB:
293 nb = nv;
294 /* ulongs tail newlines tailenc+nul*/
295 nc = 5*(nv/4) + nv%4 + nv/64+1 + 2;
296 if( nvMax < nc ){
297 sqlite3_result_error(context, "blob expanded to base85 too big", -1);
298 return;
300 bBuf = (u8*)sqlite3_value_blob(av[0]);
301 if( !bBuf ){
302 if( SQLITE_NOMEM==sqlite3_errcode(sqlite3_context_db_handle(context)) ){
303 goto memFail;
305 sqlite3_result_text(context,"",-1,SQLITE_STATIC);
306 break;
308 cBuf = sqlite3_malloc(nc);
309 if( !cBuf ) goto memFail;
310 nc = (int)(toBase85(bBuf, nb, cBuf, "\n") - cBuf);
311 sqlite3_result_text(context, cBuf, nc, sqlite3_free);
312 break;
313 case SQLITE_TEXT:
314 nc = nv;
315 nb = 4*(nv/5) + nv%5; /* may overestimate */
316 if( nvMax < nb ){
317 sqlite3_result_error(context, "blob from base85 may be too big", -1);
318 return;
319 }else if( nb<1 ){
320 nb = 1;
322 cBuf = (char *)sqlite3_value_text(av[0]);
323 if( !cBuf ){
324 if( SQLITE_NOMEM==sqlite3_errcode(sqlite3_context_db_handle(context)) ){
325 goto memFail;
327 sqlite3_result_zeroblob(context, 0);
328 break;
330 bBuf = sqlite3_malloc(nb);
331 if( !bBuf ) goto memFail;
332 nb = (int)(fromBase85(cBuf, nc, bBuf) - bBuf);
333 sqlite3_result_blob(context, bBuf, nb, sqlite3_free);
334 break;
335 default:
336 sqlite3_result_error(context, "base85 accepts only blob or text.", -1);
337 return;
339 return;
340 memFail:
341 sqlite3_result_error(context, "base85 OOM", -1);
345 ** Establish linkage to running SQLite library.
347 #ifndef SQLITE_SHELL_EXTFUNCS
348 #ifdef _WIN32
349 __declspec(dllexport)
350 #endif
351 int sqlite3_base_init
352 #else
353 static int sqlite3_base85_init
354 #endif
355 (sqlite3 *db, char **pzErr, const sqlite3_api_routines *pApi){
356 SQLITE_EXTENSION_INIT2(pApi);
357 (void)pzErr;
358 # ifndef OMIT_BASE85_CHECKER
360 int rc = sqlite3_create_function
361 (db, "is_base85", 1,
362 SQLITE_DETERMINISTIC|SQLITE_INNOCUOUS|SQLITE_UTF8,
363 0, is_base85, 0, 0);
364 if( rc!=SQLITE_OK ) return rc;
366 # endif
367 return sqlite3_create_function
368 (db, "base85", 1,
369 SQLITE_DETERMINISTIC|SQLITE_INNOCUOUS|SQLITE_DIRECTONLY|SQLITE_UTF8,
370 0, base85, 0, 0);
374 ** Define some macros to allow this extension to be built into the shell
375 ** conveniently, in conjunction with use of SQLITE_SHELL_EXTFUNCS. This
376 ** allows shell.c, as distributed, to have this extension built in.
378 # define BASE85_INIT(db) sqlite3_base85_init(db, 0, 0)
379 # define BASE85_EXPOSE(db, pzErr) /* Not needed, ..._init() does this. */
381 #else /* standalone program */
383 int main(int na, char *av[]){
384 int cin;
385 int rc = 0;
386 u8 bBuf[4*(B85_DARK_MAX/5)];
387 char cBuf[5*(sizeof(bBuf)/4)+2];
388 size_t nio;
389 # ifndef OMIT_BASE85_CHECKER
390 int b85Clean = 1;
391 # endif
392 char rw;
393 FILE *fb = 0, *foc = 0;
394 char fmode[3] = "xb";
395 if( na < 3 || av[1][0]!='-' || (rw = av[1][1])==0 || (rw!='r' && rw!='w') ){
396 sayHelp();
397 return 0;
399 fmode[0] = rw;
400 if( av[2][0]=='-' && av[2][1]==0 ){
401 switch( rw ){
402 case 'r':
403 fb = stdin;
404 setmode(fileno(stdin), O_BINARY);
405 break;
406 case 'w':
407 fb = stdout;
408 setmode(fileno(stdout), O_BINARY);
409 break;
411 }else{
412 fb = fopen(av[2], fmode);
413 foc = fb;
415 if( !fb ){
416 fprintf(stderr, "Cannot open %s for %c\n", av[2], rw);
417 rc = 1;
418 }else{
419 switch( rw ){
420 case 'r':
421 while( (nio = fread( bBuf, 1, sizeof(bBuf), fb))>0 ){
422 toBase85( bBuf, (int)nio, cBuf, 0 );
423 fprintf(stdout, "%s\n", cBuf);
425 break;
426 case 'w':
427 while( 0 != fgets(cBuf, sizeof(cBuf), stdin) ){
428 int nc = strlen(cBuf);
429 size_t nbo = fromBase85( cBuf, nc, bBuf ) - bBuf;
430 if( 1 != fwrite(bBuf, nbo, 1, fb) ) rc = 1;
431 # ifndef OMIT_BASE85_CHECKER
432 b85Clean &= allBase85( cBuf, nc );
433 # endif
435 break;
436 default:
437 sayHelp();
438 rc = 1;
440 if( foc ) fclose(foc);
442 # ifndef OMIT_BASE85_CHECKER
443 if( !b85Clean ){
444 fprintf(stderr, "Base85 input had non-base85 dark or control content.\n");
446 # endif
447 return rc;
450 #endif