dsforth: moved 8x8 printer to separate include (and made it configurable); added...
[urasm.git] / src / urlocase.c
blob82978b0d95a6d337cf0f67b724584be333a164b3
1 // convert UrAsm source code to lower case (preserving strings and labels)
2 // coded by Ketmar // Invisible Vector
3 // GPLv3 or later
4 //
5 #include <stdint.h>
6 #include <stdlib.h>
7 #include <stdio.h>
8 #include <string.h>
9 #include <unistd.h>
11 #include "liburasm/liburasm.h"
14 //**************************************************************************
16 // very simple string library
18 //**************************************************************************
20 typedef struct {
21 char *data;
22 size_t length;
23 int *rc; /* refcounter; NULL means "statically allocated" */
24 } VStr;
27 // initialise VStr structure
28 static __attribute__((unused)) inline void vstr_init (VStr *str) { if (str) { str->data = NULL; str->length = 0; str->rc = NULL; } }
31 // free VStr
32 static __attribute__((unused)) inline void vstr_free (VStr *str) {
33 if (str) {
34 if (str->rc) {
35 if (--(*str->rc) == 0) {
36 free(str->rc);
37 if (str->data) free(str->data);
39 str->data = NULL;
40 str->length = 0;
41 str->rc = NULL;
47 // copy VStr (both strings should be initialised)
48 static __attribute__((unused)) inline void vstr_copy (VStr *dest, const VStr *src) {
49 if (!dest) return;
50 if (!src) { vstr_free(dest); return; }
51 if (dest->data != src->data) {
52 /* different strings */
53 vstr_free(dest);
54 dest->data = src->data;
55 dest->length = src->length;
56 dest->rc = src->rc;
58 if (dest->rc) ++(*dest->rc);
62 // ensure that the given string is uniqie and modifiable
63 static __attribute__((unused)) inline void vstr_make_unique (VStr *str) {
64 if (!str) return;
65 if (str->length == 0) {
66 vstr_free(str); /* just in case */
67 return;
69 if (str->rc && *str->rc == 1) return; /* nothing to do */
70 /* create new string */
71 char *ns = malloc(str->length+1);
72 memcpy(ns, str->data, str->length);
73 ns[str->length] = 0;
74 int *nrc = malloc(sizeof(int));
75 *nrc = 1;
76 if (str->rc) --(*str->rc);
77 str->data = ns;
78 str->rc = nrc;
82 // replace string with static contents
83 static __attribute__((unused)) inline void vstr_set_static (VStr *str, const char *s) {
84 if (!str) return;
85 vstr_free(str);
86 if (s && s[0]) {
87 str->data = (char *)s;
88 str->length = strlen(s);
93 // create string with static contents
94 static __attribute__((unused)) inline void vstr_init_static (VStr *str, const char *s) {
95 vstr_init(str);
96 vstr_set_static(str, s);
100 // replace string with dynamic contents
101 static __attribute__((unused)) inline void vstr_set_dynamic (VStr *str, const char *s) {
102 if (!str) return;
103 vstr_free(str);
104 if (s && s[0]) {
105 str->length = strlen(s);
106 str->data = malloc(str->length+1);
107 memcpy(str->data, s, str->length);
108 str->data[str->length] = 0;
113 // create string with dynamic contents
114 static __attribute__((unused)) inline void vstr_init_dynamic (VStr *str, const char *s) {
115 vstr_init(str);
116 vstr_set_dynamic(str, s);
120 // replace string with dynamic contents and the given length
121 static __attribute__((unused)) inline void vstr_set_dynamic_ex (VStr *str, const char *s, size_t len) {
122 if (!str) return;
123 vstr_free(str);
124 if (len) {
125 str->length = len;
126 str->data = malloc(len+1);
127 if (s) {
128 memcpy(str->data, s, len);
129 str->data[len] = 0;
130 } else {
131 memset(str->data, 0, len+1);
137 // create string with dynamic contents and the given length
138 static __attribute__((unused)) inline void vstr_init_dynamic_ex (VStr *str, const char *s, size_t len) {
139 vstr_init(str);
140 vstr_set_dynamic_ex(str, s, len);
144 // append source to destination
145 static __attribute__((unused)) inline void vstr_cat_vstr (VStr *dest, const VStr *src) {
146 if (!dest || !src || src->length == 0) return;
147 vstr_make_unique(dest);
148 // make sure that we have `rc` allocated
149 if (!dest->rc) { dest->rc = malloc(sizeof(int)); *dest->rc = 1; }
150 dest->data = realloc(dest->data, src->length+dest->length+1);
151 memmove(dest->data+dest->length, src->data, src->length);
152 dest->length += src->length;
153 dest->data[dest->length] = 0;
157 // append source to destination
158 static __attribute__((unused)) inline void vstr_cat_cstr (VStr *dest, const char *s) {
159 if (!dest || !s || !s[0]) return;
160 VStr tmp;
161 if (dest->data && s >= dest->data && s < dest->data+dest->length) {
162 /* oops; inside */
163 vstr_init_dynamic(&tmp, s);
164 } else {
165 vstr_init_static(&tmp, s);
167 vstr_cat_vstr(dest, &tmp);
168 vstr_free(&tmp);
172 //==========================================================================
174 // load_file
176 //==========================================================================
177 static void load_file (const VStr *fname, VStr *text) {
178 if (!text) {
179 fprintf(stderr, "FATAL: cannot load file into nowhere!\n");
180 exit(1);
182 if (!fname || fname->length == 0) {
183 fprintf(stderr, "FATAL: cannot open nameless file for reading!\n");
184 exit(1);
186 FILE *fl = fopen(fname->data, "rb");
187 if (!fl) {
188 fprintf(stderr, "FATAL: cannot open file '%s' for reading!\n", fname->data);
189 exit(1);
191 fseek(fl, 0, SEEK_END);
192 long size = ftell(fl);
193 if (size <= 0 || size > 1024*1024*32) {
194 fclose(fl);
195 fprintf(stderr, "FATAL: file '%s' is either too big, or too small!\n", fname->data);
196 exit(1);
198 char *res = malloc((size_t)size+1);
199 fseek(fl, 0, SEEK_SET);
200 if (fread(res, (size_t)size, 1, fl) != 1) {
201 fclose(fl);
202 fprintf(stderr, "FATAL: error reading file '%s'!\n", fname->data);
203 exit(1);
205 res[(size_t)size] = 0;
206 vstr_free(text);
207 text->data = res;
208 text->length = (size_t)size;
209 text->rc = malloc(sizeof(int));
210 *text->rc = 1;
214 //==========================================================================
216 // save_file
218 //==========================================================================
219 static void save_file (const VStr *fname, const VStr *text) {
220 if (!fname || fname->length == 0) {
221 fprintf(stderr, "FATAL: cannot create nameless file!\n");
222 exit(1);
224 FILE *fl = fopen(fname->data, "wb");
225 if (!fl) {
226 fprintf(stderr, "FATAL: cannot create file '%s'!\n", fname->data);
227 exit(1);
229 if (text && text->length) {
230 if (fwrite(text->data, text->length, 1, fl) != 1) {
231 fclose(fl);
232 unlink(fname->data);
233 fprintf(stderr, "FATAL: error writing file '%s'!\n", fname->data);
234 exit(1);
237 fclose(fl);
241 //==========================================================================
243 // isAlpha
245 //==========================================================================
246 static inline int isAlpha (const char ch) {
247 return
248 (ch >= 'A' && ch <= 'Z') ||
249 (ch >= 'a' && ch <= 'z');
253 //==========================================================================
255 // isIdChar
257 //==========================================================================
258 static inline int isIdChar (const char ch) {
259 return
260 (ch >= 'A' && ch <= 'Z') ||
261 (ch >= 'a' && ch <= 'z') ||
262 (ch >= '0' && ch <= '9') ||
263 ch == '@' || ch == '_' || ch == '.';
267 //==========================================================================
269 // toLower
271 //==========================================================================
272 static inline char toLower (const char ch) { return (ch >= 'A' && ch <= 'Z' ? ch-'A'+'a' : ch); }
275 //==========================================================================
277 // toUpper
279 //==========================================================================
280 static inline char toUpper (const char ch) { return (ch >= 'a' && ch <= 'z' ? ch-'a'+'A' : ch); }
283 //==========================================================================
285 // vstr_part_strEquCI_cstr
287 //==========================================================================
288 static int vstr_part_strEquCI_cstr (const VStr *str, size_t start, size_t len, const char *cstr) {
289 if (!str) abort();
290 if (start >= str->length) return (!cstr || !cstr[0]);
291 if (len > str->length-start) len = str->length-start;
292 if (!cstr || !cstr[0]) return (len == 0);
293 const size_t cstrlen = strlen(cstr);
294 if (cstrlen != len) return 0;
295 const char *s = str->data+start;
296 for (; *cstr; ++s, ++cstr) {
297 if (toLower(*s) != toLower(*cstr)) return 0;
299 return 1;
303 //==========================================================================
305 // vstr_part_check_token_list
307 //==========================================================================
308 static int vstr_part_check_token_list (const VStr *str, size_t start, size_t len, const char *list[], size_t count) {
309 if (!count) return 0;
310 for (size_t f = count; f--; ++list) {
311 if (vstr_part_strEquCI_cstr(str, start, len, list[0])) return 1;
313 return 0;
317 // ////////////////////////////////////////////////////////////////////////// //
318 #define URASM_COMMAND_TOKENS (18)
319 static const char *UrAsmCommands[URASM_COMMAND_TOKENS] = {
320 "DISPLAY",
321 "DISPLAY0",
322 "DISPLAYA",
323 "DISPHEX",
324 "DISPHEX0",
325 "DISPHEXA",
327 "DEFFMT",
328 "MODEL",
330 "MACRO",
331 "ENDM",
334 "MODULE",
335 "ENDMODULE",
337 "IF",
338 "IFX",
339 "ELSE",
340 "ELSEIF",
341 "ELSEIFX",
342 "ENDIF",
346 #define ASM_COMMAND_TOKENS (34)
347 static const char *asmCommands[ASM_COMMAND_TOKENS] = {
348 "EQU",
349 "ORG",
350 "DISP",
351 "ENDDISP",
352 "PHASE",
353 "DEPHASE",
354 "UNPHASE",
355 "ALIGN",
356 "DISPALIGN",
357 "PHASEALIGN",
358 "ENT",
359 "CLR",
360 "RESERVE",
362 "INCLUDE",
363 "INCBIN",
365 "DUP",
366 "EDUP",
368 "DEFINCR",
369 "DEFB",
370 "DB",
371 "DEFW",
372 "DW",
373 "DEFR",
374 "DR",
375 "DEFS",
376 "DS",
377 "DEFM",
378 "DM",
379 "DEFZ",
380 "DZ",
381 "DEFX",
382 "DX",
383 "DEFC",
384 "DC",
388 #define URA_REGSX_COUNT (6)
389 static const char *URA_REGSX[URA_REGSX_COUNT] = {
390 "IX",
391 "IY",
392 "IXH", "IXL",
393 "IYH", "IYL",
397 //==========================================================================
399 // translate
401 //==========================================================================
402 static void translate (VStr *text, int mode_lower) {
403 if (!text || text->length == 0) return;
404 vstr_make_unique(text);
406 size_t pos = 0;
407 while (pos < text->length) {
408 /* comment? */
409 if (text->data[pos] == ';') {
410 while (pos < text->length && text->data[pos] != '\n') ++pos;
411 continue;
413 if ((unsigned)(text->data[pos]&0xff) <= 32) { ++pos; continue; }
414 /* af' */
415 if (pos >= 2 && text->data[pos] == '\'' &&
416 toLower(text->data[pos-2]) == 'a' &&
417 toLower(text->data[pos-1]) == 'f')
419 ++pos;
420 continue;
422 /* string? */
423 if (text->data[pos] == '"' || text->data[pos] == '\'') {
424 const char ech = text->data[pos++];
425 while (pos < text->length) {
426 const char ch = text->data[pos++];
427 if (ch == '\n' || ch == '\r') break;
428 if (ch == '\\') { ++pos; continue; }
429 if (ch == ech) break;
431 continue;
433 /* token should start with alpha */
434 if (isAlpha(text->data[pos])) {
435 /* find token end */
436 size_t epos = pos+1;
437 while (epos < text->length) {
438 const char ch = text->data[epos];
439 if ((unsigned)(ch&0xff) <= 32 || ch == ';' || ch == '"' || ch == '\'') break;
440 if (!isIdChar(ch)) break;
441 ++epos;
443 const int found =
444 vstr_part_check_token_list(text, pos, epos-pos, URASM_TOKENS, URASM_MAX_TOKEN) ||
445 vstr_part_check_token_list(text, pos, epos-pos, URA_REGS8, 8) ||
446 vstr_part_check_token_list(text, pos, epos-pos, URA_REGS16, 4) ||
447 vstr_part_check_token_list(text, pos, epos-pos, URA_REGS16A, 4) ||
448 vstr_part_check_token_list(text, pos, epos-pos, URA_REGSX, URA_REGSX_COUNT) ||
449 vstr_part_check_token_list(text, pos, epos-pos, URA_COND, 8) ||
450 vstr_part_check_token_list(text, pos, epos-pos, asmCommands, ASM_COMMAND_TOKENS) ||
451 (!mode_lower && vstr_part_check_token_list(text, pos, epos-pos, UrAsmCommands, URASM_COMMAND_TOKENS));
452 /* fix case if found */
453 if (found) {
454 for (; pos < epos; ++pos) {
455 text->data[pos] = (mode_lower ? toLower(text->data[pos]) : toUpper(text->data[pos]));
457 } else {
458 pos = epos;
460 continue;
462 /* skip token */
463 if (isIdChar(text->data[pos])) {
464 while (pos < text->length) {
465 const char ch = text->data[pos];
466 if ((unsigned)(ch&0xff) <= 32 || ch == ';' || ch == '"' || ch == '\'') break;
467 if (!isIdChar(ch)) break;
468 ++pos;
470 } else {
471 /* skip delimiter */
472 ++pos;
478 //==========================================================================
480 // main
482 //==========================================================================
483 int main (int argc, char *argv[]) {
484 VStr infile;
485 VStr outfile;
486 vstr_init(&infile);
487 vstr_init(&outfile);
489 int mode_lower = 1;
490 int nomoreargs = 0;
492 for (int f = 1; f < argc; ) {
493 const char *s = argv[f++];
494 if (!s || !s[0]) continue;
495 if (!nomoreargs) {
496 if (strcmp(s, "--") == 0) { nomoreargs = 1; continue; }
497 if (s[0] == '-') {
498 if (strcmp(s, "--lo") == 0) { mode_lower = 1; continue; }
499 if (strcmp(s, "--hi") == 0 || strcmp(s, "--up") == 0) { mode_lower = 0; continue; }
500 if (strcmp(s, "--help") == 0 || strcmp(s, "--h") == 0 ||
501 strcmp(s, "-help") == 0 || strcmp(s, "-h") == 0 ||
502 strcmp(s, "--?") == 0 || strcmp(s, "-?") == 0)
504 printf("urlocase: convert UrAsm Z80 assembler source case\n"
505 "options:\n"
506 " --lo convert to lower case (default)\n"
507 " --up convert to upper case\n");
508 return 0;
510 if (strcmp(s, "-o") == 0) {
511 if (outfile.length) {
512 fprintf(stderr, "FATAL: too many output files!\n");
513 return 1;
515 if (f >= argc || !argv[f] || !argv[f][0]) {
516 fprintf(stderr, "FATAL: output file name expected for '-o'!\n");
517 return 1;
519 vstr_set_static(&outfile, argv[f++]);
520 continue;
522 fprintf(stderr, "FATAL: unknown argument '%s'!\n", s);
523 return 1;
526 if (!infile.length) {
527 vstr_set_static(&infile, s);
528 continue;
531 if (!outfile.length) {
532 vstr_set_static(&outfile, s);
533 continue;
536 fprintf(stderr, "FATAL: too many arguments!\n");
537 return 1;
540 if (!infile.length) {
541 fprintf(stderr, "FATAL: no input file!\n");
542 return 1;
545 if (!outfile.length) vstr_copy(&outfile, &infile);
547 VStr text;
548 vstr_init(&text);
549 load_file(&infile, &text);
551 translate(&text, mode_lower);
553 save_file(&outfile, &text);
555 printf("%s -> %s (%s): done\n", infile.data, outfile.data, (mode_lower ? "lo" : "UP"));
557 vstr_free(&infile);
558 vstr_free(&outfile);
559 vstr_free(&text);
561 return 0;