UrForth: "STRLITERAL" now respects the optimiser
[urasm.git] / src / urlocase.c
blob1ddb5dcaa0ce0df2eb5e2adf514e04986f42ec49
1 // convert UrAsm source code to lower case (preserving strings and labels)
2 // coded by Ketmar // Invisible Vector
3 // GPLv3 ONLY
4 //
5 #include <stdint.h>
6 #include <stdlib.h>
7 #include <stdio.h>
8 #include <string.h>
9 #include <unistd.h>
11 #include "liburasm/liburasm.h"
14 //**************************************************************************
16 // very simple string library
18 //**************************************************************************
20 typedef struct {
21 char *data;
22 size_t length;
23 int *rc; /* refcounter; NULL means "statically allocated" */
24 } VStr;
27 // initialise VStr structure
28 static __attribute__((unused)) inline void vstr_init (VStr *str) { if (str) { str->data = NULL; str->length = 0; str->rc = NULL; } }
31 // free VStr
32 static __attribute__((unused)) inline void vstr_free (VStr *str) {
33 if (str) {
34 if (str->rc) {
35 if (--(*str->rc) == 0) {
36 free(str->rc);
37 if (str->data) free(str->data);
39 str->data = NULL;
40 str->length = 0;
41 str->rc = NULL;
47 // copy VStr (both strings should be initialised)
48 static __attribute__((unused)) inline void vstr_copy (VStr *dest, const VStr *src) {
49 if (!dest) return;
50 if (!src) { vstr_free(dest); return; }
51 if (dest->data != src->data) {
52 /* different strings */
53 vstr_free(dest);
54 dest->data = src->data;
55 dest->length = src->length;
56 dest->rc = src->rc;
58 if (dest->rc) ++(*dest->rc);
62 // ensure that the given string is uniqie and modifiable
63 static __attribute__((unused)) inline void vstr_make_unique (VStr *str) {
64 if (!str) return;
65 if (str->length == 0) {
66 vstr_free(str); /* just in case */
67 return;
69 if (str->rc && *str->rc == 1) return; /* nothing to do */
70 /* create new string */
71 char *ns = malloc(str->length+1);
72 memcpy(ns, str->data, str->length);
73 ns[str->length] = 0;
74 int *nrc = malloc(sizeof(int));
75 *nrc = 1;
76 if (str->rc) --(*str->rc);
77 str->data = ns;
78 str->rc = nrc;
82 // replace string with static contents
83 static __attribute__((unused)) inline void vstr_set_static (VStr *str, const char *s) {
84 if (!str) return;
85 vstr_free(str);
86 if (s && s[0]) {
87 str->data = (char *)s;
88 str->length = strlen(s);
93 // create string with static contents
94 static __attribute__((unused)) inline void vstr_init_static (VStr *str, const char *s) {
95 vstr_init(str);
96 vstr_set_static(str, s);
100 // replace string with dynamic contents
101 static __attribute__((unused)) inline void vstr_set_dynamic (VStr *str, const char *s) {
102 if (!str) return;
103 vstr_free(str);
104 if (s && s[0]) {
105 str->length = strlen(s);
106 str->data = malloc(str->length+1);
107 memcpy(str->data, s, str->length);
108 str->data[str->length] = 0;
113 // create string with dynamic contents
114 static __attribute__((unused)) inline void vstr_init_dynamic (VStr *str, const char *s) {
115 vstr_init(str);
116 vstr_set_dynamic(str, s);
120 // replace string with dynamic contents and the given length
121 static __attribute__((unused)) inline void vstr_set_dynamic_ex (VStr *str, const char *s, size_t len) {
122 if (!str) return;
123 vstr_free(str);
124 if (len) {
125 str->length = len;
126 str->data = malloc(len+1);
127 if (s) {
128 memcpy(str->data, s, len);
129 str->data[len] = 0;
130 } else {
131 memset(str->data, 0, len+1);
137 // create string with dynamic contents and the given length
138 static __attribute__((unused)) inline void vstr_init_dynamic_ex (VStr *str, const char *s, size_t len) {
139 vstr_init(str);
140 vstr_set_dynamic_ex(str, s, len);
144 // append source to destination
145 static __attribute__((unused)) inline void vstr_cat_vstr (VStr *dest, const VStr *src) {
146 if (!dest || !src || src->length == 0) return;
147 vstr_make_unique(dest);
148 // make sure that we have `rc` allocated
149 if (!dest->rc) { dest->rc = malloc(sizeof(int)); *dest->rc = 1; }
150 dest->data = realloc(dest->data, src->length+dest->length+1);
151 memmove(dest->data+dest->length, src->data, src->length);
152 dest->length += src->length;
153 dest->data[dest->length] = 0;
157 // append source to destination
158 static __attribute__((unused)) inline void vstr_cat_cstr (VStr *dest, const char *s) {
159 if (!dest || !s || !s[0]) return;
160 VStr tmp;
161 if (dest->data && s >= dest->data && s < dest->data+dest->length) {
162 /* oops; inside */
163 vstr_init_dynamic(&tmp, s);
164 } else {
165 vstr_init_static(&tmp, s);
167 vstr_cat_vstr(dest, &tmp);
168 vstr_free(&tmp);
172 //==========================================================================
174 // load_file
176 //==========================================================================
177 static void load_file (const VStr *fname, VStr *text) {
178 if (!text) {
179 fprintf(stderr, "FATAL: cannot load file into nowhere!\n");
180 exit(1);
182 if (!fname || fname->length == 0) {
183 fprintf(stderr, "FATAL: cannot open nameless file for reading!\n");
184 exit(1);
186 FILE *fl = fopen(fname->data, "rb");
187 if (!fl) {
188 fprintf(stderr, "FATAL: cannot open file '%s' for reading!\n", fname->data);
189 exit(1);
191 fseek(fl, 0, SEEK_END);
192 long size = ftell(fl);
193 if (size <= 0 || size > 1024*1024*32) {
194 fclose(fl);
195 fprintf(stderr, "FATAL: file '%s' is either too big, or too small!\n", fname->data);
196 exit(1);
198 char *res = malloc((size_t)size+1);
199 fseek(fl, 0, SEEK_SET);
200 if (fread(res, (size_t)size, 1, fl) != 1) {
201 fclose(fl);
202 fprintf(stderr, "FATAL: error reading file '%s'!\n", fname->data);
203 exit(1);
205 res[(size_t)size] = 0;
206 vstr_free(text);
207 text->data = res;
208 text->length = (size_t)size;
209 text->rc = malloc(sizeof(int));
210 *text->rc = 1;
214 //==========================================================================
216 // save_file
218 //==========================================================================
219 static void save_file (const VStr *fname, const VStr *text) {
220 if (!fname || fname->length == 0) {
221 fprintf(stderr, "FATAL: cannot create nameless file!\n");
222 exit(1);
224 FILE *fl = fopen(fname->data, "wb");
225 if (!fl) {
226 fprintf(stderr, "FATAL: cannot create file '%s'!\n", fname->data);
227 exit(1);
229 if (text && text->length) {
230 if (fwrite(text->data, text->length, 1, fl) != 1) {
231 fclose(fl);
232 unlink(fname->data);
233 fprintf(stderr, "FATAL: error writing file '%s'!\n", fname->data);
234 exit(1);
237 fclose(fl);
241 //==========================================================================
243 // isAlpha
245 //==========================================================================
246 static inline int isAlpha (const char ch) {
247 return
248 (ch >= 'A' && ch <= 'Z') ||
249 (ch >= 'a' && ch <= 'z');
253 //==========================================================================
255 // isIdChar
257 //==========================================================================
258 static inline int isIdChar (const char ch) {
259 return
260 (ch >= 'A' && ch <= 'Z') ||
261 (ch >= 'a' && ch <= 'z') ||
262 (ch >= '0' && ch <= '9') ||
263 ch == '@' || ch == '_' || ch == '.';
267 //==========================================================================
269 // toLower
271 //==========================================================================
272 static inline char toLower (const char ch) { return (ch >= 'A' && ch <= 'Z' ? ch-'A'+'a' : ch); }
275 //==========================================================================
277 // toUpper
279 //==========================================================================
280 static inline char toUpper (const char ch) { return (ch >= 'a' && ch <= 'z' ? ch-'a'+'A' : ch); }
283 //==========================================================================
285 // vstr_part_strEquCI_cstr
287 //==========================================================================
288 static int vstr_part_strEquCI_cstr (const VStr *str, size_t start, size_t len, const char *cstr) {
289 if (!str) abort();
290 if (start >= str->length) return (!cstr || !cstr[0]);
291 if (len > str->length-start) len = str->length-start;
292 if (!cstr || !cstr[0]) return (len == 0);
293 const size_t cstrlen = strlen(cstr);
294 if (cstrlen != len) return 0;
295 const char *s = str->data+start;
296 for (; *cstr; ++s, ++cstr) {
297 if (toLower(*s) != toLower(*cstr)) return 0;
299 return 1;
303 //==========================================================================
305 // vstr_part_check_token_list
307 //==========================================================================
308 static int vstr_part_check_token_list (const VStr *str, size_t start, size_t len, const char *list[], size_t count) {
309 if (!count) return 0;
310 for (size_t f = count; f--; ++list) {
311 if (vstr_part_strEquCI_cstr(str, start, len, list[0])) return 1;
313 return 0;
317 // ////////////////////////////////////////////////////////////////////////// //
318 #define URASM_COMMAND_TOKENS (18)
319 static const char *UrAsmCommands[URASM_COMMAND_TOKENS] = {
320 "DISPLAY",
321 "DISPLAY0",
322 "DISPLAYA",
323 "DISPHEX",
324 "DISPHEX0",
325 "DISPHEXA",
327 "DEFFMT",
328 "MODEL",
330 "MACRO",
331 "ENDM",
334 "MODULE",
335 "ENDMODULE",
337 "IF",
338 "IFX",
339 "ELSE",
340 "ELSEIF",
341 "ELSEIFX",
342 "ENDIF",
346 #define ASM_COMMAND_TOKENS (34)
347 static const char *asmCommands[ASM_COMMAND_TOKENS] = {
348 "EQU",
349 "ORG",
350 "DISP",
351 "ENDDISP",
352 "PHASE",
353 "DEPHASE",
354 "UNPHASE",
355 "ALIGN",
356 "DISPALIGN",
357 "PHASEALIGN",
358 "ENT",
359 "CLR",
360 "RESERVE",
362 "INCLUDE",
363 "INCBIN",
365 "DUP",
366 "EDUP",
368 "DEFINCR",
369 "DEFB",
370 "DB",
371 "DEFW",
372 "DW",
373 "DEFR",
374 "DR",
375 "DEFS",
376 "DS",
377 "DEFM",
378 "DM",
379 "DEFZ",
380 "DZ",
381 "DEFX",
382 "DX",
383 "DEFC",
384 "DC",
388 #define URA_REGSX_COUNT (8)
389 static const char *URA_REGSX[URA_REGSX_COUNT] = {
390 "IX",
391 "IY",
392 "IXH", "IXL",
393 "IYH", "IYL",
394 "I", "R",
398 //==========================================================================
400 // translate
402 //==========================================================================
403 static void translate (VStr *text, int mode_lower) {
404 if (!text || text->length == 0) return;
405 vstr_make_unique(text);
407 size_t pos = 0;
408 while (pos < text->length) {
409 /* comment? */
410 if (text->data[pos] == ';') {
411 while (pos < text->length && text->data[pos] != '\n') ++pos;
412 continue;
414 if ((unsigned)(text->data[pos]&0xff) <= 32) { ++pos; continue; }
415 /* af' */
416 if (pos >= 2 && text->data[pos] == '\'' &&
417 toLower(text->data[pos-2]) == 'a' &&
418 toLower(text->data[pos-1]) == 'f')
420 ++pos;
421 continue;
423 /* string? */
424 if (text->data[pos] == '"' || text->data[pos] == '\'') {
425 const char ech = text->data[pos++];
426 while (pos < text->length) {
427 const char ch = text->data[pos++];
428 if (ch == '\n' || ch == '\r') break;
429 if (ch == '\\') { ++pos; continue; }
430 if (ch == ech) break;
432 continue;
434 /* token should start with alpha */
435 if (isAlpha(text->data[pos])) {
436 /* find token end */
437 size_t epos = pos+1;
438 while (epos < text->length) {
439 const char ch = text->data[epos];
440 if ((unsigned)(ch&0xff) <= 32 || ch == ';' || ch == '"' || ch == '\'') break;
441 if (!isIdChar(ch)) break;
442 ++epos;
444 const int found =
445 vstr_part_check_token_list(text, pos, epos-pos, URASM_TOKENS, URASM_MAX_TOKEN) ||
446 vstr_part_check_token_list(text, pos, epos-pos, URA_REGS8, 8) ||
447 vstr_part_check_token_list(text, pos, epos-pos, URA_REGS16, 4) ||
448 vstr_part_check_token_list(text, pos, epos-pos, URA_REGS16A, 4) ||
449 vstr_part_check_token_list(text, pos, epos-pos, URA_REGSX, URA_REGSX_COUNT) ||
450 vstr_part_check_token_list(text, pos, epos-pos, URA_COND, 8) ||
451 vstr_part_check_token_list(text, pos, epos-pos, asmCommands, ASM_COMMAND_TOKENS) ||
452 (!mode_lower && vstr_part_check_token_list(text, pos, epos-pos, UrAsmCommands, URASM_COMMAND_TOKENS));
453 /* fix case if found */
454 if (found) {
455 for (; pos < epos; ++pos) {
456 text->data[pos] = (mode_lower ? toLower(text->data[pos]) : toUpper(text->data[pos]));
458 } else {
459 pos = epos;
461 continue;
463 /* skip token */
464 if (isIdChar(text->data[pos])) {
465 while (pos < text->length) {
466 const char ch = text->data[pos];
467 if ((unsigned)(ch&0xff) <= 32 || ch == ';' || ch == '"' || ch == '\'') break;
468 if (!isIdChar(ch)) break;
469 ++pos;
471 } else {
472 /* skip delimiter */
473 ++pos;
479 //==========================================================================
481 // main
483 //==========================================================================
484 int main (int argc, char *argv[]) {
485 VStr infile;
486 VStr outfile;
487 vstr_init(&infile);
488 vstr_init(&outfile);
490 int mode_lower = 1;
491 int nomoreargs = 0;
493 for (int f = 1; f < argc; ) {
494 const char *s = argv[f++];
495 if (!s || !s[0]) continue;
496 if (!nomoreargs) {
497 if (strcmp(s, "--") == 0) { nomoreargs = 1; continue; }
498 if (s[0] == '-') {
499 if (strcmp(s, "--lo") == 0) { mode_lower = 1; continue; }
500 if (strcmp(s, "--hi") == 0 || strcmp(s, "--up") == 0) { mode_lower = 0; continue; }
501 if (strcmp(s, "--help") == 0 || strcmp(s, "--h") == 0 ||
502 strcmp(s, "-help") == 0 || strcmp(s, "-h") == 0 ||
503 strcmp(s, "--?") == 0 || strcmp(s, "-?") == 0)
505 printf("urlocase: convert UrAsm Z80 assembler source case\n"
506 "options:\n"
507 " --lo convert to lower case (default)\n"
508 " --up convert to upper case\n");
509 return 0;
511 if (strcmp(s, "-o") == 0) {
512 if (outfile.length) {
513 fprintf(stderr, "FATAL: too many output files!\n");
514 return 1;
516 if (f >= argc || !argv[f] || !argv[f][0]) {
517 fprintf(stderr, "FATAL: output file name expected for '-o'!\n");
518 return 1;
520 vstr_set_static(&outfile, argv[f++]);
521 continue;
523 fprintf(stderr, "FATAL: unknown argument '%s'!\n", s);
524 return 1;
527 if (!infile.length) {
528 vstr_set_static(&infile, s);
529 continue;
532 if (!outfile.length) {
533 vstr_set_static(&outfile, s);
534 continue;
537 fprintf(stderr, "FATAL: too many arguments!\n");
538 return 1;
541 if (!infile.length) {
542 fprintf(stderr, "FATAL: no input file!\n");
543 return 1;
546 if (!outfile.length) vstr_copy(&outfile, &infile);
548 VStr text;
549 vstr_init(&text);
550 load_file(&infile, &text);
552 translate(&text, mode_lower);
554 save_file(&outfile, &text);
556 printf("%s -> %s (%s): done\n", infile.data, outfile.data, (mode_lower ? "lo" : "UP"));
558 vstr_free(&infile);
559 vstr_free(&outfile);
560 vstr_free(&text);
562 return 0;