liburasm: do not allow Z80N instructions in assembler if Z80A mode is active
[urasm.git] / src / urlocase.c
blob9ebae9b8dce62221892dfde3a63d4466d27061fc
1 // convert UrAsm source code to lower case (preserving strings and labels)
2 // coded by Ketmar // Invisible Vector
3 // GPLv3 or later
4 //
5 #include <stdint.h>
6 #include <stdlib.h>
7 #include <stdio.h>
8 #include <string.h>
9 #include <unistd.h>
11 #include "liburasm/liburasm.h"
14 //**************************************************************************
16 // very simple string library
18 //**************************************************************************
20 typedef struct {
21 char *data;
22 size_t length;
23 int *rc; /* refcounter; NULL means "statically allocated" */
24 } VStr;
27 // initialise VStr structure
28 static __attribute__((unused)) inline void vstr_init (VStr *str) { if (str) { str->data = NULL; str->length = 0; str->rc = NULL; } }
31 // free VStr
32 static __attribute__((unused)) inline void vstr_free (VStr *str) {
33 if (str) {
34 if (str->rc) {
35 if (--(*str->rc) == 0) {
36 free(str->rc);
37 if (str->data) free(str->data);
39 str->data = NULL;
40 str->length = 0;
41 str->rc = NULL;
47 // copy VStr (both strings should be initialised)
48 static __attribute__((unused)) inline void vstr_copy (VStr *dest, const VStr *src) {
49 if (!dest) return;
50 if (!src) { vstr_free(dest); return; }
51 if (dest->data != src->data) {
52 /* different strings */
53 vstr_free(dest);
54 dest->data = src->data;
55 dest->length = src->length;
56 dest->rc = src->rc;
58 if (dest->rc) ++(*dest->rc);
62 // ensure that the given string is uniqie and modifiable
63 static __attribute__((unused)) inline void vstr_make_unique (VStr *str) {
64 if (!str) return;
65 if (str->length == 0) {
66 vstr_free(str); /* just in case */
67 return;
69 if (str->rc && *str->rc == 1) return; /* nothing to do */
70 /* create new string */
71 char *ns = malloc(str->length+1);
72 memcpy(ns, str->data, str->length);
73 ns[str->length] = 0;
74 int *nrc = malloc(sizeof(int));
75 *nrc = 1;
76 if (str->rc) --(*str->rc);
77 str->data = ns;
78 str->rc = nrc;
82 // replace string with static contents
83 static __attribute__((unused)) inline void vstr_set_static (VStr *str, const char *s) {
84 if (!str) return;
85 vstr_free(str);
86 if (s && s[0]) {
87 str->data = (char *)s;
88 str->length = strlen(s);
93 // create string with static contents
94 static __attribute__((unused)) inline void vstr_init_static (VStr *str, const char *s) {
95 vstr_init(str);
96 vstr_set_static(str, s);
100 // replace string with dynamic contents
101 static __attribute__((unused)) inline void vstr_set_dynamic (VStr *str, const char *s) {
102 if (!str) return;
103 vstr_free(str);
104 if (s && s[0]) {
105 str->length = strlen(s);
106 str->data = malloc(str->length+1);
107 memcpy(str->data, s, str->length);
108 str->data[str->length] = 0;
113 // create string with dynamic contents
114 static __attribute__((unused)) inline void vstr_init_dynamic (VStr *str, const char *s) {
115 vstr_init(str);
116 vstr_set_dynamic(str, s);
120 // replace string with dynamic contents and the given length
121 static __attribute__((unused)) inline void vstr_set_dynamic_ex (VStr *str, const char *s, size_t len) {
122 if (!str) return;
123 vstr_free(str);
124 if (len) {
125 str->length = len;
126 str->data = malloc(len+1);
127 if (s) {
128 memcpy(str->data, s, len);
129 str->data[len] = 0;
130 } else {
131 memset(str->data, 0, len+1);
137 // create string with dynamic contents and the given length
138 static __attribute__((unused)) inline void vstr_init_dynamic_ex (VStr *str, const char *s, size_t len) {
139 vstr_init(str);
140 vstr_set_dynamic_ex(str, s, len);
144 // append source to destination
145 static __attribute__((unused)) inline void vstr_cat_vstr (VStr *dest, const VStr *src) {
146 if (!dest || !src || src->length == 0) return;
147 vstr_make_unique(dest);
148 // make sure that we have `rc` allocated
149 if (!dest->rc) { dest->rc = malloc(sizeof(int)); *dest->rc = 1; }
150 dest->data = realloc(dest->data, src->length+dest->length+1);
151 memmove(dest->data+dest->length, src->data, src->length);
152 dest->length += src->length;
153 dest->data[dest->length] = 0;
157 // append source to destination
158 static __attribute__((unused)) inline void vstr_cat_cstr (VStr *dest, const char *s) {
159 if (!dest || !s || !s[0]) return;
160 VStr tmp;
161 if (dest->data && s >= dest->data && s < dest->data+dest->length) {
162 /* oops; inside */
163 vstr_init_dynamic(&tmp, s);
164 } else {
165 vstr_init_static(&tmp, s);
167 vstr_cat_vstr(dest, &tmp);
168 vstr_free(&tmp);
172 //==========================================================================
174 // load_file
176 //==========================================================================
177 static void load_file (const VStr *fname, VStr *text) {
178 if (!text) {
179 fprintf(stderr, "FATAL: cannot load file into nowhere!\n");
180 exit(1);
182 if (!fname || fname->length == 0) {
183 fprintf(stderr, "FATAL: cannot open nameless file for reading!\n");
184 exit(1);
186 FILE *fl = fopen(fname->data, "rb");
187 if (!fl) {
188 fprintf(stderr, "FATAL: cannot open file '%s' for reading!\n", fname->data);
189 exit(1);
191 fseek(fl, 0, SEEK_END);
192 long size = ftell(fl);
193 if (size <= 0 || size > 1024*1024*32) {
194 fclose(fl);
195 fprintf(stderr, "FATAL: file '%s' is either too big, or too small!\n", fname->data);
196 exit(1);
198 char *res = malloc((size_t)size+1);
199 fseek(fl, 0, SEEK_SET);
200 if (fread(res, (size_t)size, 1, fl) != 1) {
201 fclose(fl);
202 fprintf(stderr, "FATAL: error reading file '%s'!\n", fname->data);
203 exit(1);
205 res[(size_t)size] = 0;
206 vstr_free(text);
207 text->data = res;
208 text->length = (size_t)size;
209 text->rc = malloc(sizeof(int));
210 *text->rc = 1;
214 //==========================================================================
216 // save_file
218 //==========================================================================
219 static void save_file (const VStr *fname, const VStr *text) {
220 if (!fname || fname->length == 0) {
221 fprintf(stderr, "FATAL: cannot create nameless file!\n");
222 exit(1);
224 FILE *fl = fopen(fname->data, "wb");
225 if (!fl) {
226 fprintf(stderr, "FATAL: cannot create file '%s'!\n", fname->data);
227 exit(1);
229 if (text && text->length) {
230 if (fwrite(text->data, text->length, 1, fl) != 1) {
231 fclose(fl);
232 unlink(fname->data);
233 fprintf(stderr, "FATAL: error writing file '%s'!\n", fname->data);
234 exit(1);
237 fclose(fl);
241 //==========================================================================
243 // isAlpha
245 //==========================================================================
246 static inline int isAlpha (const char ch) {
247 return
248 (ch >= 'A' && ch <= 'Z') ||
249 (ch >= 'a' && ch <= 'z');
253 //==========================================================================
255 // isIdChar
257 //==========================================================================
258 static inline int isIdChar (const char ch) {
259 return
260 (ch >= 'A' && ch <= 'Z') ||
261 (ch >= 'a' && ch <= 'z') ||
262 (ch >= '0' && ch <= '9') ||
263 ch == '@' || ch == '_' || ch == '.';
267 //==========================================================================
269 // toLower
271 //==========================================================================
272 static inline char toLower (const char ch) { return (ch >= 'A' && ch <= 'Z' ? ch-'A'+'a' : ch); }
275 //==========================================================================
277 // toUpper
279 //==========================================================================
280 static inline char toUpper (const char ch) { return (ch >= 'a' && ch <= 'z' ? ch-'a'+'A' : ch); }
283 //==========================================================================
285 // vstr_part_strEquCI_cstr
287 //==========================================================================
288 static int vstr_part_strEquCI_cstr (const VStr *str, size_t start, size_t len, const char *cstr) {
289 if (!str) abort();
290 if (start >= str->length) return (!cstr || !cstr[0]);
291 if (len > str->length-start) len = str->length-start;
292 if (!cstr || !cstr[0]) return (len == 0);
293 const size_t cstrlen = strlen(cstr);
294 if (cstrlen != len) return 0;
295 const char *s = str->data+start;
296 for (; *cstr; ++s, ++cstr) {
297 if (toLower(*s) != toLower(*cstr)) return 0;
299 return 1;
303 //==========================================================================
305 // vstr_part_check_token_list
307 //==========================================================================
308 static int vstr_part_check_token_list (const VStr *str, size_t start, size_t len, const char *list[], size_t count) {
309 if (!count) return 0;
310 for (size_t f = count; f--; ++list) {
311 if (vstr_part_strEquCI_cstr(str, start, len, list[0])) return 1;
313 return 0;
317 // ////////////////////////////////////////////////////////////////////////// //
318 #define URASM_COMMAND_TOKENS (18)
319 static const char *UrAsmCommands[URASM_COMMAND_TOKENS] = {
320 "DISPLAY",
321 "DISPLAY0",
322 "DISPLAYA",
323 "DISPHEX",
324 "DISPHEX0",
325 "DISPHEXA",
327 "DEFFMT",
328 "MODEL",
330 "MACRO",
331 "ENDM",
334 "MODULE",
335 "ENDMODULE",
337 "IF",
338 "IFX",
339 "ELSE",
340 "ELSEIF",
341 "ELSEIFX",
342 "ENDIF",
346 #define ASM_COMMAND_TOKENS (34)
347 static const char *asmCommands[ASM_COMMAND_TOKENS] = {
348 "EQU",
349 "ORG",
350 "DISP",
351 "ENDDISP",
352 "PHASE",
353 "DEPHASE",
354 "UNPHASE",
355 "ALIGN",
356 "DISPALIGN",
357 "PHASEALIGN",
358 "ENT",
359 "CLR",
360 "RESERVE",
362 "INCLUDE",
363 "INCBIN",
365 "DUP",
366 "EDUP",
368 "DEFINCR",
369 "DEFB",
370 "DB",
371 "DEFW",
372 "DW",
373 "DEFR",
374 "DR",
375 "DEFS",
376 "DS",
377 "DEFM",
378 "DM",
379 "DEFZ",
380 "DZ",
381 "DEFX",
382 "DX",
383 "DEFC",
384 "DC",
388 //==========================================================================
390 // translate
392 //==========================================================================
393 static void translate (VStr *text, int mode_lower) {
394 if (!text || text->length == 0) return;
395 vstr_make_unique(text);
397 size_t pos = 0;
398 while (pos < text->length) {
399 /* comment? */
400 if (text->data[pos] == ';') {
401 while (pos < text->length && text->data[pos] != '\n') ++pos;
402 continue;
404 if ((unsigned)(text->data[pos]&0xff) <= 32) { ++pos; continue; }
405 /* af' */
406 if (pos >= 2 && text->data[pos] == '\'' &&
407 toLower(text->data[pos-2]) == 'a' &&
408 toLower(text->data[pos-1]) == 'f')
410 ++pos;
411 continue;
413 /* string? */
414 if (text->data[pos] == '"' || text->data[pos] == '\'') {
415 const char ech = text->data[pos++];
416 while (pos < text->length) {
417 const char ch = text->data[pos++];
418 if (ch == '\n' || ch == '\r') break;
419 if (ch == '\\') { ++pos; continue; }
420 if (ch == ech) break;
422 continue;
424 /* token should start with alpha */
425 if (isAlpha(text->data[pos])) {
426 /* find token end */
427 size_t epos = pos+1;
428 while (epos < text->length) {
429 const char ch = text->data[epos];
430 if ((unsigned)(ch&0xff) <= 32 || ch == ';' || ch == '"' || ch == '\'') break;
431 if (!isIdChar(ch)) break;
432 ++epos;
434 const int found =
435 vstr_part_check_token_list(text, pos, epos-pos, URASM_TOKENS, URASM_MAX_TOKEN) ||
436 vstr_part_check_token_list(text, pos, epos-pos, URA_REGS8, 8) ||
437 vstr_part_check_token_list(text, pos, epos-pos, URA_REGS16, 4) ||
438 vstr_part_check_token_list(text, pos, epos-pos, URA_REGS16A, 4) ||
439 vstr_part_check_token_list(text, pos, epos-pos, URA_COND, 8) ||
440 vstr_part_check_token_list(text, pos, epos-pos, asmCommands, ASM_COMMAND_TOKENS) ||
441 (!mode_lower && vstr_part_check_token_list(text, pos, epos-pos, UrAsmCommands, URASM_COMMAND_TOKENS));
442 /* fix case if found */
443 if (found) {
444 for (; pos < epos; ++pos) {
445 text->data[pos] = (mode_lower ? toLower(text->data[pos]) : toUpper(text->data[pos]));
447 } else {
448 pos = epos;
450 continue;
452 /* skip token */
453 if (isIdChar(text->data[pos])) {
454 while (pos < text->length) {
455 const char ch = text->data[pos];
456 if ((unsigned)(ch&0xff) <= 32 || ch == ';' || ch == '"' || ch == '\'') break;
457 if (!isIdChar(ch)) break;
458 ++pos;
460 } else {
461 /* skip delimiter */
462 ++pos;
468 //==========================================================================
470 // main
472 //==========================================================================
473 int main (int argc, char *argv[]) {
474 VStr infile;
475 VStr outfile;
476 vstr_init(&infile);
477 vstr_init(&outfile);
479 int mode_lower = 1;
480 int nomoreargs = 0;
482 for (int f = 1; f < argc; ) {
483 const char *s = argv[f++];
484 if (!s || !s[0]) continue;
485 if (!nomoreargs) {
486 if (strcmp(s, "--") == 0) { nomoreargs = 1; continue; }
487 if (s[0] == '-') {
488 if (strcmp(s, "--lo") == 0) { mode_lower = 1; continue; }
489 if (strcmp(s, "--hi") == 0 || strcmp(s, "--up") == 0) { mode_lower = 0; continue; }
490 if (strcmp(s, "--help") == 0 || strcmp(s, "--h") == 0 ||
491 strcmp(s, "-help") == 0 || strcmp(s, "-h") == 0 ||
492 strcmp(s, "--?") == 0 || strcmp(s, "-?") == 0)
494 printf("urlocase: convert UrAsm Z80 assembler source case\n"
495 "options:\n"
496 " --lo convert to lower case (default)\n"
497 " --up convert to upper case\n");
498 return 0;
500 if (strcmp(s, "-o") == 0) {
501 if (outfile.length) {
502 fprintf(stderr, "FATAL: too many output files!\n");
503 return 1;
505 if (f >= argc || !argv[f] || !argv[f][0]) {
506 fprintf(stderr, "FATAL: output file name expected for '-o'!\n");
507 return 1;
509 vstr_set_static(&outfile, argv[f++]);
510 continue;
512 fprintf(stderr, "FATAL: unknown argument '%s'!\n", s);
513 return 1;
516 if (!infile.length) {
517 vstr_set_static(&infile, s);
518 continue;
521 if (!outfile.length) {
522 vstr_set_static(&outfile, s);
523 continue;
526 fprintf(stderr, "FATAL: too many arguments!\n");
527 return 1;
530 if (!infile.length) {
531 fprintf(stderr, "FATAL: no input file!\n");
532 return 1;
535 if (!outfile.length) vstr_copy(&outfile, &infile);
537 VStr text;
538 vstr_init(&text);
539 load_file(&infile, &text);
541 translate(&text, mode_lower);
543 save_file(&outfile, &text);
545 printf("%s -> %s (%s): done\n", infile.data, outfile.data, (mode_lower ? "lo" : "UP"));
547 vstr_free(&infile);
548 vstr_free(&outfile);
549 vstr_free(&text);
551 return 0;