From fe8a6fdf386119f1b778f6cf0a8eb9d996ab4dc5 Mon Sep 17 00:00:00 2001 From: Ali Gholami Rudi Date: Thu, 31 Oct 2013 16:03:33 +0330 Subject: [PATCH] tok: allow large ds sections and string tokens --- Makefile | 2 +- cpp.c | 60 ++++++++++++++++++++++----------------------------- gen.c | 21 +++++++++--------- mem.c | 75 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ mem.h | 16 ++++++++++++++ ncc.c | 15 +++++++------ ncc.h | 7 +++--- npp.c | 9 +++++--- tok.c | 62 ++++++++++++++++++++++++++--------------------------- tok.h | 4 ++-- 10 files changed, 178 insertions(+), 93 deletions(-) create mode 100644 mem.c create mode 100644 mem.h diff --git a/Makefile b/Makefile index 40aabff..d98848f 100644 --- a/Makefile +++ b/Makefile @@ -17,7 +17,7 @@ LDFLAGS = all: ncc npp %.o: %.c ncc.h $(CC) -c $(CFLAGS) $< -ncc: ncc.o tok.o out.o cpp.o tab.o gen.o reg.o $(GEN) +ncc: ncc.o tok.o out.o cpp.o tab.o gen.o reg.o mem.o $(GEN) $(CC) -o $@ $^ $(LDFLAGS) npp: npp.o cpp.o tab.o $(CC) -o $@ $^ $(LDFLAGS) diff --git a/cpp.c b/cpp.c index 3a27589..b42e362 100644 --- a/cpp.c +++ b/cpp.c @@ -8,6 +8,7 @@ #include #include #include +#include "mem.h" #include "ncc.h" #include "tab.h" #include "tok.h" @@ -18,7 +19,7 @@ static int cur; static struct macro { char name[NAMELEN]; - char def[MACROLEN]; + char def[MDEFLEN]; char args[NARGS][NAMELEN]; int nargs; int isfunc; @@ -42,7 +43,7 @@ static struct buf { char path[NAMELEN]; /* for BUF_MACRO */ struct macro *macro; - char args[NARGS][MACROLEN]; /* arguments passed to a macro */ + char args[NARGS][MARGLEN]; /* arguments passed to a macro */ /* for BUF_ARG */ int arg_buf; /* the bufs index of the owning macro */ } bufs[NBUFS]; @@ -220,14 +221,6 @@ static void read_tilleol(char *dst) *dst = '\0'; } -static char *putstr(char *d, char *s) -{ - while (*s) - *d++ = *s++; - *d = '\0'; - return d; -} - static char *locs[NLOCS] = {}; static int nlocs = 0; @@ -241,13 +234,10 @@ static int include_find(char *name, int std) int i; for (i = std ? nlocs - 1 : nlocs; i >= 0; i--) { char path[1 << 10]; - char *s; - s = path; - if (locs[i]) { - s = putstr(s, locs[i]); - *s++ = '/'; - } - s = putstr(s, name); + if (locs[i]) + sprintf(path, "%s/%s", locs[i], name); + else + strcpy(path, name); if (!include_file(path)) return 0; } @@ -333,9 +323,7 @@ static void macro_define(void) read_tilleol(d->def); } -int cpp_read(char *buf); - -static char ebuf[BUFLEN]; +static char ebuf[MARGLEN]; static int elen; static int ecur; @@ -343,17 +331,20 @@ static long evalexpr(void); static int cpp_eval(void) { - char evalbuf[BUFLEN]; + char evalbuf[MARGLEN]; int old_limit; - int ret, nr; + int ret, clen; + char *cbuf; read_tilleol(evalbuf); buf_new(BUF_EVAL, evalbuf, strlen(evalbuf)); elen = 0; ecur = 0; old_limit = bufs_limit; bufs_limit = nbufs; - while ((nr = cpp_read(ebuf + elen)) >= 0) - elen += nr; + while (!cpp_read(&cbuf, &clen)) { + memcpy(ebuf + elen, cbuf, clen); + elen += clen; + } bufs_limit = old_limit; ret = evalexpr(); buf_pop(); @@ -532,12 +523,9 @@ static int expandable(char *word) void cpp_define(char *name, char *def) { - char tmp_buf[MACROLEN]; - char *s = tmp_buf; - s = putstr(s, name); - *s++ = '\t'; - s = putstr(s, def); - buf_new(BUF_TEMP, tmp_buf, s - tmp_buf); + char tmp_buf[MDEFLEN]; + sprintf(tmp_buf, "%s\t%s", name, def); + buf_new(BUF_TEMP, tmp_buf, strlen(tmp_buf)); macro_define(); buf_pop(); } @@ -548,10 +536,12 @@ static char seen_name[NAMELEN]; /* the name of the last macro */ static int hunk_off; static int hunk_len; -int cpp_read(char *s) +int cpp_read(char **obuf, int *olen) { int old, end; int jump_name = 0; + *olen = 0; + *obuf = ""; if (seen_macro == 1) { macro_expand(seen_name); seen_macro = 0; @@ -610,15 +600,15 @@ int cpp_read(char *s) } cur++; } - /* macros are expanded later; ignore its name */ + /* macros are expanded later; ignoring their names */ end = jump_name ? cur - strlen(seen_name) : cur; - memcpy(s, buf + old, end - old); - s[end - old] = '\0'; if (!buf_iseval()) { hunk_off += hunk_len; hunk_len = end - old; } - return end - old; + *obuf = buf + old; + *olen = end - old; + return 0; } /* preprocessor constant expression evaluation */ diff --git a/gen.c b/gen.c index dcbda91..1c6f33b 100644 --- a/gen.c +++ b/gen.c @@ -2,6 +2,7 @@ #include #include #include "gen.h" +#include "mem.h" #include "ncc.h" #include "out.h" #include "reg.h" @@ -19,8 +20,7 @@ char cs[SECLEN]; /* code segment */ int cslen; -static char ds[SECLEN]; /* data segment */ -static int dslen; +static struct mem ds; /* data segment */ static long bsslen; /* bss segment size */ static long sp; /* stack pointer offset from R_RBP */ @@ -871,20 +871,21 @@ long o_dsnew(char *name, int size, int global) { int idx; if (pass1) - return dslen; + return mem_len(&ds); idx = ndats++; if (idx >= NDATS) err("nomem: NDATS reached!\n"); strcpy(dat_names[idx], name); - dat_offs[idx] = dslen; - out_sym(name, OUT_DS | (global ? OUT_GLOB : 0), dslen, size); - dslen += ALIGN(size, OUT_ALIGNMENT); + dat_offs[idx] = mem_len(&ds); + out_sym(name, OUT_DS | (global ? OUT_GLOB : 0), mem_len(&ds), size); + mem_putz(&ds, ALIGN(size, OUT_ALIGNMENT)); return dat_offs[idx]; } void o_dscpy(long addr, void *buf, int len) { - memcpy(ds + addr, buf, len); + if (!pass1) + mem_cpy(&ds, addr, buf, len); } static int dat_off(char *name) @@ -906,11 +907,11 @@ void o_dsset(char *name, int off, unsigned bt) } if (t->loc == LOC_NUM && !t->bt) { num_cast(t, bt); - memcpy(ds + sym_off, &t->addr, BT_SZ(bt)); + mem_cpy(&ds, sym_off, &t->addr, BT_SZ(bt)); } if (t->loc == LOC_SYM && !t->bt) { out_rel(t->sym, OUT_DS, sym_off); - memcpy(ds + sym_off, &t->off, BT_SZ(bt)); + mem_cpy(&ds, sym_off, &t->off, BT_SZ(bt)); } tmp_drop(1); } @@ -918,7 +919,7 @@ void o_dsset(char *name, int off, unsigned bt) void o_write(int fd) { i_done(); - out_write(fd, cs, cslen, ds, dslen); + out_write(fd, cs, cslen, mem_buf(&ds), mem_len(&ds)); } static void func_reset(void) diff --git a/mem.c b/mem.c new file mode 100644 index 0000000..f355fd0 --- /dev/null +++ b/mem.c @@ -0,0 +1,75 @@ +#include +#include +#include +#include +#include "mem.h" + +#define MEMSZ 512 + +static void mem_extend(struct mem *mem) +{ + char *s = mem->s; + mem->sz = mem->sz ? mem->sz + mem->sz : MEMSZ; + mem->s = malloc(mem->sz); + if (mem->n) + memcpy(mem->s, s, mem->n); + free(s); +} + +void mem_init(struct mem *mem) +{ + memset(mem, 0, sizeof(*mem)); +} + +void mem_done(struct mem *mem) +{ + free(mem->s); + memset(mem, 0, sizeof(*mem)); +} + +void mem_cut(struct mem *mem, int pos) +{ + mem->n = pos < mem->n ? pos : mem->n; +} + +void mem_cpy(struct mem *mem, int off, void *buf, int len) +{ + memcpy(mem->s + off, buf, len); +} + +void mem_put(struct mem *mem, void *buf, int len) +{ + while (mem->n + len + 1 >= mem->sz) + mem_extend(mem); + mem_cpy(mem, mem->n, buf, len); + mem->n += len; +} + +void mem_putc(struct mem *mem, int c) +{ + if (mem->n + 2 >= mem->sz) + mem_extend(mem); + mem->s[mem->n++] = c; +} + +void mem_putz(struct mem *mem, int sz) +{ + while (mem->n + sz + 1 >= mem->sz) + mem_extend(mem); + memset(mem->s + mem->n, 0, sz); + mem->n += sz; +} + +/* return a pointer to mem's buffer; valid as long as mem is not modified */ +void *mem_buf(struct mem *mem) +{ + if (!mem->s) + return ""; + mem->s[mem->n] = '\0'; + return mem->s; +} + +int mem_len(struct mem *mem) +{ + return mem->n; +} diff --git a/mem.h b/mem.h new file mode 100644 index 0000000..74e092f --- /dev/null +++ b/mem.h @@ -0,0 +1,16 @@ +/* variable length memory buffer */ +struct mem { + char *s; /* allocated buffer */ + int sz; /* buffer size */ + int n; /* length of data stored in s */ +}; + +void mem_init(struct mem *mem); +void mem_done(struct mem *mem); +void mem_cut(struct mem *mem, int pos); +void *mem_buf(struct mem *mem); +void mem_put(struct mem *mem, void *buf, int len); +void mem_putc(struct mem *mem, int c); +void mem_putz(struct mem *mem, int sz); +void mem_cpy(struct mem *mem, int off, void *buf, int len); +int mem_len(struct mem *mem); diff --git a/ncc.c b/ncc.c index 1c1799e..c8f35e3 100644 --- a/ncc.c +++ b/ncc.c @@ -480,8 +480,9 @@ static void readprimary(void) if (!tok_jmp(TOK_STR)) { struct type t = {}; /* char type inside the arrays */ struct type a = {}; /* the char array type */ - char buf[STRLEN]; - int len = tok_str(buf); + char *buf; + int len; + tok_str(&buf, &len); t.bt = 1 | BT_SIGNED; a.id = array_add(&t, len); a.flags = T_ARRAY; @@ -1149,10 +1150,10 @@ static void globalinit(void *obj, int off, struct type *t) if (t->flags & T_ARRAY && tok_see() == TOK_STR) { struct type *t_de = &arrays[t->id].type; if (!t_de->ptr && !t_de->flags && TYPE_SZ(t_de) == 1) { - char buf[STRLEN]; + char *buf; int len; tok_expect(TOK_STR); - len = tok_str(buf); + tok_str(&buf, &len); o_dscpy(name->addr + off, buf, len); return; } @@ -1202,10 +1203,10 @@ static void localinit(void *obj, int off, struct type *t) if (t->flags & T_ARRAY && tok_see() == TOK_STR) { struct type *t_de = &arrays[t->id].type; if (!t_de->ptr && !t_de->flags && TYPE_SZ(t_de) == 1) { - char buf[STRLEN]; + char *buf; int len; tok_expect(TOK_STR); - len = tok_str(buf); + tok_str(&buf, &len); o_localoff(addr, off); o_sym(tmp_str(buf, len)); o_num(len); @@ -1910,7 +1911,7 @@ static int initsize(void) if (tok_jmp('=')) return 0; if (!tok_jmp(TOK_STR)) { - n = tok_str(NULL); + tok_str(NULL, &n); tok_jump(addr); return n; } diff --git a/ncc.h b/ncc.h index c3d9b10..f90edaa 100644 --- a/ncc.h +++ b/ncc.h @@ -1,7 +1,5 @@ /* predefined array limits; (p.f. means per function) */ -#define SECLEN (1 << 18) /* size of CS/DS sections */ -#define BUFLEN (1 << 18) /* buffer size in cpp.c and tok.c */ -#define STRLEN (1 << 17) /* size of strings */ +#define SECLEN (1 << 19) /* size of CS section */ #define NDATS 1024 /* number of DS data symbols */ #define NSYMS 4096 /* number of elf symbols */ #define NREL 4096 /* number of elf relocations */ @@ -20,7 +18,8 @@ #define NLABELS 1024 /* number of labels p.f. */ #define NAMELEN 128 /* size of identifiers */ #define NDEFS 1024 /* number of macros */ -#define MACROLEN 1024 /* size of macros arguments/definitions */ +#define MARGLEN 1024 /* size of macro arguments */ +#define MDEFLEN 2048 /* size of macro definitions */ #define NBUFS 32 /* macro expansion stack depth */ #define NLOCS 1024 /* number of header search paths */ diff --git a/npp.c b/npp.c index d96de5b..23c17c8 100644 --- a/npp.c +++ b/npp.c @@ -80,8 +80,9 @@ int main(int argc, char *argv[]) int ofd = 1; int i = 1; char *s1, *s2; - int nr; int len = 0; + char *cbuf; + int clen; while (i < argc && argv[i][0] == '-') { if (argv[i][1] == 'I') cpp_addpath(argv[i][2] ? argv[i] + 2 : argv[++i]); @@ -110,8 +111,10 @@ int main(int argc, char *argv[]) s2 = malloc(OBUFSZ); if (!s1 || !s2) die("npp: cannot allocate enough memory\n"); - while ((nr = cpp_read(s1 + len)) >= 0) - len += nr; + while (!cpp_read(&cbuf, &clen)) { + memcpy(s1 + len, cbuf, clen); + len += clen; + } len = rmcomments(s2, s1, len); xwrite(ofd, s2, len); close(ofd); diff --git a/tok.c b/tok.c index d9550c8..f3c1455 100644 --- a/tok.c +++ b/tok.c @@ -3,10 +3,13 @@ #include #include #include "gen.h" +#include "mem.h" #include "ncc.h" #include "tok.h" -static char buf[BUFLEN]; +static struct mem tok_mem; /* the data read via cpp_read() so far */ +static struct mem str; /* the last tok_str() string */ +static char *buf; static int len; static int cur; static char name[NAMELEN]; @@ -66,7 +69,7 @@ static char *digs = "0123456789abcdef"; static int esc_char(int *c, char *s) { if (*s != '\\') { - *c = *s; + *c = (unsigned char) *s; return 1; } if (strchr(esc_code, s[1])) { @@ -90,7 +93,7 @@ static int esc_char(int *c, char *s) *c = ret; return i; } - *c = s[1]; + *c = (unsigned char) s[1]; return 2; } @@ -144,34 +147,29 @@ static void readnum(void) num = -1; } -static char str[STRLEN]; -static int str_len; - -int tok_str(char *buf) +void tok_str(char **buf, int *len) { + if (len) + *len = mem_len(&str) + 1; if (buf) - memcpy(buf, str, str_len); - return str_len; + *buf = mem_buf(&str); } -static int readstr(char *out) +static void readstr(struct mem *mem) { - char *s = out; - char *r = buf + cur; + char *s = buf + cur; char *e = buf + len; - r++; - while (r < e && *r != '"') { - if (*r == '\\') { - int c; - r += esc_char(&c, r); - *s++ = c; + int c; + s++; + while (s < e && *s != '"') { + if (*s == '\\') { + s += esc_char(&c, s); + mem_putc(mem, c); } else { - *s++ = *r++; + mem_putc(mem, (unsigned char) *s++); } } - *s++ = '\0'; - cur = r - buf + 1; - return s - out - 1; + cur = s - buf + 1; } static int id_char(int c) @@ -181,14 +179,17 @@ static int id_char(int c) static int skipws(void) { + int clen; + char *cbuf; while (1) { if (cur == len) { - int r; - while (!(r = cpp_read(buf + cur))) - ; - if (r == -1) - return 1; - len += r; + clen = 0; + while (!clen) + if (cpp_read(&cbuf, &clen)) + return 1; + mem_put(&tok_mem, cbuf, clen); + buf = mem_buf(&tok_mem); + len = mem_len(&tok_mem); } while (cur < len && isspace(buf[cur])) cur++; @@ -229,13 +230,12 @@ int tok_get(void) if (skipws()) return TOK_EOF; if (buf[cur] == '"') { - str_len = 0; + mem_cut(&str, 0); while (buf[cur] == '"') { - str_len += readstr(str + str_len); + readstr(&str); if (skipws()) return TOK_EOF; } - str_len++; return TOK_STR; } if (isdigit(buf[cur]) || buf[cur] == '\'') { diff --git a/tok.h b/tok.h index 8b605fa..37cb414 100644 --- a/tok.h +++ b/tok.h @@ -39,7 +39,7 @@ int tok_see(void); int tok_get(void); char *tok_id(void); int tok_num(long *n); -int tok_str(char *out); +void tok_str(char **buf, int *len); long tok_addr(void); void tok_jump(long addr); @@ -47,7 +47,7 @@ int cpp_init(char *path); void cpp_addpath(char *s); void cpp_define(char *name, char *def); char *cpp_loc(long addr); -int cpp_read(char *s); +int cpp_read(char **buf, int *len); void die(char *msg, ...); void err(char *fmt, ...); -- 2.11.4.GIT