From 669244d22464b8e3af138f4df9e1f914f6feeffb Mon Sep 17 00:00:00 2001
From: Alexander Viro <viro@parcelfarce.linux.theplanet.co.uk>
Date: Mon, 14 Jun 2004 16:09:02 -0700
Subject: [PATCH] [PATCH] lazy-copy macro expansion in pre-processing

This is almost a total rewrite of the macro expansion.  It now does
lazy-copy or the arguments, with the realization that if an argument
only shows up once in the macro expansion (quite common), we don't need
to allocate a new copy of the argument token stream, we can just insert
it as-is.

I hope it's reasonably clean and readable; review would be obviously welcome.
---
 pre-process.c               | 948 +++++++++++++++++++++++++++-----------------
 token.h                     |  17 +
 validation/preprocessor12.c |   7 +
 validation/preprocessor13.c |   7 +
 validation/preprocessor14.c |   7 +
 validation/preprocessor15.c |   7 +
 6 files changed, 619 insertions(+), 374 deletions(-)
 create mode 100644 validation/preprocessor12.c
 create mode 100644 validation/preprocessor13.c
 create mode 100644 validation/preprocessor14.c
 create mode 100644 validation/preprocessor15.c

diff --git a/pre-process.c b/pre-process.c
index e648035b..e5750c2e 100644
--- a/pre-process.c
+++ b/pre-process.c
@@ -75,41 +75,8 @@ static struct token *alloc_token(struct position *pos)
 
 static const char *show_token_sequence(struct token *token);
 
-static struct token *is_defined(struct token *head, struct token *token, struct token *next)
-{
-	char *string[] = { "0", "1" };
-	char *defined = string[lookup_symbol(token->ident, NS_PREPROCESSOR) != NULL];
-	struct token *newtoken = alloc_token(&token->pos);
-
-	token_type(newtoken) = TOKEN_NUMBER;
-	newtoken->number = defined;
-	newtoken->next = next;
-	head->next = newtoken;
-	return next;
-}
-
-
-struct token *defined_one_symbol(struct token *head, struct token *next)
-{
-	struct token *token = next->next;
-	struct token *past = token->next;
-
-	if (match_op(token, '(')) {
-		token = past;
-		past = token->next;
-		if (!match_op(past, ')'))
-			return next;
-		past = past->next;
-	}
-	if (token_type(token) == TOKEN_IDENT)
-		return is_defined(head, token, past);
-	return next;
-}
-
-struct token variable_argument = { .next = &eof_token_entry };
-
-/* Expand symbol 'sym' between 'head->next' and 'head->next->next' */
-static struct token *expand(struct token *, struct symbol *);
+/* Expand symbol 'sym' at '*list' */
+static struct token **expand(struct token **, struct symbol *);
 
 static void replace_with_string(struct token *token, const char *str)
 {
@@ -130,346 +97,473 @@ static void replace_with_integer(struct token *token, unsigned int val)
 	token->number = buf;
 }
 
-struct token *expand_one_symbol(struct token *head, struct token *token)
+static void replace_with_defined(struct token *token)
 {
+	char *string[] = { "0", "1" };
+	int defined = 0;
+	if (token_type(token) != TOKEN_IDENT)
+		warn(token->pos, "operator \"defined\" requires an identifier");
+	else if (lookup_symbol(token->ident, NS_PREPROCESSOR))
+		defined = 1;
+	token_type(token) = TOKEN_NUMBER;
+	token->number = string[defined];
+}
+
+struct token **expand_one_symbol(struct token **list)
+{
+	struct token *token = *list;
 	struct symbol *sym;
 
 	if (token->pos.noexpand)
-		return token;
+		return &token->next;
 
 	sym = lookup_symbol(token->ident, NS_PREPROCESSOR);
 	if (sym)
-		return expand(head, sym);
+		return expand(list, sym);
 	if (token->ident == &__LINE___ident) {
 		replace_with_integer(token, token->pos.line);
 	} else if (token->ident == &__FILE___ident) {
 		replace_with_string(token, (input_streams + token->pos.stream)->name);
-	} else if (token->ident == &defined_ident) {
-		return defined_one_symbol(head, token);
 	}
-	return token;
+	return &token->next;
 }
 
-static inline void eat_untaint(struct token *head, struct token *token)
+static inline struct token *scan_next(struct token **where)
 {
+	struct token *token = *where;
+	if (token_type(token) != TOKEN_UNTAINT)
+		return token;
 	do {
 		token->ident->tainted = 0;
 		token = token->next;
 	} while (token_type(token) == TOKEN_UNTAINT);
-	head->next = token;
+	*where = token;
+	return token;
 }
 
-static struct token *expand_list(struct token *head)
+static struct token **expand_list(struct token **list)
 {
-	for (;;) {
-		struct token *next = head->next;
-
-		/* Did we hit the end of the current expansion? */
-		if (eof_token(next))
-			break;
-
-		switch (token_type(next)) {
-		case TOKEN_IDENT:
-			next = expand_one_symbol(head, next);
-			break;
-		case TOKEN_UNTAINT:
-			eat_untaint(head, next);
-			continue;
-		}
-
-		head = next;
+	struct token *next;
+	while (!eof_token(next = scan_next(list))) {
+		if (token_type(next) == TOKEN_IDENT)
+			list = expand_one_symbol(list);
+		else
+			list = &next->next;
 	}
-	return head;
+	return list;
 }
 
-static struct token *find_argument_end(struct token *start, struct token *arglist)
+static struct token *collect_arg(struct token *prev, int vararg, struct position *pos)
 {
+	struct token **p = &prev->next;
+	struct token *next;
 	int nesting = 0;
 
-	while (!eof_token(start)) {
-		struct token *next = start->next;
-		if (token_type(next) == TOKEN_UNTAINT) {
-			eat_untaint(start, next);
-			continue;
-		} else if (token_type(next) == TOKEN_IDENT) {
-			if (next->ident->tainted)
-				next->pos.noexpand = 1;
-		} else if (match_op(next, '('))
+	while (!eof_token(next = scan_next(p))) {
+		if (match_op(next, '(')) {
 			nesting++;
-		else if (match_op(next, ')')) {
-			if (--nesting < 0) {
-				start->next = &eof_token_entry;
-				return next->next;
-			}
-		} else if (!nesting && match_op(next, ',') && arglist->next != &variable_argument) {
-			next->special = SPECIAL_ARG_SEPARATOR;
-			arglist = arglist->next;
+		} else if (match_op(next, ')')) {
+			if (!nesting--)
+				break;
+		} else if (match_op(next, ',') && !nesting && !vararg) {
+			break;
 		}
-		start = next;
+		next->pos.stream = pos->stream;
+		next->pos.line = pos->line;
+		next->pos.pos = pos->pos;
+		p = &next->next;
 	}
-	return start;
+	*p = &eof_token_entry;
+	return next;
 }
 
-static struct token *dup_token(struct token *token, struct position *streampos, struct position *pos)
-{
-	struct token *alloc = alloc_token(streampos);
-	token_type(alloc) = token_type(token);
-	alloc->pos.newline = pos->newline;
-	alloc->pos.whitespace = pos->whitespace;
-	alloc->pos.noexpand = token->pos.noexpand;
-	alloc->number = token->number;
-	return alloc;	
-}
+/*
+ * We store arglist as <counter> [arg1] <number of uses for arg1> ... eof
+ */
 
-static void insert(struct token *token, struct token *prev)
-{
-	token->next = prev->next;
-	prev->next = token;
-}
+struct arg {
+	struct token *arg;
+	struct token *expanded;
+	struct token *str;
+	int n_normal;
+	int n_quoted;
+	int n_str;
+};
 
-static struct token * replace(struct token *token, struct token *prev, struct token *list)
+static int collect_arguments(struct token *start, struct token *arglist, struct arg *args, struct token *what)
 {
-	struct position *pos = &token->pos;
+	int wanted = arglist->count.normal;
+	struct token *next = NULL;
+	int count = 0;
 
-	prev->next = token->next;
-	while (!eof_token(list) && !match_op(list, SPECIAL_ARG_SEPARATOR)) {
-		struct token *newtok = dup_token(list, &token->pos, pos);
-		insert(newtok, prev);
-		prev = newtok;
-		list = list->next;
-		pos = &list->pos;
+	arglist = arglist->next;	/* skip counter */
+
+	if (!wanted) {
+		next = collect_arg(start, 0, &what->pos);
+		if (eof_token(next))
+			goto Eclosing;
+		if (!eof_token(start->next) || !match_op(next, ')')) {
+			count++;
+			goto Emany;
+		}
+	} else {
+		for (count = 0; count < wanted; count++) {
+			struct argcount *p = &arglist->next->count;
+			next = collect_arg(start, p->vararg, &what->pos);
+			arglist = arglist->next->next;
+			if (eof_token(next))
+				goto Eclosing;
+			args[count].arg = start->next;
+			args[count].n_normal = p->normal;
+			args[count].n_quoted = p->quoted;
+			args[count].n_str = p->str;
+			if (match_op(next, ')')) {
+				count++;
+				break;
+			}
+			start = next;
+		}
+		if (count == wanted && !match_op(next, ')'))
+			goto Emany;
+		if (count == wanted - 1) {
+			struct argcount *p = &arglist->next->count;
+			if (!p->vararg)
+				goto Efew;
+			args[count].arg = NULL;
+			args[count].n_normal = p->normal;
+			args[count].n_quoted = p->quoted;
+			args[count].n_str = p->str;
+		}
+		if (count < wanted - 1)
+			goto Efew;
+	}
+	what->next = next->next;
+	return 1;
+
+Efew:
+	warn(what->pos, "macro \"%s\" requires %d arguments, but only %d given",
+		show_token(what), wanted, count);
+	goto out;
+Emany:
+	while (match_op(next, ',')) {
+		next = collect_arg(next, 0, &what->pos);
+		count++;
 	}
-	return prev;
+	if (eof_token(next))
+		goto Eclosing;
+	warn(what->pos, "macro \"%s\" passed %d arguments, but takes just %d",
+		show_token(what), count, wanted);
+	goto out;
+Eclosing:
+	warn(what->pos, "unterminated argument list invoking macro \"%s\"",
+		show_token(what));
+out:
+	what->next = next->next;
+	return 0;
 }
 
-static struct token *get_argument(int nr, struct token *args)
+static struct token *dup_list(struct token *list)
 {
-	if (!nr)
-		return args;
-	while (!eof_token(args)) {
-		if (match_op(args, SPECIAL_ARG_SEPARATOR))
-			if (!--nr)
-				return args->next;
-		args = args->next;
+	struct token *res;
+	struct token **p = &res;
+
+	while (!eof_token(list)) {
+		struct token *newtok = __alloc_token(0);
+		*newtok = *list;
+		*p = newtok;
+		p = &newtok->next;
+		list = list->next;
 	}
-				
-	return args;
+	return res;
 }
 
-static struct token *stringify(struct token *token, struct token *arg)
+static struct token *stringify(struct token *arg)
 {
 	const char *s = show_token_sequence(arg);
 	int size = strlen(s)+1;
-	struct token *newtoken = alloc_token(&token->pos);
+	struct token *token = __alloc_token(0);
 	struct string *string = __alloc_string(size);
 
-	newtoken->pos.newline = token->pos.newline;
 	memcpy(string->data, s, size);
 	string->length = size;
-	token_type(newtoken) = TOKEN_STRING;
-	newtoken->string = string;
-	newtoken->next = &eof_token_entry;
-	return newtoken;
+	token->pos = arg->pos;
+	token_type(token) = TOKEN_STRING;
+	token->string = string;
+	token->next = &eof_token_entry;
+	return token;
 }
 
-static struct token empty_arg_token = { .pos = { .type = TOKEN_EOF } };
-
-static struct token *expand_one_arg(struct token *head, struct token *token, struct token *arguments, int do_expand)
+static void expand_arguments(int count, struct arg *args)
 {
-	int nr = token->argnum;
-	struct token *orig_head = head;
-	struct token *arg = get_argument(nr, arguments);
-	struct token *last = token->next;
-	token->next = &eof_token_entry;
-
-	/*
-	 * Special case for gcc 'x ## arg' semantics: if 'arg' is empty
-	 * then the 'x' goes away too.
-	 * NB: that's not what gcc kludge is about; later  -- AV
-	 */
-	if (token_type(head) == TOKEN_CONCAT && eof_token(arg)) {
-		arg = &empty_arg_token;
-		empty_arg_token.next = &eof_token_entry;
+	int i;
+	for (i = 0; i < count; i++) {
+		struct token *arg = args[i].arg;
+		if (!arg)
+			arg = &eof_token_entry;
+		if (args[i].n_str)
+			args[i].str = stringify(arg);
+		if (args[i].n_normal) {
+			if (!args[i].n_quoted) {
+				args[i].expanded = arg;
+				args[i].arg = NULL;
+			} else if (eof_token(arg)) {
+				args[i].expanded = arg;
+			} else {
+				args[i].expanded = dup_list(arg);
+			}
+			expand_list(&args[i].expanded);
+		}
 	}
-
-	head = replace(token, head, arg);
-	if (do_expand)
-		head = expand_list(orig_head);
-	head->next = last;
-	return head;
 }
 
-static void expand_arguments(struct token *token, struct token *head,
-		struct token *arguments)
+/*
+ * Possibly valid combinations:
+ *  - ident + ident -> ident
+ *  - ident + number -> ident unless number contains '.', '+' or '-'.
+ *  - number + number -> number
+ *  - number + ident -> number
+ *  - number + '.' -> number
+ *  - number + '+' or '-' -> number, if number used to end on [eEpP].
+ *  - '.' + number -> number, if number used to start with a digit.
+ *  - special + special -> either special or an error.
+ */
+static enum token_type combine(struct token *left, struct token *right, char *p)
 {
-	for (;;) {
-		struct token *next = head->next;
+	int len;
+	enum token_type t1 = token_type(left), t2 = token_type(right);
 
-		/* Did we hit the end of the current expansion? */
-		if (eof_token(next))
-			break;
+	if (t1 != TOKEN_IDENT && t1 != TOKEN_NUMBER && t1 != TOKEN_SPECIAL)
+		return TOKEN_ERROR;
+
+	if (t2 != TOKEN_IDENT && t2 != TOKEN_NUMBER && t2 != TOKEN_SPECIAL)
+		return TOKEN_ERROR;
+
+	strcpy(p, show_token(left));
+	strcat(p, show_token(right));
+	len = strlen(p);
 
-		if (token_type(next) == TOKEN_STR_ARGUMENT) {
-			int nr = next->argnum;
-			struct token *newtoken = stringify(next, get_argument(nr, arguments));
-			next = replace(next, head, newtoken);
-		} else if (token_type(next) == TOKEN_MACRO_ARGUMENT)
-			next = expand_one_arg(head, next, arguments, 1);
-		else if (token_type(next) == TOKEN_QUOTED_ARGUMENT)
-			next = expand_one_arg(head, next, arguments, 0);
+	if (len >= 256)
+		return TOKEN_ERROR;
 
-		head = next;
+	if (t1 == TOKEN_IDENT) {
+		if (t2 == TOKEN_SPECIAL)
+			return TOKEN_ERROR;
+		if (t2 == TOKEN_NUMBER && strpbrk(p, "+-."))
+			return TOKEN_ERROR;
+		return TOKEN_IDENT;
 	}
+
+	if (t1 == TOKEN_NUMBER) {
+		if (t2 == TOKEN_SPECIAL) {
+			switch (right->special) {
+			case '.':
+				break;
+			case '+': case '-':
+				if (strchr("eEpP", p[len - 2]))
+					break;
+			default:
+				return TOKEN_ERROR;
+			}
+		}
+		return TOKEN_NUMBER;
+	}
+
+	if (p[0] == '.' && isdigit(p[1]))
+		return TOKEN_NUMBER;
+
+	return TOKEN_SPECIAL;
 }
 
-/*
- * Possibly valid combinations:
- *  - anything + 'empty_arg_token' is empty.
- *  - ident + ident - combine (==ident)
- *		a1 ## bb = 'a1bb'
- *  - ident + number - combine (==ident)
- *		a ## 0x5 = 'a0x5'
- *  - number + number - combine (==number)
- *		12 ## 12 = '1212'
- *  - number + ident - combine (==number)
- *		0x ## aaa = '0xaaa'
- *  - string + string - leave as is, C will combine them anyway
- * others cause an error and leave the tokens as separate tokens.
- */
-static struct token *hashhash(struct token *head, struct token *first)
+static int merge(struct token *left, struct token *right)
 {
-	struct token *token;
-	static char buffer[512], *p;
-	struct token *newtoken;
-	int i = 2;
-
-	/*
-	 * Special case for gcc 'x ## arg' semantics: if 'arg' is empty
-	 * then the 'x' goes away too.
-	 *
-	 * See expand_one_arg.
-	 */
-	if (token_type(first->next) == TOKEN_EOF)
-		return first->next->next;
-
-	p = buffer;
-	token = first;
-	do {
-		static const char *src;
-		int len;
+	extern unsigned char combinations[][3];
+	static char buffer[512];
+	int n;
 
-		switch (token_type(token)) {
-		case TOKEN_IDENT:
-			len = token->ident->len;
-			src = token->ident->name;
-			break;
-		case TOKEN_NUMBER:
-			src = token->number;
-			len = strlen(src);
+	switch (combine(left, right, buffer)) {
+	case TOKEN_IDENT:
+		left->ident = built_in_ident(buffer);
+		left->pos.noexpand = 0;
+		return 1;
+
+	case TOKEN_NUMBER:
+		token_type(left) = TOKEN_NUMBER;	/* could be . + num */
+		left->number = __alloc_bytes(strlen(buffer) + 1);
+		memcpy(left->number, buffer, strlen(buffer) + 1);
+		return 1;
+
+	case TOKEN_SPECIAL:
+		if (buffer[2] && buffer[3])
 			break;
-		default:
-			goto out;
+		for (n = SPECIAL_BASE; n < SPECIAL_ARG_SEPARATOR; n++) {
+			if (!memcmp(buffer, combinations[n-SPECIAL_BASE], 3)) {
+				left->special = n;
+				return 1;
+			}
 		}
-		memcpy(p, src, len);
-		p += len;
-		token = token->next;
-	} while (--i > 0);
-
-out:
-	*p++ = 0;
-	if (!*buffer)
-		return token;
+	default:
+		;
+	}
+	warn(left->pos, "'##' failed: concatenation is not a valid token");
+	return 0;
+}
 
-	newtoken = alloc_token(&first->pos);
-	newtoken->next = token;
+static struct token *dup_token(struct token *token, struct position *streampos, struct position *pos)
+{
+	struct token *alloc = alloc_token(streampos);
+	token_type(alloc) = token_type(token);
+	alloc->pos.newline = pos->newline;
+	alloc->pos.whitespace = pos->whitespace;
+	alloc->number = token->number;
+	alloc->pos.noexpand = token->pos.noexpand;
+	return alloc;	
+}
 
-	token_type(newtoken) = token_type(first);
-	switch (token_type(newtoken)) {
-	case TOKEN_IDENT:
-		newtoken->ident = built_in_ident(buffer);
-		break;
-	case TOKEN_NUMBER: {
-		newtoken->number = __alloc_bytes(p - buffer);
-		memcpy(newtoken->number, buffer, p - buffer);
-		break;
-	}
+static struct token **copy(struct token **where, struct token *list, int *count)
+{
+	int need_copy = --*count;
+	while (!eof_token(list)) {
+		struct token *token;
+		if (need_copy)
+			token = dup_token(list, &list->pos, &list->pos);
+		else
+			token = list;
+		if (token_type(token) == TOKEN_IDENT && token->ident->tainted)
+			token->pos.noexpand = 1;
+		*where = token;
+		where = &token->next;
+		list = list->next;
 	}
-	return newtoken;
+	*where = &eof_token_entry;
+	return where;
 }
 
-static void retokenize(struct token *head)
+static struct token **substitute(struct token **list, struct token *body, struct arg *args)
 {
-	struct token * next = head->next;
-	struct token * nextnext = next->next;
-	struct token * nextnextnext = nextnext->next;
-
-	if (eof_token(next) || eof_token(nextnext))
-		return;
+	struct token *token = *list;
+	struct position *base_pos = &token->pos;
+	struct position *pos = base_pos;
+	int *count;
+	enum {Normal, Placeholder, Concat} state = Normal;
+
+	for (; !eof_token(body); body = body->next, pos = &body->pos) {
+		struct token *added, *arg;
+		struct token **tail;
+
+		switch (token_type(body)) {
+		case TOKEN_GNU_KLUDGE:
+			/*
+			 * GNU kludge: if we had <comma>##<vararg>, behaviour
+			 * depends on whether we had enough arguments to have
+			 * a vararg.  If we did, ## is just ignored.  Otherwise
+			 * both , and ## are ignored.  Comma should come from
+			 * the body of macro and not be an argument of earlier
+			 * concatenation.
+			 */
+			if (!args[body->next->argnum].arg)
+				continue;
+			added = dup_token(body, base_pos, pos);
+			token_type(added) = TOKEN_SPECIAL;
+			tail = &added->next;
+			break;
 
-	for (;;) {
-		if (eof_token(nextnextnext))
+		case TOKEN_STR_ARGUMENT:
+			arg = args[body->argnum].str;
+			count = &args[body->argnum].n_str;
+			goto copy_arg;
+
+		case TOKEN_QUOTED_ARGUMENT:
+			arg = args[body->argnum].arg;
+			count = &args[body->argnum].n_quoted;
+			if (!arg || eof_token(arg)) {
+				if (state == Concat)
+					state = Normal;
+				else
+					state = Placeholder;
+				continue;
+			}
+			goto copy_arg;
+
+		case TOKEN_MACRO_ARGUMENT:
+			arg = args[body->argnum].expanded;
+			count = &args[body->argnum].n_normal;
+			if (eof_token(arg)) {
+				state = Normal;
+				continue;
+			}
+		copy_arg:
+			tail = copy(&added, arg, count);
+			added->pos.newline = pos->newline;
+			added->pos.whitespace = pos->whitespace;
 			break;
-		
-		if (token_type(nextnext) == TOKEN_CONCAT) {
-			next->next = nextnextnext;
-			next = hashhash(head, next);
-			head->next = next;
-			nextnext = next->next;
-			nextnextnext = nextnext->next;
+
+		case TOKEN_CONCAT:
+			if (state == Placeholder)
+				state = Normal;
+			else
+				state = Concat;
 			continue;
-			
+
+		case TOKEN_IDENT:
+			added = dup_token(body, base_pos, pos);
+			if (added->ident->tainted)
+				added->pos.noexpand = 1;
+			tail = &added->next;
+			break;
+
+		default:
+			added = dup_token(body, base_pos, pos);
+			tail = &added->next;
+			break;
 		}
 
-		head = next;
-		next = nextnext;
-		nextnext = nextnext->next;
-		nextnextnext = nextnextnext->next;
+		/*
+		 * if we got to doing real concatenation, we already have
+		 * added something into the list, so containing_token() is OK.
+		 */
+		if (state == Concat && merge(containing_token(list), added)) {
+			*list = added->next;
+			if (tail != &added->next)
+				list = tail;
+		} else {
+			*list = added;
+			list = tail;
+		}
+		state = Normal;
 	}
+	*list = &eof_token_entry;
+	return list;
 }
 
-static struct token *expand(struct token *head, struct symbol *sym)
+static struct token **expand(struct token **list, struct symbol *sym)
 {
-	struct token *arguments, *last;
-	struct token *token = head->next;
+	struct token *last;
+	struct token *token = *list;
 	struct ident *expanding = token->ident;
+	struct token **tail;
+	int nargs = sym->arglist ? sym->arglist->count.normal : 0;
+	struct arg args[nargs];
 
 	if (expanding->tainted) {
 		token->pos.noexpand = 1;
-		return token;
+		return &token->next;
 	}
 
 	if (sym->arglist) {
-		if (token_type(token->next) == TOKEN_UNTAINT)
-			eat_untaint(token, token->next);
-		arguments = token->next;
-		if (!match_op(arguments, '('))
-			return token;
-		last = find_argument_end(arguments, sym->arglist);
-		arguments = arguments->next;
-	} else {
-		arguments = NULL;
-		last = token->next;
+		if (!match_op(scan_next(&token->next), '('))
+			return &token->next;
+		if (!collect_arguments(token->next, sym->arglist, args, token))
+			return &token->next;
+		expand_arguments(nargs, args);
 	}
-	token->next = &eof_token_entry;
-
-	/* Replace the token with the token expansion */
-	replace(token, head, sym->expansion);
-
-	/* Then, replace all the arguments with their expansions */
-	if (arguments)
-		expand_arguments(token, head, arguments);
-
-	/* Re-tokenize the sequence if any ## token exists.. */
-	retokenize(head);
-
-	token = head;
-	while (!eof_token(token->next))
-		token = token->next;
-	token->next = last;
 
 	expanding->tainted = 1;
 
-	return head;
+	last = token->next;
+	tail = substitute(list, sym->expansion, args);
+	*tail = last;
+
+	return list;
 }
 
 static const char *token_name_sequence(struct token *token, int endop, struct token *start)
@@ -495,7 +589,7 @@ static const char *token_name_sequence(struct token *token, int endop, struct to
 	return buffer;
 }
 
-static int try_include(const char *path, int plen, const char *filename, int flen, struct token *head)
+static int try_include(const char *path, int plen, const char *filename, int flen, struct token **where)
 {
 	int fd;
 	static char fullname[PATH_MAX];
@@ -510,19 +604,19 @@ static int try_include(const char *path, int plen, const char *filename, int fle
 	if (fd >= 0) {
 		char * streamname = __alloc_bytes(plen + flen);
 		memcpy(streamname, fullname, plen + flen);
-		head->next = tokenize(streamname, fd, head->next);
+		*where = tokenize(streamname, fd, *where);
 		close(fd);
 		return 1;
 	}
 	return 0;
 }
 
-static int do_include_path(const char **pptr, struct token *head, struct token *token, const char *filename, int flen)
+static int do_include_path(const char **pptr, struct token **list, struct token *token, const char *filename, int flen)
 {
 	const char *path;
 
 	while ((path = *pptr++) != NULL) {
-		if (!try_include(path, strlen(path), filename, flen, head))
+		if (!try_include(path, strlen(path), filename, flen, list))
 			continue;
 		return 1;
 	}
@@ -530,7 +624,7 @@ static int do_include_path(const char **pptr, struct token *head, struct token *
 }
 	
 
-static void do_include(int local, struct stream *stream, struct token *head, struct token *token, const char *filename)
+static void do_include(int local, struct stream *stream, struct token **list, struct token *token, const char *filename)
 {
 	int flen = strlen(filename) + 1;
 
@@ -544,22 +638,22 @@ static void do_include(int local, struct stream *stream, struct token *head, str
 		slash = strrchr(path, '/');
 		plen = slash ? slash - path : 0;
 
-		if (try_include(path, plen, filename, flen, head))
+		if (try_include(path, plen, filename, flen, list))
 			return;
 	}
 
 	/* Check the standard include paths.. */
-	if (do_include_path(includepath, head, token, filename, flen))
+	if (do_include_path(includepath, list, token, filename, flen))
 		return;
-	if (do_include_path(sys_includepath, head, token, filename, flen))
+	if (do_include_path(sys_includepath, list, token, filename, flen))
 		return;
-	if (do_include_path(gcc_includepath, head, token, filename, flen))
+	if (do_include_path(gcc_includepath, list, token, filename, flen))
 		return;
 
 	error(token->pos, "unable to open '%s'", filename);
 }
 
-static int handle_include(struct stream *stream, struct token *head, struct token *token)
+static int handle_include(struct stream *stream, struct token **list, struct token *token)
 {
 	const char *filename;
 	struct token *next;
@@ -572,13 +666,13 @@ static int handle_include(struct stream *stream, struct token *head, struct toke
 	next = token->next;
 	expect = '>';
 	if (!match_op(next, '<')) {
-		expand_list(token);
+		expand_list(&token->next);
 		expect = 0;
 		next = token;
 	}
 	token = next->next;
 	filename = token_name_sequence(token, expect, token);
-	do_include(!expect, stream, head, token, filename);
+	do_include(!expect, stream, list, token, filename);
 	return 1;
 }
 
@@ -593,8 +687,10 @@ static int token_different(struct token *t1, struct token *t2)
 	case TOKEN_IDENT:
 		different = t1->ident != t2->ident;
 		break;
+	case TOKEN_ARG_COUNT:
 	case TOKEN_UNTAINT:
 	case TOKEN_CONCAT:
+	case TOKEN_GNU_KLUDGE:
 		different = 0;
 		break;
 	case TOKEN_NUMBER:
@@ -643,36 +739,56 @@ static int token_list_different(struct token *list1, struct token *list2)
 	}
 }
 
+static inline void set_arg_count(struct token *token)
+{
+	token_type(token) = TOKEN_ARG_COUNT;
+	token->count.normal = token->count.quoted =
+	token->count.str = token->count.vararg = 0;
+}
+
 static struct token *parse_arguments(struct token *list)
 {
 	struct token *arg = list->next, *next = list;
+	struct argcount *count = &list->count;
+
+	set_arg_count(list);
 
 	if (match_op(arg, ')')) {
+		next = arg->next;
 		list->next = &eof_token_entry;
-		return arg->next;
+		return next;
 	}
 
 	while (token_type(arg) == TOKEN_IDENT) {
 		if (arg->ident == &__VA_ARGS___ident)
 			goto Eva_args;
+		if (!++count->normal)
+			goto Eargs;
 		next = arg->next;
 
 		if (match_op(next, ',')) {
-			arg = arg->next = next->next;
+			set_arg_count(next);
+			arg = next->next;
 			continue;
 		}
 
 		if (match_op(next, ')')) {
-			arg->next = &eof_token_entry;
-			return next->next;
+			set_arg_count(next);
+			next = next->next;
+			arg->next->next = &eof_token_entry;
+			return next;
 		}
 
 		/* normal cases are finished here */
 
 		if (match_op(next, SPECIAL_ELLIPSIS)) {
-			arg->next = &variable_argument;
-			if (match_op(next->next, ')'))
-				return next->next->next;
+			if (match_op(next->next, ')')) {
+				set_arg_count(next);
+				next->count.vararg = 1;
+				next = next->next;
+				arg->next->next = &eof_token_entry;
+				return next->next;
+			}
 
 			arg = next;
 			goto Enotclosed;
@@ -690,10 +806,15 @@ static struct token *parse_arguments(struct token *list)
 		next = arg->next;
 		token_type(arg) = TOKEN_IDENT;
 		arg->ident = &__VA_ARGS___ident;
-		arg->next = &variable_argument;
 		if (!match_op(next, ')'))
 			goto Enotclosed;
-		return next->next;
+		if (!++count->normal)
+			goto Eargs;
+		set_arg_count(next);
+		next->count.vararg = 1;
+		next = next->next;
+		arg->next->next = &eof_token_entry;
+		return next;
 	}
 
 	if (eof_token(arg)) {
@@ -719,6 +840,9 @@ Enotclosed:
 Eva_args:
 	warn(arg->pos, "__VA_ARGS__ can only appear in the expansion of a C99 variadic macro");
 	return NULL;
+Eargs:
+	warn(arg->pos, "too many arguments in macro definition");
+	return NULL;
 }
 
 static int try_arg(struct token *token, enum token_type type, struct token *arglist)
@@ -729,11 +853,29 @@ static int try_arg(struct token *token, enum token_type type, struct token *argl
 	if (!arglist || token_type(token) != TOKEN_IDENT)
 		return 0;
 
-	for (nr = 0; !eof_token(arglist); nr++, arglist = arglist->next) {
+	arglist = arglist->next;
+
+	for (nr = 0; !eof_token(arglist); nr++, arglist = arglist->next->next) {
 		if (arglist->ident == ident) {
+			struct argcount *count = &arglist->next->count;
+			int n;
+
 			token->argnum = nr;
 			token_type(token) = type;
-			return 1;
+			switch (type) {
+			case TOKEN_MACRO_ARGUMENT:
+				n = ++count->normal;
+				break;
+			case TOKEN_QUOTED_ARGUMENT:
+				n = ++count->quoted;
+				break;
+			default:
+				n = ++count->str;
+			}
+			if (n)
+				return count->vararg ? 2 : 1;
+			token_type(token) = TOKEN_ERROR;
+			return -1;
 		}
 	}
 	return 0;
@@ -743,22 +885,36 @@ static struct token *parse_expansion(struct token *expansion, struct token *argl
 {
 	struct token *token = expansion;
 	struct token **p;
+	struct token *last = NULL;
 
 	if (match_op(token, SPECIAL_HASHHASH))
 		goto Econcat;
 
 	for (p = &expansion; !eof_token(token); p = &token->next, token = *p) {
-		if (match_op(token, '#') && arglist) {
-			struct token *next = token->next;
-			if (!try_arg(next, TOKEN_STR_ARGUMENT, arglist))
-				goto Equote;
-			token = *p = next;
+		if (match_op(token, '#')) {
+			if (arglist) {
+				struct token *next = token->next;
+				if (!try_arg(next, TOKEN_STR_ARGUMENT, arglist))
+					goto Equote;
+				next->pos.whitespace = token->pos.whitespace;
+				token = *p = next;
+			} else {
+				token->pos.noexpand = 1;
+			}
 		} else if (match_op(token, SPECIAL_HASHHASH)) {
 			struct token *next = token->next;
+			int arg = try_arg(next, TOKEN_QUOTED_ARGUMENT, arglist);
 			token_type(token) = TOKEN_CONCAT;
-			if (try_arg(next, TOKEN_QUOTED_ARGUMENT, arglist))
+			if (arg) {
 				token = next;
-			else if (match_op(next, SPECIAL_HASHHASH))
+				/* GNU kludge */
+				if (arg == 2 && last && match_op(last, ',')) {
+					token_type(last) = TOKEN_GNU_KLUDGE;
+					last->next = token;
+				}
+			} else if (match_op(next, SPECIAL_HASHHASH))
+				token = next;
+			else if (match_op(next, ','))
 				token = next;
 			else if (eof_token(next))
 				goto Econcat;
@@ -767,6 +923,9 @@ static struct token *parse_expansion(struct token *expansion, struct token *argl
 		} else {
 			try_arg(token, TOKEN_MACRO_ARGUMENT, arglist);
 		}
+		if (token_type(token) == TOKEN_ERROR)
+			goto Earg;
+		last = token;
 	}
 	token = alloc_token(&expansion->pos);
 	token_type(token) = TOKEN_UNTAINT;
@@ -782,9 +941,12 @@ Equote:
 Econcat:
 	warn(token->pos, "'##' cannot appear at the ends of macro expansion");
 	return NULL;
+Earg:
+	warn(token->pos, "too many instances of argument in body");
+	return NULL;
 }
 
-static int handle_define(struct stream *stream, struct token *head, struct token *token)
+static int handle_define(struct stream *stream, struct token **line, struct token *token)
 {
 	struct token *arglist, *expansion;
 	struct token *left = token->next;
@@ -792,7 +954,7 @@ static int handle_define(struct stream *stream, struct token *head, struct token
 	struct ident *name;
 
 	if (token_type(left) != TOKEN_IDENT) {
-		warn(head->pos, "expected identifier to 'define'");
+		warn(token->pos, "expected identifier to 'define'");
 		return 0;
 	}
 	if (false_nesting)
@@ -806,7 +968,6 @@ static int handle_define(struct stream *stream, struct token *head, struct token
 		expansion = parse_arguments(expansion);
 		if (!expansion)
 			return 1;
-		arglist = arglist->next;
 	}
 
 	expansion = parse_expansion(expansion, arglist, name);
@@ -831,13 +992,13 @@ static int handle_define(struct stream *stream, struct token *head, struct token
 	return 1;
 }
 
-static int handle_undef(struct stream *stream, struct token *head, struct token *token)
+static int handle_undef(struct stream *stream, struct token **line, struct token *token)
 {
 	struct token *left = token->next;
 	struct symbol **sym;
 
 	if (token_type(left) != TOKEN_IDENT) {
-		warn(head->pos, "expected identifier to 'undef'");
+		warn(token->pos, "expected identifier to 'undef'");
 		return 0;
 	}
 	if (false_nesting)
@@ -878,12 +1039,12 @@ static int token_defined(struct token *token)
 	return 0;
 }
 
-static int handle_ifdef(struct stream *stream, struct token *head, struct token *token)
+static int handle_ifdef(struct stream *stream, struct token **line, struct token *token)
 {
 	return preprocessor_if(token, token_defined(token->next));
 }
 
-static int handle_ifndef(struct stream *stream, struct token *head, struct token *token)
+static int handle_ifndef(struct stream *stream, struct token **line, struct token *token)
 {
 	struct token *next = token->next;
 	if (stream->constant == -1) {
@@ -900,29 +1061,73 @@ static int handle_ifndef(struct stream *stream, struct token *head, struct token
 	return preprocessor_if(token, !token_defined(next));
 }
 
-static int expression_value(struct token *head)
+/*
+ * Expression handling for #if and #elif; it differs from normal expansion
+ * due to special treatment of "defined".
+ */
+static int expression_value(struct token **where)
 {
 	struct expression *expr;
-	struct token *token;
+	struct token *p;
+	struct token **list = where, **beginning = NULL;
 	long long value;
+	int state = 0;
+
+	while (!eof_token(p = scan_next(list))) {
+		switch (state) {
+		case 0:
+			if (token_type(p) == TOKEN_IDENT) {
+				if (p->ident != &defined_ident) {
+					list = expand_one_symbol(list);
+					continue;
+				}
+				state = 1;
+				beginning = list;
+			}
+			break;
+		case 1:
+			if (match_op(p, '(')) {
+				state = 2;
+			} else {
+				state = 0;
+				replace_with_defined(p);
+				*beginning = p;
+			}
+			break;
+		case 2:
+			if (token_type(p) == TOKEN_IDENT)
+				state = 3;
+			else
+				state = 0;
+			replace_with_defined(p);
+			*beginning = p;
+			break;
+		case 3:
+			state = 0;
+			if (!match_op(p, ')'))
+				warn(p->pos, "missing ')' after \"defined\"");
+			*list = p->next;
+			continue;
+		}
+		list = &p->next;
+	}
 
-	expand_list(head);
-	token = constant_expression(head->next, &expr);
-	if (!eof_token(token))
-		warn(token->pos, "garbage at end: %s", show_token_sequence(token));
+	p = constant_expression(*where, &expr);
+	if (!eof_token(p))
+		warn(p->pos, "garbage at end: %s", show_token_sequence(p));
 	value = get_expression_value(expr);
 	return value != 0;
 }
 
-static int handle_if(struct stream *stream, struct token *head, struct token *token)
+static int handle_if(struct stream *stream, struct token **line, struct token *token)
 {
 	int value = 0;
 	if (!false_nesting)
-		value = expression_value(token);
+		value = expression_value(&token->next);
 	return preprocessor_if(token, value);
 }
 
-static int handle_elif(struct stream * stream, struct token *head, struct token *token)
+static int handle_elif(struct stream * stream, struct token **line, struct token *token)
 {
 	if (stream->nesting == if_nesting)
 		stream->constant = 0;
@@ -930,7 +1135,7 @@ static int handle_elif(struct stream * stream, struct token *head, struct token
 		/* If this whole if-thing is if'ed out, an elif cannot help */
 		if (elif_ignore[if_nesting-1])
 			return 1;
-		if (expression_value(token)) {
+		if (expression_value(&token->next)) {
 			false_nesting--;
 			true_nesting++;
 			elif_ignore[if_nesting-1] = 1;
@@ -946,7 +1151,7 @@ static int handle_elif(struct stream * stream, struct token *head, struct token
 	return 1;
 }
 
-static int handle_else(struct stream *stream, struct token *head, struct token *token)
+static int handle_else(struct stream *stream, struct token **line, struct token *token)
 {
 	if (stream->nesting == if_nesting)
 		stream->constant = 0;
@@ -968,7 +1173,7 @@ static int handle_else(struct stream *stream, struct token *head, struct token *
 	return 1;
 }
 
-static int handle_endif(struct stream *stream, struct token *head, struct token *token)
+static int handle_endif(struct stream *stream, struct token **line, struct token *token)
 {
 	if (stream->constant == -2 && stream->nesting == if_nesting)
 		stream->constant = -1;
@@ -993,7 +1198,7 @@ static const char *show_token_sequence(struct token *token)
 
 	if (!token)
 		return "<none>";
-	while (!eof_token(token) && !match_op(token, SPECIAL_ARG_SEPARATOR)) {
+	while (!eof_token(token)) {
 		const char *val = show_token(token);
 		int len = strlen(val);
 
@@ -1013,7 +1218,7 @@ static const char *show_token_sequence(struct token *token)
 	return buffer;
 }
 
-static int handle_warning(struct stream *stream, struct token *head, struct token *token)
+static int handle_warning(struct stream *stream, struct token **line, struct token *token)
 {
 	if (false_nesting)
 		return 1;
@@ -1021,7 +1226,7 @@ static int handle_warning(struct stream *stream, struct token *head, struct toke
 	return 1;
 }
 
-static int handle_error(struct stream *stream, struct token *head, struct token *token)
+static int handle_error(struct stream *stream, struct token **line, struct token *token)
 {
 	if (false_nesting)
 		return 1;
@@ -1029,7 +1234,7 @@ static int handle_error(struct stream *stream, struct token *head, struct token
 	return 1;
 }
 
-static int handle_nostdinc(struct stream *stream, struct token *head, struct token *token)
+static int handle_nostdinc(struct stream *stream, struct token **line, struct token *token)
 {
 	if (false_nesting)
 		return 1;
@@ -1051,7 +1256,7 @@ static void add_path_entry(struct token *token, const char *path)
 	warn(token->pos, "too many include path entries");
 }
 
-static int handle_add_include(struct stream *stream, struct token *head, struct token *token)
+static int handle_add_include(struct stream *stream, struct token **line, struct token *token)
 {
 	for (;;) {
 		token = token->next;
@@ -1077,25 +1282,25 @@ static int handle_add_include(struct stream *stream, struct token *head, struct
  * So eventually this will turn into some kind of extended
  * __attribute__() like thing, except called __pragma__(xxx).
  */
-static int handle_pragma(struct stream *stream, struct token *head, struct token *token)
+static int handle_pragma(struct stream *stream, struct token **line, struct token *token)
 {
-	struct token *next = head->next;
+	struct token *next = *line;
 
 	token->ident = &pragma_ident;
 	token->pos.newline = 1;
 	token->pos.whitespace = 1;
 	token->pos.pos = 1;
-	head->next = token;
+	*line = token;
 	token->next = next;
 	return 1;
 }
 
-static int handle_preprocessor_command(struct stream *stream, struct token *head, struct ident *ident, struct token *token)
+static int handle_preprocessor_command(struct stream *stream, struct token **line, struct ident *ident, struct token *token)
 {
 	int i;
 	static struct {
 		const char *name;
-		int (*handler)(struct stream *, struct token *, struct token *);
+		int (*handler)(struct stream *, struct token **, struct token *);
 	} handlers[] = {
 		{ "define",	handle_define },
 		{ "undef",	handle_undef },
@@ -1117,25 +1322,25 @@ static int handle_preprocessor_command(struct stream *stream, struct token *head
 
 	for (i = 0; i < (sizeof (handlers) / sizeof (handlers[0])); i++) {
 		if (match_string_ident(ident, handlers[i].name))
-			return handlers[i].handler(stream, head, token);
+			return handlers[i].handler(stream, line, token);
 	}
 	return 0;
 }
 
-static void handle_preprocessor_line(struct stream *stream, struct token * head, struct token *token)
+static void handle_preprocessor_line(struct stream *stream, struct token **line, struct token *token)
 {
 	if (!token)
 		return;
 
 	if (token_type(token) == TOKEN_IDENT)
-		if (handle_preprocessor_command(stream, head, token->ident, token))
+		if (handle_preprocessor_command(stream, line, token->ident, token))
 			return;
 	warn(token->pos, "unrecognized preprocessor line '%s'", show_token_sequence(token));
 }
 
-static void preprocessor_line(struct stream *stream, struct token * head)
+static void preprocessor_line(struct stream *stream, struct token **line)
 {
-	struct token *start = head->next, *next;
+	struct token *start = *line, *next;
 	struct token **tp = &start->next;
 
 	for (;;) {
@@ -1144,24 +1349,27 @@ static void preprocessor_line(struct stream *stream, struct token * head)
 			break;
 		tp = &next->next;
 	}
-	head->next = next;
+	*line = next;
 	*tp = &eof_token_entry;
-	handle_preprocessor_line(stream, head, start->next);
+	handle_preprocessor_line(stream, line, start->next);
 }
 
-static void do_preprocess(struct token *head)
+static void do_preprocess(struct token **list)
 {
-	do {
-		struct token *next = head->next;
+	struct token *next;
+
+	while (!eof_token(next = scan_next(list))) {
 		struct stream *stream = input_streams + next->pos.stream;
 
 		if (next->pos.newline && match_op(next, '#')) {
-			preprocessor_line(stream, head);
-			continue;
+			if (!next->pos.noexpand) {
+				preprocessor_line(stream, list);
+				continue;
+			}
 		}
 
 		if (false_nesting) {
-			head->next = next->next;
+			*list = next->next;
 			continue;
 		}
 
@@ -1172,36 +1380,28 @@ static void do_preprocess(struct token *head)
 			}
 			/* fallthrough */
 		case TOKEN_STREAMBEGIN:
-			head->next = next->next;
-			continue;
-
-		case TOKEN_UNTAINT:
-			eat_untaint(head, next);
+			*list = next->next;
 			continue;
 
 		case TOKEN_IDENT:
-			next = expand_one_symbol(head, next);
-			/* fallthrough */
+			list = expand_one_symbol(list);
+			break;
 		default:
-			/*
-			 * Any token expansion (even if it ended up being an
-			 * empty expansion) in this stream implies it can't
-			 * be constant.
-			 */
-			stream->constant = 0;
+			list = &next->next;
 		}
-
-		head = next;
-	} while (!eof_token(head));
+		/*
+		 * Any token expansion (even if it ended up being an
+		 * empty expansion) in this stream implies it can't
+		 * be constant.
+		 */
+		stream->constant = 0;
+	}
 }
 
 struct token * preprocess(struct token *token)
 {
-	struct token header = { };
-
 	preprocessing = 1;
-	header.next = token;
-	do_preprocess(&header);
+	do_preprocess(&token);
 	if (if_nesting)
 		warn(unmatched_if->pos, "unmatched preprocessor conditional");
 
@@ -1209,5 +1409,5 @@ struct token * preprocess(struct token *token)
 	clear_expression_alloc();
 	preprocessing = 0;
 
-	return header.next;
+	return token;
 }
diff --git a/token.h b/token.h
index d35ae744..32cd0701 100644
--- a/token.h
+++ b/token.h
@@ -61,7 +61,9 @@ enum token_type {
 	TOKEN_STR_ARGUMENT,
 	TOKEN_QUOTED_ARGUMENT,
 	TOKEN_CONCAT,
+	TOKEN_GNU_KLUDGE,
 	TOKEN_UNTAINT,
+	TOKEN_ARG_COUNT,
 };
 
 /* Combination tokens */
@@ -117,6 +119,14 @@ struct string {
 	char data[];
 };
 
+/* will fit into 32 bits */
+struct argcount {
+	unsigned normal:10;
+	unsigned quoted:10;
+	unsigned str:10;
+	unsigned vararg:1;
+};
+
 /*
  * This is a very common data structure, it should be kept
  * as small as humanly possible. Big (rare) types go as
@@ -132,9 +142,16 @@ struct token {
 		struct string *string;
 		int character;
 		int argnum;
+		struct argcount count;
 	};
 };
 
+static inline struct token *containing_token(struct token **p)
+{
+	void *addr = (char*)p - ((char*)&((struct token *)0)->next - (char*)0);
+	return addr;
+}
+
 #define token_type(x) ((x)->pos.type)
 
 /*
diff --git a/validation/preprocessor12.c b/validation/preprocessor12.c
new file mode 100644
index 00000000..385c1a7d
--- /dev/null
+++ b/validation/preprocessor12.c
@@ -0,0 +1,7 @@
+/*
+ * GNU kludge
+ */
+#define A(x,...) x,##__VA_ARGS__
+A(1)
+A(1,2)
+A(1,2,3)
diff --git a/validation/preprocessor13.c b/validation/preprocessor13.c
new file mode 100644
index 00000000..96c813ee
--- /dev/null
+++ b/validation/preprocessor13.c
@@ -0,0 +1,7 @@
+/*
+ * GNU kludge, corner case
+ */
+#define A(x,...) x##,##__VA_ARGS__
+A(1)
+A(1,2)
+A(1,2,3)
diff --git a/validation/preprocessor14.c b/validation/preprocessor14.c
new file mode 100644
index 00000000..001f1f2d
--- /dev/null
+++ b/validation/preprocessor14.c
@@ -0,0 +1,7 @@
+/*
+ * GNU kludge, another corner case
+ */
+#define A(x,y,...) ,##x##__VA_ARGS__
+A(,1)
+#define B(x,y,...) x##,##__VA_ARGS__
+B(,1)
diff --git a/validation/preprocessor15.c b/validation/preprocessor15.c
new file mode 100644
index 00000000..0a2dfd7f
--- /dev/null
+++ b/validation/preprocessor15.c
@@ -0,0 +1,7 @@
+#define A defi
+#define B ned
+#define C(x,y) x##y
+#define D(x,y) C(x,y)
+#if D(A,B) B
+D(1,2)
+#endif
-- 
2.11.4.GIT