It appears Solaris's cc is ignoring the signedness of bitfield types.
[xiph/unicode.git] / sushivision / tokens.c
blobf7d0934c93a0fc4829b94fd56525668c594910cf
1 /*
3 * sushivision copyright (C) 2006-2007 Monty <monty@xiph.org>
5 * sushivision is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2, or (at your option)
8 * any later version.
9 *
10 * sushivision is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with sushivision; see the file COPYING. If not, write to the
17 * Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
22 /* you should never write your own parser. ever. */
24 #define _GNU_SOURCE
25 #include <string.h>
26 #include <stdio.h>
27 #include <stdlib.h>
28 #include <ctype.h>
29 #include <math.h>
30 #include "internal.h"
32 // not a true recursive parser as there's no arbitrary nesting.
34 void _sv_tokenval_free(_sv_tokenval *v){
35 if(v){
36 if(v->s)free(v->s);
37 free(v);
41 void _sv_token_free(_sv_token *t){
42 if(t){
43 if(t->name)free(t->name);
44 if(t->label)free(t->label);
45 if(t->values){
46 int i;
47 for(i=0;i<t->n;i++)
48 if(t->values[i])_sv_tokenval_free(t->values[i]);
49 free(t->values);
51 free(t);
56 void _sv_tokenlist_free(_sv_tokenlist *l){
57 if(l){
58 if(l->list){
59 int i;
60 for(i=0;i<l->n;i++)
61 if(l->list[i])_sv_token_free(l->list[i]);
62 free(l->list);
64 free(l);
68 // don't allow locale conventions to screw up built-in number strings.
69 // Sorry, but we're hardcoding decimal points in the float syntax. This is
70 // for built-in arg strings, we can substitute in labels later if
71 // anyone's offended.
72 static float atof_portable(char *number){
73 int msign=1;
74 int d=0;
75 int f=0;
76 int fe=0;
77 int esign=1;
78 int e=0;
79 int dflag=0;
80 int fflag=0;
81 int eflag=0;
83 // trim whitespace
84 while(number && *number && isspace(*number))number++;
86 // overall sign
87 if(number && *number=='-'){
88 msign = -1;
89 number++;
91 if(number && *number=='+'){
92 number++;
95 // whole integer
96 while(number && *number>='0' && *number<='9'){
97 d = d*10+(*number-48);
98 number++;
99 dflag=1;
102 // seperator
103 if(number && *number=='.'){
104 number++;
107 // fraction
108 while(number && *number>='0' && *number<='9'){
109 f = f*10+(*number-48);
110 fe--;
111 number++;
112 fflag=1;
115 // exponent seperator
116 if(number && (*number=='e' || *number=='E')){
117 number++;
119 //exponent sign
120 if(number && *number=='-'){
121 esign = -1;
122 number++;
124 if(number && *number=='+'){
125 number++;
127 while(number && *number>='0' && *number<='9'){
128 e = e*10+(*number-48);
129 number++;
130 eflag=1;
132 if(*number)return NAN;
133 if(!eflag)return NAN;
136 if(!dflag && !fflag)return NAN;
137 return msign*(d+f*powf(10,fe))*powf(10,e*esign);
140 static char *trim(char *in){
141 char *head=in;
142 char *tail=in;
143 if(!in)return NULL;
145 while(*head && isspace(*head))head++;
146 while(*head){
147 *tail = *head;
148 tail++;
149 head++;
151 while(tail>in && isspace(*(tail-1)))tail--;
152 *tail=0;
154 return in;
157 static char *unescape(char *a){
158 char *head=a;
159 char *tail=a;
160 int escape=0;
162 if(head){
163 while(1){
164 *tail=*head;
165 if(!*head)break;
167 if(*head!='\\' || escape){
168 tail++;
169 escape=0;
170 }else{
171 escape=1;
174 head++;
178 return a;
181 char *_sv_tokenize_escape(char *a){
182 char *head=a;
183 char *tail;
184 char *ret;
185 int count=0;
187 while(head && *head){
188 if(*head==':' ||
189 *head==',' ||
190 *head=='(' ||
191 *head==')' ||
192 isspace(*head) ||
193 *head=='\\')
194 count++;
195 count++;
196 head++;
199 head=a;
200 ret=tail=calloc(count+1,sizeof(*tail));
202 while(head && *head){
203 if(*head==':' ||
204 *head==',' ||
205 *head=='(' ||
206 *head==')' ||
207 isspace(*head) ||
208 *head=='\\'){
209 *tail='\\';
210 tail++;
212 *tail=*head;
213 tail++;
214 head++;
217 return ret;
220 // split at unescaped, unenclosed seperator
221 // only parens enclose in our syntax
222 static char *split(char *a, char sep){
223 char *arg=a;
224 char *ret=NULL;
225 int escape=0;
226 int level=0;
228 while(arg && *arg){
229 if(*arg=='(' && !escape){
230 level++;
232 if(*arg==')' && !escape){
233 level--;
234 if(level<0){
235 fprintf(stderr,"sushivision: ignoring extraneous paren in \"%s\".\n",
237 *arg=' ';
238 level=0;
241 if(*arg==sep && !escape && level==0){
242 // we've found our split point
243 ret=arg+1;
244 *arg='\0';
245 return ret;
247 if(*arg=='\\'){
248 escape=1-escape;
249 }else{
250 escape=0;
252 arg++;
255 return NULL;
258 static int splitcount(char *a, char sep){
259 char *arg=a;
260 int escape=0;
261 int level=0;
262 int count=1;
264 while(arg && *arg){
265 if(*arg=='(' && !escape){
266 level++;
268 if(*arg==')' && !escape){
269 level--;
270 if(level<0) level=0;
272 if(*arg==sep && !escape && level==0)
273 count++;
274 else{
275 if(*arg=='\\'){
276 escape=1-escape;
277 }else{
278 escape=0;
281 arg++;
284 return count;
287 // unwrap contents enclosed by first level of parens
288 // only parens enclose in our syntax
289 static char *unwrap(char *a){
290 char *arg=a;
291 char *ret=NULL;
292 int escape=0;
293 int level=0;
295 while(arg && *arg){
296 if(*arg=='(' && !escape){
297 if(level==0){
298 ret=arg+1;
299 *arg='\0';
301 level++;
303 if(*arg==')' && !escape){
304 level--;
305 if(level==0){
306 *arg='\0';
307 return ret;
309 if(level<0){
310 fprintf(stderr,"sushivision: ignoring extraneous paren in \"%s\".\n",
312 *arg=' ';
313 level=0;
316 if(*arg=='\\'){
317 escape=1-escape;
318 }else{
319 escape=0;
321 arg++;
324 if(level!=0){
325 fprintf(stderr,"sushivision: unbalanced paren(s) at \"%s(%s\".\n",
326 a,ret);
327 return ret;
329 return NULL;
332 // a string is any C string of characters not containing unescaped
333 // parens, commas, colons. Preceeding and trailing spaces are
334 // stripped (escaped spaces are never stripped). Thus, parsing a
335 // string consists only of stripping spaces and checking for illegal
336 // characters.
337 char *_sv_tokenize_string(char *in){
338 if(!in)return NULL;
339 char *a = strdup(in);
340 char *ret = NULL;
342 // ignore anything following a comma
343 if(split(a,','))
344 fprintf(stderr,"sushivision: ignoring trailing garbage \"%s\".\n",a);
346 // ignore anything following a colon
347 if(split(a,':'))
348 fprintf(stderr,"sushivision: ignoring trailing garbage after \"%s\".\n",a);
350 // complain about unescaped parens
351 if(unwrap(a))
352 fprintf(stderr,"sushivision: ignoring garbage after \"%s\".\n",a);
354 if(*a=='\0')goto done;
356 ret = strdup(trim(unescape(a)));
358 done:
359 free(a);
360 return ret;
363 // a number is a standard printf format floating point number string
364 // representation. It may not contain any characters (aside from
365 // trailing/preceeding spaces) that are not part of the number
366 // representation.
367 _sv_tokenval *_sv_tokenize_number(char *in){
368 if(!in)return NULL;
369 char *a = strdup(in);
370 _sv_tokenval *ret=NULL;
372 a = trim(unescape(a));
373 if(*a=='\0')goto done;
375 ret=calloc(1,sizeof(*ret));
376 ret->s = strdup(a);
377 ret->v = atof_portable(a);
379 done:
381 free(a);
382 return ret;
385 _sv_token *_sv_tokenize_name(char *in){
386 _sv_token *ret = NULL;
387 char *s = _sv_tokenize_string(in);
388 if(!s)return NULL;
390 ret=calloc(1,sizeof(*ret));
391 ret->name = s;
392 return ret;
395 _sv_token *_sv_tokenize_labelname(char *in){
396 if(!in)return NULL;
397 char *a = strdup(in);
398 _sv_token *ret = NULL;
400 // split name/label
401 char *l=split(a,':');
402 ret = _sv_tokenize_name(a);
403 if(!ret)goto done;
405 if(!l){
406 ret->label = strdup(ret->name);
407 }else{
408 char *label = _sv_tokenize_string(l);
409 if(!label)
410 ret->label = strdup("");
411 else
412 ret->label = label;
415 done:
416 free(a);
417 return ret;
420 _sv_tokenval *_sv_tokenize_displayvalue(char *in){
421 if(!in)return NULL;
422 char *a = strdup(in);
423 _sv_tokenval *ret = NULL;
425 // split value/label
426 char *l=split(a,':');
427 ret = _sv_tokenize_number(a);
428 if(!ret)goto done;
430 if(l){
431 char *label = _sv_tokenize_string(l);
432 if(ret->s) free(ret->s);
433 if(!label)
434 ret->s = strdup("");
435 else
436 ret->s = label;
439 done:
440 free(a);
441 return ret;
444 _sv_tokenval *_sv_tokenize_flag(char *in){
445 _sv_tokenval *ret = NULL;
446 char *s = _sv_tokenize_string(in);
447 if(!s)return NULL;
449 ret=calloc(1,sizeof(*ret));
450 ret->s = s;
451 ret->v = NAN;
452 return ret;
455 _sv_tokenval *_sv_tokenize_parameter(char *in){
457 if(!in)return NULL;
458 char *a = strdup(in);
459 _sv_tokenval *ret = NULL;
461 // split value/label
462 char *l=split(a,'=');
463 if(!l){
464 ret = _sv_tokenize_flag(a);
465 }else{
466 ret = _sv_tokenize_number(l);
467 if(ret){
468 char *label = _sv_tokenize_string(a);
469 if(ret->s) free(ret->s);
470 if(!label)
471 ret->s = strdup("");
472 else
473 ret->s = label;
477 free(a);
478 return ret;
481 _sv_token *_sv_tokenize_parameterlist(char *in){
482 if(!in)return NULL;
484 char *l=strdup(in);
485 in=l;
487 int i,n = splitcount(l,',');
488 _sv_token *ret = calloc(1,sizeof(*ret));
490 ret->n = n;
491 ret->values = calloc(n,sizeof(*ret->values));
493 for(i=0;i<n;i++){
494 char *next = split(l,',');
495 ret->values[i] = _sv_tokenize_parameter(l);
496 l=next;
498 free(in);
500 return ret;
503 _sv_token *_sv_tokenize_valuelist(char *in){
504 if(!in)return NULL;
506 char *l=strdup(in);
507 in=l;
509 int i,n = splitcount(l,',');
510 _sv_token *ret = calloc(1,sizeof(*ret));
512 ret->n = n;
513 ret->values = calloc(n,sizeof(*ret->values));
515 for(i=0;i<n;i++){
516 char *next = split(l,',');
517 ret->values[i] = _sv_tokenize_displayvalue(l);
518 l=next;
520 free(in);
522 return ret;
525 _sv_token *_sv_tokenize_nameparam(char *in){
526 _sv_token *ret = NULL;
527 char *a=strdup(in);
528 char *p;
529 if(!a)return NULL;
531 // single arg; ignore anything following a level 0 comma
532 if(split(a,','))
533 fprintf(stderr,"sushivision: ignoring trailing garbage after \"%s\".\n",a);
535 // split name/args
536 p=unwrap(a);
538 if(*a=='\0')goto done;
539 ret = _sv_tokenize_name(a);
541 if(p){
542 _sv_token *l = _sv_tokenize_parameterlist(p);
543 if(l){
544 ret->n = l->n;
545 ret->values = l->values;
547 l->n = 0;
548 l->values = 0;
549 _sv_token_free(l);
553 done:
554 free(a);
555 return ret;
558 _sv_token *_sv_tokenize_declparam(char *in){
559 _sv_token *ret = NULL;
560 char *a=strdup(in);
561 char *p;
562 if(!a)return NULL;
564 // single arg; ignore anything following a level 0 comma
565 if(split(a,','))
566 fprintf(stderr,"sushivision: ignoring trailing garbage after \"%s\".\n",a);
568 // split name/args
569 p=unwrap(a);
571 if(*a=='\0')goto done;
572 ret = _sv_tokenize_labelname(a);
574 if(p){
575 _sv_token *l = _sv_tokenize_parameterlist(p);
576 if(l){
577 ret->n = l->n;
578 ret->values = l->values;
580 l->n = 0;
581 l->values = 0;
582 _sv_token_free(l);
586 done:
587 free(a);
588 return ret;
591 _sv_tokenlist *_sv_tokenize_namelist(char *in){
592 if(!in)return NULL;
594 char *l=strdup(in);
595 in=l;
597 int i,n = splitcount(l,',');
598 _sv_tokenlist *ret = calloc(1,sizeof(*ret));
600 ret->n = n;
601 ret->list = calloc(n,sizeof(*ret->list));
603 for(i=0;i<n;i++){
604 char *next = split(l,',');
605 ret->list[i] = _sv_tokenize_nameparam(l);
606 l=next;
608 free(in);
610 return ret;
613 _sv_tokenlist *_sv_tokenize_noparamlist(char *in){
614 if(!in)return NULL;
616 char *l=strdup(in);
617 in=l;
619 int i,n = splitcount(l,',');
620 _sv_tokenlist *ret = calloc(1,sizeof(*ret));
622 ret->n = n;
623 ret->list = calloc(n,sizeof(*ret->list));
625 for(i=0;i<n;i++){
626 char *next = split(l,',');
627 ret->list[i] = _sv_tokenize_name(l);
628 l=next;
630 free(in);
632 return ret;