sushivision/tokens.c

   1 /*
   2  *
   3  *     sushivision copyright (C) 2006-2007 Monty <monty@xiph.org>
   4  *
   5  *  sushivision is free software; you can redistribute it and/or modify
   6  *  it under the terms of the GNU General Public License as published by
   7  *  the Free Software Foundation; either version 2, or (at your option)
   8  *  any later version.
   9  *
  10  *  sushivision is distributed in the hope that it will be useful,
  11  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  12  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  13  *  GNU General Public License for more details.
  14  *
  15  *  You should have received a copy of the GNU General Public License
  16  *  along with sushivision; see the file COPYING.  If not, write to the
  17  *  Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
  18  *
  19  *
  20  */
  21
  22 /* you should never write your own parser.  ever. */
  23
  24 #define _GNU_SOURCE
  25 #include <string.h>
  26 #include <stdio.h>
  27 #include <stdlib.h>
  28 #include <ctype.h>
  29 #include <math.h>
  30 #include "internal.h"
  31
  32 // not a true recursive parser as there's no arbitrary nesting.
  33
  34 void _sv_tokenval_free(_sv_tokenval *v){
  35   if(v){
  36     if(v->s)free(v->s);
  37     free(v);
  38   }
  39 }
  40
  41 void _sv_token_free(_sv_token *t){
  42   if(t){
  43     if(t->name)free(t->name);
  44     if(t->label)free(t->label);
  45     if(t->values){
  46       int i;
  47       for(i=0;i<t->n;i++)
  48         if(t->values[i])_sv_tokenval_free(t->values[i]);
  49       free(t->values);
  50     }
  51     free(t);
  52   }
  53 }
  54
  55
  56 void _sv_tokenlist_free(_sv_tokenlist *l){
  57   if(l){
  58     if(l->list){
  59       int i;
  60       for(i=0;i<l->n;i++)
  61         if(l->list[i])_sv_token_free(l->list[i]);
  62       free(l->list);
  63     }
  64     free(l);
  65   }
  66 }
  67
  68 // don't allow locale conventions to screw up built-in number strings.
  69 // Sorry, but we're hardcoding decimal points in the float syntax.  This is
  70 // for built-in arg strings, we can substitute in labels later if
  71 // anyone's offended.
  72 static float atof_portable(char *number){
  73   int msign=1;
  74   int d=0;
  75   int f=0;
  76   int fe=0;
  77   int esign=1;
  78   int e=0;
  79   int dflag=0;
  80   int fflag=0;
  81   int eflag=0;
  82
  83   // trim whitespace
  84   while(number && *number && isspace(*number))number++;
  85
  86   // overall sign
  87   if(number && *number=='-'){
  88     msign = -1;
  89     number++;
  90   }
  91   if(number && *number=='+'){
  92     number++;
  93   }
  94
  95   // whole integer
  96   while(number && *number>='0' && *number<='9'){
  97     d = d*10+(*number-48);
  98     number++;
  99     dflag=1;
 100   }
 101
 102   // seperator
 103   if(number && *number=='.'){
 104     number++;
 105   }
 106
 107   // fraction
 108   while(number && *number>='0' && *number<='9'){
 109     f = f*10+(*number-48);
 110     fe--;
 111     number++;
 112     fflag=1;
 113   }
 114
 115   // exponent seperator
 116   if(number && (*number=='e' || *number=='E')){
 117     number++;
 118
 119     //exponent sign
 120     if(number && *number=='-'){
 121       esign = -1;
 122       number++;
 123     }
 124     if(number && *number=='+'){
 125       number++;
 126     }
 127     while(number && *number>='0' && *number<='9'){
 128       e = e*10+(*number-48);
 129       number++;
 130       eflag=1;
 131     }
 132     if(*number)return NAN;
 133     if(!eflag)return NAN;
 134   }
 135
 136   if(!dflag && !fflag)return NAN;
 137   return msign*(d+f*powf(10,fe))*powf(10,e*esign);
 138 }
 139
 140 static char *trim(char *in){
 141   char *head=in;
 142   char *tail=in;
 143   if(!in)return NULL;
 144
 145   while(*head && isspace(*head))head++;
 146   while(*head){
 147     *tail = *head;
 148     tail++;
 149     head++;
 150   }
 151   while(tail>in && isspace(*(tail-1)))tail--;
 152   *tail=0;
 153
 154   return in;
 155 }
 156
 157 static char *unescape(char *a){
 158   char *head=a;
 159   char *tail=a;
 160   int escape=0;
 161
 162   if(head){
 163     while(1){
 164       *tail=*head;
 165       if(!*head)break;
 166
 167       if(*head!='\\' || escape){
 168         tail++;
 169         escape=0;
 170       }else{
 171         escape=1;
 172       }
 173
 174       head++;
 175     }
 176   }
 177
 178   return a;
 179 }
 180
 181 char *_sv_tokenize_escape(char *a){
 182   char *head=a;
 183   char *tail;
 184   char *ret;
 185   int count=0;
 186
 187   while(head && *head){
 188     if(*head==':' ||
 189        *head==',' ||
 190        *head=='(' ||
 191        *head==')' ||
 192        isspace(*head) ||
 193        *head=='\\')
 194       count++;
 195     count++;
 196     head++;
 197   }
 198
 199   head=a;
 200   ret=tail=calloc(count+1,sizeof(*tail));
 201
 202   while(head && *head){
 203     if(*head==':' ||
 204        *head==',' ||
 205        *head=='(' ||
 206        *head==')' ||
 207        isspace(*head) ||
 208        *head=='\\'){
 209       *tail='\\';
 210       tail++;
 211     }
 212     *tail=*head;
 213     tail++;
 214     head++;
 215   }
 216
 217   return ret;
 218 }
 219
 220 // split at unescaped, unenclosed seperator
 221 // only parens enclose in our syntax
 222 static char *split(char *a, char sep){
 223   char *arg=a;
 224   char *ret=NULL;
 225   int escape=0;
 226   int level=0;
 227
 228   while(arg && *arg){
 229     if(*arg=='(' && !escape){
 230       level++;
 231     }
 232     if(*arg==')' && !escape){
 233       level--;
 234       if(level<0){
 235         fprintf(stderr,"sushivision: ignoring extraneous paren in \"%s\".\n",
 236                 a);
 237         *arg=' ';
 238         level=0;
 239       }
 240     }
 241     if(*arg==sep && !escape && level==0){
 242       // we've found our split point
 243       ret=arg+1;
 244       *arg='\0';
 245       return ret;
 246     }
 247     if(*arg=='\\'){
 248       escape=1-escape;
 249     }else{
 250       escape=0;
 251     }
 252     arg++;
 253   }
 254
 255   return NULL;
 256 }
 257
 258 static int splitcount(char *a, char sep){
 259   char *arg=a;
 260   int escape=0;
 261   int level=0;
 262   int count=1;
 263
 264   while(arg && *arg){
 265     if(*arg=='(' && !escape){
 266       level++;
 267     }
 268     if(*arg==')' && !escape){
 269       level--;
 270       if(level<0) level=0;
 271     }
 272     if(*arg==sep && !escape && level==0)
 273       count++;
 274     else{
 275       if(*arg=='\\'){
 276         escape=1-escape;
 277       }else{
 278         escape=0;
 279       }
 280     }
 281     arg++;
 282   }
 283
 284   return count;
 285 }
 286
 287 // unwrap contents enclosed by first level of parens
 288 // only parens enclose in our syntax
 289 static char *unwrap(char *a){
 290   char *arg=a;
 291   char *ret=NULL;
 292   int escape=0;
 293   int level=0;
 294
 295   while(arg && *arg){
 296     if(*arg=='(' && !escape){
 297       if(level==0){
 298         ret=arg+1;
 299         *arg='\0';
 300       }
 301       level++;
 302     }
 303     if(*arg==')' && !escape){
 304       level--;
 305       if(level==0){
 306         *arg='\0';
 307         return ret;
 308       }
 309       if(level<0){
 310         fprintf(stderr,"sushivision: ignoring extraneous paren in \"%s\".\n",
 311                 a);
 312         *arg=' ';
 313         level=0;
 314       }
 315     }
 316     if(*arg=='\\'){
 317       escape=1-escape;
 318     }else{
 319       escape=0;
 320     }
 321     arg++;
 322   }
 323
 324   if(level!=0){
 325     fprintf(stderr,"sushivision: unbalanced paren(s) at \"%s(%s\".\n",
 326             a,ret);
 327     return ret;
 328   }
 329   return NULL;
 330 }
 331
 332 // a string is any C string of characters not containing unescaped
 333 // parens, commas, colons.  Preceeding and trailing spaces are
 334 // stripped (escaped spaces are never stripped).  Thus, parsing a
 335 // string consists only of stripping spaces and checking for illegal
 336 // characters.
 337 char *_sv_tokenize_string(char *in){
 338   if(!in)return NULL;
 339   char *a = strdup(in);
 340   char *ret = NULL;
 341
 342   // ignore anything following a comma
 343   if(split(a,','))
 344     fprintf(stderr,"sushivision: ignoring trailing garbage \"%s\".\n",a);
 345
 346   // ignore anything following a colon
 347   if(split(a,':'))
 348     fprintf(stderr,"sushivision: ignoring trailing garbage after \"%s\".\n",a);
 349
 350   // complain about unescaped parens
 351   if(unwrap(a))
 352     fprintf(stderr,"sushivision: ignoring garbage after \"%s\".\n",a);
 353
 354   if(*a=='\0')goto done;
 355
 356   ret = strdup(trim(unescape(a)));
 357
 358  done:
 359   free(a);
 360   return ret;
 361 }
 362
 363 // a number is a standard printf format floating point number string
 364 // representation.  It may not contain any characters (aside from
 365 // trailing/preceeding spaces) that are not part of the number
 366 // representation.
 367 _sv_tokenval *_sv_tokenize_number(char *in){
 368   if(!in)return NULL;
 369   char *a = strdup(in);
 370   _sv_tokenval *ret=NULL;
 371
 372   a = trim(unescape(a));
 373   if(*a=='\0')goto done;
 374
 375   ret=calloc(1,sizeof(*ret));
 376   ret->s = strdup(a);
 377   ret->v = atof_portable(a);
 378
 379  done:
 380
 381   free(a);
 382   return ret;
 383 }
 384
 385 _sv_token *_sv_tokenize_name(char *in){
 386   _sv_token *ret = NULL;
 387   char *s = _sv_tokenize_string(in);
 388   if(!s)return NULL;
 389
 390   ret=calloc(1,sizeof(*ret));
 391   ret->name = s;
 392   return ret;
 393 }
 394
 395 _sv_token *_sv_tokenize_labelname(char *in){
 396   if(!in)return NULL;
 397   char *a = strdup(in);
 398   _sv_token *ret = NULL;
 399
 400   // split name/label
 401   char *l=split(a,':');
 402   ret = _sv_tokenize_name(a);
 403   if(!ret)goto done;
 404
 405   if(!l){
 406     ret->label = strdup(ret->name);
 407   }else{
 408     char *label = _sv_tokenize_string(l);
 409     if(!label)
 410       ret->label = strdup("");
 411     else
 412       ret->label = label;
 413   }
 414
 415  done:
 416   free(a);
 417   return ret;
 418 }
 419
 420 _sv_tokenval *_sv_tokenize_displayvalue(char *in){
 421   if(!in)return NULL;
 422   char *a = strdup(in);
 423   _sv_tokenval *ret = NULL;
 424
 425   // split value/label
 426   char *l=split(a,':');
 427   ret = _sv_tokenize_number(a);
 428   if(!ret)goto done;
 429
 430   if(l){
 431     char *label = _sv_tokenize_string(l);
 432     if(ret->s) free(ret->s);
 433     if(!label)
 434       ret->s = strdup("");
 435     else
 436       ret->s = label;
 437   }
 438
 439  done:
 440   free(a);
 441   return ret;
 442 }
 443
 444 _sv_tokenval *_sv_tokenize_flag(char *in){
 445   _sv_tokenval *ret = NULL;
 446   char *s = _sv_tokenize_string(in);
 447   if(!s)return NULL;
 448
 449   ret=calloc(1,sizeof(*ret));
 450   ret->s = s;
 451   ret->v = NAN;
 452   return ret;
 453 }
 454
 455 _sv_tokenval *_sv_tokenize_parameter(char *in){
 456
 457   if(!in)return NULL;
 458   char *a = strdup(in);
 459   _sv_tokenval *ret = NULL;
 460
 461   // split value/label
 462   char *l=split(a,'=');
 463   if(!l){
 464     ret = _sv_tokenize_flag(a);
 465   }else{
 466     ret = _sv_tokenize_number(l);
 467     if(ret){
 468       char *label = _sv_tokenize_string(a);
 469       if(ret->s) free(ret->s);
 470       if(!label)
 471         ret->s = strdup("");
 472       else
 473         ret->s = label;
 474     }
 475   }
 476
 477   free(a);
 478   return ret;
 479 }
 480
 481 _sv_token *_sv_tokenize_parameterlist(char *in){
 482   if(!in)return NULL;
 483
 484   char *l=strdup(in);
 485   in=l;
 486
 487   int i,n = splitcount(l,',');
 488   _sv_token *ret = calloc(1,sizeof(*ret));
 489
 490   ret->n = n;
 491   ret->values = calloc(n,sizeof(*ret->values));
 492
 493   for(i=0;i<n;i++){
 494     char *next = split(l,',');
 495     ret->values[i] = _sv_tokenize_parameter(l);
 496     l=next;
 497   }
 498   free(in);
 499
 500   return ret;
 501 }
 502
 503 _sv_token *_sv_tokenize_valuelist(char *in){
 504   if(!in)return NULL;
 505
 506   char *l=strdup(in);
 507   in=l;
 508
 509   int i,n = splitcount(l,',');
 510   _sv_token *ret = calloc(1,sizeof(*ret));
 511
 512   ret->n = n;
 513   ret->values = calloc(n,sizeof(*ret->values));
 514
 515   for(i=0;i<n;i++){
 516     char *next = split(l,',');
 517     ret->values[i] = _sv_tokenize_displayvalue(l);
 518     l=next;
 519   }
 520   free(in);
 521
 522   return ret;
 523 }
 524
 525 _sv_token *_sv_tokenize_nameparam(char *in){
 526   _sv_token *ret = NULL;
 527   char *a=strdup(in);
 528   char *p;
 529   if(!a)return NULL;
 530
 531   // single arg; ignore anything following a level 0 comma
 532   if(split(a,','))
 533     fprintf(stderr,"sushivision: ignoring trailing garbage after \"%s\".\n",a);
 534
 535   // split name/args
 536   p=unwrap(a);
 537
 538   if(*a=='\0')goto done;
 539   ret = _sv_tokenize_name(a);
 540
 541   if(p){
 542     _sv_token *l = _sv_tokenize_parameterlist(p);
 543     if(l){
 544       ret->n = l->n;
 545       ret->values =  l->values;
 546
 547       l->n = 0;
 548       l->values = 0;
 549       _sv_token_free(l);
 550     }
 551   }
 552
 553  done:
 554   free(a);
 555   return ret;
 556 }
 557
 558 _sv_token *_sv_tokenize_declparam(char *in){
 559   _sv_token *ret = NULL;
 560   char *a=strdup(in);
 561   char *p;
 562   if(!a)return NULL;
 563
 564   // single arg; ignore anything following a level 0 comma
 565   if(split(a,','))
 566     fprintf(stderr,"sushivision: ignoring trailing garbage after \"%s\".\n",a);
 567
 568   // split name/args
 569   p=unwrap(a);
 570
 571   if(*a=='\0')goto done;
 572   ret = _sv_tokenize_labelname(a);
 573
 574   if(p){
 575     _sv_token *l = _sv_tokenize_parameterlist(p);
 576     if(l){
 577       ret->n = l->n;
 578       ret->values =  l->values;
 579
 580       l->n = 0;
 581       l->values = 0;
 582       _sv_token_free(l);
 583     }
 584   }
 585
 586  done:
 587   free(a);
 588   return ret;
 589 }
 590
 591 _sv_tokenlist *_sv_tokenize_namelist(char *in){
 592   if(!in)return NULL;
 593
 594   char *l=strdup(in);
 595   in=l;
 596
 597   int i,n = splitcount(l,',');
 598   _sv_tokenlist *ret = calloc(1,sizeof(*ret));
 599
 600   ret->n = n;
 601   ret->list = calloc(n,sizeof(*ret->list));
 602
 603   for(i=0;i<n;i++){
 604     char *next = split(l,',');
 605     ret->list[i] = _sv_tokenize_nameparam(l);
 606     l=next;
 607   }
 608   free(in);
 609
 610   return ret;
 611 }
 612
 613 _sv_tokenlist *_sv_tokenize_noparamlist(char *in){
 614   if(!in)return NULL;
 615
 616   char *l=strdup(in);
 617   in=l;
 618
 619   int i,n = splitcount(l,',');
 620   _sv_tokenlist *ret = calloc(1,sizeof(*ret));
 621
 622   ret->n = n;
 623   ret->list = calloc(n,sizeof(*ret->list));
 624
 625   for(i=0;i<n;i++){
 626     char *next = split(l,',');
 627     ret->list[i] = _sv_tokenize_name(l);
 628     l=next;
 629   }
 630   free(in);
 631
 632   return ret;
 633 }
 634