src/tokenizer.c

   1 /*
   2  * Copyright (C) 2003-2010 The Music Player Daemon Project
   3  * http://www.musicpd.org
   4  *
   5  * This program is free software; you can redistribute it and/or modify
   6  * it under the terms of the GNU General Public License as published by
   7  * the Free Software Foundation; either version 2 of the License, or
   8  * (at your option) any later version.
   9  *
  10  * This program is distributed in the hope that it will be useful,
  11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  13  * GNU General Public License for more details.
  14  *
  15  * You should have received a copy of the GNU General Public License along
  16  * with this program; if not, write to the Free Software Foundation, Inc.,
  17  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
  18  */
  19
  20 #include "config.h"
  21 #include "tokenizer.h"
  22
  23 #include <stdbool.h>
  24 #include <assert.h>
  25 #include <string.h>
  26
  27 G_GNUC_CONST
  28 static GQuark
  29 tokenizer_quark(void)
  30 {
  31         return g_quark_from_static_string("tokenizer");
  32 }
  33
  34 static inline bool
  35 valid_word_first_char(char ch)
  36 {
  37         return g_ascii_isalpha(ch);
  38 }
  39
  40 static inline bool
  41 valid_word_char(char ch)
  42 {
  43         return g_ascii_isalnum(ch) || ch == '_';
  44 }
  45
  46 char *
  47 tokenizer_next_word(char **input_p, GError **error_r)
  48 {
  49         char *word, *input;
  50
  51         assert(input_p != NULL);
  52         assert(*input_p != NULL);
  53
  54         word = input = *input_p;
  55
  56         if (*input == 0)
  57                 return NULL;
  58
  59         /* check the first character */
  60
  61         if (!valid_word_first_char(*input)) {
  62                 g_set_error(error_r, tokenizer_quark(), 0,
  63                             "Letter expected");
  64                 return NULL;
  65         }
  66
  67         /* now iterate over the other characters until we find a
  68            whitespace or end-of-string */
  69
  70         while (*++input != 0) {
  71                 if (g_ascii_isspace(*input)) {
  72                         /* a whitespace: the word ends here */
  73                         *input = 0;
  74                         /* skip all following spaces, too */
  75                         input = g_strchug(input + 1);
  76                         break;
  77                 }
  78
  79                 if (!valid_word_char(*input)) {
  80                         *input_p = input;
  81                         g_set_error(error_r, tokenizer_quark(), 0,
  82                                     "Invalid word character");
  83                         return NULL;
  84                 }
  85         }
  86
  87         /* end of string: the string is already null-terminated
  88            here */
  89
  90         *input_p = input;
  91         return word;
  92 }
  93
  94 static inline bool
  95 valid_unquoted_char(char ch)
  96 {
  97         return (unsigned char)ch > 0x20 && ch != '"' && ch != '\'';
  98 }
  99
 100 char *
 101 tokenizer_next_unquoted(char **input_p, GError **error_r)
 102 {
 103         char *word, *input;
 104
 105         assert(input_p != NULL);
 106         assert(*input_p != NULL);
 107
 108         word = input = *input_p;
 109
 110         if (*input == 0)
 111                 return NULL;
 112
 113         /* check the first character */
 114
 115         if (!valid_unquoted_char(*input)) {
 116                 g_set_error(error_r, tokenizer_quark(), 0,
 117                             "Invalid unquoted character");
 118                 return NULL;
 119         }
 120
 121         /* now iterate over the other characters until we find a
 122            whitespace or end-of-string */
 123
 124         while (*++input != 0) {
 125                 if (g_ascii_isspace(*input)) {
 126                         /* a whitespace: the word ends here */
 127                         *input = 0;
 128                         /* skip all following spaces, too */
 129                         input = g_strchug(input + 1);
 130                         break;
 131                 }
 132
 133                 if (!valid_unquoted_char(*input)) {
 134                         *input_p = input;
 135                         g_set_error(error_r, tokenizer_quark(), 0,
 136                                     "Invalid unquoted character");
 137                         return NULL;
 138                 }
 139         }
 140
 141         /* end of string: the string is already null-terminated
 142            here */
 143
 144         *input_p = input;
 145         return word;
 146 }
 147
 148 char *
 149 tokenizer_next_string(char **input_p, GError **error_r)
 150 {
 151         char *word, *dest, *input;
 152
 153         assert(input_p != NULL);
 154         assert(*input_p != NULL);
 155
 156         word = dest = input = *input_p;
 157
 158         if (*input == 0)
 159                 /* end of line */
 160                 return NULL;
 161
 162         /* check for the opening " */
 163
 164         if (*input != '"') {
 165                 g_set_error(error_r, tokenizer_quark(), 0,
 166                             "'\"' expected");
 167                 return NULL;
 168         }
 169
 170         ++input;
 171
 172         /* copy all characters */
 173
 174         while (*input != '"') {
 175                 if (*input == '\\')
 176                         /* the backslash escapes the following
 177                            character */
 178                         ++input;
 179
 180                 if (*input == 0) {
 181                         /* return input-1 so the caller can see the
 182                            difference between "end of line" and
 183                            "error" */
 184                         *input_p = input - 1;
 185                         g_set_error(error_r, tokenizer_quark(), 0,
 186                                     "Missing closing '\"'");
 187                         return NULL;
 188                 }
 189
 190                 /* copy one character */
 191                 *dest++ = *input++;
 192         }
 193
 194         /* the following character must be a whitespace (or end of
 195            line) */
 196
 197         ++input;
 198         if (*input != 0 && !g_ascii_isspace(*input)) {
 199                 *input_p = input;
 200                 g_set_error(error_r, tokenizer_quark(), 0,
 201                             "Space expected after closing '\"'");
 202                 return NULL;
 203         }
 204
 205         /* finish the string and return it */
 206
 207         *dest = 0;
 208         *input_p = g_strchug(input);
 209         return word;
 210 }
 211
 212 char *
 213 tokenizer_next_param(char **input_p, GError **error_r)
 214 {
 215         assert(input_p != NULL);
 216         assert(*input_p != NULL);
 217
 218         if (**input_p == '"')
 219                 return tokenizer_next_string(input_p, error_r);
 220         else
 221                 return tokenizer_next_unquoted(input_p, error_r);
 222 }