src/tokenizer.c

   1 /*
   2  * Copyright (C) 2003-2009 The Music Player Daemon Project
   3  * http://www.musicpd.org
   4  *
   5  * This program is free software; you can redistribute it and/or modify
   6  * it under the terms of the GNU General Public License as published by
   7  * the Free Software Foundation; either version 2 of the License, or
   8  * (at your option) any later version.
   9  *
  10  * This program is distributed in the hope that it will be useful,
  11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  13  * GNU General Public License for more details.
  14  *
  15  * You should have received a copy of the GNU General Public License along
  16  * with this program; if not, write to the Free Software Foundation, Inc.,
  17  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
  18  */
  19
  20 #include "tokenizer.h"
  21
  22 #include <stdbool.h>
  23 #include <assert.h>
  24 #include <string.h>
  25
  26 G_GNUC_CONST
  27 static GQuark
  28 tokenizer_quark(void)
  29 {
  30         return g_quark_from_static_string("tokenizer");
  31 }
  32
  33 static inline bool
  34 valid_word_first_char(char ch)
  35 {
  36         return g_ascii_isalpha(ch);
  37 }
  38
  39 static inline bool
  40 valid_word_char(char ch)
  41 {
  42         return g_ascii_isalnum(ch) || ch == '_';
  43 }
  44
  45 char *
  46 tokenizer_next_word(char **input_p, GError **error_r)
  47 {
  48         char *word, *input;
  49
  50         assert(input_p != NULL);
  51         assert(*input_p != NULL);
  52
  53         word = input = *input_p;
  54
  55         if (*input == 0)
  56                 return NULL;
  57
  58         /* check the first character */
  59
  60         if (!valid_word_first_char(*input)) {
  61                 g_set_error(error_r, tokenizer_quark(), 0,
  62                             "Letter expected");
  63                 return NULL;
  64         }
  65
  66         /* now iterate over the other characters until we find a
  67            whitespace or end-of-string */
  68
  69         while (*++input != 0) {
  70                 if (g_ascii_isspace(*input)) {
  71                         /* a whitespace: the word ends here */
  72                         *input = 0;
  73                         /* skip all following spaces, too */
  74                         input = g_strchug(input + 1);
  75                         break;
  76                 }
  77
  78                 if (!valid_word_char(*input)) {
  79                         *input_p = input;
  80                         g_set_error(error_r, tokenizer_quark(), 0,
  81                                     "Invalid word character");
  82                         return NULL;
  83                 }
  84         }
  85
  86         /* end of string: the string is already null-terminated
  87            here */
  88
  89         *input_p = input;
  90         return word;
  91 }
  92
  93 char *
  94 tokenizer_next_string(char **input_p, GError **error_r)
  95 {
  96         char *word, *dest, *input;
  97
  98         assert(input_p != NULL);
  99         assert(*input_p != NULL);
 100
 101         word = dest = input = *input_p;
 102
 103         if (*input == 0)
 104                 /* end of line */
 105                 return NULL;
 106
 107         /* check for the opening " */
 108
 109         if (*input != '"') {
 110                 g_set_error(error_r, tokenizer_quark(), 0,
 111                             "'\"' expected");
 112                 return NULL;
 113         }
 114
 115         ++input;
 116
 117         /* copy all characters */
 118
 119         while (*input != '"') {
 120                 if (*input == '\\')
 121                         /* the backslash escapes the following
 122                            character */
 123                         ++input;
 124
 125                 if (*input == 0) {
 126                         /* return input-1 so the caller can see the
 127                            difference between "end of line" and
 128                            "error" */
 129                         *input_p = input - 1;
 130                         g_set_error(error_r, tokenizer_quark(), 0,
 131                                     "Missing closing '\"'");
 132                         return NULL;
 133                 }
 134
 135                 /* copy one character */
 136                 *dest++ = *input++;
 137         }
 138
 139         /* the following character must be a whitespace (or end of
 140            line) */
 141
 142         ++input;
 143         if (*input != 0 && !g_ascii_isspace(*input)) {
 144                 *input_p = input;
 145                 g_set_error(error_r, tokenizer_quark(), 0,
 146                             "Space expected after closing '\"'");
 147                 return NULL;
 148         }
 149
 150         /* finish the string and return it */
 151
 152         *dest = 0;
 153         *input_p = g_strchug(input);
 154         return word;
 155 }
 156
 157 char *
 158 tokenizer_next_word_or_string(char **input_p, GError **error_r)
 159 {
 160         assert(input_p != NULL);
 161         assert(*input_p != NULL);
 162
 163         if (**input_p == '"')
 164                 return tokenizer_next_string(input_p, error_r);
 165         else
 166                 return tokenizer_next_word(input_p, error_r);
 167 }