Fix a few places that were non-multibyte-safe in tsearch configuration file
[PostgreSQL.git] / src / backend / tsearch / ts_utils.c
blob09d78a15b7ee5d2d8b96d5d5a88043e8a101024b
1 /*-------------------------------------------------------------------------
3 * ts_utils.c
4 * various support functions
6 * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
9 * IDENTIFICATION
10 * $PostgreSQL$
12 *-------------------------------------------------------------------------
15 #include "postgres.h"
17 #include <ctype.h>
19 #include "miscadmin.h"
20 #include "tsearch/ts_locale.h"
21 #include "tsearch/ts_public.h"
22 #include "tsearch/ts_utils.h"
23 #include "utils/builtins.h"
27 * Given the base name and extension of a tsearch config file, return
28 * its full path name. The base name is assumed to be user-supplied,
29 * and is checked to prevent pathname attacks. The extension is assumed
30 * to be safe.
32 * The result is a palloc'd string.
34 char *
35 get_tsearch_config_filename(const char *basename,
36 const char *extension)
38 char sharepath[MAXPGPATH];
39 char *result;
42 * We limit the basename to contain a-z, 0-9, and underscores. This may
43 * be overly restrictive, but we don't want to allow access to anything
44 * outside the tsearch_data directory, so for instance '/' *must* be
45 * rejected, and on some platforms '\' and ':' are risky as well. Allowing
46 * uppercase might result in incompatible behavior between case-sensitive
47 * and case-insensitive filesystems, and non-ASCII characters create other
48 * interesting risks, so on the whole a tight policy seems best.
50 if (strspn(basename, "abcdefghijklmnopqrstuvwxyz0123456789_") != strlen(basename))
51 ereport(ERROR,
52 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
53 errmsg("invalid text search configuration file name \"%s\"",
54 basename)));
56 get_share_path(my_exec_path, sharepath);
57 result = palloc(MAXPGPATH);
58 snprintf(result, MAXPGPATH, "%s/tsearch_data/%s.%s",
59 sharepath, basename, extension);
61 return result;
64 static int
65 comparestr(const void *a, const void *b)
67 return strcmp(*(char **) a, *(char **) b);
71 * Reads a stop-word file. Each word is run through 'wordop'
72 * function, if given. wordop may either modify the input in-place,
73 * or palloc a new version.
75 void
76 readstoplist(const char *fname, StopList *s, char *(*wordop) (const char *))
78 char **stop = NULL;
80 s->len = 0;
81 if (fname && *fname)
83 char *filename = get_tsearch_config_filename(fname, "stop");
84 tsearch_readline_state trst;
85 char *line;
86 int reallen = 0;
88 if (!tsearch_readline_begin(&trst, filename))
89 ereport(ERROR,
90 (errcode(ERRCODE_CONFIG_FILE_ERROR),
91 errmsg("could not open stop-word file \"%s\": %m",
92 filename)));
94 while ((line = tsearch_readline(&trst)) != NULL)
96 char *pbuf = line;
98 /* Trim trailing space */
99 while (*pbuf && !t_isspace(pbuf))
100 pbuf += pg_mblen(pbuf);
101 *pbuf = '\0';
103 /* Skip empty lines */
104 if (*line == '\0')
106 pfree(line);
107 continue;
110 if (s->len >= reallen)
112 if (reallen == 0)
114 reallen = 64;
115 stop = (char **) palloc(sizeof(char *) * reallen);
117 else
119 reallen *= 2;
120 stop = (char **) repalloc((void *) stop,
121 sizeof(char *) * reallen);
125 if (wordop)
127 stop[s->len] = wordop(line);
128 if (stop[s->len] != line)
129 pfree(line);
131 else
132 stop[s->len] = line;
134 (s->len)++;
137 tsearch_readline_end(&trst);
138 pfree(filename);
141 s->stop = stop;
143 /* Sort to allow binary searching */
144 if (s->stop && s->len > 0)
145 qsort(s->stop, s->len, sizeof(char *), comparestr);
148 bool
149 searchstoplist(StopList *s, char *key)
151 return (s->stop && s->len > 0 &&
152 bsearch(&key, s->stop, s->len,
153 sizeof(char *), comparestr)) ? true : false;