s3-mdssvc: lexer and parser for Spotlight queries
[Samba.git] / source3 / rpc_server / mdssvc / sparql_parser.y
blob7a151dc4a41726e2d2d0771ec41ee3b829a455f6
1 /*
2 Unix SMB/CIFS implementation.
3 Main metadata server / Spotlight routines
5 Copyright (C) Ralph Boehme 2012-2014
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3 of the License, or
10 (at your option) any later version.
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with this program. If not, see <http://www.gnu.org/licenses/>.
22 #include "includes.h"
23 #include "mdssvc.h"
24 #include "sparql_parser.h"
25 #include "sparql_mapping.h"
27 #define YYMALLOC SMB_MALLOC
28 #define YYREALLOC SMB_REALLOC
30 struct yy_buffer_state;
31 typedef struct yy_buffer_state *YY_BUFFER_STATE;
32 extern int yylex (void);
33 extern void yyerror (char const *);
34 extern void *yyterminate(void);
35 extern YY_BUFFER_STATE yy_scan_string( const char *str);
36 extern void yy_delete_buffer ( YY_BUFFER_STATE buffer );
38 /* forward declarations */
39 static const char *map_expr(const char *attr, char op, const char *val);
40 static const char *map_daterange(const char *dateattr,
41 time_t date1, time_t date2);
42 static time_t isodate2unix(const char *s);
44 /* global vars, eg needed by the lexer */
45 struct sparql_parser_state {
46 TALLOC_CTX *frame;
47 YY_BUFFER_STATE s;
48 char var;
49 const char *result;
50 } *global_sparql_parser_state;
53 %code provides {
54 #include <stdbool.h>
55 #include "mdssvc.h"
56 #define SPRAW_TIME_OFFSET 978307200
57 extern int yywrap(void);
58 extern bool map_spotlight_to_sparql_query(struct sl_query *slq);
61 %union {
62 int ival;
63 const char *sval;
64 bool bval;
65 time_t tval;
68 %expect 5
69 %error-verbose
71 %type <sval> match expr line function
72 %type <tval> date
74 %token <sval> WORD
75 %token <bval> BOOL
76 %token FUNC_INRANGE
77 %token DATE_ISO
78 %token OBRACE CBRACE EQUAL UNEQUAL GT LT COMMA QUOTE
79 %left AND
80 %left OR
83 input:
84 /* empty */
85 | input line
88 line:
89 expr {
90 global_sparql_parser_state->result = $1;
94 expr:
95 BOOL {
97 * We can't properly handle these in expressions, fortunately this
98 * is probably only ever used by OS X as sole element in an
99 * expression ie "False" (when Finder window selected our share
100 * but no search string entered yet). Packet traces showed that OS
101 * X Spotlight server then returns a failure (ie -1) which is what
102 * we do here too by calling YYABORT.
104 YYABORT;
107 * We have "match OR match" and "expr OR expr", because the former is
108 * supposed to catch and coalesque expressions of the form
110 * MDSattribute1="hello"||MDSattribute2="hello"
112 * into a single SPARQL expression for the case where both
113 * MDSattribute1 and MDSattribute2 map to the same SPARQL attibute,
114 * which is eg the case for "*" and "kMDItemTextContent" which both
115 * map to SPARQL "fts:match".
118 | match OR match {
119 if (strcmp($1, $3) != 0) {
120 $$ = talloc_asprintf(talloc_tos(), "{ %s } UNION { %s }", $1, $3);
121 } else {
122 $$ = talloc_asprintf(talloc_tos(), "%s", $1);
125 | match {
126 $$ = $1;
128 | function {
129 $$ = $1;
131 | OBRACE expr CBRACE {
132 $$ = talloc_asprintf(talloc_tos(), "%s", $2);
134 | expr AND expr {
135 $$ = talloc_asprintf(talloc_tos(), "%s . %s", $1, $3);
137 | expr OR expr {
138 if (strcmp($1, $3) != 0) {
139 $$ = talloc_asprintf(talloc_tos(), "{ %s } UNION { %s }", $1, $3);
140 } else {
141 $$ = talloc_asprintf(talloc_tos(), "%s", $1);
146 match:
147 WORD EQUAL QUOTE WORD QUOTE {
148 $$ = map_expr($1, '=', $4);
149 if ($$ == NULL) YYABORT;
151 | WORD UNEQUAL QUOTE WORD QUOTE {
152 $$ = map_expr($1, '!', $4);
153 if ($$ == NULL) YYABORT;
155 | WORD LT QUOTE WORD QUOTE {
156 $$ = map_expr($1, '<', $4);
157 if ($$ == NULL) YYABORT;
159 | WORD GT QUOTE WORD QUOTE {
160 $$ = map_expr($1, '>', $4);
161 if ($$ == NULL) YYABORT;
163 | WORD EQUAL QUOTE WORD QUOTE WORD {
164 $$ = map_expr($1, '=', $4);
165 if ($$ == NULL) YYABORT;
167 | WORD UNEQUAL QUOTE WORD QUOTE WORD {
168 $$ = map_expr($1, '!', $4);
169 if ($$ == NULL) YYABORT;
171 | WORD LT QUOTE WORD QUOTE WORD {
172 $$ = map_expr($1, '<', $4);
173 if ($$ == NULL) YYABORT;
175 | WORD GT QUOTE WORD QUOTE WORD {
176 $$ = map_expr($1, '>', $4);
177 if ($$ == NULL) YYABORT;
181 function:
182 FUNC_INRANGE OBRACE WORD COMMA date COMMA date CBRACE {
183 $$ = map_daterange($3, $5, $7);
184 if ($$ == NULL) YYABORT;
188 date:
189 DATE_ISO OBRACE WORD CBRACE {$$ = isodate2unix($3);}
190 | WORD {$$ = atoi($1) + SPRAW_TIME_OFFSET;}
195 static time_t isodate2unix(const char *s)
197 struct tm tm;
198 const char *p;
200 p = strptime(s, "%Y-%m-%dT%H:%M:%SZ", &tm);
201 if (p == NULL) {
202 return (time_t)-1;
204 return mktime(&tm);
207 static const char *map_daterange(const char *dateattr,
208 time_t date1, time_t date2)
210 struct sparql_parser_state *s = global_sparql_parser_state;
211 int result = 0;
212 char *sparql = NULL;
213 const struct sl_attr_map *p;
214 struct tm *tmp;
215 char buf1[64], buf2[64];
217 if (s->var == 'z') {
218 return NULL;
221 tmp = localtime(&date1);
222 if (tmp == NULL) {
223 return NULL;
225 result = strftime(buf1, sizeof(buf1), "%Y-%m-%dT%H:%M:%SZ", tmp);
226 if (result == 0) {
227 return NULL;
230 tmp = localtime(&date2);
231 if (tmp == NULL) {
232 return NULL;
234 result = strftime(buf2, sizeof(buf2), "%Y-%m-%dT%H:%M:%SZ", tmp);
235 if (result == 0) {
236 return NULL;
239 p = sl_attr_map_by_spotlight(dateattr);
240 if (p == NULL) {
241 return NULL;
244 sparql = talloc_asprintf(talloc_tos(),
245 "?obj %s ?%c FILTER (?%c > '%s' && ?%c < '%s')",
246 p->sparql_attr,
247 s->var,
248 s->var,
249 buf1,
250 s->var,
251 buf2);
252 if (sparql == NULL) {
253 return NULL;
256 s->var++;
257 return sparql;
260 static char *map_type_search(const char *attr, char op, const char *val)
262 char *result = NULL;
263 const char *sparqlAttr;
264 const struct sl_type_map *p;
266 p = sl_type_map_by_spotlight(val);
267 if (p == NULL) {
268 return NULL;
271 switch (p->type) {
272 case kMDTypeMapRDF:
273 sparqlAttr = "rdf:type";
274 break;
275 case kMDTypeMapMime:
276 sparqlAttr = "nie:mimeType";
277 break;
278 default:
279 return NULL;
282 result = talloc_asprintf(talloc_tos(), "?obj %s '%s'",
283 sparqlAttr,
284 p->sparql_type);
285 if (result == NULL) {
286 return NULL;
289 return result;
292 static const char *map_expr(const char *attr, char op, const char *val)
294 struct sparql_parser_state *s = global_sparql_parser_state;
295 int result = 0;
296 char *sparql = NULL;
297 const struct sl_attr_map *p;
298 time_t t;
299 struct tm *tmp;
300 char buf1[64];
301 char *q;
302 const char *start;
304 if (s->var == 'z') {
305 return NULL;
308 p = sl_attr_map_by_spotlight(attr);
309 if (p == NULL) {
310 return NULL;
313 if ((p->type != ssmt_type) && (p->sparql_attr == NULL)) {
314 yyerror("unsupported Spotlight attribute");
315 return NULL;
318 switch (p->type) {
319 case ssmt_bool:
320 sparql = talloc_asprintf(talloc_tos(), "?obj %s '%s'",
321 p->sparql_attr, val);
322 if (sparql == NULL) {
323 return NULL;
325 break;
327 case ssmt_num:
328 sparql = talloc_asprintf(talloc_tos(),
329 "?obj %s ?%c FILTER(?%c %c%c '%s')",
330 p->sparql_attr,
331 s->var,
332 s->var,
334 /* append '=' to '!' */
335 op == '!' ? '=' : ' ',
336 val);
337 if (sparql == NULL) {
338 return NULL;
340 s->var++;
341 break;
343 case ssmt_str:
344 q = talloc_strdup(talloc_tos(), "");
345 if (q == NULL) {
346 return NULL;
348 start = val;
349 while (*val) {
350 if (*val != '*') {
351 val++;
352 continue;
354 if (val > start) {
355 q = talloc_strndup_append(q, start, val - start);
356 if (q == NULL) {
357 return NULL;
360 q = talloc_strdup_append(q, ".*");
361 if (q == NULL) {
362 return NULL;
364 val++;
365 start = val;
367 if (val > start) {
368 q = talloc_strndup_append(q, start, val - start);
369 if (q == NULL) {
370 return NULL;
373 sparql = talloc_asprintf(talloc_tos(),
374 "?obj %s ?%c "
375 "FILTER(regex(?%c, '^%s$', 'i'))",
376 p->sparql_attr,
377 s->var,
378 s->var,
380 TALLOC_FREE(q);
381 if (sparql == NULL) {
382 return NULL;
384 s->var++;
385 break;
387 case ssmt_fts:
388 sparql = talloc_asprintf(talloc_tos(), "?obj %s '%s'",
389 p->sparql_attr, val);
390 if (sparql == NULL) {
391 return NULL;
393 break;
395 case ssmt_date:
396 t = atoi(val) + SPRAW_TIME_OFFSET;
397 tmp = localtime(&t);
398 if (tmp == NULL) {
399 return NULL;
401 result = strftime(buf1, sizeof(buf1),
402 "%Y-%m-%dT%H:%M:%SZ", tmp);
403 if (result == 0) {
404 return NULL;
406 sparql = talloc_asprintf(talloc_tos(),
407 "?obj %s ?%c FILTER(?%c %c '%s')",
408 p->sparql_attr,
409 s->var,
410 s->var,
412 buf1);
413 if (sparql == NULL) {
414 return NULL;
416 s->var++;
417 break;
419 case ssmt_type:
420 sparql = map_type_search(attr, op, val);
421 if (sparql == NULL) {
422 return NULL;
424 break;
426 default:
427 return NULL;
430 return sparql;
433 void yyerror(const char *str)
435 DEBUG(1, ("yyerror: %s\n", str));
438 int yywrap(void)
440 return 1;
444 * Map a Spotlight RAW query string to a SPARQL query string
446 bool map_spotlight_to_sparql_query(struct sl_query *slq)
448 struct sparql_parser_state s = {
449 .frame = talloc_stackframe(),
450 .var = 'a',
452 int result;
454 s.s = yy_scan_string(slq->query_string);
455 if (s.s == NULL) {
456 TALLOC_FREE(s.frame);
457 return false;
459 global_sparql_parser_state = &s;
460 result = yyparse();
461 global_sparql_parser_state = NULL;
462 yy_delete_buffer(s.s);
464 if (result != 0) {
465 TALLOC_FREE(s.frame);
466 return false;
469 slq->sparql_query = talloc_asprintf(slq,
470 "SELECT ?url WHERE { %s . ?obj nie:url ?url . "
471 "FILTER(tracker:uri-is-descendant('file://%s/', ?url)) }",
472 s.result, slq->path_scope);
473 TALLOC_FREE(s.frame);
474 if (slq->sparql_query == NULL) {
475 return false;
478 return true;