Fix whitespace irregularities in code
[xapian.git] / xapian-applications / omega / transform.cc
blob5aea85d683352655775a9414419fc7ed66890ecc
1 /** @file transform.cc
2 * @brief Implement OmegaScript $transform function.
3 */
4 /* Copyright (C) 2003,2009,2015 Olly Betts
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
21 #include <config.h>
23 #include "transform.h"
25 #include <pcre.h>
27 #include <map>
28 #include <string>
29 #include <vector>
31 using namespace std;
33 static map<pair<string, int>, pcre *> re_cache;
35 static pcre *
36 get_re(const string & pattern, int options)
38 pair<string, int> re_key = make_pair(pattern, options);
39 auto re_it = re_cache.find(re_key);
40 if (re_it != re_cache.end()) {
41 return re_it->second;
44 const char *error;
45 int erroffset;
46 pcre * re =
47 pcre_compile(pattern.c_str(), options, &error, &erroffset, NULL);
48 if (!re) {
49 string m = "$transform failed to compile its regular expression: ";
50 m += error;
51 throw m;
53 re_cache.insert(make_pair(re_key, re));
54 return re;
57 void
58 omegascript_match(string & value, const vector<string> & args)
60 int offsets[30];
61 int options = 0;
62 if (args.size() > 2) {
63 const string &opts = args[2];
64 for (string::const_iterator i = opts.begin(); i != opts.end(); ++i) {
65 switch (*i) {
66 case 'i':
67 options |= PCRE_CASELESS;
68 break;
69 case 'm':
70 options |= PCRE_MULTILINE;
71 break;
72 case 's':
73 options |= PCRE_DOTALL;
74 break;
75 case 'x':
76 options |= PCRE_EXTENDED;
77 break;
78 default: {
79 string m = "Unknown $match option character: ";
80 m += *i;
81 throw m;
86 pcre * re = get_re(args[0], options);
87 int matches = pcre_exec(re, NULL, args[1].data(), args[1].size(),
88 0, 0, offsets, 30);
89 if (matches > 0) {
90 value += "true";
94 void
95 omegascript_transform(string & value, const vector<string> & args)
97 int offsets[30];
98 bool replace_all = false;
99 int options = 0;
100 if (args.size() > 3) {
101 const string & opts = args[3];
102 for (string::const_iterator i = opts.begin(); i != opts.end(); ++i) {
103 switch (*i) {
104 case 'g':
105 replace_all = true;
106 break;
107 case 'i':
108 options |= PCRE_CASELESS;
109 break;
110 case 'm':
111 options |= PCRE_MULTILINE;
112 break;
113 case 's':
114 options |= PCRE_DOTALL;
115 break;
116 case 'x':
117 options |= PCRE_EXTENDED;
118 break;
119 default: {
120 string m = "Unknown $transform option character: ";
121 m += *i;
122 throw m;
128 pcre * re = get_re(args[0], options);
129 size_t start = 0;
130 do {
131 int matches = pcre_exec(re, NULL, args[2].data(), args[2].size(),
132 int(start), 0, offsets, 30);
133 if (matches <= 0) {
134 // (matches == PCRE_ERROR_NOMATCH) is OK, otherwise this is an
135 // error. FIXME: should we report this rather than ignoring it?
136 break;
139 // Substitute \1 ... \9, and \\.
140 string::const_iterator i;
141 value.append(args[2], start, offsets[0] - start);
142 for (i = args[1].begin(); i != args[1].end(); ++i) {
143 char ch = *i;
144 if (ch != '\\') {
145 value += ch;
146 continue;
149 if (rare(++i == args[1].end())) {
150 // Trailing single '\'.
151 value += ch;
152 break;
155 int c = *i;
156 if (c >= '1' && c <= '9') {
157 c -= '0';
158 // If there aren't that many groupings, expand to nothing.
159 if (c >= matches) continue;
160 } else {
161 value += ch;
162 if (c != '\\') value += char(c);
163 continue;
166 int off_c = offsets[c * 2];
167 value.append(args[2], off_c, offsets[c * 2 + 1] - off_c);
169 start = offsets[1];
170 } while (replace_all);
171 value.append(args[2], start, string::npos);