7 /* regular expression set */
9 regex_t regex
; /* the combined regular expression */
10 int n
; /* number of regular expressions in this set */
11 int *grp
; /* the group assigned to each subgroup */
12 int *setgrpcnt
; /* number of groups in each regular expression */
13 int grpcnt
; /* group count */
16 static int re_groupcount(char *s
)
18 int n
= 0; /* number of groups */
19 int brk
= 0; /* one if inside a bracket expression */
20 int brk2
= 0; /* nested bracket type: ':', '*', or '=' */
25 if (s
[0] == '\\' && s
[1]) {
27 } else if (s
[0] == '[' && s
[1] && s
[2]) {
28 s
+= s
[1] == '^' ? 2 : 1;
35 if (s
[0] == '[' && (s
[1] == ':' || s
[1] == '*' || s
[1] == '=')) {
39 } else if (s
[0] == brk2
&& s
[1] == ']') {
49 struct rset
*rset_make(int n
, char **re
, int flg
)
51 struct rset
*rs
= malloc(sizeof(*rs
));
52 struct sbuf
*sb
= sbuf_make();
53 int regex_flg
= REG_EXTENDED
| (flg
& RE_ICASE
? REG_ICASE
: 0);
55 memset(rs
, 0, sizeof(*rs
));
56 rs
->grp
= malloc((n
+ 1) * sizeof(rs
->grp
[0]));
57 rs
->setgrpcnt
= malloc((n
+ 1) * sizeof(rs
->setgrpcnt
[0]));
61 for (i
= 0; i
< n
; i
++) {
72 rs
->grp
[i
] = rs
->grpcnt
;
73 rs
->setgrpcnt
[i
] = re_groupcount(re
[i
]);
74 rs
->grpcnt
+= 1 + rs
->setgrpcnt
[i
];
76 rs
->grp
[n
] = rs
->grpcnt
;
78 if (regcomp(&rs
->regex
, sbuf_buf(sb
), regex_flg
)) {
89 /* return the index of the matching regular expression or -1 if none matches */
90 int rset_find(struct rset
*rs
, char *s
, int n
, int *grps
, int flg
)
93 int found
, i
, set
= -1;
94 int regex_flg
= REG_NEWLINE
;
98 regex_flg
|= REG_NOTBOL
;
100 regex_flg
|= REG_NOTEOL
;
101 subs
= malloc(rs
->grpcnt
* sizeof(subs
[0]));
102 found
= !regexec(&rs
->regex
, s
, rs
->grpcnt
, subs
, regex_flg
);
103 for (i
= 0; found
&& i
< rs
->n
; i
++)
104 if (rs
->grp
[i
] >= 0 && subs
[rs
->grp
[i
]].rm_so
>= 0)
106 if (found
&& set
>= 0) {
107 for (i
= 0; i
< n
; i
++) {
108 int grp
= rs
->grp
[set
] + i
;
109 if (i
< rs
->setgrpcnt
[set
] + 1) {
110 grps
[i
* 2] = subs
[grp
].rm_so
;
111 grps
[i
* 2 + 1] = subs
[grp
].rm_eo
;
113 grps
[i
* 2 + 0] = -1;
114 grps
[i
* 2 + 1] = -1;
122 void rset_free(struct rset
*rs
)
130 /* read a regular expression enclosed in a delimiter */
131 char *re_read(char **src
)
135 int delim
= (unsigned char) *s
++;
139 while (*s
&& *s
!= delim
) {
140 if (s
[0] == '\\' && s
[1])
142 sbuf_chr(sbuf
, '\\');
143 sbuf_chr(sbuf
, (unsigned char) *s
++);
145 *src
= *s
? s
+ 1 : s
;
146 return sbuf_done(sbuf
);