24 // whether binfilter struct is needed
26 # define NEEDBINFILTER
29 # define NEEDBINFILTER
36 # define BINFILTERLEN PUBLIC_LEN
41 size_t len
; // real len minus one
45 #endif // NEEDBINFILTER
49 static VEC_STRUCT(bfiltervec
,struct binfilter
) filters
;
59 static VEC_STRUCT(ifiltervec
,struct intfilter
) filters
;
67 #define PCRE2_CODE_UNIT_WIDTH 8
74 static VEC_STRUCT(pfiltervec
,struct pcre2filter
) filters
;
78 static void filters_init()
85 static inline size_t filter_len(size_t i
)
88 const u8
*m
= (const u8
*)&VEC_BUF(filters
,i
).m
;
90 const u8
*m
= (const u8
*)&ifiltermask
;
93 for (size_t j
= 0;;) {
95 for (size_t k
= 0;;) {
103 if (++j
>= sizeof(IFT
))
111 static inline int filter_compare(const void *p1
,const void *p2
)
113 if (((const struct intfilter
*)p1
)->f
< ((const struct intfilter
*)p2
)->f
)
115 if (((const struct intfilter
*)p1
)->f
> ((const struct intfilter
*)p2
)->f
)
123 * for mask expansion, we need to figure out how much bits
124 * we need to fill in with different values.
125 * while in big endian machines this is quite easy,
126 * representation we use for little endian ones may
127 * leave gap of bits we don't want to touch.
129 * initial idea draft:
131 * raw representation -- FF.FF.F0.00
132 * big endian -- 0xFFFFF000
133 * little endian -- 0x00F0FFFF
134 * b: 0xFFffF000 ^ 0xFFff0000 -> 0x0000F000
135 * 0x0000F000 + 1 -> 0x0000F001
136 * 0x0000F000 & 0x0000F001 -> 0x0000F000 <- shifted mask
137 * 0x0000F000 ^ 0x0000F000 -> 0x00000000 <- direct mask
138 * 0x0000F000 ^ 0x00000000 -> 0x0000F000 <- shifted mask
139 * l: 0x00f0FFff ^ 0x0000FFff -> 0x00f00000
140 * 0x00f00000 + 1 -> 0x00f00001
141 * 0x00f00000 & 0x00f00001 -> 0x00f00000 <- shifted mask
142 * 0x00f00000 ^ 0x00f00000 -> 0x00000000 <- direct mask
143 * 0x00f00000 ^ 0x00000000 -> 0x00f00000 <- shifted mask
145 * b: 0xFFffFFff ^ 0xF0000000 -> 0x0FffFFff
146 * 0x0FffFFff + 1 -> 0x10000000
147 * 0x0FffFFff & 0x10000000 -> 0x00000000 <- shifted mask
148 * 0x0FffFFff ^ 0x00000000 -> 0x0FffFFff <- direct mask
149 * 0x0FffFFff ^ 0x0FffFFff -> 0x00000000 <- shifted mask
150 * l: 0xFFffFFff ^ 0x000000f0 -> 0xFFffFF0f
151 * 0xFFffFF0f + 1 -> 0xFFffFF10
152 * 0xFFffFF0f & 0xFFffFF10 -> 0xFFffFF00 <- shifted mask
153 * 0xFFffFF0f ^ 0xFFffFF00 -> 0x0000000f <- direct mask
154 * 0xFFffFF0f ^ 0x0000000f -> 0xFFffFF00 <- shifted mask
156 * essentially, we have to make direct mask + shifted mask bits worth of information
157 * and then split it into 2 parts
158 * we do not need absolute shifted mask shifting value, just relative to direct mask
159 * 0x0sss00dd - shifted & direct mask combo
160 * 0x000sssdd - combined mask
162 * generate values from 0x00000000 to 0x000sssdd
163 * for each value, realmask <- (val & 0x000000dd) | ((val & 0x000sss00) << relshiftval)
165 * realmask <- (val & 0x000000dd) | ((val << relshiftval) & 0x0sss0000)
168 * above method doesn't work in some cases. better way:
170 * l: 0x80ffFFff ^ 0x00f0FFff -> 0x800f0000
171 * 0x800f0000 >> 16 -> 0x0000800f
172 * 0x0000800f + 1 -> 0x00008010
173 * 0x0000800f & 0x00008010 -> 0x00008000 <- smask
174 * 0x0000800f ^ 0x00008000 -> 0x0000000f <- dmask
176 * cross <- difference between mask we desire and mask we currently have
177 * shift cross to left variable ammount of times to eliminate zeros
178 * save shift ammount as ishift (initial shift)
179 * then, we eliminate first area of ones; if there was no gap, result is already all zeros
180 * save this thing as smask. it's only higher bits.
181 * XOR smask and cross; result is only lower bits.
182 * shift smask to left variable ammount of times until gap is eliminated.
183 * save resulting mask as cmask;
184 * save resulting shift value as rshift.
187 static int flattened
= 0;
189 #define EXPVAL(init,j,dmask,smask,ishift,rshift) \
190 ((init) | ((((j) & (dmask)) | (((j) << (rshift)) & (smask))) << (ishift)))
191 // add expanded set of values
192 // allocates space on its own
193 static void ifilter_addexpanded(
194 struct intfilter
*ifltr
,
195 IFT dmask
,IFT smask
,IFT cmask
,
196 int ishift
,int rshift
)
199 size_t i
= VEC_LENGTH(filters
);
200 VEC_ADDN(filters
,cmask
+ 1);
201 for (size_t j
= 0;;++j
) {
202 VEC_BUF(filters
,i
+ j
).f
=
203 EXPVAL(ifltr
->f
,j
,dmask
,smask
,ishift
,rshift
);
209 // expand existing stuff
210 // allocates needed stuff on its own
211 static void ifilter_expand(IFT dmask
,IFT smask
,IFT cmask
,int ishift
,int rshift
)
214 size_t len
= VEC_LENGTH(filters
);
215 VEC_ADDN(filters
,cmask
* len
);
216 size_t esz
= cmask
+ 1; // size of expanded elements
217 for (size_t i
= len
- 1;;--i
) {
218 for (IFT j
= 0;;++j
) {
219 VEC_BUF(filters
,i
* esz
+ j
).f
=
220 EXPVAL(VEC_BUF(filters
,i
).f
,j
,dmask
,smask
,ishift
,rshift
);
229 static inline void ifilter_addflatten(struct intfilter
*ifltr
,IFT mask
)
231 if (VEC_LENGTH(filters
) == 0) {
233 VEC_ADD(filters
,*ifltr
);
237 if (ifiltermask
== mask
) {
239 VEC_ADD(filters
,*ifltr
);
242 IFT cross
= ifiltermask
^ mask
;
244 while ((cross
& 1) == 0) {
248 IFT smask
= cross
& (cross
+ 1); // shift mask
249 IFT dmask
= cross
^ smask
; // direct mask
250 IFT cmask
; // combined mask
251 int rshift
= 0; // relative shift
252 while (cmask
= (smask
>> rshift
) | dmask
,(cmask
& (cmask
+ 1)) != 0)
255 if (ifiltermask
> mask
) {
256 // already existing stuff has more precise mask than we
257 // so we need to expand our stuff
258 ifilter_addexpanded(ifltr
,dmask
,smask
,cmask
,ishift
,rshift
);
262 ifilter_expand(dmask
,smask
,cmask
,ishift
,rshift
);
263 VEC_ADD(filters
,*ifltr
);
267 # endif // EXPANDMASK
272 * struct intfilter layout: filter,mask
273 * stuff is compared in big-endian way, so memcmp
274 * filter needs to be compared first
275 * if its equal, mask needs to be compared
276 * memcmp is aplicable there too
277 * due to struct intfilter layout, it all can be stuffed into one memcmp call
279 static inline int filter_compare(const void *p1
,const void *p2
)
281 return memcmp(p1
,p2
,sizeof(struct intfilter
));
286 static void filter_sort(void)
288 size_t len
= VEC_LENGTH(filters
);
290 qsort(&VEC_BUF(filters
,0),len
,sizeof(struct intfilter
),&filter_compare
);
297 static inline size_t filter_len(size_t i
)
299 size_t c
= VEC_BUF(filters
,i
).len
* 8;
300 u8 v
= VEC_BUF(filters
,i
).mask
;
301 for (size_t k
= 0;;) {
311 static inline int filter_compare(const void *p1
,const void *p2
)
313 const struct binfilter
*b1
= (const struct binfilter
*)p1
;
314 const struct binfilter
*b2
= (const struct binfilter
*)p2
;
316 size_t l
= b1
->len
<= b2
->len
? b1
->len
: b2
->len
;
318 int cmp
= memcmp(b1
->f
,b2
->f
,l
);
322 if (b1
->len
< b2
->len
)
324 if (b1
->len
> b2
->len
)
327 u8 cmask
= b1
->mask
& b2
->mask
;
328 if ((b1
->f
[l
] & cmask
) < (b2
->f
[l
] & cmask
))
330 if ((b1
->f
[l
] & cmask
) > (b2
->f
[l
] & cmask
))
333 if (b1
->mask
< b2
->mask
)
335 if (b1
->mask
> b2
->mask
)
341 static void filter_sort(void)
343 size_t len
= VEC_LENGTH(filters
);
345 qsort(&VEC_BUF(filters
,0),len
,sizeof(struct binfilter
),&filter_compare
);
352 #define filter_len(i) ((pcre2ovector[1] - pcre2ovector[0]) * 5)
354 #endif // PCRE2FILTER
356 static void filters_add(const char *filter
)
368 // skip regex start symbol. we do not support regex tho
372 memset(&bf
,0,sizeof(bf
));
374 if (!base32_valid(filter
,&ret
)) {
375 fprintf(stderr
,"filter \"%s\" is invalid\n",filter
);
379 fprintf(stderr
,"^\n");
383 ret
= BASE32_FROM_LEN(ret
);
387 size_t maxsz
= sizeof(IFT
);
389 size_t maxsz
= sizeof(bf
.f
);
392 fprintf(stderr
,"filter \"%s\" is too long\n",filter
);
394 maxsz
= (maxsz
* 8) / 5;
397 fprintf(stderr
,"^\n");
400 base32_from(bf
.f
,&bf
.mask
,filter
);
405 for (size_t i
= 0;i
< bf
.len
;++i
)
407 mc
.b
[bf
.len
] = bf
.mask
;
408 memcpy(fc
.b
,bf
.f
,sizeof(fc
.b
));
411 struct intfilter ifltr
= {
419 ifilter_addflatten(&ifltr
,mc
.i
);
421 VEC_ADD(filters
,ifltr
);
428 #endif // NEEDBINFILTER
432 PCRE2_SIZE erroroffset
;
435 re
= pcre2_compile((PCRE2_SPTR8
)filter
,PCRE2_ZERO_TERMINATED
,
436 PCRE2_NO_UTF_CHECK
| PCRE2_ANCHORED
,&errornum
,&erroroffset
,0);
438 PCRE2_UCHAR buffer
[1024];
439 pcre2_get_error_message(errornum
,buffer
,sizeof(buffer
));
440 fprintf(stderr
,"PCRE2 compilation failed at offset " FSZ
": %s\n",
441 (size_t)erroroffset
,buffer
);
445 // attempt to JIT. ignore error
446 (void) pcre2_jit_compile(re
,PCRE2_JIT_COMPLETE
);
448 struct pcre2filter f
;
449 memset(&f
,0,sizeof(f
));
451 size_t fl
= strlen(filter
) + 1;
452 f
.str
= (char *) malloc(fl
);
455 memcpy(f
.str
,filter
,fl
);
457 #endif // PCRE2FILTER
461 static inline int filters_a_includes_b(size_t a
,size_t b
)
465 return VEC_BUF(filters
,a
).f
== VEC_BUF(filters
,b
).f
;
467 return VEC_BUF(filters
,a
).f
== (VEC_BUF(filters
,b
).f
& VEC_BUF(filters
,a
).m
);
470 const struct binfilter
*fa
= &VEC_BUF(filters
,a
);
471 const struct binfilter
*fb
= &VEC_BUF(filters
,b
);
473 if (fa
->len
> fb
->len
)
477 int cmp
= memcmp(fa
->f
,fb
->f
,l
);
481 if (fa
->len
< fb
->len
)
484 if (fa
->mask
> fb
->mask
)
487 return fa
->f
[l
] == (fb
->f
[l
] & fa
->mask
);
491 static void filters_dedup(void)
493 size_t last
= ~(size_t)0; // index after last matching element
494 size_t chk
; // element to compare against
495 size_t st
; // start of area to destroy
497 size_t len
= VEC_LENGTH(filters
);
498 for (size_t i
= 1;i
< len
;++i
) {
500 if (filters_a_includes_b(i
- 1,i
)) {
501 if (last
!= ~(size_t)0) {
502 memmove(&VEC_BUF(filters
,st
),
503 &VEC_BUF(filters
,last
),
504 (i
- last
) * VEC_ELSIZE(filters
));
514 if (filters_a_includes_b(chk
,i
))
518 if (last
!= ~(size_t)0) {
519 memmove(&VEC_BUF(filters
,st
),
520 &VEC_BUF(filters
,last
),
521 (len
- last
) * VEC_ELSIZE(filters
));
523 VEC_SETLENGTH(filters
,st
);
526 #endif // !PCRE2FILTER
528 static void filters_prepare(void)
532 fprintf(stderr
,"sorting filters...");
536 fprintf(stderr
," removing duplicates...");
540 fprintf(stderr
," done.\n");
544 static void filters_clean(void)
547 for (size_t i
= 0;i
< VEC_LENGTH(filters
);++i
) {
548 pcre2_code_free(VEC_BUF(filters
,i
).re
);
549 free(VEC_BUF(filters
,i
).str
);
555 static size_t filters_count(void)
557 return VEC_LENGTH(filters
);
564 #define MATCHFILTER(it,pk) \
565 ((*(IFT *)(pk) & VEC_BUF(filters,it).m) == VEC_BUF(filters,it).f)
567 #define DOFILTER(it,pk,code) \
569 for (it = 0;it < VEC_LENGTH(filters);++it) { \
570 if (unlikely(MATCHFILTER(it,pk))) { \
581 #define DOFILTER(it,pk,code) \
583 register IFT maskedpk = *(IFT *)(pk) & ifiltermask; \
584 for (size_t down = 0,up = VEC_LENGTH(filters);down < up;) { \
585 it = (up + down) / 2; \
586 if (maskedpk < VEC_BUF(filters,it).f) \
588 else if (maskedpk > VEC_BUF(filters,it).f) \
599 #define DOFILTER(it,pk,code) \
601 for (size_t down = 0,up = VEC_LENGTH(filters);down < up;) { \
602 it = (up + down) / 2; \
603 IFT maskedpk = *(IFT *)(pk) & VEC_BUF(filters,it).m; \
604 register int cmp = memcmp(&maskedpk,&VEC_BUF(filters,it).f,sizeof(IFT)); \
630 #define MATCHFILTER(it,pk) ( \
631 memcmp(pk,VEC_BUF(filters,it).f,VEC_BUF(filters,it).len) == 0 && \
632 (pk[VEC_BUF(filters,it).len] & VEC_BUF(filters,it).mask) == VEC_BUF(filters,it).f[VEC_BUF(filters,it).len])
634 #define DOFILTER(it,pk,code) \
636 for (it = 0;it < VEC_LENGTH(filters);++it) { \
637 if (unlikely(MATCHFILTER(it,pk))) { \
646 #define DOFILTER(it,pk,code) \
648 for (size_t down = 0,up = VEC_LENGTH(filters);down < up;) { \
649 it = (up + down) / 2; \
651 register int filterdiff = memcmp(pk,VEC_BUF(filters,it).f,VEC_BUF(filters,it).len); \
652 if (filterdiff < 0) { \
656 if (filterdiff > 0) { \
661 if ((pk[VEC_BUF(filters,it).len] & VEC_BUF(filters,it).mask) < \
662 VEC_BUF(filters,it).f[VEC_BUF(filters,it).len]) \
667 if ((pk[VEC_BUF(filters,it).len] & VEC_BUF(filters,it).mask) > \
668 VEC_BUF(filters,it).f[VEC_BUF(filters,it).len]) \
691 char pkconvbuf[BASE32_TO_LEN(PUBLIC_LEN) + 1]; \
692 pcre2_match_data *pcre2md = pcre2_match_data_create(128,0); \
693 PCRE2_SIZE *pcre2ovector = 0;
696 pcre2_match_data_free(pcre2md);
698 #define DOFILTER(it,pk,code) \
700 base32_to(pkconvbuf,pk,PUBLIC_LEN); \
701 size_t __l = VEC_LENGTH(filters); \
702 for (it = 0;it < __l;++it) { \
703 int rc = pcre2_match(VEC_BUF(filters,it).re,(PCRE2_SPTR8)pkconvbuf,BASE32_TO_LEN(PUBLIC_LEN),0, \
704 PCRE2_NO_UTF_CHECK,pcre2md,0); \
705 if (unlikely(rc >= 0)) { \
706 pcre2ovector = pcre2_get_ovector_pointer(pcre2md); \
713 #endif // PCRE2FILTER
716 static void loadfilterfile(const char *fname
)
719 FILE *f
= fopen(fname
,"r");
720 while (fgets(buf
,sizeof(buf
),f
)) {
721 for (char *p
= buf
;*p
;++p
) {
727 if (*buf
&& *buf
!= '#' && memcmp(buf
,"//",2) != 0)
732 static void filters_print(void)
737 l
= VEC_LENGTH(filters
);
739 fprintf(stderr
,"filters:\n");
741 for (i
= 0;i
< l
;++i
) {
743 char buf0
[256],buf1
[256];
747 if (!verboseflag
&& i
>= 20) {
748 size_t notshown
= l
- i
;
749 fprintf(stderr
,"[another " FSZ
" %s not shown]\n",
750 notshown
,notshown
== 1 ? "filter" : "filters");
759 imraw
= (u8
*)&VEC_BUF(filters
,i
).m
;
761 imraw
= (u8
*)&ifiltermask
;
763 while (len
< sizeof(IFT
) && imraw
[len
] != 0x00) ++len
;
764 u8 mask
= imraw
[len
-1];
765 u8
*ifraw
= (u8
*)&VEC_BUF(filters
,i
).f
;
769 size_t len
= VEC_BUF(filters
,i
).len
+ 1;
770 u8 mask
= VEC_BUF(filters
,i
).mask
;
771 u8
*ifraw
= VEC_BUF(filters
,i
).f
;
774 base32_to(buf0
,ifraw
,len
);
775 memcpy(bufx
,ifraw
,len
);
776 bufx
[len
- 1] |= ~mask
;
777 base32_to(buf1
,bufx
,len
);
778 char *a
= buf0
,*b
= buf1
;
779 while (*a
&& *a
== *b
)
782 fprintf(stderr
,"\t%s\n",buf0
);
783 #endif // NEEDBINFILTER
785 fprintf(stderr
,"\t%s\n",VEC_BUF(filters
,i
).str
);
786 #endif // PCRE2FILTER
788 fprintf(stderr
,"in total, " FSZ
" %s\n",l
,l
== 1 ? "filter" : "filters");