2 * Copyright (c) 2020 by Solar Designer
6 #ifndef PASSWDQC_FILTER_H__
7 #define PASSWDQC_FILTER_H__
11 /* Higher-level API for use by passwdqc_check.c */
14 uint8_t version
[4]; /* PASSWDQC_FILTER_VERSION */
15 uint8_t threshold
; /* 0 to 4 */
16 uint8_t bucket_size
; /* 2 to 4 */
17 uint8_t hash_id
; /* one of PASSWDQC_FILTER_HASH_* */
19 uint64_t endianness
; /* PASSWDQC_FILTER_ENDIANNESS */
20 uint64_t reserved2
; /* e.g., for checksum */
21 uint64_t capacity
, deletes
, inserts
, dupes
, kicks
;
22 } passwdqc_filter_header_t
;
25 passwdqc_filter_header_t header
;
29 extern int passwdqc_filter_open(passwdqc_filter_t
*flt
, const char *filename
);
30 extern int passwdqc_filter_lookup(const passwdqc_filter_t
*flt
, const char *plaintext
);
31 extern int passwdqc_filter_close(passwdqc_filter_t
*flt
);
33 #ifdef PASSWDQC_FILTER_INTERNALS
34 /* Lower-level inlines for shared use by pwqfilter.c and passwdqc_filter.c */
36 #include <string.h> /* for strcspn() */
43 #define PASSWDQC_FILTER_VERSION "PWQ0"
44 #define PASSWDQC_FILTER_ENDIANNESS 0x0807060504030201ULL
46 static inline int passwdqc_filter_verify_header(const passwdqc_filter_header_t
*header
)
48 return (memcmp(header
->version
, PASSWDQC_FILTER_VERSION
, sizeof(header
->version
)) ||
49 header
->threshold
> header
->bucket_size
|| header
->bucket_size
< 2 || header
->bucket_size
> 4 ||
50 header
->endianness
!= PASSWDQC_FILTER_ENDIANNESS
||
51 (header
->capacity
& 3) || header
->capacity
< 4 || header
->capacity
> ((1ULL << 32) - 1) * 4 ||
52 header
->inserts
- header
->deletes
> header
->capacity
) ? -1 : 0;
56 PASSWDQC_FILTER_HASH_OPAQUE
= 0,
57 PASSWDQC_FILTER_HASH_MIN
= 1,
58 PASSWDQC_FILTER_HASH_MD4
= 1,
59 PASSWDQC_FILTER_HASH_NTLM_CP1252
= 2,
60 PASSWDQC_FILTER_HASH_MAX
= 2
61 } passwdqc_filter_hash_id_t
;
64 uint64_t hi
, lo
; /* we access hi first, so let's also place it first */
65 } passwdqc_filter_packed_t
;
67 typedef uint32_t passwdqc_filter_i_t
;
68 typedef uint64_t passwdqc_filter_f_t
;
71 passwdqc_filter_f_t slots
[4];
72 } passwdqc_filter_unpacked_t
;
78 } passwdqc_filter_hash_t
;
81 #define force_inline __attribute__ ((always_inline)) inline
82 #define likely(x) __builtin_expect(!!(x), 1)
83 #define unlikely(x) __builtin_expect(!!(x), 0)
85 #define force_inline inline
87 #define unlikely(x) (x)
90 static force_inline passwdqc_filter_i_t
passwdqc_filter_wrap(uint32_t what
, passwdqc_filter_i_t m
)
92 return ((uint64_t)what
* m
) >> 32;
95 static force_inline passwdqc_filter_i_t
passwdqc_filter_h2i(passwdqc_filter_hash_t
*h
, passwdqc_filter_i_t m
)
99 * Controversial optimization: when converting a hash to its hash table index
100 * for the primary bucket, take its initial portion and swap the nibbles so
101 * that we process most of the hash table semi-sequentially in case our input
102 * is an ASCII-sorted list of hex-encoded hashes. A drawback is that we fail
103 * to reach high load if our input is a biased fragment from such sorted list.
105 #if defined(__BYTE_ORDER) && __BYTE_ORDER == __BIG_ENDIAN
108 i
= (uint32_t)h
->uc
[0] << 24;
109 i
|= (uint32_t)h
->uc
[1] << 16;
110 i
|= (uint32_t)h
->uc
[2] << 8;
111 i
|= (uint32_t)h
->uc
[3];
113 i
= ((i
& 0x0f0f0f0f) << 4) | ((i
>> 4) & 0x0f0f0f0f);
114 return passwdqc_filter_wrap(i
, m
);
117 static force_inline passwdqc_filter_f_t
passwdqc_filter_h2f(passwdqc_filter_hash_t
*h
)
119 #if defined(__BYTE_ORDER) && __BYTE_ORDER == __LITTLE_ENDIAN
123 f
= (uint64_t)h
->uc
[8];
124 f
|= (uint64_t)h
->uc
[9] << 8;
125 f
|= (uint64_t)h
->uc
[10] << 16;
126 f
|= (uint64_t)h
->uc
[11] << 24;
127 f
|= (uint64_t)h
->uc
[12] << 32;
128 f
|= (uint64_t)h
->uc
[13] << 40;
129 f
|= (uint64_t)h
->uc
[14] << 48;
130 f
|= (uint64_t)h
->uc
[15] << 56;
135 static force_inline passwdqc_filter_i_t
passwdqc_filter_alti(passwdqc_filter_i_t i
, passwdqc_filter_f_t f
, passwdqc_filter_i_t m
)
138 * We must not use more than 33 bits of the fingerprint here for consistent
139 * behavior in case the fingerprint later gets truncated.
141 int64_t alti
= (int64_t)(m
- 1 - i
) - passwdqc_filter_wrap((uint32_t)f
, m
);
144 * This is how we could have made use of the 33rd bit while staying in range,
145 * but testing shows that this is unnecessary.
147 alti
-= ((f
>> 32) & 1);
152 assert((passwdqc_filter_i_t
)alti
< m
);
154 return (passwdqc_filter_i_t
)alti
;
157 static inline unsigned int passwdqc_filter_ssdecode(unsigned int src
)
159 /* First 16 tetrahedral numbers (n*(n+1)*(n+2)/3!) in reverse order */
160 static const uint16_t tab4
[] = {816, 680, 560, 455, 364, 286, 220, 165, 120, 84, 56, 35, 20, 10, 4, 1};
161 /* First 16 triangular numbers (n*(n+1)/2!) in reverse order */
162 static const uint8_t tab3
[] = {136, 120, 105, 91, 78, 66, 55, 45, 36, 28, 21, 15, 10, 6, 3, 1};
164 unsigned int dst
, i
= 0;
166 while (src
>= tab4
[i
])
170 while (src
>= tab3
[i
])
174 while (src
>= 16 - i
)
183 static force_inline
int passwdqc_filter_unpack(passwdqc_filter_unpacked_t
*dst
, const passwdqc_filter_packed_t
*src
,
184 const uint16_t *ssdecode
)
186 uint64_t hi
= src
->hi
, f
= src
->lo
;
187 unsigned int ssi
= hi
>> (64 - 12); /* semi-sort index */
189 if (likely(ssi
- 1 < 3876)) {
190 passwdqc_filter_f_t ssd
= ssdecode
? ssdecode
[ssi
- 1] : passwdqc_filter_ssdecode(ssi
- 1);
191 const unsigned int fbits
= 33;
192 const unsigned int lobits
= fbits
- 4;
193 const passwdqc_filter_f_t lomask
= ((passwdqc_filter_f_t
)1 << lobits
) - 1;
194 dst
->slots
[0] = (f
& lomask
) | ((ssd
& 0x000f) << lobits
);
196 dst
->slots
[1] = (f
& lomask
) | ((ssd
& 0x00f0) << (lobits
- 4));
198 f
|= hi
<< (64 - 2 * lobits
);
199 dst
->slots
[2] = (f
& lomask
) | ((ssd
& 0x0f00) << (lobits
- 8));
201 dst
->slots
[3] = (f
& lomask
) | ((ssd
& 0xf000) << (lobits
- 12));
205 if (likely(hi
<= 1)) {
207 return unlikely(f
) ? -1 : 0;
213 if (likely((ssi
& 0xf80) == 0xf80)) {
214 const unsigned int fbits
= 41;
215 const passwdqc_filter_f_t fmask
= ((passwdqc_filter_f_t
)1 << fbits
) - 1;
216 dst
->slots
[0] = f
& fmask
;
218 f
|= hi
<< (64 - fbits
);
219 dst
->slots
[1] = f
& fmask
;
220 if (unlikely(dst
->slots
[0] < dst
->slots
[1]))
222 f
= hi
>> (2 * fbits
- 64);
223 dst
->slots
[2] = f
& fmask
;
224 if (unlikely(dst
->slots
[1] < dst
->slots
[2]))
229 if (likely((ssi
& 0xfc0) == 0xf40)) {
230 const unsigned int fbits
= 61;
231 const passwdqc_filter_f_t fmask
= ((passwdqc_filter_f_t
)1 << fbits
) - 1;
232 dst
->slots
[0] = f
& fmask
;
234 f
|= hi
<< (64 - fbits
);
235 dst
->slots
[1] = f
& fmask
;
236 if (unlikely(dst
->slots
[0] < dst
->slots
[1]))
244 static inline int passwdqc_filter_f_eq(passwdqc_filter_f_t stored
, passwdqc_filter_f_t full
, unsigned int largest_bucket_size
)
246 if (likely((uint32_t)stored
!= (uint32_t)full
))
249 * Ignore optional high bits of a stored fingerprint if they're all-zero,
250 * regardless of whether the fingerprint possibly came from a large enough slot
251 * for those zeroes to potentially be meaningful. We have to do this because
252 * the fingerprint might have been previously stored in a larger (smaller-slot)
253 * bucket and been kicked from there, in which case the zeroes are meaningless.
254 * Exception: we don't have to do this if there were no larger buckets so far.
256 if ((stored
>> 33) || largest_bucket_size
< 4) {
257 if ((stored
>> 41) || largest_bucket_size
< 3) {
259 return likely(stored
== full
);
261 return likely(stored
== (full
& (((passwdqc_filter_f_t
)1 << 61) - 1)));
263 return likely(stored
== (full
& (((passwdqc_filter_f_t
)1 << 41) - 1)));
266 return likely(stored
== (full
& (((passwdqc_filter_f_t
)1 << 33) - 1)));
270 static inline void passwdqc_filter_md4(passwdqc_filter_hash_t
*dst
, const char *src
)
274 MD4_Update(&ctx
, src
, strcspn(src
, "\n\r"));
275 MD4_Final(dst
->uc
, &ctx
);
278 static inline void passwdqc_filter_ntlm_cp1252(passwdqc_filter_hash_t
*dst
, const char *src
)
281 * 5 of these codes are undefined in CP1252. We let the original single-byte
282 * values for them pass through, which appears to match how the HIBP v7 NTLM
283 * hashes were generated.
285 static const uint16_t c1
[] = {
286 0x20ac, 0x81, 0x201a, 0x0192, 0x201e, 0x2026, 0x2020, 0x2021,
287 0x02c6, 0x2030, 0x0160, 0x2039, 0x0152, 0x8d, 0x017d, 0x8f,
288 0x90, 0x2018, 0x2019, 0x201c, 0x201d, 0x2022, 0x2013, 0x2014,
289 0x02dc, 0x2122, 0x0161, 0x203a, 0x0153, 0x9d, 0x017e, 0x0178
294 while (*src
!= '\n' && *src
!= '\r' && *src
) {
295 unsigned int c
= *(unsigned char *)src
++;
296 if (c
- 128 < sizeof(c1
) / sizeof(c1
[0]))
298 #if defined(__BYTE_ORDER) && __BYTE_ORDER == __LITTLE_ENDIAN
299 MD4_Update(&ctx
, &c
, 2);
301 uint8_t ucs2
[2] = {c
, c
>> 8};
302 MD4_Update(&ctx
, ucs2
, 2);
305 MD4_Final(dst
->uc
, &ctx
);
308 #endif /* PASSWDQC_FILTER_INTERNALS */
309 #endif /* PASSWDQC_FILTER_H__ */