1 /* FFdecsa -- fast decsa algorithm
3 * Copyright (C) 2007 Dark Avenger
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
21 #include <emmintrin.h>
23 #define MEMALIGN_VAL 16
30 static const union __u128i ff0
= {{0x00000000U
, 0x00000000U
, 0x00000000U
, 0x00000000U
}};
31 static const union __u128i ff1
= {{0xffffffffU
, 0xffffffffU
, 0xffffffffU
, 0xffffffffU
}};
33 typedef __m128i group
;
34 #define GROUP_PARALLELISM 128
37 #define FFAND(a,b) _mm_and_si128((a),(b))
38 #define FFOR(a,b) _mm_or_si128((a),(b))
39 #define FFXOR(a,b) _mm_xor_si128((a),(b))
40 #define FFNOT(a) _mm_xor_si128((a),FF1())
41 #define MALLOC(X) _mm_malloc(X,16)
42 #define FREE(X) _mm_free(X)
46 static const union __u128i ff29
= {{0x29292929U
, 0x29292929U
, 0x29292929U
, 0x29292929U
}};
47 static const union __u128i ff02
= {{0x02020202U
, 0x02020202U
, 0x02020202U
, 0x02020202U
}};
48 static const union __u128i ff04
= {{0x04040404U
, 0x04040404U
, 0x04040404U
, 0x04040404U
}};
49 static const union __u128i ff10
= {{0x10101010U
, 0x10101010U
, 0x10101010U
, 0x10101010U
}};
50 static const union __u128i ff40
= {{0x40404040U
, 0x40404040U
, 0x40404040U
, 0x40404040U
}};
51 static const union __u128i ff80
= {{0x80808080U
, 0x80808080U
, 0x80808080U
, 0x80808080U
}};
53 typedef __m128i batch
;
54 #define BYTES_PER_BATCH 16
55 #define B_FFN_ALL_29() ff29.v
56 #define B_FFN_ALL_02() ff02.v
57 #define B_FFN_ALL_04() ff04.v
58 #define B_FFN_ALL_10() ff10.v
59 #define B_FFN_ALL_40() ff40.v
60 #define B_FFN_ALL_80() ff80.v
62 #define B_FFAND(a,b) FFAND(a,b)
63 #define B_FFOR(a,b) FFOR(a,b)
64 #define B_FFXOR(a,b) FFXOR(a,b)
65 #define B_FFSH8L(a,n) _mm_slli_epi64((a),(n))
66 #define B_FFSH8R(a,n) _mm_srli_epi64((a),(n))
74 inline static void XOR_BEST_BY(unsigned char *d
, unsigned char *s1
, unsigned char *s2
)
76 __m128i vs1
= _mm_load_si128((__m128i
*)s1
);
77 __m128i vs2
= _mm_load_si128((__m128i
*)s2
);
78 vs1
= _mm_xor_si128(vs1
, vs2
);
79 _mm_store_si128((__m128i
*)d
, vs1
);