2 ============================================================================
4 Author : Heiher <r@hev.cc>
6 Copyright : Copyright (c) 2015 everyone.
7 Description : The helpers for x86 SSE to Loongson MMI.
8 ============================================================================
11 #ifndef __MMI_HELPERS_H__
12 #define __MMI_HELPERS_H__
14 #define __mm_packxxxx(_f, _D, _d, _s, _t) \
15 #_f " %[" #_t "], %[" #_d "h], %[" #_s "h] \n\t" #_f " %[" #_D "l], %[" #_d \
18 "punpckhwd %[" #_D "h], %[" #_D "l], %[" #_t \
20 "punpcklwd %[" #_D "l], %[" #_D "l], %[" #_t "] \n\t"
22 #define _mm_or(_D, _d, _s) \
23 "or %[" #_D "h], %[" #_d "h], %[" #_s \
25 "or %[" #_D "l], %[" #_d "l], %[" #_s "l] \n\t"
27 #define _mm_xor(_D, _d, _s) \
28 "xor %[" #_D "h], %[" #_d "h], %[" #_s \
30 "xor %[" #_D "l], %[" #_d "l], %[" #_s "l] \n\t"
32 #define _mm_and(_D, _d, _s) \
33 "and %[" #_D "h], %[" #_d "h], %[" #_s \
35 "and %[" #_D "l], %[" #_d "l], %[" #_s "l] \n\t"
38 #define _mm_pandn(_D, _d, _s) \
39 "pandn %[" #_D "h], %[" #_d "h], %[" #_s \
41 "pandn %[" #_D "l], %[" #_d "l], %[" #_s "l] \n\t"
44 #define _mm_pshuflh(_D, _d, _s) \
45 "mov.d %[" #_D "h], %[" #_d \
47 "pshufh %[" #_D "l], %[" #_d "l], %[" #_s "] \n\t"
49 /* SSE: psllw (bits) */
50 #define _mm_psllh(_D, _d, _s) \
51 "psllh %[" #_D "h], %[" #_d "h], %[" #_s \
53 "psllh %[" #_D "l], %[" #_d "l], %[" #_s "] \n\t"
55 /* SSE: pslld (bits) */
56 #define _mm_psllw(_D, _d, _s) \
57 "psllw %[" #_D "h], %[" #_d "h], %[" #_s \
59 "psllw %[" #_D "l], %[" #_d "l], %[" #_s "] \n\t"
61 /* SSE: psllq (bits) */
62 #define _mm_pslld(_D, _d, _s) \
63 "dsll %[" #_D "h], %[" #_d "h], %[" #_s \
65 "dsll %[" #_D "l], %[" #_d "l], %[" #_s "] \n\t"
67 /* SSE: pslldq (bytes) */
68 #define _mm_psllq(_D, _d, _s, _s64, _tf) \
69 "subu %[" #_tf "], %[" #_s64 "], %[" #_s \
71 "dsrl %[" #_tf "], %[" #_d "l], %[" #_tf \
73 "dsll %[" #_D "h], %[" #_d "h], %[" #_s \
75 "dsll %[" #_D "l], %[" #_d "l], %[" #_s \
77 "or %[" #_D "h], %[" #_D "h], %[" #_tf "] \n\t"
79 /* SSE: psrlw (bits) */
80 #define _mm_psrlh(_D, _d, _s) \
81 "psrlh %[" #_D "h], %[" #_d "h], %[" #_s \
83 "psrlh %[" #_D "l], %[" #_d "l], %[" #_s "] \n\t"
85 /* SSE: psrld (bits) */
86 #define _mm_psrlw(_D, _d, _s) \
87 "psrlw %[" #_D "h], %[" #_d "h], %[" #_s \
89 "psrlw %[" #_D "l], %[" #_d "l], %[" #_s "] \n\t"
91 /* SSE: psrlq (bits) */
92 #define _mm_psrld(_D, _d, _s) \
93 "dsrl %[" #_D "h], %[" #_d "h], %[" #_s \
95 "dsrl %[" #_D "l], %[" #_d "l], %[" #_s "] \n\t"
97 /* SSE: psrldq (bytes) */
98 #define _mm_psrlq(_D, _d, _s, _s64, _tf) \
99 "subu %[" #_tf "], %[" #_s64 "], %[" #_s \
101 "dsll %[" #_tf "], %[" #_d "h], %[" #_tf \
103 "dsrl %[" #_D "h], %[" #_d "h], %[" #_s \
105 "dsrl %[" #_D "l], %[" #_d "l], %[" #_s \
107 "or %[" #_D "l], %[" #_D "l], %[" #_tf "] \n\t"
110 #define _mm_psraw(_D, _d, _s) \
111 "psraw %[" #_D "h], %[" #_d "h], %[" #_s \
113 "psraw %[" #_D "l], %[" #_d "l], %[" #_s "] \n\t"
116 #define _mm_paddb(_D, _d, _s) \
117 "paddb %[" #_D "h], %[" #_d "h], %[" #_s \
119 "paddb %[" #_D "l], %[" #_d "l], %[" #_s "l] \n\t"
122 #define _mm_paddh(_D, _d, _s) \
123 "paddh %[" #_D "h], %[" #_d "h], %[" #_s \
125 "paddh %[" #_D "l], %[" #_d "l], %[" #_s "l] \n\t"
128 #define _mm_paddw(_D, _d, _s) \
129 "paddw %[" #_D "h], %[" #_d "h], %[" #_s \
131 "paddw %[" #_D "l], %[" #_d "l], %[" #_s "l] \n\t"
134 #define _mm_paddd(_D, _d, _s) \
135 "dadd %[" #_D "h], %[" #_d "h], %[" #_s \
137 "dadd %[" #_D "l], %[" #_d "l], %[" #_s "l] \n\t"
140 #define _mm_psubh(_D, _d, _s) \
141 "psubh %[" #_D "h], %[" #_d "h], %[" #_s \
143 "psubh %[" #_D "l], %[" #_d "l], %[" #_s "l] \n\t"
146 #define _mm_psubw(_D, _d, _s) \
147 "psubw %[" #_D "h], %[" #_d "h], %[" #_s \
149 "psubw %[" #_D "l], %[" #_d "l], %[" #_s "l] \n\t"
152 #define _mm_pmaxub(_D, _d, _s) \
153 "pmaxub %[" #_D "h], %[" #_d "h], %[" #_s \
155 "pmaxub %[" #_D "l], %[" #_d "l], %[" #_s "l] \n\t"
158 #define _mm_pmullh(_D, _d, _s) \
159 "pmullh %[" #_D "h], %[" #_d "h], %[" #_s \
161 "pmullh %[" #_D "l], %[" #_d "l], %[" #_s "l] \n\t"
164 #define _mm_pmulhh(_D, _d, _s) \
165 "pmulhh %[" #_D "h], %[" #_d "h], %[" #_s \
167 "pmulhh %[" #_D "l], %[" #_d "l], %[" #_s "l] \n\t"
170 #define _mm_pmuluw(_D, _d, _s) \
171 "pmuluw %[" #_D "h], %[" #_d "h], %[" #_s \
173 "pmuluw %[" #_D "l], %[" #_d "l], %[" #_s "l] \n\t"
176 #define _mm_packsshb(_D, _d, _s, _t) __mm_packxxxx(packsshb, _D, _d, _s, _t)
179 #define _mm_packsswh(_D, _d, _s, _t) __mm_packxxxx(packsswh, _D, _d, _s, _t)
182 #define _mm_packushb(_D, _d, _s, _t) __mm_packxxxx(packushb, _D, _d, _s, _t)
185 #define _mm_punpcklbh(_D, _d, _s) \
186 "punpckhbh %[" #_D "h], %[" #_d "l], %[" #_s \
188 "punpcklbh %[" #_D "l], %[" #_d "l], %[" #_s "l] \n\t"
191 #define _mm_punpcklhw(_D, _d, _s) \
192 "punpckhhw %[" #_D "h], %[" #_d "l], %[" #_s \
194 "punpcklhw %[" #_D "l], %[" #_d "l], %[" #_s "l] \n\t"
197 #define _mm_punpcklwd(_D, _d, _s) \
198 "punpckhwd %[" #_D "h], %[" #_d "l], %[" #_s \
200 "punpcklwd %[" #_D "l], %[" #_d "l], %[" #_s "l] \n\t"
202 /* SSE: punpcklqdq */
203 #define _mm_punpckldq(_D, _d, _s) \
204 "mov.d %[" #_D "h], %[" #_s \
206 "mov.d %[" #_D "l], %[" #_d "l] \n\t"
209 #define _mm_punpckhbh(_D, _d, _s) \
210 "punpcklbh %[" #_D "l], %[" #_d "h], %[" #_s \
212 "punpckhbh %[" #_D "h], %[" #_d "h], %[" #_s "h] \n\t"
215 #define _mm_punpckhhw(_D, _d, _s) \
216 "punpcklhw %[" #_D "l], %[" #_d "h], %[" #_s \
218 "punpckhhw %[" #_D "h], %[" #_d "h], %[" #_s "h] \n\t"
221 #define _mm_punpckhwd(_D, _d, _s) \
222 "punpcklwd %[" #_D "l], %[" #_d "h], %[" #_s \
224 "punpckhwd %[" #_D "h], %[" #_d "h], %[" #_s "h] \n\t"
226 /* SSE: punpckhqdq */
227 #define _mm_punpckhdq(_D, _d, _s) \
228 "mov.d %[" #_D "l], %[" #_d \
230 "mov.d %[" #_D "h], %[" #_s "h] \n\t"
232 #endif /* __MMI_HELPERS_H__ */