1 /***************************************************************************
3 * Open \______ \ ____ ____ | | _\_ |__ _______ ___
4 * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
5 * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
6 * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
10 * Copyright (C) 2008 by Maurus Cuelenaere
11 * Copyright (C) 2006-2007 by Ingenic Semiconductor Inc.
13 * This program is free software; you can redistribute it and/or
14 * modify it under the terms of the GNU General Public License
15 * as published by the Free Software Foundation; either version 2
16 * of the License, or (at your option) any later version.
18 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
19 * KIND, either express or implied.
21 ****************************************************************************/
23 /* Jz47xx Ingenic Media Extension Instruction Set
25 These are ~60 SIMD instructions for the Jz47xx MIPS core.
27 To compile assembly files using these instructions, they
28 must be piped through a bash script called mxu_as.
43 #ifndef MXU_REGS_USE_ARRAY
46 static int xr1
, xr2
, xr3
, xr4
, xr5
, xr6
, xr7
, xr8
, xr9
;
47 static int xr10
, xr11
, xr12
, xr13
, xr14
, xr15
, xr16
;
51 static int mxu_xr
[17] = {0};
53 #define xr0 mxu_xr[ 0]
54 #define xr1 mxu_xr[ 1]
55 #define xr2 mxu_xr[ 2]
56 #define xr3 mxu_xr[ 3]
57 #define xr4 mxu_xr[ 4]
58 #define xr5 mxu_xr[ 5]
59 #define xr6 mxu_xr[ 6]
60 #define xr7 mxu_xr[ 7]
61 #define xr8 mxu_xr[ 8]
62 #define xr9 mxu_xr[ 9]
63 #define xr10 mxu_xr[10]
64 #define xr11 mxu_xr[11]
65 #define xr12 mxu_xr[12]
66 #define xr13 mxu_xr[13]
67 #define xr14 mxu_xr[14]
68 #define xr15 mxu_xr[15]
69 #define xr16 mxu_xr[16]
93 #endif /* C_VERSION */
97 #define S32I2M(xr, r) if (&xr != mxu_xr) xr = r
99 #define S32LDD(xr, p, o) if (&xr != mxu_xr) xr = *(long*)((unsigned long)p + o)
100 #define S32STD(xr, p, o) *(long*)((unsigned long)p + o) = xr
102 #define S32LDDV(xr, p, o, s) if (&xr != mxu_xr) xr = *(long*)((unsigned long)p + ((o) << s))
103 #define S32STDV(xr, p, o, s) *(long*)((unsigned long)p + ((o) << s)) = xr
105 #define S32LDIV(xra, rb, rc, strd2) \
107 if (&xra != mxu_xr) xra = *(long*)((unsigned long)rb + ((rc) << strd2));\
108 rb = (char*)rb + ((rc) << strd2);\
111 #define S32SDIV(xra, rb, rc, strd2) \
113 *(long*)((unsigned long)rb + ((rc) << strd2)) = xra;\
114 rb = (char*)rb + ((rc) << strd2);\
117 #define S32LDI(xra, rb, o) \
119 if (&xra != mxu_xr) xra = *(long*)((unsigned long)rb + o);\
123 #define S32SDI(xra, rb, o) \
125 *(long*)((unsigned long)rb + o) = xra;\
129 #define S32LDIV(xra, rb, rc, strd2) \
131 if (&xra != mxu_xr) xra = *(long*)((unsigned long)rb + ((rc) << strd2));\
132 rb = (char*)rb + ((rc) << strd2);\
135 #define S32SDIV(xra, rb, rc, strd2) \
137 *(long*)((unsigned long)rb + ((rc) << strd2)) = xra;\
138 rb = (char*)rb + ((rc) << strd2);\
141 #define Q16ADD_AS_WW(a, b, c, d) \
144 short bl = b & 0xFFFF;\
146 short cl = c & 0xFFFF;\
151 if (&a != mxu_xr) a = (ah << 16) | (al & 0xFFFF);\
152 if (&d != mxu_xr) d = (dh << 16) | (dl & 0xFFFF);\
155 #define Q16ADD_AS_XW(a, b, c, d) \
158 short bl = b & 0xFFFF;\
160 short cl = c & 0xFFFF;\
165 if (&a != mxu_xr) a = (ah << 16) | (al & 0xFFFF);\
166 if (&d != mxu_xr) d = (dh << 16) | (dl & 0xFFFF);\
169 #define Q16ADD_AA_WW(a, b, c, d) \
172 short bl = b & 0xFFFF;\
174 short cl = c & 0xFFFF;\
177 if (&a != mxu_xr) a = (ah << 16) | (al & 0xFFFF);\
178 if (&d != mxu_xr) d = (ah << 16) | (al & 0xFFFF);\
181 #define D16MUL_LW(a, b, c, d)\
183 short bl = b & 0xFFFF;\
184 short cl = c & 0xFFFF;\
186 if (&a != mxu_xr) a = ch * bl;\
187 if (&d != mxu_xr) d = cl * bl;\
190 #define D16MUL_WW(a, b, c, d)\
193 short bl = b & 0xFFFF;\
195 short cl = c & 0xFFFF;\
196 if (&a != mxu_xr) a = ch * bh;\
197 if (&d != mxu_xr) d = cl * bl;\
200 #define D16MAC_AA_LW(a, b, c, d)\
202 short bl = b & 0xFFFF;\
203 short cl = c & 0xFFFF;\
205 if (&a != mxu_xr) a += ch * bl;\
206 if (&d != mxu_xr) d += cl * bl;\
209 #define D16MUL_HW(a, b, c, d)\
212 short cl = c & 0xFFFF;\
214 if (&a != mxu_xr) a = ch * bh;\
215 if (&d != mxu_xr) d = cl * bh;\
218 #define D16MAC_AA_HW(a, b, c, d)\
221 short cl = c & 0xFFFF;\
223 if (&a != mxu_xr) a += ch * bh;\
224 if (&d != mxu_xr) d += cl * bh;\
227 #define D32SLL(a, b, c, d, sft)\
229 if (&a != mxu_xr) a = b << sft;\
230 if (&d != mxu_xr) d = c << sft;\
233 #define D32SARL(a, b, c, sft) if (&a != mxu_xr) a = (((long)b >> sft) << 16) | (((long)c >> sft) & 0xFFFF)
235 #define S32SFL(a, b, c, d, ptn) \
237 unsigned char b3 = (unsigned char)((unsigned long)b >> 24);\
238 unsigned char b2 = (unsigned char)((unsigned long)b >> 16);\
239 unsigned char b1 = (unsigned char)((unsigned long)b >> 8);\
240 unsigned char b0 = (unsigned char)((unsigned long)b >> 0);\
241 unsigned char c3 = (unsigned char)((unsigned long)c >> 24);\
242 unsigned char c2 = (unsigned char)((unsigned long)c >> 16);\
243 unsigned char c1 = (unsigned char)((unsigned long)c >> 8);\
244 unsigned char c0 = (unsigned char)((unsigned long)c >> 0);\
245 unsigned char a3, a2, a1, a0, d3, d2, d1, d0;\
257 else if (ptn1 == ptn)\
268 else if (ptn2 == ptn)\
279 else if (ptn3 == ptn)\
290 if (&a != mxu_xr) a = ((unsigned long)a3 << 24) | ((unsigned long)a2 << 16) | ((unsigned long)a1 << 8) | (unsigned long)a0;\
291 if (&d != mxu_xr) d = ((unsigned long)d3 << 24) | ((unsigned long)d2 << 16) | ((unsigned long)d1 << 8) | (unsigned long)d0;\
294 #define D32SAR(a, b, c, d, sft)\
296 if (&a != mxu_xr) a = (long)b >> sft;\
297 if (&d != mxu_xr) d = (long)c >> sft;\
300 #define D32SLR(a, b, c, d, sft)\
302 if (&a != mxu_xr) a = (unsigned long)b >> sft;\
303 if (&d != mxu_xr) d = (unsigned long)c >> sft;\
305 #define Q16SLL(a,b,c,d,sft)\
311 if(&a!=mxu_xr) a=((bh<<sft)<<16)|(((long)bl<<sft) & 0xffff);\
312 if(&d!=mxu_xr) d=((dh<<sft)<<16)|(((long)bl<<sft) & 0xffff);\
315 #define Q16SAR(a,b,c,d,sft)\
318 short bl = b & 0xffff;\
320 short cl = c & 0xffff;\
321 if(&a!=mxu_xr) a=(((short)bh>>sft)<<16)|((long)((short)b1>>sft) & 0xffff);\
322 if(&d!=mxu_xr) d=(((short)ch>>sft)<<16)|((long)((short)cl>>sft) & 0xffff);\
325 #define D32ACC_AA(a, b, c, d)\
331 if (&a != mxu_xr) a = _a + _b + _c;\
332 if (&d != mxu_xr) d = _d + _b + _c;\
335 #define D32ACC_AS(a, b, c, d)\
341 if (&a != mxu_xr) a = _a + _b + _c;\
342 if (&d != mxu_xr) d = _d + _b - _c;\
345 #define D32ADD_AS(a, b, c, d)\
349 if (&a != mxu_xr) a = _b + _c;\
350 if (&d != mxu_xr) d = _b - _c;\
353 #define D32ADD_SS(a, b, c, d)\
357 if (&a != mxu_xr) a = _b - _c;\
358 if (&d != mxu_xr) d = _b - _c;\
361 #define D32ADD_AA(a, b, c, d)\
365 if (&a != mxu_xr) a = _b + _c;\
366 if (&d != mxu_xr) d = _b + _c;\
369 #define D16MADL_AA_WW(a, b, c, d) \
371 short _ah = a >> 16;\
372 short _al = (a << 16) >> 16;\
373 short _bh = b >> 16;\
374 short _bl = (b << 16) >> 16;\
375 short _ch = c >> 16;\
376 short _cl = (c << 16) >> 16;\
380 _ah += (L32 << 16) >> 16; \
381 _al += (R32 << 16) >> 16; \
382 if (&d != mxu_xr) d = (_ah << 16) + (_al & 0xffff);\
385 #define D16MACF_AA_WW(a, b, c, d) \
387 short _bh = b >> 16;\
388 short _bl = (b << 16) >> 16;\
389 short _ch = c >> 16;\
390 short _cl = (c << 16) >> 16;\
392 L32 = (_bh * _ch) << 1;\
393 R32 = (_bl * _cl) << 1; \
396 if (&a != mxu_xr) a = ((((L32 >> 15) + 1) >> 1) << 16) + ((((R32 >> 15) + 1) >> 1) & 0xffff);\
399 #define D16MAC_AA_WW(a, b, c, d) \
401 short _bh = b >> 16;\
402 short _bl = (b << 16) >> 16;\
403 short _ch = c >> 16;\
404 short _cl = (c << 16) >> 16;\
408 if (&a != mxu_xr) a = a + L32;\
409 if (&d != mxu_xr) d = d + R32;\
412 #define D16MAC_SS_WW(a, b, c, d) \
414 short _bh = b >> 16;\
415 short _bl = (b << 16) >> 16;\
416 short _ch = c >> 16;\
417 short _cl = (c << 16) >> 16;\
421 if (&a != mxu_xr) a = a - L32;\
422 if (&d != mxu_xr) d = d - R32;\
425 #define D16MAC_SA_HW(a, b, c, d) \
427 short _bh = b >> 16;\
428 short _bl = (b << 16) >> 16;\
429 short _ch = c >> 16;\
430 short _cl = (c << 16) >> 16;\
434 if (&a != mxu_xr) a = a - L32;\
435 if (&d != mxu_xr) d = d + R32;\
438 #define D16MAC_SS_HW(a, b, c, d) \
440 short _bh = b >> 16;\
441 short _bl = (b << 16) >> 16;\
442 short _ch = c >> 16;\
443 short _cl = (c << 16) >> 16;\
447 if (&a != mxu_xr) a = a - L32;\
448 if (&d != mxu_xr) d = d - R32;\
451 #define D16MAC_AS_HW(a, b, c, d) \
453 short _bh = b >> 16;\
454 short _bl = (b << 16) >> 16;\
455 short _ch = c >> 16;\
456 short _cl = (c << 16) >> 16;\
460 if (&a != mxu_xr) a = a + L32;\
461 if (&d != mxu_xr) d = d - R32;\
464 #define D16MAC_AS_LW(a, b, c, d) \
466 short _bh = b >> 16;\
467 short _bl = (b << 16) >> 16;\
468 short _ch = c >> 16;\
469 short _cl = (c << 16) >> 16;\
473 if (&a != mxu_xr) a = a + L32;\
474 if (&d != mxu_xr) d = d - R32;\
478 #define D16MAC_SA_LW(a, b, c, d) \
480 short _bh = b >> 16;\
481 short _bl = (b << 16) >> 16;\
482 short _ch = c >> 16;\
483 short _cl = (c << 16) >> 16;\
487 if (&a != mxu_xr) a = a - L32;\
488 if (&d != mxu_xr) d = d + R32;\
491 #define D16MAC_SS_LW(a, b, c, d) \
493 short _bh = b >> 16;\
494 short _bl = (b << 16) >> 16;\
495 short _ch = c >> 16;\
496 short _cl = (c << 16) >> 16;\
500 if (&a != mxu_xr) a = a - L32;\
501 if (&d != mxu_xr) d = d - R32;\
505 #define Q8ADDE_AA(xra, xrb, xrc, xrd) \
507 unsigned char b3 = (unsigned char)((unsigned long)xrb >> 24);\
508 unsigned char b2 = (unsigned char)((unsigned long)xrb >> 16);\
509 unsigned char b1 = (unsigned char)((unsigned long)xrb >> 8);\
510 unsigned char b0 = (unsigned char)((unsigned long)xrb >> 0);\
511 unsigned char c3 = (unsigned char)((unsigned long)xrc >> 24);\
512 unsigned char c2 = (unsigned char)((unsigned long)xrc >> 16);\
513 unsigned char c1 = (unsigned char)((unsigned long)xrc >> 8);\
514 unsigned char c0 = (unsigned char)((unsigned long)xrc >> 0);\
515 short ah, al, dh, dl;\
520 if (&xra != mxu_xr) xra = ((unsigned long)ah << 16) | (unsigned short)al;\
521 if (&xrd != mxu_xr) xrd = ((unsigned long)dh << 16) | (unsigned short)dl;\
524 #define Q16SAT(xra, xrb, xrc) \
526 short bh = xrb >> 16;\
527 short bl = xrb & 0xFFFF;\
528 short ch = xrc >> 16;\
529 short cl = xrc & 0xFFFF;\
530 if (bh > 255) bh = 255;\
532 if (bl > 255) bl = 255;\
534 if (ch > 255) ch = 255;\
536 if (cl > 255) cl = 255;\
538 if (&xra != mxu_xr) xra = ((unsigned)bh << 24) | ((unsigned)bl << 16) | ((unsigned)ch << 8) | (unsigned)cl;\
541 #define Q8SAD(xra, xrb, xrc, xrd) \
543 short b3 = (unsigned char)((unsigned long)xrb >> 24);\
544 short b2 = (unsigned char)((unsigned long)xrb >> 16);\
545 short b1 = (unsigned char)((unsigned long)xrb >> 8);\
546 short b0 = (unsigned char)((unsigned long)xrb >> 0);\
547 short c3 = (unsigned char)((unsigned long)xrc >> 24);\
548 short c2 = (unsigned char)((unsigned long)xrc >> 16);\
549 short c1 = (unsigned char)((unsigned long)xrc >> 8);\
550 short c0 = (unsigned char)((unsigned long)xrc >> 0);\
551 int int0, int1, int2, int3;\
552 int3 = labs(b3 - c3);\
553 int2 = labs(b2 - c2);\
554 int1 = labs(b1 - c1);\
555 int0 = labs(b0 - c0);\
556 if (&xra != mxu_xr) xra = int0 + int1 + int2 + int3;\
557 if (&xrd != mxu_xr) xrd += int0 + int1 + int2 + int3;\
560 #define Q8AVGR(xra, xrb, xrc) \
562 short b3 = (unsigned char)((unsigned long)xrb >> 24);\
563 short b2 = (unsigned char)((unsigned long)xrb >> 16);\
564 short b1 = (unsigned char)((unsigned long)xrb >> 8);\
565 short b0 = (unsigned char)((unsigned long)xrb >> 0);\
566 short c3 = (unsigned char)((unsigned long)xrc >> 24);\
567 short c2 = (unsigned char)((unsigned long)xrc >> 16);\
568 short c1 = (unsigned char)((unsigned long)xrc >> 8);\
569 short c0 = (unsigned char)((unsigned long)xrc >> 0);\
570 unsigned char a3, a2, a1, a0;\
571 a3 = (unsigned char)((b3 + c3 + 1) >> 1);\
572 a2 = (unsigned char)((b2 + c2 + 1) >> 1);\
573 a1 = (unsigned char)((b1 + c1 + 1) >> 1);\
574 a0 = (unsigned char)((b0 + c0 + 1) >> 1);\
575 if (&xra != mxu_xr) xra = ((unsigned long)a3 << 24) | ((unsigned long)a2 << 16) | ((unsigned long)a1 << 8) | (unsigned long)a0;\
578 #define S32ALN(xra, xrb, xrc, rs) \
582 if (&xra != mxu_xr) xra = xrb;\
586 if (&xra != mxu_xr) xra = (xrb << 8) | ((unsigned long)xrc >> 24);\
590 if (&xra != mxu_xr) xra = (xrb << 16) | ((unsigned long)xrc >> 16);\
594 if (&xra != mxu_xr) xra = (xrb << 24) | ((unsigned long)xrc >> 8);\
598 if (&xra != mxu_xr) xra = xrc;\
602 #else /* C_VERSION */
604 /***********************************LD/SD***********************************/
605 #define S32LDD(xra,rb,s12) \
607 __asm__ __volatile ("S32LDD xr%0,%z1,%2" \
609 :"K"(xra),"d" (rb),"I"(s12)); \
612 #define S32STD(xra,rb,s12) \
614 __asm__ __volatile ("S32STD xr%0,%z1,%2" \
616 :"K"(xra),"d" (rb),"I"(s12):"memory"); \
619 #define S32LDDV(xra,rb,rc,strd2) \
621 __asm__ __volatile ("S32LDDV xr%0,%z1,%z2,%3" \
623 :"K"(xra),"d" (rb),"d"(rc),"K"(strd2)); \
626 #define S32STDV(xra,rb,rc,strd2) \
628 __asm__ __volatile ("S32STDV xr%0,%z1,%z2,%3" \
630 :"K"(xra),"d" (rb),"d"(rc),"K"(strd2):"memory"); \
633 #define S32LDI(xra,rb,s12) \
635 __asm__ __volatile ("S32LDI xr%1,%z0,%2" \
637 :"K"(xra),"I"(s12)); \
640 #define S32SDI(xra,rb,s12) \
642 __asm__ __volatile ("S32SDI xr%1,%z0,%2" \
644 :"K"(xra),"I"(s12):"memory"); \
647 #define S32LDIV(xra,rb,rc,strd2) \
649 __asm__ __volatile ("S32LDIV xr%1,%z0,%z2,%3" \
651 :"K"(xra),"d"(rc),"K"(strd2)); \
654 #define S32SDIV(xra,rb,rc,strd2) \
656 __asm__ __volatile ("S32SDIV xr%1,%z0,%z2,%3" \
658 :"K"(xra),"d"(rc),"K"(strd2):"memory"); \
661 /***********************************D16MUL***********************************/
662 #define D16MUL_WW(xra,xrb,xrc,xrd) \
664 __asm__ __volatile ("D16MUL xr%0,xr%1,xr%2,xr%3,WW" \
666 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \
669 #define D16MUL_LW(xra,xrb,xrc,xrd) \
671 __asm__ __volatile ("D16MUL xr%0,xr%1,xr%2,xr%3,LW" \
673 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \
676 #define D16MUL_HW(xra,xrb,xrc,xrd) \
678 __asm__ __volatile ("D16MUL xr%0,xr%1,xr%2,xr%3,HW" \
680 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \
683 #define D16MUL_XW(xra,xrb,xrc,xrd) \
685 __asm__ __volatile ("D16MUL xr%0,xr%1,xr%2,xr%3,XW" \
687 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \
690 /**********************************D16MULF*******************************/
691 #define D16MULF_WW(xra,xrb,xrc) \
693 __asm__ __volatile ("D16MULF xr%0,xr%1,xr%2,WW" \
695 :"K"(xra),"K"(xrb),"K"(xrc)); \
698 #define D16MULF_LW(xra,xrb,xrc) \
700 __asm__ __volatile ("D16MULF xr%0,xr%1,xr%2,LW" \
702 :"K"(xra),"K"(xrb),"K"(xrc)); \
705 #define D16MULF_HW(xra,xrb,xrc) \
707 __asm__ __volatile ("D16MULF xr%0,xr%1,xr%2,HW" \
709 :"K"(xra),"K"(xrb),"K"(xrc)); \
712 #define D16MULF_XW(xra,xrb,xrc) \
714 __asm__ __volatile ("D16MULF xr%0,xr%1,xr%2,XW" \
716 :"K"(xra),"K"(xrb),"K"(xrc)); \
719 /***********************************D16MAC********************************/
720 #define D16MAC_AA_WW(xra,xrb,xrc,xrd) \
722 __asm__ __volatile ("D16MAC xr%0,xr%1,xr%2,xr%3,AA,WW" \
724 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \
727 #define D16MAC_AA_LW(xra,xrb,xrc,xrd) \
729 __asm__ __volatile ("D16MAC xr%0,xr%1,xr%2,xr%3,AA,LW" \
731 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \
734 #define D16MAC_AA_HW(xra,xrb,xrc,xrd) \
736 __asm__ __volatile ("D16MAC xr%0,xr%1,xr%2,xr%3,AA,HW" \
738 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \
741 #define D16MAC_AA_XW(xra,xrb,xrc,xrd) \
743 __asm__ __volatile ("D16MAC xr%0,xr%1,xr%2,xr%3,AA,XW" \
745 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \
748 #define D16MAC_AS_WW(xra,xrb,xrc,xrd) \
750 __asm__ __volatile ("D16MAC xr%0,xr%1,xr%2,xr%3,AS,WW" \
752 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \
755 #define D16MAC_AS_LW(xra,xrb,xrc,xrd) \
757 __asm__ __volatile ("D16MAC xr%0,xr%1,xr%2,xr%3,AS,LW" \
759 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \
762 #define D16MAC_AS_HW(xra,xrb,xrc,xrd) \
764 __asm__ __volatile ("D16MAC xr%0,xr%1,xr%2,xr%3,AS,HW" \
766 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \
769 #define D16MAC_AS_XW(xra,xrb,xrc,xrd) \
771 __asm__ __volatile ("D16MAC xr%0,xr%1,xr%2,xr%3,AS,XW" \
773 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \
776 #define D16MAC_SA_WW(xra,xrb,xrc,xrd) \
778 __asm__ __volatile ("D16MAC xr%0,xr%1,xr%2,xr%3,SA,WW" \
780 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \
783 #define D16MAC_SA_LW(xra,xrb,xrc,xrd) \
785 __asm__ __volatile ("D16MAC xr%0,xr%1,xr%2,xr%3,SA,LW" \
787 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \
790 #define D16MAC_SA_HW(xra,xrb,xrc,xrd) \
792 __asm__ __volatile ("D16MAC xr%0,xr%1,xr%2,xr%3,SA,HW" \
794 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \
797 #define D16MAC_SA_XW(xra,xrb,xrc,xrd) \
799 __asm__ __volatile ("D16MAC xr%0,xr%1,xr%2,xr%3,SA,XW" \
801 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \
804 #define D16MAC_SS_WW(xra,xrb,xrc,xrd) \
806 __asm__ __volatile ("D16MAC xr%0,xr%1,xr%2,xr%3,SS,WW" \
808 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \
811 #define D16MAC_SS_LW(xra,xrb,xrc,xrd) \
813 __asm__ __volatile ("D16MAC xr%0,xr%1,xr%2,xr%3,SS,LW" \
815 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \
818 #define D16MAC_SS_HW(xra,xrb,xrc,xrd) \
820 __asm__ __volatile ("D16MAC xr%0,xr%1,xr%2,xr%3,SS,HW" \
822 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \
825 #define D16MAC_SS_XW(xra,xrb,xrc,xrd) \
827 __asm__ __volatile ("D16MAC xr%0,xr%1,xr%2,xr%3,SS,XW" \
829 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \
832 /**********************************D16MACF*******************************/
833 #define D16MACF_AA_WW(xra,xrb,xrc,xrd) \
835 __asm__ __volatile ("D16MACF xr%0,xr%1,xr%2,xr%3,AA,WW" \
837 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \
840 #define D16MACF_AA_LW(xra,xrb,xrc,xrd) \
842 __asm__ __volatile ("D16MACF xr%0,xr%1,xr%2,xr%3,AA,LW" \
844 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \
847 #define D16MACF_AA_HW(xra,xrb,xrc,xrd) \
849 __asm__ __volatile ("D16MACF xr%0,xr%1,xr%2,xr%3,AA,HW" \
851 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \
854 #define D16MACF_AA_XW(xra,xrb,xrc,xrd) \
856 __asm__ __volatile ("D16MACF xr%0,xr%1,xr%2,xr%3,AA,XW" \
858 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \
861 #define D16MACF_AS_WW(xra,xrb,xrc,xrd) \
863 __asm__ __volatile ("D16MACF xr%0,xr%1,xr%2,xr%3,AS,WW" \
865 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \
868 #define D16MACF_AS_LW(xra,xrb,xrc,xrd) \
870 __asm__ __volatile ("D16MACF xr%0,xr%1,xr%2,xr%3,AS,LW" \
872 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \
875 #define D16MACF_AS_HW(xra,xrb,xrc,xrd) \
877 __asm__ __volatile ("D16MACF xr%0,xr%1,xr%2,xr%3,AS,HW" \
879 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \
882 #define D16MACF_AS_XW(xra,xrb,xrc,xrd) \
884 __asm__ __volatile ("D16MACF xr%0,xr%1,xr%2,xr%3,AS,XW" \
886 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \
889 #define D16MACF_SA_WW(xra,xrb,xrc,xrd) \
891 __asm__ __volatile ("D16MACF xr%0,xr%1,xr%2,xr%3,SA,WW" \
893 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \
896 #define D16MACF_SA_LW(xra,xrb,xrc,xrd) \
898 __asm__ __volatile ("D16MACF xr%0,xr%1,xr%2,xr%3,SA,LW" \
900 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \
903 #define D16MACF_SA_HW(xra,xrb,xrc,xrd) \
905 __asm__ __volatile ("D16MACF xr%0,xr%1,xr%2,xr%3,SA,HW" \
907 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \
910 #define D16MACF_SA_XW(xra,xrb,xrc,xrd) \
912 __asm__ __volatile ("D16MACF xr%0,xr%1,xr%2,xr%3,SA,XW" \
914 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \
917 #define D16MACF_SS_WW(xra,xrb,xrc,xrd) \
919 __asm__ __volatile ("D16MACF xr%0,xr%1,xr%2,xr%3,SS,WW" \
921 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \
924 #define D16MACF_SS_LW(xra,xrb,xrc,xrd) \
926 __asm__ __volatile ("D16MACF xr%0,xr%1,xr%2,xr%3,SS,LW" \
928 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \
931 #define D16MACF_SS_HW(xra,xrb,xrc,xrd) \
933 __asm__ __volatile ("D16MACF xr%0,xr%1,xr%2,xr%3,SS,HW" \
935 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \
938 #define D16MACF_SS_XW(xra,xrb,xrc,xrd) \
940 __asm__ __volatile ("D16MACF xr%0,xr%1,xr%2,xr%3,SS,XW" \
942 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \
945 /**********************************D16MADL*******************************/
946 #define D16MADL_AA_WW(xra,xrb,xrc,xrd) \
948 __asm__ __volatile ("D16MADL xr%0,xr%1,xr%2,xr%3,AA,WW" \
950 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \
953 #define D16MADL_AA_LW(xra,xrb,xrc,xrd) \
955 __asm__ __volatile ("D16MADL xr%0,xr%1,xr%2,xr%3,AA,LW" \
957 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \
960 #define D16MADL_AA_HW(xra,xrb,xrc,xrd) \
962 __asm__ __volatile ("D16MADL xr%0,xr%1,xr%2,xr%3,AA,HW" \
964 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \
967 #define D16MADL_AA_XW(xra,xrb,xrc,xrd) \
969 __asm__ __volatile ("D16MADL xr%0,xr%1,xr%2,xr%3,AA,XW" \
971 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \
974 #define D16MADL_AS_WW(xra,xrb,xrc,xrd) \
976 __asm__ __volatile ("D16MADL xr%0,xr%1,xr%2,xr%3,AS,WW" \
978 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \
981 #define D16MADL_AS_LW(xra,xrb,xrc,xrd) \
983 __asm__ __volatile ("D16MADL xr%0,xr%1,xr%2,xr%3,AS,LW" \
985 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \
988 #define D16MADL_AS_HW(xra,xrb,xrc,xrd) \
990 __asm__ __volatile ("D16MADL xr%0,xr%1,xr%2,xr%3,AS,HW" \
992 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \
995 #define D16MADL_AS_XW(xra,xrb,xrc,xrd) \
997 __asm__ __volatile ("D16MADL xr%0,xr%1,xr%2,xr%3,AS,XW" \
999 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \
1002 #define D16MADL_SA_WW(xra,xrb,xrc,xrd) \
1004 __asm__ __volatile ("D16MADL xr%0,xr%1,xr%2,xr%3,SA,WW" \
1006 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \
1009 #define D16MADL_SA_LW(xra,xrb,xrc,xrd) \
1011 __asm__ __volatile ("D16MADL xr%0,xr%1,xr%2,xr%3,SA,LW" \
1013 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \
1016 #define D16MADL_SA_HW(xra,xrb,xrc,xrd) \
1018 __asm__ __volatile ("D16MADL xr%0,xr%1,xr%2,xr%3,SA,HW" \
1020 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \
1023 #define D16MADL_SA_XW(xra,xrb,xrc,xrd) \
1025 __asm__ __volatile ("D16MADL xr%0,xr%1,xr%2,xr%3,SA,XW" \
1027 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \
1030 #define D16MADL_SS_WW(xra,xrb,xrc,xrd) \
1032 __asm__ __volatile ("D16MADL xr%0,xr%1,xr%2,xr%3,SS,WW" \
1034 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \
1037 #define D16MADL_SS_LW(xra,xrb,xrc,xrd) \
1039 __asm__ __volatile ("D16MADL xr%0,xr%1,xr%2,xr%3,SS,LW" \
1041 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \
1044 #define D16MADL_SS_HW(xra,xrb,xrc,xrd) \
1046 __asm__ __volatile ("D16MADL xr%0,xr%1,xr%2,xr%3,SS,HW" \
1048 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \
1051 #define D16MADL_SS_XW(xra,xrb,xrc,xrd) \
1053 __asm__ __volatile ("D16MADL xr%0,xr%1,xr%2,xr%3,SS,XW" \
1055 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \
1058 /***********************************S16MAD*******************************/
1059 #define S16MAD_A_HH(xra,xrb,xrc,xrd) \
1061 __asm__ __volatile ("S16MAD xr%0,xr%1,xr%2,xr%3,A,0" \
1063 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \
1066 #define S16MAD_A_LL(xra,xrb,xrc,xrd) \
1068 __asm__ __volatile ("S16MAD xr%0,xr%1,xr%2,xr%3,A,1" \
1070 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \
1073 #define S16MAD_A_HL(xra,xrb,xrc,xrd) \
1075 __asm__ __volatile ("S16MAD xr%0,xr%1,xr%2,xr%3,A,2" \
1077 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \
1080 #define S16MAD_A_LH(xra,xrb,xrc,xrd) \
1082 __asm__ __volatile ("S16MAD xr%0,xr%1,xr%2,xr%3,A,3" \
1084 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \
1087 #define S16MAD_S_HH(xra,xrb,xrc,xrd) \
1089 __asm__ __volatile ("S16MAD xr%0,xr%1,xr%2,xr%3,S,0" \
1091 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \
1094 #define S16MAD_S_LL(xra,xrb,xrc,xrd) \
1096 __asm__ __volatile ("S16MAD xr%0,xr%1,xr%2,xr%3,S,1" \
1098 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \
1101 #define S16MAD_S_HL(xra,xrb,xrc,xrd) \
1103 __asm__ __volatile ("S16MAD xr%0,xr%1,xr%2,xr%3,S,2" \
1105 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \
1108 #define S16MAD_S_LH(xra,xrb,xrc,xrd) \
1110 __asm__ __volatile ("S16MAD xr%0,xr%1,xr%2,xr%3,S,3" \
1112 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \
1115 /***********************************Q8MUL********************************/
1116 #define Q8MUL(xra,xrb,xrc,xrd) \
1118 __asm__ __volatile ("Q8MUL xr%0,xr%1,xr%2,xr%3" \
1120 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \
1123 /***********************************Q8MAC********************************/
1124 #define Q8MAC_AA(xra,xrb,xrc,xrd) \
1126 __asm__ __volatile ("Q8MAC xr%0,xr%1,xr%2,xr%3,AA" \
1128 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \
1131 #define Q8MAC_AS(xra,xrb,xrc,xrd) \
1133 __asm__ __volatile ("Q8MAC xr%0,xr%1,xr%2,xr%3,AS" \
1135 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \
1138 #define Q8MAC_SA(xra,xrb,xrc,xrd) \
1140 __asm__ __volatile ("Q8MAC xr%0,xr%1,xr%2,xr%3,SA" \
1142 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \
1145 #define Q8MAC_SS(xra,xrb,xrc,xrd) \
1147 __asm__ __volatile ("Q8MAC xr%0,xr%1,xr%2,xr%3,SS" \
1149 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \
1152 /***********************************Q8MADL********************************/
1153 #define Q8MADL_AA(xra,xrb,xrc,xrd) \
1155 __asm__ __volatile ("Q8MADL xr%0,xr%1,xr%2,xr%3,AA" \
1157 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \
1160 #define Q8MADL_AS(xra,xrb,xrc,xrd) \
1162 __asm__ __volatile ("Q8MADL xr%0,xr%1,xr%2,xr%3,AS" \
1164 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \
1167 #define Q8MADL_SA(xra,xrb,xrc,xrd) \
1169 __asm__ __volatile ("Q8MADL xr%0,xr%1,xr%2,xr%3,SA" \
1171 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \
1174 #define Q8MADL_SS(xra,xrb,xrc,xrd) \
1176 __asm__ __volatile ("Q8MADL xr%0,xr%1,xr%2,xr%3,SS" \
1178 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \
1181 /***********************************D32ADD********************************/
1182 #define D32ADD_AA(xra,xrb,xrc,xrd) \
1184 __asm__ __volatile ("D32ADD xr%0,xr%1,xr%2,xr%3,AA" \
1186 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \
1189 #define D32ADD_AS(xra,xrb,xrc,xrd) \
1191 __asm__ __volatile ("D32ADD xr%0,xr%1,xr%2,xr%3,AS" \
1193 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \
1196 #define D32ADD_SA(xra,xrb,xrc,xrd) \
1198 __asm__ __volatile ("D32ADD xr%0,xr%1,xr%2,xr%3,SA" \
1200 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \
1203 #define D32ADD_SS(xra,xrb,xrc,xrd) \
1205 __asm__ __volatile ("D32ADD xr%0,xr%1,xr%2,xr%3,SS" \
1207 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \
1210 /***********************************D32ACC********************************/
1211 #define D32ACC_AA(xra,xrb,xrc,xrd) \
1213 __asm__ __volatile ("D32ACC xr%0,xr%1,xr%2,xr%3,AA" \
1215 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \
1218 #define D32ACC_AS(xra,xrb,xrc,xrd) \
1220 __asm__ __volatile ("D32ACC xr%0,xr%1,xr%2,xr%3,AS" \
1222 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \
1225 #define D32ACC_SA(xra,xrb,xrc,xrd) \
1227 __asm__ __volatile ("D32ACC xr%0,xr%1,xr%2,xr%3,SA" \
1229 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \
1232 #define D32ACC_SS(xra,xrb,xrc,xrd) \
1234 __asm__ __volatile ("D32ACC xr%0,xr%1,xr%2,xr%3,SS" \
1236 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \
1239 /***********************************S32CPS********************************/
1240 #define S32CPS(xra,xrb,xrc) \
1242 __asm__ __volatile ("S32CPS xr%0,xr%1,xr%2" \
1244 :"K"(xra),"K"(xrb),"K"(xrc)); \
1247 #define S32ABS(xra,xrb) \
1249 __asm__ __volatile ("S32CPS xr%0,xr%1,xr%2" \
1251 :"K"(xra),"K"(xrb),"K"(xrb)); \
1254 /***********************************Q16ADD********************************/
1255 #define Q16ADD_AA_WW(xra,xrb,xrc,xrd) \
1257 __asm__ __volatile ("Q16ADD xr%0,xr%1,xr%2,xr%3,AA,WW" \
1259 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \
1262 #define Q16ADD_AA_LW(xra,xrb,xrc,xrd) \
1264 __asm__ __volatile ("Q16ADD xr%0,xr%1,xr%2,xr%3,AA,LW" \
1266 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \
1269 #define Q16ADD_AA_HW(xra,xrb,xrc,xrd) \
1271 __asm__ __volatile ("Q16ADD xr%0,xr%1,xr%2,xr%3,AA,HW" \
1273 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \
1276 #define Q16ADD_AA_XW(xra,xrb,xrc,xrd) \
1278 __asm__ __volatile ("Q16ADD xr%0,xr%1,xr%2,xr%3,AA,XW" \
1280 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \
1282 #define Q16ADD_AS_WW(xra,xrb,xrc,xrd) \
1284 __asm__ __volatile ("Q16ADD xr%0,xr%1,xr%2,xr%3,AS,WW" \
1286 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \
1289 #define Q16ADD_AS_LW(xra,xrb,xrc,xrd) \
1291 __asm__ __volatile ("Q16ADD xr%0,xr%1,xr%2,xr%3,AS,LW" \
1293 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \
1296 #define Q16ADD_AS_HW(xra,xrb,xrc,xrd) \
1298 __asm__ __volatile ("Q16ADD xr%0,xr%1,xr%2,xr%3,AS,HW" \
1300 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \
1303 #define Q16ADD_AS_XW(xra,xrb,xrc,xrd) \
1305 __asm__ __volatile ("Q16ADD xr%0,xr%1,xr%2,xr%3,AS,XW" \
1307 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \
1310 #define Q16ADD_SA_WW(xra,xrb,xrc,xrd) \
1312 __asm__ __volatile ("Q16ADD xr%0,xr%1,xr%2,xr%3,SA,WW" \
1314 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \
1317 #define Q16ADD_SA_LW(xra,xrb,xrc,xrd) \
1319 __asm__ __volatile ("Q16ADD xr%0,xr%1,xr%2,xr%3,SA,LW" \
1321 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \
1324 #define Q16ADD_SA_HW(xra,xrb,xrc,xrd) \
1326 __asm__ __volatile ("Q16ADD xr%0,xr%1,xr%2,xr%3,SA,HW" \
1328 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \
1331 #define Q16ADD_SA_XW(xra,xrb,xrc,xrd) \
1333 __asm__ __volatile ("Q16ADD xr%0,xr%1,xr%2,xr%3,SA,XW" \
1335 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \
1338 #define Q16ADD_SS_WW(xra,xrb,xrc,xrd) \
1340 __asm__ __volatile ("Q16ADD xr%0,xr%1,xr%2,xr%3,SS,WW" \
1342 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \
1345 #define Q16ADD_SS_LW(xra,xrb,xrc,xrd) \
1347 __asm__ __volatile ("Q16ADD xr%0,xr%1,xr%2,xr%3,SS,LW" \
1349 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \
1352 #define Q16ADD_SS_HW(xra,xrb,xrc,xrd) \
1354 __asm__ __volatile ("Q16ADD xr%0,xr%1,xr%2,xr%3,SS,HW" \
1356 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \
1359 #define Q16ADD_SS_XW(xra,xrb,xrc,xrd) \
1361 __asm__ __volatile ("Q16ADD xr%0,xr%1,xr%2,xr%3,SS,XW" \
1363 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \
1366 /***********************************Q16ACC********************************/
1367 #define Q16ACC_AA(xra,xrb,xrc,xrd) \
1369 __asm__ __volatile ("Q16ACC xr%0,xr%1,xr%2,xr%3,AA" \
1371 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \
1374 #define Q16ACC_AS(xra,xrb,xrc,xrd) \
1376 __asm__ __volatile ("Q16ACC xr%0,xr%1,xr%2,xr%3,AS" \
1378 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \
1381 #define Q16ACC_SA(xra,xrb,xrc,xrd) \
1383 __asm__ __volatile ("Q16ACC xr%0,xr%1,xr%2,xr%3,SA" \
1385 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \
1388 #define Q16ACC_SS(xra,xrb,xrc,xrd) \
1390 __asm__ __volatile ("Q16ACC xr%0,xr%1,xr%2,xr%3,SS" \
1392 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \
1395 /***********************************D16CPS********************************/
1396 #define D16CPS(xra,xrb,xrc) \
1398 __asm__ __volatile ("D16CPS xr%0,xr%1,xr%2" \
1400 :"K"(xra),"K"(xrb),"K"(xrc)); \
1403 #define D16ABS(xra,xrb) \
1405 __asm__ __volatile ("D16CPS xr%0,xr%1,xr%2" \
1407 :"K"(xra),"K"(xrb),"K"(xrb)); \
1410 /*******************************D16AVG/D16AVGR*****************************/
1411 #define D16AVG(xra,xrb,xrc) \
1413 __asm__ __volatile ("D16AVG xr%0,xr%1,xr%2" \
1415 :"K"(xra),"K"(xrb),"K"(xrc)); \
1417 #define D16AVGR(xra,xrb,xrc) \
1419 __asm__ __volatile ("D16AVGR xr%0,xr%1,xr%2" \
1421 :"K"(xra),"K"(xrb),"K"(xrc)); \
1424 /************************************Q8ADD********************************/
1425 #define Q8ADD_AA(xra,xrb,xrc) \
1427 __asm__ __volatile ("Q8ADD xr%0,xr%1,xr%2,AA" \
1429 :"K"(xra),"K"(xrb),"K"(xrc)); \
1432 #define Q8ADD_AS(xra,xrb,xrc) \
1434 __asm__ __volatile ("Q8ADD xr%0,xr%1,xr%2,AS" \
1436 :"K"(xra),"K"(xrb),"K"(xrc)); \
1439 #define Q8ADD_SA(xra,xrb,xrc) \
1441 __asm__ __volatile ("Q8ADD xr%0,xr%1,xr%2,SA" \
1443 :"K"(xra),"K"(xrb),"K"(xrc)); \
1446 #define Q8ADD_SS(xra,xrb,xrc) \
1448 __asm__ __volatile ("Q8ADD xr%0,xr%1,xr%2,SS" \
1450 :"K"(xra),"K"(xrb),"K"(xrc)); \
1453 /************************************Q8ADDE********************************/
1454 #define Q8ADDE_AA(xra,xrb,xrc,xrd) \
1456 __asm__ __volatile ("Q8ADDE xr%0,xr%1,xr%2,xr%3,AA" \
1458 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \
1461 #define Q8ADDE_AS(xra,xrb,xrc,xrd) \
1463 __asm__ __volatile ("Q8ADDE xr%0,xr%1,xr%2,xr%3,AS" \
1465 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \
1468 #define Q8ADDE_SA(xra,xrb,xrc,xrd) \
1470 __asm__ __volatile ("Q8ADDE xr%0,xr%1,xr%2,xr%3,SA" \
1472 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \
1475 #define Q8ADDE_SS(xra,xrb,xrc,xrd) \
1477 __asm__ __volatile ("Q8ADDE xr%0,xr%1,xr%2,xr%3,SS" \
1479 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \
1482 /************************************Q8ACCE********************************/
1483 #define Q8ACCE_AA(xra,xrb,xrc,xrd) \
1485 __asm__ __volatile ("Q8ACCE xr%0,xr%1,xr%2,xr%3,AA" \
1487 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \
1490 #define Q8ACCE_AS(xra,xrb,xrc,xrd) \
1492 __asm__ __volatile ("Q8ACCE xr%0,xr%1,xr%2,xr%3,AS" \
1494 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \
1497 #define Q8ACCE_SA(xra,xrb,xrc,xrd) \
1499 __asm__ __volatile ("Q8ACCE xr%0,xr%1,xr%2,xr%3,SA" \
1501 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \
1504 #define Q8ACCE_SS(xra,xrb,xrc,xrd) \
1506 __asm__ __volatile ("Q8ACCE xr%0,xr%1,xr%2,xr%3,SS" \
1508 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \
1511 /************************************Q8ABD********************************/
1512 #define Q8ABD(xra,xrb,xrc) \
1514 __asm__ __volatile ("Q8ABD xr%0,xr%1,xr%2" \
1516 :"K"(xra),"K"(xrb),"K"(xrc)); \
1519 /************************************Q8SLT********************************/
1520 #define Q8SLT(xra,xrb,xrc) \
1522 __asm__ __volatile ("Q8SLT xr%0,xr%1,xr%2" \
1524 :"K"(xra),"K"(xrb),"K"(xrc)); \
1527 /************************************Q8SAD********************************/
1528 #define Q8SAD(xra,xrb,xrc,xrd) \
1530 __asm__ __volatile ("Q8SAD xr%0,xr%1,xr%2,xr%3" \
1532 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \
1535 /********************************Q8AVG/Q8AVGR*****************************/
1536 #define Q8AVG(xra,xrb,xrc) \
1538 __asm__ __volatile ("Q8AVG xr%0,xr%1,xr%2" \
1540 :"K"(xra),"K"(xrb),"K"(xrc)); \
1542 #define Q8AVGR(xra,xrb,xrc) \
1544 __asm__ __volatile ("Q8AVGR xr%0,xr%1,xr%2" \
1546 :"K"(xra),"K"(xrb),"K"(xrc)); \
1549 /**********************************D32SHIFT******************************/
1550 #define D32SLL(xra,xrb,xrc,xrd,SFT4) \
1552 __asm__ __volatile ("D32SLL xr%0,xr%1,xr%2,xr%3,%4" \
1554 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd),"K"(SFT4)); \
1557 #define D32SLR(xra,xrb,xrc,xrd,SFT4) \
1559 __asm__ __volatile ("D32SLR xr%0,xr%1,xr%2,xr%3,%4" \
1561 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd),"K"(SFT4)); \
1564 #define D32SAR(xra,xrb,xrc,xrd,SFT4) \
1566 __asm__ __volatile ("D32SAR xr%0,xr%1,xr%2,xr%3,%4" \
1568 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd),"K"(SFT4)); \
1571 #define D32SARL(xra,xrb,xrc,SFT4) \
1573 __asm__ __volatile ("D32SARL xr%0,xr%1,xr%2,%3" \
1575 :"K"(xra),"K"(xrb),"K"(xrc),"K"(SFT4)); \
1578 #define D32SLLV(xra,xrd,rb) \
1580 __asm__ __volatile ("D32SLLV xr%0,xr%1,%z2" \
1582 :"K"(xra),"K"(xrd),"d"(rb)); \
1585 #define D32SLRV(xra,xrd,rb) \
1587 __asm__ __volatile ("D32SLRV xr%0,xr%1,%z2" \
1589 :"K"(xra),"K"(xrd),"d"(rb)); \
1592 #define D32SARV(xra,xrd,rb) \
1594 __asm__ __volatile ("D32SARV xr%0,xr%1,%z2" \
1596 :"K"(xra),"K"(xrd),"d"(rb)); \
1599 #define D32SARW(xra,xrb,xrc,rb) \
1601 __asm__ __volatile ("D32SARW xr%0,xr%1,xr%2,%3" \
1603 :"K"(xra),"K"(xrb),"K"(xrc),"d"(rb)); \
1606 /**********************************Q16SHIFT******************************/
1607 #define Q16SLL(xra,xrb,xrc,xrd,SFT4) \
1609 __asm__ __volatile ("Q16SLL xr%0,xr%1,xr%2,xr%3,%4" \
1611 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd),"K"(SFT4)); \
1614 #define Q16SLR(xra,xrb,xrc,xrd,SFT4) \
1616 __asm__ __volatile ("Q16SLR xr%0,xr%1,xr%2,xr%3,%4" \
1618 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd),"K"(SFT4)); \
1621 #define Q16SAR(xra,xrb,xrc,xrd,SFT4) \
1623 __asm__ __volatile ("Q16SAR xr%0,xr%1,xr%2,xr%3,%4" \
1625 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd),"K"(SFT4)); \
1628 #define Q16SLLV(xra,xrd,rb) \
1630 __asm__ __volatile ("Q16SLLV xr%0,xr%1,%z2" \
1632 :"K"(xra),"K"(xrd),"d"(rb)); \
1635 #define Q16SLRV(xra,xrd,rb) \
1637 __asm__ __volatile ("Q16SLRV xr%0,xr%1,%z2" \
1639 :"K"(xra),"K"(xrd),"d"(rb)); \
1642 #define Q16SARV(xra,xrd,rb) \
1644 __asm__ __volatile ("Q16SARV xr%0,xr%1,%z2" \
1646 :"K"(xra),"K"(xrd),"d"(rb)); \
1649 /*********************************MAX/MIN*********************************/
1650 #define S32MAX(xra,xrb,xrc) \
1652 __asm__ __volatile ("S32MAX xr%0,xr%1,xr%2" \
1654 :"K"(xra),"K"(xrb),"K"(xrc)); \
1657 #define S32MIN(xra,xrb,xrc) \
1659 __asm__ __volatile ("S32MIN xr%0,xr%1,xr%2" \
1661 :"K"(xra),"K"(xrb),"K"(xrc)); \
1664 #define D16MAX(xra,xrb,xrc) \
1666 __asm__ __volatile ("D16MAX xr%0,xr%1,xr%2" \
1668 :"K"(xra),"K"(xrb),"K"(xrc)); \
1671 #define D16MIN(xra,xrb,xrc) \
1673 __asm__ __volatile ("D16MIN xr%0,xr%1,xr%2" \
1675 :"K"(xra),"K"(xrb),"K"(xrc)); \
1678 #define Q8MAX(xra,xrb,xrc) \
1680 __asm__ __volatile ("Q8MAX xr%0,xr%1,xr%2" \
1682 :"K"(xra),"K"(xrb),"K"(xrc)); \
1685 #define Q8MIN(xra,xrb,xrc) \
1687 __asm__ __volatile ("Q8MIN xr%0,xr%1,xr%2" \
1689 :"K"(xra),"K"(xrb),"K"(xrc)); \
1692 /*************************************MOVE********************************/
1693 #define S32I2M(xra,rb) \
1695 __asm__ __volatile ("S32I2M xr%0,%z1" \
1697 :"K"(xra),"d"(rb)); \
1700 #define S32M2I(xra) \
1703 __asm__ __volatile ("S32M2I xr%1, %0" \
1709 /*********************************S32SFL**********************************/
1710 #define S32SFL(xra,xrb,xrc,xrd,optn2) \
1712 __asm__ __volatile ("S32SFL xr%0,xr%1,xr%2,xr%3,ptn%4" \
1714 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd),"K"(optn2)); \
1717 /*********************************S32ALN**********************************/
1718 #define S32ALN(xra,xrb,xrc,rs) \
1720 __asm__ __volatile ("S32ALN xr%0,xr%1,xr%2,%z3" \
1722 :"K"(xra),"K"(xrb),"K"(xrc),"d"(rs)); \
1725 /*********************************Q16SAT**********************************/
1726 #define Q16SAT(xra,xrb,xrc) \
1728 __asm__ __volatile ("Q16SAT xr%0,xr%1,xr%2" \
1730 :"K"(xra),"K"(xrb),"K"(xrc)); \
1736 #define Index_Invalidate_I 0x00
1737 #define Index_Writeback_Inv_D 0x01
1738 #define Index_Load_Tag_I 0x04
1739 #define Index_Load_Tag_D 0x05
1740 #define Index_Store_Tag_I 0x08
1741 #define Index_Store_Tag_D 0x09
1742 #define Hit_Invalidate_I 0x10
1743 #define Hit_Invalidate_D 0x11
1744 #define Hit_Writeback_Inv_D 0x15
1745 #define Hit_Writeback_I 0x18
1746 #define Hit_Writeback_D 0x19
1751 #define PrefLoadStreamed 4
1752 #define PrefStoreStreamed 5
1753 #define PrefLoadRetained 6
1754 #define PrefStoreRetained 7
1755 #define PrefWBInval 25
1756 #define PrefNudge 25
1757 #define PrefPreForStore 30
1759 #define mips_pref(base, offset, op) \
1760 __asm__ __volatile__( \
1761 " .set noreorder \n" \
1762 " pref %1, %2(%0) \n" \
1765 : "r" (base), "i" (op), "i" (offset))
1767 #define cache_op(op, addr) \
1768 __asm__ __volatile__( \
1769 " .set noreorder \n" \
1770 " cache %0, %1 \n" \
1773 : "i" (op), "m" (*(unsigned char *)(addr)))
1775 #define i_pref(hint,base,offset) \
1776 ({ __asm__ __volatile__("pref %0,%2(%1)"::"i"(hint),"r"(base),"i"(offset):"memory");})
1778 struct unaligned_32
{ unsigned int l
; } __attribute__((packed
));
1779 #define LD32(a) (((const struct unaligned_32 *) (a))->l)
1780 #define ST32(a, b) (((struct unaligned_32 *) (a))->l) = (b)
1782 #define REVERSE_LD32(xra, xrb, rb, s12) \
1785 __asm__ __volatile ("S32LDD xr%1,%z3,%4\n\t" \
1786 "S32SFL xr%1,xr%1, xr%1, xr%2, ptn0\n\t" \
1787 "S32SFL xr%1,xr%2, xr%1, xr%2, ptn3\n\t" \
1788 "S32SFL xr%1,xr%2, xr%1, xr%2, ptn2\n\t" \
1791 :"K"(xra), "K"(xrb), "d"(rb), "I"(s12)); \
1795 #define IU_CLZ(rb) \
1798 __asm__ __volatile ("clz %0, %1" \
1804 #endif /* C_VERSION */
1806 #endif /* JZ_MXU_H_ */