From 4c7cbb8a62fc7441b80cc2b584ea87f38c49b9fe Mon Sep 17 00:00:00 2001 From: mcuelenaere Date: Wed, 31 Dec 2008 01:57:07 +0000 Subject: [PATCH] Ingenic Jz4740: * Clean up header file a bit * Add information about the IPU * Add original license * Add Ingenic Media Extension Instruction Set header file git-svn-id: svn://svn.rockbox.org/rockbox/trunk@19621 a1c6a512-1295-4272-9138-f99709370657 --- firmware/export/jz4740.h | 309 ++++++-- firmware/export/jz_mxu.h | 1806 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 2064 insertions(+), 51 deletions(-) create mode 100644 firmware/export/jz_mxu.h diff --git a/firmware/export/jz4740.h b/firmware/export/jz4740.h index 7fbfba283..4b6008a9a 100644 --- a/firmware/export/jz4740.h +++ b/firmware/export/jz4740.h @@ -1,3 +1,39 @@ +/*************************************************************************** + * __________ __ ___. + * Open \______ \ ____ ____ | | _\_ |__ _______ ___ + * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ / + * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < < + * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \ + * \/ \/ \/ \/ \/ + * $Id$ + * + * Copyright (C) 2008 by Maurus Cuelenaere + * Copyright (C) 2006-2007 by Ingenic Semiconductor Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY + * KIND, either express or implied. + * + ****************************************************************************/ + +/* + * linux/include/asm-mips/mach-jz4740/jz4740.h + * + * JZ4740 common definition. + * + * Copyright (C) 2006 - 2007 Ingenic Semiconductor Inc. + * + * Author: + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + /* * Include file for Ingenic Semiconductor's JZ4740 CPU. */ @@ -6,50 +42,50 @@ #ifndef __ASSEMBLY__ -#define REG8(addr) (*(volatile unsigned char *)(addr)) +#define REG8(addr) (*(volatile unsigned char *)(addr)) #define REG16(addr) (*(volatile unsigned short *)(addr)) -#define REG32(addr) (*(volatile unsigned int *)(addr)) +#define REG32(addr) (*(volatile unsigned int *)(addr)) #endif /* !ASSEMBLY */ -//---------------------------------------------------------------------- -// Boot ROM Specification -// +/************************************************************************* + * Boot ROM Specification + */ /* NOR Boot config */ #define JZ4740_NORBOOT_8BIT 0x00000000 /* 8-bit data bus flash */ -#define JZ4740_NORBOOT_16BIT 0x10101010 /* 16-bit data bus flash */ -#define JZ4740_NORBOOT_32BIT 0x20202020 /* 32-bit data bus flash */ +#define JZ4740_NORBOOT_16BIT 0x10101010 /* 16-bit data bus flash */ +#define JZ4740_NORBOOT_32BIT 0x20202020 /* 32-bit data bus flash */ /* NAND Boot config */ #define JZ4740_NANDBOOT_B8R3 0xffffffff /* 8-bit bus & 3 row cycles */ #define JZ4740_NANDBOOT_B8R2 0xf0f0f0f0 /* 8-bit bus & 2 row cycles */ -#define JZ4740_NANDBOOT_B16R3 0x0f0f0f0f /* 16-bit bus & 3 row cycles */ -#define JZ4740_NANDBOOT_B16R2 0x00000000 /* 16-bit bus & 2 row cycles */ +#define JZ4740_NANDBOOT_B16R3 0x0f0f0f0f /* 16-bit bus & 3 row cycles */ +#define JZ4740_NANDBOOT_B16R2 0x00000000 /* 16-bit bus & 2 row cycles */ -//---------------------------------------------------------------------- -// Register Definitions -// +/************************************************************************* + * Register Definitions + */ #define CPM_BASE 0xB0000000 -#define INTC_BASE 0xB0001000 +#define INTC_BASE 0xB0001000 #define TCU_BASE 0xB0002000 #define WDT_BASE 0xB0002000 #define RTC_BASE 0xB0003000 -#define GPIO_BASE 0xB0010000 +#define GPIO_BASE 0xB0010000 #define AIC_BASE 0xB0020000 -#define ICDC_BASE 0xB0020000 +#define ICDC_BASE 0xB0020000 #define MSC_BASE 0xB0021000 -#define UART0_BASE 0xB0030000 +#define UART0_BASE 0xB0030000 #define I2C_BASE 0xB0042000 #define SSI_BASE 0xB0043000 -#define SADC_BASE 0xB0070000 +#define SADC_BASE 0xB0070000 #define EMC_BASE 0xB3010000 -#define DMAC_BASE 0xB3020000 +#define DMAC_BASE 0xB3020000 #define UHC_BASE 0xB3030000 #define UDC_BASE 0xB3040000 #define LCD_BASE 0xB3050000 -#define SLCD_BASE 0xB3050000 +#define SLCD_BASE 0xB3050000 #define CIM_BASE 0xB3060000 #define ETH_BASE 0xB3100000 @@ -59,43 +95,43 @@ *************************************************************************/ #define INTC_ISR (INTC_BASE + 0x00) #define INTC_IMR (INTC_BASE + 0x04) -#define INTC_IMSR (INTC_BASE + 0x08) -#define INTC_IMCR (INTC_BASE + 0x0c) +#define INTC_IMSR (INTC_BASE + 0x08) +#define INTC_IMCR (INTC_BASE + 0x0c) #define INTC_IPR (INTC_BASE + 0x10) #define REG_INTC_ISR REG32(INTC_ISR) #define REG_INTC_IMR REG32(INTC_IMR) -#define REG_INTC_IMSR REG32(INTC_IMSR) -#define REG_INTC_IMCR REG32(INTC_IMCR) +#define REG_INTC_IMSR REG32(INTC_IMSR) +#define REG_INTC_IMCR REG32(INTC_IMCR) #define REG_INTC_IPR REG32(INTC_IPR) // 1st-level interrupts #define IRQ_I2C 1 -#define IRQ_EMC 2 +#define IRQ_EMC 2 #define IRQ_UHC 3 -#define IRQ_UART0 9 -#define IRQ_SADC 12 +#define IRQ_UART0 9 +#define IRQ_SADC 12 #define IRQ_MSC 14 #define IRQ_RTC 15 #define IRQ_SSI 16 #define IRQ_CIM 17 #define IRQ_AIC 18 #define IRQ_ETH 19 -#define IRQ_DMAC 20 -#define IRQ_TCU2 21 -#define IRQ_TCU1 22 -#define IRQ_TCU0 23 -#define IRQ_UDC 24 -#define IRQ_GPIO3 25 -#define IRQ_GPIO2 26 -#define IRQ_GPIO1 27 -#define IRQ_GPIO0 28 +#define IRQ_DMAC 20 +#define IRQ_TCU2 21 +#define IRQ_TCU1 22 +#define IRQ_TCU0 23 +#define IRQ_UDC 24 +#define IRQ_GPIO3 25 +#define IRQ_GPIO2 26 +#define IRQ_GPIO1 27 +#define IRQ_GPIO0 28 #define IRQ_IPU 29 #define IRQ_LCD 30 // 2nd-level interrupts -#define IRQ_DMA_0 32 /* 32 to 37 for DMAC channel 0 to 5 */ -#define IRQ_GPIO_0 48 /* 48 to 175 for GPIO pin 0 to 127 */ +#define IRQ_DMA_0 32 /* 32 to 37 for DMAC channel 0 to 5 */ +#define IRQ_GPIO_0 48 /* 48 to 175 for GPIO pin 0 to 127 */ /************************************************************************* @@ -103,15 +139,15 @@ *************************************************************************/ #define RTC_RCR (RTC_BASE + 0x00) /* RTC Control Register */ #define RTC_RSR (RTC_BASE + 0x04) /* RTC Second Register */ -#define RTC_RSAR (RTC_BASE + 0x08) /* RTC Second Alarm Register */ +#define RTC_RSAR (RTC_BASE + 0x08) /* RTC Second Alarm Register */ #define RTC_RGR (RTC_BASE + 0x0c) /* RTC Regulator Register */ #define RTC_HCR (RTC_BASE + 0x20) /* Hibernate Control Register */ -#define RTC_HWFCR (RTC_BASE + 0x24) /* Hibernate Wakeup Filter Counter Reg */ -#define RTC_HRCR (RTC_BASE + 0x28) /* Hibernate Reset Counter Register */ -#define RTC_HWCR (RTC_BASE + 0x2c) /* Hibernate Wakeup Control Register */ -#define RTC_HWRSR (RTC_BASE + 0x30) /* Hibernate Wakeup Status Register */ -#define RTC_HSPR (RTC_BASE + 0x34) /* Hibernate Scratch Pattern Register */ +#define RTC_HWFCR (RTC_BASE + 0x24) /* Hibernate Wakeup Filter Counter Reg */ +#define RTC_HRCR (RTC_BASE + 0x28) /* Hibernate Reset Counter Register */ +#define RTC_HWCR (RTC_BASE + 0x2c) /* Hibernate Wakeup Control Register */ +#define RTC_HWRSR (RTC_BASE + 0x30) /* Hibernate Wakeup Status Register */ +#define RTC_HSPR (RTC_BASE + 0x34) /* Hibernate Scratch Pattern Register */ #define REG_RTC_RCR REG32(RTC_RCR) #define REG_RTC_RSR REG32(RTC_RSR) @@ -1447,14 +1483,14 @@ #define SSI_CR1_RTRG_120 (15<< SSI_CR1_RTRG_BIT) #define SSI_CR1_FLEN_BIT 4 #define SSI_CR1_FLEN_MASK (0xf << SSI_CR1_FLEN_BIT) - #define SSI_CR1_FLEN_2BIT (0x0 << SSI_CR1_FLEN_BIT) - #define SSI_CR1_FLEN_3BIT (0x1 << SSI_CR1_FLEN_BIT) - #define SSI_CR1_FLEN_4BIT (0x2 << SSI_CR1_FLEN_BIT) - #define SSI_CR1_FLEN_5BIT (0x3 << SSI_CR1_FLEN_BIT) - #define SSI_CR1_FLEN_6BIT (0x4 << SSI_CR1_FLEN_BIT) - #define SSI_CR1_FLEN_7BIT (0x5 << SSI_CR1_FLEN_BIT) - #define SSI_CR1_FLEN_8BIT (0x6 << SSI_CR1_FLEN_BIT) - #define SSI_CR1_FLEN_9BIT (0x7 << SSI_CR1_FLEN_BIT) + #define SSI_CR1_FLEN_2BIT (0x0 << SSI_CR1_FLEN_BIT) + #define SSI_CR1_FLEN_3BIT (0x1 << SSI_CR1_FLEN_BIT) + #define SSI_CR1_FLEN_4BIT (0x2 << SSI_CR1_FLEN_BIT) + #define SSI_CR1_FLEN_5BIT (0x3 << SSI_CR1_FLEN_BIT) + #define SSI_CR1_FLEN_6BIT (0x4 << SSI_CR1_FLEN_BIT) + #define SSI_CR1_FLEN_7BIT (0x5 << SSI_CR1_FLEN_BIT) + #define SSI_CR1_FLEN_8BIT (0x6 << SSI_CR1_FLEN_BIT) + #define SSI_CR1_FLEN_9BIT (0x7 << SSI_CR1_FLEN_BIT) #define SSI_CR1_FLEN_10BIT (0x8 << SSI_CR1_FLEN_BIT) #define SSI_CR1_FLEN_11BIT (0x9 << SSI_CR1_FLEN_BIT) #define SSI_CR1_FLEN_12BIT (0xA << SSI_CR1_FLEN_BIT) @@ -4945,4 +4981,175 @@ do{ \ #endif /* !__ASSEMBLY__ */ + +#ifndef _IPU_H_ +#define _IPU_H_ + +// IPU_REG_BASE +#define IPU_P_BASE 0x13080000 +#define IPU__OFFSET 0x13080000 +#define IPU__SIZE 0x00001000 + +struct ipu_module +{ + unsigned int reg_ctrl; // 0x0 + unsigned int reg_status; // 0x4 + unsigned int reg_d_fmt; // 0x8 + unsigned int reg_y_addr; // 0xc + unsigned int reg_u_addr; // 0x10 + unsigned int reg_v_addr; // 0x14 + unsigned int reg_in_fm_gs; // 0x18 + unsigned int reg_y_stride; // 0x1c + unsigned int reg_uv_stride; // 0x20 + unsigned int reg_out_addr; // 0x24 + unsigned int reg_out_gs; // 0x28 + unsigned int reg_out_stride; // 0x2c + unsigned int rsz_coef_index; // 0x30 + unsigned int reg_csc_c0_coef; // 0x34 + unsigned int reg_csc_c1_coef; // 0x38 + unsigned int reg_csc_c2_coef; // 0x3c + unsigned int reg_csc_c3_coef; // 0x40 + unsigned int reg_csc_c4_coef; // 0x44 + unsigned int hrsz_coef_lut[20]; // 0x48 + unsigned int vrsz_coef_lut[20]; // 0x98 +}; + +typedef struct +{ + unsigned int coef; + unsigned short int in_n; + unsigned short int out_n; +} rsz_lut; + +struct Ration2m +{ + float ratio; + int n, m; +}; + + +// Register offset +#define REG_CTRL 0x0 +#define REG_STATUS 0x4 +#define REG_D_FMT 0x8 +#define REG_Y_ADDR 0xc +#define REG_U_ADDR 0x10 +#define REG_V_ADDR 0x14 +#define REG_IN_FM_GS 0x18 +#define REG_Y_STRIDE 0x1c +#define REG_UV_STRIDE 0x20 +#define REG_OUT_ADDR 0x24 +#define REG_OUT_GS 0x28 +#define REG_OUT_STRIDE 0x2c +#define REG_RSZ_COEF_INDEX 0x30 +#define REG_CSC_C0_COEF 0x34 +#define REG_CSC_C1_COEF 0x38 +#define REG_CSC_C2_COEF 0x3c +#define REG_CSC_C3_COEF 0x40 +#define REG_CSC_C4_COEF 0x44 +#define HRSZ_LUT_BASE 0x48 +#define VRSZ_LUT_BASE 0x98 + +// REG_CTRL field define +#define IPU_EN (1 << 0) +#define RSZ_EN (1 << 1) +#define FM_IRQ_EN (1 << 2) +#define IPU_RESET (1 << 3) +#define H_UP_SCALE (1 << 8) +#define V_UP_SCALE (1 << 9) +#define H_SCALE_SHIFT (8) +#define V_SCALE_SHIFT (9) + +// REG_STATUS field define +#define OUT_END (1 << 0) + +// REG_D_FMT field define +#define INFMT_YUV420 (0 << 0) +#define INFMT_YUV422 (1 << 0) +#define INFMT_YUV444 (2 << 0) +#define INFMT_YUV411 (3 << 0) +#define INFMT_YCbCr420 (4 << 0) +#define INFMT_YCbCr422 (5 << 0) +#define INFMT_YCbCr444 (6 << 0) +#define INFMT_YCbCr411 (7 << 0) + +#define OUTFMT_RGB555 (0 << 16) +#define OUTFMT_RGB565 (1 << 16) +#define OUTFMT_RGB888 (2 << 16) + +// REG_IN_FM_GS field define +#define IN_FM_W(val) ((val) << 16) +#define IN_FM_H(val) ((val) << 0) + +// REG_IN_FM_GS field define +#define OUT_FM_W(val) ((val) << 16) +#define OUT_FM_H(val) ((val) << 0) + +// REG_UV_STRIDE field define +#define U_STRIDE(val) ((val) << 16) +#define V_STRIDE(val) ((val) << 0) + + +#define VE_IDX_SFT 0 +#define HE_IDX_SFT 16 + +// RSZ_LUT_FIELD +#define OUT_N_SFT 0 +#define OUT_N_MSK 0x1 +#define IN_N_SFT 1 +#define IN_N_MSK 0x1 +#define W_COEF_SFT 2 +#define W_COEF_MSK 0xFF + +// function about REG_CTRL +#define stop_ipu(IPU_V_BASE) \ + REG32(IPU_V_BASE + REG_CTRL) &= ~IPU_EN; + +#define run_ipu(IPU_V_BASE) \ + REG32(IPU_V_BASE + REG_CTRL) |= IPU_EN; + +#define reset_ipu(IPU_V_BASE) \ + REG32(IPU_V_BASE + REG_CTRL) |= IPU_RESET; + +#define disable_irq(IPU_V_BASE) \ + REG32(IPU_V_BASE + REG_CTRL) &= ~FM_IRQ_EN; + +#define disable_rsize(IPU_V_BASE) \ + REG32(IPU_V_BASE + REG_CTRL) &= ~RSZ_EN; + +#define enable_rsize(IPU_V_BASE) \ + REG32(IPU_V_BASE + REG_CTRL) |= RSZ_EN; + +#define ipu_is_enable(IPU_V_BASE) \ + (REG32(IPU_V_BASE + REG_CTRL) & IPU_EN) + +// function about REG_STATUS +#define clear_end_flag(IPU_V_BASE) \ + REG32(IPU_V_BASE + REG_STATUS) &= ~OUT_END; + +#define polling_end_flag(IPU_V_BASE) \ + (REG32(IPU_V_BASE + REG_STATUS) & OUT_END) + +/* parameter + R = 1.164 * (Y - 16) + 1.596 * (cr - 128) {C0, C1} + G = 1.164 * (Y - 16) - 0.392 * (cb -128) - 0.813 * (cr - 128) {C0, C2, C3} + B = 1.164 * (Y - 16) + 2.017 * (cb - 128) {C0, C4} +*/ + +#if 1 +#define YUV_CSC_C0 0x4A8 /* 1.164 * 1024 */ +#define YUV_CSC_C1 0x662 /* 1.596 * 1024 */ +#define YUV_CSC_C2 0x191 /* 0.392 * 1024 */ +#define YUV_CSC_C3 0x341 /* 0.813 * 1024 */ +#define YUV_CSC_C4 0x811 /* 2.017 * 1024 */ +#else +#define YUV_CSC_C0 0x400 +#define YUV_CSC_C1 0x59C +#define YUV_CSC_C2 0x161 +#define YUV_CSC_C3 0x2DC +#define YUV_CSC_C4 0x718 +#endif + +#endif /* _IPU_H_ */ + #endif /* __JZ4740_H__ */ diff --git a/firmware/export/jz_mxu.h b/firmware/export/jz_mxu.h new file mode 100644 index 000000000..b833aedce --- /dev/null +++ b/firmware/export/jz_mxu.h @@ -0,0 +1,1806 @@ +/*************************************************************************** + * __________ __ ___. + * Open \______ \ ____ ____ | | _\_ |__ _______ ___ + * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ / + * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < < + * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \ + * \/ \/ \/ \/ \/ + * $Id$ + * + * Copyright (C) 2008 by Maurus Cuelenaere + * Copyright (C) 2006-2007 by Ingenic Semiconductor Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY + * KIND, either express or implied. + * + ****************************************************************************/ + +/* Jz47xx Ingenic Media Extension Instruction Set + + These are ~60 SIMD instructions for the Jz47xx MIPS core. + + To compile assembly files using these instructions, they + must be piped through a bash script called mxu_as. +*/ + +#ifndef JZ_MXU_H_ +#define JZ_MXU_H_ + +#define ptn0 0 +#define ptn1 1 +#define ptn2 2 +#define ptn3 3 + +#ifdef C_VERSION + +/* MXU registers */ + +#ifndef MXU_REGS_USE_ARRAY + +#define xr0 0 +static int xr1, xr2, xr3, xr4, xr5, xr6, xr7, xr8, xr9; +static int xr10, xr11, xr12, xr13, xr14, xr15, xr16; + +#else + +static int mxu_xr[17] = {0}; + +#define xr0 mxu_xr[ 0] +#define xr1 mxu_xr[ 1] +#define xr2 mxu_xr[ 2] +#define xr3 mxu_xr[ 3] +#define xr4 mxu_xr[ 4] +#define xr5 mxu_xr[ 5] +#define xr6 mxu_xr[ 6] +#define xr7 mxu_xr[ 7] +#define xr8 mxu_xr[ 8] +#define xr9 mxu_xr[ 9] +#define xr10 mxu_xr[10] +#define xr11 mxu_xr[11] +#define xr12 mxu_xr[12] +#define xr13 mxu_xr[13] +#define xr14 mxu_xr[14] +#define xr15 mxu_xr[15] +#define xr16 mxu_xr[16] + +#endif + +#else /* C_VERSION */ + +#define xr0 0 +#define xr1 1 +#define xr2 2 +#define xr3 3 +#define xr4 4 +#define xr5 5 +#define xr6 6 +#define xr7 7 +#define xr8 8 +#define xr9 9 +#define xr10 10 +#define xr11 11 +#define xr12 12 +#define xr13 13 +#define xr14 14 +#define xr15 15 +#define xr16 16 + +#endif /* C_VERSION */ + +#ifdef C_VERSION + +#define S32I2M(xr, r) if (&xr != mxu_xr) xr = r +#define S32M2I(xr) xr +#define S32LDD(xr, p, o) if (&xr != mxu_xr) xr = *(long*)((unsigned long)p + o) +#define S32STD(xr, p, o) *(long*)((unsigned long)p + o) = xr + +#define S32LDDV(xr, p, o, s) if (&xr != mxu_xr) xr = *(long*)((unsigned long)p + ((o) << s)) +#define S32STDV(xr, p, o, s) *(long*)((unsigned long)p + ((o) << s)) = xr + +#define S32LDIV(xra, rb, rc, strd2) \ +{\ + if (&xra != mxu_xr) xra = *(long*)((unsigned long)rb + ((rc) << strd2));\ + rb = (char*)rb + ((rc) << strd2);\ +} + +#define S32SDIV(xra, rb, rc, strd2) \ +{\ + *(long*)((unsigned long)rb + ((rc) << strd2)) = xra;\ + rb = (char*)rb + ((rc) << strd2);\ +} + +#define S32LDI(xra, rb, o) \ +{\ + if (&xra != mxu_xr) xra = *(long*)((unsigned long)rb + o);\ + rb = (char*)rb + o;\ +} + +#define S32SDI(xra, rb, o) \ +{\ + *(long*)((unsigned long)rb + o) = xra;\ + rb = (char*)rb + o;\ +} + +#define S32LDIV(xra, rb, rc, strd2) \ +{\ + if (&xra != mxu_xr) xra = *(long*)((unsigned long)rb + ((rc) << strd2));\ + rb = (char*)rb + ((rc) << strd2);\ +} + +#define S32SDIV(xra, rb, rc, strd2) \ +{\ + *(long*)((unsigned long)rb + ((rc) << strd2)) = xra;\ + rb = (char*)rb + ((rc) << strd2);\ +} + +#define Q16ADD_AS_WW(a, b, c, d) \ +{\ + short bh = b >> 16;\ + short bl = b & 0xFFFF;\ + short ch = c >> 16;\ + short cl = c & 0xFFFF;\ + int ah = bh + ch;\ + int al = bl + cl;\ + int dh = bh - ch;\ + int dl = bl - cl;\ + if (&a != mxu_xr) a = (ah << 16) | (al & 0xFFFF);\ + if (&d != mxu_xr) d = (dh << 16) | (dl & 0xFFFF);\ +} + +#define Q16ADD_AS_XW(a, b, c, d) \ +{\ + short bh = b >> 16;\ + short bl = b & 0xFFFF;\ + short ch = c >> 16;\ + short cl = c & 0xFFFF;\ + int ah = bl + ch;\ + int al = bh + cl;\ + int dh = bl - ch;\ + int dl = bh - cl;\ + if (&a != mxu_xr) a = (ah << 16) | (al & 0xFFFF);\ + if (&d != mxu_xr) d = (dh << 16) | (dl & 0xFFFF);\ +} + +#define Q16ADD_AA_WW(a, b, c, d) \ +{\ + short bh = b >> 16;\ + short bl = b & 0xFFFF;\ + short ch = c >> 16;\ + short cl = c & 0xFFFF;\ + int ah = bh + ch;\ + int al = bl + cl;\ + if (&a != mxu_xr) a = (ah << 16) | (al & 0xFFFF);\ + if (&d != mxu_xr) d = (ah << 16) | (al & 0xFFFF);\ +} + +#define D16MUL_LW(a, b, c, d)\ +{\ + short bl = b & 0xFFFF;\ + short cl = c & 0xFFFF;\ + short ch = c >> 16;\ + if (&a != mxu_xr) a = ch * bl;\ + if (&d != mxu_xr) d = cl * bl;\ +} + +#define D16MUL_WW(a, b, c, d)\ +{\ + short bh = b >> 16;\ + short bl = b & 0xFFFF;\ + short ch = c >> 16;\ + short cl = c & 0xFFFF;\ + if (&a != mxu_xr) a = ch * bh;\ + if (&d != mxu_xr) d = cl * bl;\ +} + +#define D16MAC_AA_LW(a, b, c, d)\ +{\ + short bl = b & 0xFFFF;\ + short cl = c & 0xFFFF;\ + short ch = c >> 16;\ + if (&a != mxu_xr) a += ch * bl;\ + if (&d != mxu_xr) d += cl * bl;\ +} + +#define D16MUL_HW(a, b, c, d)\ +{\ + short bh = b >> 16;\ + short cl = c & 0xFFFF;\ + short ch = c >> 16;\ + if (&a != mxu_xr) a = ch * bh;\ + if (&d != mxu_xr) d = cl * bh;\ +} + +#define D16MAC_AA_HW(a, b, c, d)\ +{\ + short bh = b >> 16;\ + short cl = c & 0xFFFF;\ + short ch = c >> 16;\ + if (&a != mxu_xr) a += ch * bh;\ + if (&d != mxu_xr) d += cl * bh;\ +} + +#define D32SLL(a, b, c, d, sft)\ +{\ + if (&a != mxu_xr) a = b << sft;\ + if (&d != mxu_xr) d = c << sft;\ +} + +#define D32SARL(a, b, c, sft) if (&a != mxu_xr) a = (((long)b >> sft) << 16) | (((long)c >> sft) & 0xFFFF) + +#define S32SFL(a, b, c, d, ptn) \ +{\ + unsigned char b3 = (unsigned char)((unsigned long)b >> 24);\ + unsigned char b2 = (unsigned char)((unsigned long)b >> 16);\ + unsigned char b1 = (unsigned char)((unsigned long)b >> 8);\ + unsigned char b0 = (unsigned char)((unsigned long)b >> 0);\ + unsigned char c3 = (unsigned char)((unsigned long)c >> 24);\ + unsigned char c2 = (unsigned char)((unsigned long)c >> 16);\ + unsigned char c1 = (unsigned char)((unsigned long)c >> 8);\ + unsigned char c0 = (unsigned char)((unsigned long)c >> 0);\ + unsigned char a3, a2, a1, a0, d3, d2, d1, d0;\ + if (ptn0 == ptn) \ + {\ + a3 = b3;\ + a2 = c3;\ + a1 = b2;\ + a0 = c2;\ + d3 = b1;\ + d2 = c1;\ + d1 = b0;\ + d0 = c0;\ + }\ + else if (ptn1 == ptn)\ + {\ + a3 = b3;\ + a2 = b1;\ + a1 = c3;\ + a0 = c1;\ + d3 = b2;\ + d2 = b0;\ + d1 = c2;\ + d0 = c0;\ + }\ + else if (ptn2 == ptn)\ + {\ + a3 = b3;\ + a2 = c3;\ + a1 = b1;\ + a0 = c1;\ + d3 = b2;\ + d2 = c2;\ + d1 = b0;\ + d0 = c0;\ + }\ + else if (ptn3 == ptn)\ + {\ + a3 = b3;\ + a2 = b2;\ + a1 = c3;\ + a0 = c2;\ + d3 = b1;\ + d2 = b0;\ + d1 = c1;\ + d0 = c0;\ + }\ + if (&a != mxu_xr) a = ((unsigned long)a3 << 24) | ((unsigned long)a2 << 16) | ((unsigned long)a1 << 8) | (unsigned long)a0;\ + if (&d != mxu_xr) d = ((unsigned long)d3 << 24) | ((unsigned long)d2 << 16) | ((unsigned long)d1 << 8) | (unsigned long)d0;\ +} + +#define D32SAR(a, b, c, d, sft)\ +{\ + if (&a != mxu_xr) a = (long)b >> sft;\ + if (&d != mxu_xr) d = (long)c >> sft;\ +} + +#define D32SLR(a, b, c, d, sft)\ +{\ + if (&a != mxu_xr) a = (unsigned long)b >> sft;\ + if (&d != mxu_xr) d = (unsigned long)c >> sft;\ +} +#define Q16SLL(a,b,c,d,sft)\ +{\ + short bh=b>>16;\ + short bl=b&0xffff;\ + short ch=c>>16;\ + short cl=c&0xffff;\ + if(&a!=mxu_xr) a=((bh<> 16;\ + short bl = b & 0xffff;\ + short ch = c >> 16;\ + short cl = c & 0xffff;\ + if(&a!=mxu_xr) a=(((short)bh>>sft)<<16)|((long)((short)b1>>sft) & 0xffff);\ + if(&d!=mxu_xr) d=(((short)ch>>sft)<<16)|((long)((short)cl>>sft) & 0xffff);\ +} + +#define D32ACC_AA(a, b, c, d)\ +{\ + int _b = b;\ + int _c = c;\ + int _a = a;\ + int _d = d;\ + if (&a != mxu_xr) a = _a + _b + _c;\ + if (&d != mxu_xr) d = _d + _b + _c;\ +} + +#define D32ACC_AS(a, b, c, d)\ +{\ + int _b = b;\ + int _c = c;\ + int _a = a;\ + int _d = d;\ + if (&a != mxu_xr) a = _a + _b + _c;\ + if (&d != mxu_xr) d = _d + _b - _c;\ +} + +#define D32ADD_AS(a, b, c, d)\ +{\ + int _b = b;\ + int _c = c;\ + if (&a != mxu_xr) a = _b + _c;\ + if (&d != mxu_xr) d = _b - _c;\ +} + +#define D32ADD_SS(a, b, c, d)\ +{\ + int _b = b;\ + int _c = c;\ + if (&a != mxu_xr) a = _b - _c;\ + if (&d != mxu_xr) d = _b - _c;\ +} + +#define D32ADD_AA(a, b, c, d)\ +{\ + int _b = b;\ + int _c = c;\ + if (&a != mxu_xr) a = _b + _c;\ + if (&d != mxu_xr) d = _b + _c;\ +} + +#define D16MADL_AA_WW(a, b, c, d) \ + do { \ + short _ah = a >> 16;\ + short _al = (a << 16) >> 16;\ + short _bh = b >> 16;\ + short _bl = (b << 16) >> 16;\ + short _ch = c >> 16;\ + short _cl = (c << 16) >> 16;\ + int L32, R32; \ + L32 = _bh * _ch;\ + R32 = _bl * _cl; \ + _ah += (L32 << 16) >> 16; \ + _al += (R32 << 16) >> 16; \ + if (&d != mxu_xr) d = (_ah << 16) + (_al & 0xffff);\ + } while (0) + +#define D16MACF_AA_WW(a, b, c, d) \ + do { \ + short _bh = b >> 16;\ + short _bl = (b << 16) >> 16;\ + short _ch = c >> 16;\ + short _cl = (c << 16) >> 16;\ + int L32, R32; \ + L32 = (_bh * _ch) << 1;\ + R32 = (_bl * _cl) << 1; \ + L32 = a + L32; \ + R32 = d + R32; \ + if (&a != mxu_xr) a = ((((L32 >> 15) + 1) >> 1) << 16) + ((((R32 >> 15) + 1) >> 1) & 0xffff);\ + } while (0) + +#define D16MAC_AA_WW(a, b, c, d) \ +do { \ + short _bh = b >> 16;\ + short _bl = (b << 16) >> 16;\ + short _ch = c >> 16;\ + short _cl = (c << 16) >> 16;\ + int L32, R32; \ + L32 = (_bh * _ch);\ + R32 = (_bl * _cl); \ + if (&a != mxu_xr) a = a + L32;\ + if (&d != mxu_xr) d = d + R32;\ + } while (0) + +#define D16MAC_SS_WW(a, b, c, d) \ +do { \ + short _bh = b >> 16;\ + short _bl = (b << 16) >> 16;\ + short _ch = c >> 16;\ + short _cl = (c << 16) >> 16;\ + int L32, R32; \ + L32 = (_bh * _ch);\ + R32 = (_bl * _cl); \ + if (&a != mxu_xr) a = a - L32;\ + if (&d != mxu_xr) d = d - R32;\ + } while (0) + +#define D16MAC_SA_HW(a, b, c, d) \ +do { \ + short _bh = b >> 16;\ + short _bl = (b << 16) >> 16;\ + short _ch = c >> 16;\ + short _cl = (c << 16) >> 16;\ + int L32, R32; \ + L32 = (_bh * _ch);\ + R32 = (_bh * _cl); \ + if (&a != mxu_xr) a = a - L32;\ + if (&d != mxu_xr) d = d + R32;\ + } while (0) + +#define D16MAC_SS_HW(a, b, c, d) \ +do { \ + short _bh = b >> 16;\ + short _bl = (b << 16) >> 16;\ + short _ch = c >> 16;\ + short _cl = (c << 16) >> 16;\ + int L32, R32; \ + L32 = (_bh * _ch);\ + R32 = (_bh * _cl); \ + if (&a != mxu_xr) a = a - L32;\ + if (&d != mxu_xr) d = d - R32;\ + } while (0) + +#define D16MAC_AS_HW(a, b, c, d) \ +do { \ + short _bh = b >> 16;\ + short _bl = (b << 16) >> 16;\ + short _ch = c >> 16;\ + short _cl = (c << 16) >> 16;\ + int L32, R32; \ + L32 = (_bh * _ch);\ + R32 = (_bh * _cl); \ + if (&a != mxu_xr) a = a + L32;\ + if (&d != mxu_xr) d = d - R32;\ + } while (0) + +#define D16MAC_AS_LW(a, b, c, d) \ +do { \ + short _bh = b >> 16;\ + short _bl = (b << 16) >> 16;\ + short _ch = c >> 16;\ + short _cl = (c << 16) >> 16;\ + int L32, R32; \ + L32 = (_bl * _ch);\ + R32 = (_bl * _cl); \ + if (&a != mxu_xr) a = a + L32;\ + if (&d != mxu_xr) d = d - R32;\ + } while (0) + + +#define D16MAC_SA_LW(a, b, c, d) \ +do { \ + short _bh = b >> 16;\ + short _bl = (b << 16) >> 16;\ + short _ch = c >> 16;\ + short _cl = (c << 16) >> 16;\ + int L32, R32; \ + L32 = (_bl * _ch);\ + R32 = (_bl * _cl); \ + if (&a != mxu_xr) a = a - L32;\ + if (&d != mxu_xr) d = d + R32;\ + } while (0) + +#define D16MAC_SS_LW(a, b, c, d) \ +do { \ + short _bh = b >> 16;\ + short _bl = (b << 16) >> 16;\ + short _ch = c >> 16;\ + short _cl = (c << 16) >> 16;\ + int L32, R32; \ + L32 = (_bl * _ch);\ + R32 = (_bl * _cl); \ + if (&a != mxu_xr) a = a - L32;\ + if (&d != mxu_xr) d = d - R32;\ + } while (0) + + +#define Q8ADDE_AA(xra, xrb, xrc, xrd) \ +{\ + unsigned char b3 = (unsigned char)((unsigned long)xrb >> 24);\ + unsigned char b2 = (unsigned char)((unsigned long)xrb >> 16);\ + unsigned char b1 = (unsigned char)((unsigned long)xrb >> 8);\ + unsigned char b0 = (unsigned char)((unsigned long)xrb >> 0);\ + unsigned char c3 = (unsigned char)((unsigned long)xrc >> 24);\ + unsigned char c2 = (unsigned char)((unsigned long)xrc >> 16);\ + unsigned char c1 = (unsigned char)((unsigned long)xrc >> 8);\ + unsigned char c0 = (unsigned char)((unsigned long)xrc >> 0);\ + short ah, al, dh, dl;\ + ah = b3 + c3;\ + al = b2 + c2;\ + dh = b1 + c1;\ + dl = b0 + c0;\ + if (&xra != mxu_xr) xra = ((unsigned long)ah << 16) | (unsigned short)al;\ + if (&xrd != mxu_xr) xrd = ((unsigned long)dh << 16) | (unsigned short)dl;\ +} + +#define Q16SAT(xra, xrb, xrc) \ +{\ + short bh = xrb >> 16;\ + short bl = xrb & 0xFFFF;\ + short ch = xrc >> 16;\ + short cl = xrc & 0xFFFF;\ + if (bh > 255) bh = 255;\ + if (bh < 0) bh = 0;\ + if (bl > 255) bl = 255;\ + if (bl < 0) bl = 0;\ + if (ch > 255) ch = 255;\ + if (ch < 0) ch = 0;\ + if (cl > 255) cl = 255;\ + if (cl < 0) cl = 0;\ + if (&xra != mxu_xr) xra = ((unsigned)bh << 24) | ((unsigned)bl << 16) | ((unsigned)ch << 8) | (unsigned)cl;\ +} + +#define Q8SAD(xra, xrb, xrc, xrd) \ +{\ + short b3 = (unsigned char)((unsigned long)xrb >> 24);\ + short b2 = (unsigned char)((unsigned long)xrb >> 16);\ + short b1 = (unsigned char)((unsigned long)xrb >> 8);\ + short b0 = (unsigned char)((unsigned long)xrb >> 0);\ + short c3 = (unsigned char)((unsigned long)xrc >> 24);\ + short c2 = (unsigned char)((unsigned long)xrc >> 16);\ + short c1 = (unsigned char)((unsigned long)xrc >> 8);\ + short c0 = (unsigned char)((unsigned long)xrc >> 0);\ + int int0, int1, int2, int3;\ + int3 = labs(b3 - c3);\ + int2 = labs(b2 - c2);\ + int1 = labs(b1 - c1);\ + int0 = labs(b0 - c0);\ + if (&xra != mxu_xr) xra = int0 + int1 + int2 + int3;\ + if (&xrd != mxu_xr) xrd += int0 + int1 + int2 + int3;\ +} + +#define Q8AVGR(xra, xrb, xrc) \ +{\ + short b3 = (unsigned char)((unsigned long)xrb >> 24);\ + short b2 = (unsigned char)((unsigned long)xrb >> 16);\ + short b1 = (unsigned char)((unsigned long)xrb >> 8);\ + short b0 = (unsigned char)((unsigned long)xrb >> 0);\ + short c3 = (unsigned char)((unsigned long)xrc >> 24);\ + short c2 = (unsigned char)((unsigned long)xrc >> 16);\ + short c1 = (unsigned char)((unsigned long)xrc >> 8);\ + short c0 = (unsigned char)((unsigned long)xrc >> 0);\ + unsigned char a3, a2, a1, a0;\ + a3 = (unsigned char)((b3 + c3 + 1) >> 1);\ + a2 = (unsigned char)((b2 + c2 + 1) >> 1);\ + a1 = (unsigned char)((b1 + c1 + 1) >> 1);\ + a0 = (unsigned char)((b0 + c0 + 1) >> 1);\ + if (&xra != mxu_xr) xra = ((unsigned long)a3 << 24) | ((unsigned long)a2 << 16) | ((unsigned long)a1 << 8) | (unsigned long)a0;\ +} + +#define S32ALN(xra, xrb, xrc, rs) \ +{\ + if (0 == rs)\ + {\ + if (&xra != mxu_xr) xra = xrb;\ + }\ + else if (1 == rs)\ + {\ + if (&xra != mxu_xr) xra = (xrb << 8) | ((unsigned long)xrc >> 24);\ + }\ + else if (2 == rs)\ + {\ + if (&xra != mxu_xr) xra = (xrb << 16) | ((unsigned long)xrc >> 16);\ + }\ + else if (3 == rs)\ + {\ + if (&xra != mxu_xr) xra = (xrb << 24) | ((unsigned long)xrc >> 8);\ + }\ + else if (4 == rs)\ + {\ + if (&xra != mxu_xr) xra = xrc;\ + }\ +} + +#else /* C_VERSION */ + +/***********************************LD/SD***********************************/ +#define S32LDD(xra,rb,s12) \ + do { \ + __asm__ __volatile ("S32LDD xr%0,%z1,%2" \ + : \ + :"K"(xra),"d" (rb),"I"(s12)); \ + } while (0) + +#define S32STD(xra,rb,s12) \ + do { \ + __asm__ __volatile ("S32STD xr%0,%z1,%2" \ + : \ + :"K"(xra),"d" (rb),"I"(s12):"memory"); \ + } while (0) + +#define S32LDDV(xra,rb,rc,strd2) \ + do { \ + __asm__ __volatile ("S32LDDV xr%0,%z1,%z2,%3" \ + : \ + :"K"(xra),"d" (rb),"d"(rc),"K"(strd2)); \ + } while (0) + +#define S32STDV(xra,rb,rc,strd2) \ + do { \ + __asm__ __volatile ("S32STDV xr%0,%z1,%z2,%3" \ + : \ + :"K"(xra),"d" (rb),"d"(rc),"K"(strd2):"memory"); \ + } while (0) + +#define S32LDI(xra,rb,s12) \ + do { \ + __asm__ __volatile ("S32LDI xr%1,%z0,%2" \ + :"+d" (rb) \ + :"K"(xra),"I"(s12)); \ + } while (0) + +#define S32SDI(xra,rb,s12) \ + do { \ + __asm__ __volatile ("S32SDI xr%1,%z0,%2" \ + :"+d" (rb) \ + :"K"(xra),"I"(s12):"memory"); \ + } while (0) + +#define S32LDIV(xra,rb,rc,strd2) \ + do { \ + __asm__ __volatile ("S32LDIV xr%1,%z0,%z2,%3" \ + :"+d" (rb) \ + :"K"(xra),"d"(rc),"K"(strd2)); \ + } while (0) + +#define S32SDIV(xra,rb,rc,strd2) \ + do { \ + __asm__ __volatile ("S32SDIV xr%1,%z0,%z2,%3" \ + :"+d" (rb) \ + :"K"(xra),"d"(rc),"K"(strd2):"memory"); \ + } while (0) + +/***********************************D16MUL***********************************/ +#define D16MUL_WW(xra,xrb,xrc,xrd) \ + do { \ + __asm__ __volatile ("D16MUL xr%0,xr%1,xr%2,xr%3,WW" \ + : \ + :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \ + } while (0) + +#define D16MUL_LW(xra,xrb,xrc,xrd) \ + do { \ + __asm__ __volatile ("D16MUL xr%0,xr%1,xr%2,xr%3,LW" \ + : \ + :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \ + } while (0) + +#define D16MUL_HW(xra,xrb,xrc,xrd) \ + do { \ + __asm__ __volatile ("D16MUL xr%0,xr%1,xr%2,xr%3,HW" \ + : \ + :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \ + } while (0) + +#define D16MUL_XW(xra,xrb,xrc,xrd) \ + do { \ + __asm__ __volatile ("D16MUL xr%0,xr%1,xr%2,xr%3,XW" \ + : \ + :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \ + } while (0) + +/**********************************D16MULF*******************************/ +#define D16MULF_WW(xra,xrb,xrc) \ + do { \ + __asm__ __volatile ("D16MULF xr%0,xr%1,xr%2,WW" \ + : \ + :"K"(xra),"K"(xrb),"K"(xrc)); \ + } while (0) + +#define D16MULF_LW(xra,xrb,xrc) \ + do { \ + __asm__ __volatile ("D16MULF xr%0,xr%1,xr%2,LW" \ + : \ + :"K"(xra),"K"(xrb),"K"(xrc)); \ + } while (0) + +#define D16MULF_HW(xra,xrb,xrc) \ + do { \ + __asm__ __volatile ("D16MULF xr%0,xr%1,xr%2,HW" \ + : \ + :"K"(xra),"K"(xrb),"K"(xrc)); \ + } while (0) + +#define D16MULF_XW(xra,xrb,xrc) \ + do { \ + __asm__ __volatile ("D16MULF xr%0,xr%1,xr%2,XW" \ + : \ + :"K"(xra),"K"(xrb),"K"(xrc)); \ + } while (0) + +/***********************************D16MAC********************************/ +#define D16MAC_AA_WW(xra,xrb,xrc,xrd) \ + do { \ + __asm__ __volatile ("D16MAC xr%0,xr%1,xr%2,xr%3,AA,WW" \ + : \ + :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \ + } while (0) + +#define D16MAC_AA_LW(xra,xrb,xrc,xrd) \ + do { \ + __asm__ __volatile ("D16MAC xr%0,xr%1,xr%2,xr%3,AA,LW" \ + : \ + :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \ + } while (0) + +#define D16MAC_AA_HW(xra,xrb,xrc,xrd) \ + do { \ + __asm__ __volatile ("D16MAC xr%0,xr%1,xr%2,xr%3,AA,HW" \ + : \ + :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \ + } while (0) + +#define D16MAC_AA_XW(xra,xrb,xrc,xrd) \ + do { \ + __asm__ __volatile ("D16MAC xr%0,xr%1,xr%2,xr%3,AA,XW" \ + : \ + :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \ + } while (0) + +#define D16MAC_AS_WW(xra,xrb,xrc,xrd) \ + do { \ + __asm__ __volatile ("D16MAC xr%0,xr%1,xr%2,xr%3,AS,WW" \ + : \ + :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \ + } while (0) + +#define D16MAC_AS_LW(xra,xrb,xrc,xrd) \ + do { \ + __asm__ __volatile ("D16MAC xr%0,xr%1,xr%2,xr%3,AS,LW" \ + : \ + :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \ + } while (0) + +#define D16MAC_AS_HW(xra,xrb,xrc,xrd) \ + do { \ + __asm__ __volatile ("D16MAC xr%0,xr%1,xr%2,xr%3,AS,HW" \ + : \ + :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \ + } while (0) + +#define D16MAC_AS_XW(xra,xrb,xrc,xrd) \ + do { \ + __asm__ __volatile ("D16MAC xr%0,xr%1,xr%2,xr%3,AS,XW" \ + : \ + :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \ + } while (0) + +#define D16MAC_SA_WW(xra,xrb,xrc,xrd) \ + do { \ + __asm__ __volatile ("D16MAC xr%0,xr%1,xr%2,xr%3,SA,WW" \ + : \ + :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \ + } while (0) + +#define D16MAC_SA_LW(xra,xrb,xrc,xrd) \ + do { \ + __asm__ __volatile ("D16MAC xr%0,xr%1,xr%2,xr%3,SA,LW" \ + : \ + :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \ + } while (0) + +#define D16MAC_SA_HW(xra,xrb,xrc,xrd) \ + do { \ + __asm__ __volatile ("D16MAC xr%0,xr%1,xr%2,xr%3,SA,HW" \ + : \ + :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \ + } while (0) + +#define D16MAC_SA_XW(xra,xrb,xrc,xrd) \ + do { \ + __asm__ __volatile ("D16MAC xr%0,xr%1,xr%2,xr%3,SA,XW" \ + : \ + :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \ + } while (0) + +#define D16MAC_SS_WW(xra,xrb,xrc,xrd) \ + do { \ + __asm__ __volatile ("D16MAC xr%0,xr%1,xr%2,xr%3,SS,WW" \ + : \ + :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \ + } while (0) + +#define D16MAC_SS_LW(xra,xrb,xrc,xrd) \ + do { \ + __asm__ __volatile ("D16MAC xr%0,xr%1,xr%2,xr%3,SS,LW" \ + : \ + :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \ + } while (0) + +#define D16MAC_SS_HW(xra,xrb,xrc,xrd) \ + do { \ + __asm__ __volatile ("D16MAC xr%0,xr%1,xr%2,xr%3,SS,HW" \ + : \ + :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \ + } while (0) + +#define D16MAC_SS_XW(xra,xrb,xrc,xrd) \ + do { \ + __asm__ __volatile ("D16MAC xr%0,xr%1,xr%2,xr%3,SS,XW" \ + : \ + :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \ + } while (0) + +/**********************************D16MACF*******************************/ +#define D16MACF_AA_WW(xra,xrb,xrc,xrd) \ + do { \ + __asm__ __volatile ("D16MACF xr%0,xr%1,xr%2,xr%3,AA,WW" \ + : \ + :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \ + } while (0) + +#define D16MACF_AA_LW(xra,xrb,xrc,xrd) \ + do { \ + __asm__ __volatile ("D16MACF xr%0,xr%1,xr%2,xr%3,AA,LW" \ + : \ + :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \ + } while (0) + +#define D16MACF_AA_HW(xra,xrb,xrc,xrd) \ + do { \ + __asm__ __volatile ("D16MACF xr%0,xr%1,xr%2,xr%3,AA,HW" \ + : \ + :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \ + } while (0) + +#define D16MACF_AA_XW(xra,xrb,xrc,xrd) \ + do { \ + __asm__ __volatile ("D16MACF xr%0,xr%1,xr%2,xr%3,AA,XW" \ + : \ + :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \ + } while (0) + +#define D16MACF_AS_WW(xra,xrb,xrc,xrd) \ + do { \ + __asm__ __volatile ("D16MACF xr%0,xr%1,xr%2,xr%3,AS,WW" \ + : \ + :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \ + } while (0) + +#define D16MACF_AS_LW(xra,xrb,xrc,xrd) \ + do { \ + __asm__ __volatile ("D16MACF xr%0,xr%1,xr%2,xr%3,AS,LW" \ + : \ + :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \ + } while (0) + +#define D16MACF_AS_HW(xra,xrb,xrc,xrd) \ + do { \ + __asm__ __volatile ("D16MACF xr%0,xr%1,xr%2,xr%3,AS,HW" \ + : \ + :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \ + } while (0) + +#define D16MACF_AS_XW(xra,xrb,xrc,xrd) \ + do { \ + __asm__ __volatile ("D16MACF xr%0,xr%1,xr%2,xr%3,AS,XW" \ + : \ + :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \ + } while (0) + +#define D16MACF_SA_WW(xra,xrb,xrc,xrd) \ + do { \ + __asm__ __volatile ("D16MACF xr%0,xr%1,xr%2,xr%3,SA,WW" \ + : \ + :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \ + } while (0) + +#define D16MACF_SA_LW(xra,xrb,xrc,xrd) \ + do { \ + __asm__ __volatile ("D16MACF xr%0,xr%1,xr%2,xr%3,SA,LW" \ + : \ + :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \ + } while (0) + +#define D16MACF_SA_HW(xra,xrb,xrc,xrd) \ + do { \ + __asm__ __volatile ("D16MACF xr%0,xr%1,xr%2,xr%3,SA,HW" \ + : \ + :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \ + } while (0) + +#define D16MACF_SA_XW(xra,xrb,xrc,xrd) \ + do { \ + __asm__ __volatile ("D16MACF xr%0,xr%1,xr%2,xr%3,SA,XW" \ + : \ + :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \ + } while (0) + +#define D16MACF_SS_WW(xra,xrb,xrc,xrd) \ + do { \ + __asm__ __volatile ("D16MACF xr%0,xr%1,xr%2,xr%3,SS,WW" \ + : \ + :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \ + } while (0) + +#define D16MACF_SS_LW(xra,xrb,xrc,xrd) \ + do { \ + __asm__ __volatile ("D16MACF xr%0,xr%1,xr%2,xr%3,SS,LW" \ + : \ + :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \ + } while (0) + +#define D16MACF_SS_HW(xra,xrb,xrc,xrd) \ + do { \ + __asm__ __volatile ("D16MACF xr%0,xr%1,xr%2,xr%3,SS,HW" \ + : \ + :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \ + } while (0) + +#define D16MACF_SS_XW(xra,xrb,xrc,xrd) \ + do { \ + __asm__ __volatile ("D16MACF xr%0,xr%1,xr%2,xr%3,SS,XW" \ + : \ + :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \ + } while (0) + +/**********************************D16MADL*******************************/ +#define D16MADL_AA_WW(xra,xrb,xrc,xrd) \ + do { \ + __asm__ __volatile ("D16MADL xr%0,xr%1,xr%2,xr%3,AA,WW" \ + : \ + :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \ + } while (0) + +#define D16MADL_AA_LW(xra,xrb,xrc,xrd) \ + do { \ + __asm__ __volatile ("D16MADL xr%0,xr%1,xr%2,xr%3,AA,LW" \ + : \ + :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \ + } while (0) + +#define D16MADL_AA_HW(xra,xrb,xrc,xrd) \ + do { \ + __asm__ __volatile ("D16MADL xr%0,xr%1,xr%2,xr%3,AA,HW" \ + : \ + :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \ + } while (0) + +#define D16MADL_AA_XW(xra,xrb,xrc,xrd) \ + do { \ + __asm__ __volatile ("D16MADL xr%0,xr%1,xr%2,xr%3,AA,XW" \ + : \ + :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \ + } while (0) + +#define D16MADL_AS_WW(xra,xrb,xrc,xrd) \ + do { \ + __asm__ __volatile ("D16MADL xr%0,xr%1,xr%2,xr%3,AS,WW" \ + : \ + :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \ + } while (0) + +#define D16MADL_AS_LW(xra,xrb,xrc,xrd) \ + do { \ + __asm__ __volatile ("D16MADL xr%0,xr%1,xr%2,xr%3,AS,LW" \ + : \ + :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \ + } while (0) + +#define D16MADL_AS_HW(xra,xrb,xrc,xrd) \ + do { \ + __asm__ __volatile ("D16MADL xr%0,xr%1,xr%2,xr%3,AS,HW" \ + : \ + :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \ + } while (0) + +#define D16MADL_AS_XW(xra,xrb,xrc,xrd) \ + do { \ + __asm__ __volatile ("D16MADL xr%0,xr%1,xr%2,xr%3,AS,XW" \ + : \ + :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \ + } while (0) + +#define D16MADL_SA_WW(xra,xrb,xrc,xrd) \ + do { \ + __asm__ __volatile ("D16MADL xr%0,xr%1,xr%2,xr%3,SA,WW" \ + : \ + :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \ + } while (0) + +#define D16MADL_SA_LW(xra,xrb,xrc,xrd) \ + do { \ + __asm__ __volatile ("D16MADL xr%0,xr%1,xr%2,xr%3,SA,LW" \ + : \ + :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \ + } while (0) + +#define D16MADL_SA_HW(xra,xrb,xrc,xrd) \ + do { \ + __asm__ __volatile ("D16MADL xr%0,xr%1,xr%2,xr%3,SA,HW" \ + : \ + :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \ + } while (0) + +#define D16MADL_SA_XW(xra,xrb,xrc,xrd) \ + do { \ + __asm__ __volatile ("D16MADL xr%0,xr%1,xr%2,xr%3,SA,XW" \ + : \ + :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \ + } while (0) + +#define D16MADL_SS_WW(xra,xrb,xrc,xrd) \ + do { \ + __asm__ __volatile ("D16MADL xr%0,xr%1,xr%2,xr%3,SS,WW" \ + : \ + :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \ + } while (0) + +#define D16MADL_SS_LW(xra,xrb,xrc,xrd) \ + do { \ + __asm__ __volatile ("D16MADL xr%0,xr%1,xr%2,xr%3,SS,LW" \ + : \ + :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \ + } while (0) + +#define D16MADL_SS_HW(xra,xrb,xrc,xrd) \ + do { \ + __asm__ __volatile ("D16MADL xr%0,xr%1,xr%2,xr%3,SS,HW" \ + : \ + :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \ + } while (0) + +#define D16MADL_SS_XW(xra,xrb,xrc,xrd) \ + do { \ + __asm__ __volatile ("D16MADL xr%0,xr%1,xr%2,xr%3,SS,XW" \ + : \ + :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \ + } while (0) + +/***********************************S16MAD*******************************/ +#define S16MAD_A_HH(xra,xrb,xrc,xrd) \ + do { \ + __asm__ __volatile ("S16MAD xr%0,xr%1,xr%2,xr%3,A,0" \ + : \ + :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \ + } while (0) + +#define S16MAD_A_LL(xra,xrb,xrc,xrd) \ + do { \ + __asm__ __volatile ("S16MAD xr%0,xr%1,xr%2,xr%3,A,1" \ + : \ + :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \ + } while (0) + +#define S16MAD_A_HL(xra,xrb,xrc,xrd) \ + do { \ + __asm__ __volatile ("S16MAD xr%0,xr%1,xr%2,xr%3,A,2" \ + : \ + :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \ + } while (0) + +#define S16MAD_A_LH(xra,xrb,xrc,xrd) \ + do { \ + __asm__ __volatile ("S16MAD xr%0,xr%1,xr%2,xr%3,A,3" \ + : \ + :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \ + } while (0) + +#define S16MAD_S_HH(xra,xrb,xrc,xrd) \ + do { \ + __asm__ __volatile ("S16MAD xr%0,xr%1,xr%2,xr%3,S,0" \ + : \ + :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \ + } while (0) + +#define S16MAD_S_LL(xra,xrb,xrc,xrd) \ + do { \ + __asm__ __volatile ("S16MAD xr%0,xr%1,xr%2,xr%3,S,1" \ + : \ + :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \ + } while (0) + +#define S16MAD_S_HL(xra,xrb,xrc,xrd) \ + do { \ + __asm__ __volatile ("S16MAD xr%0,xr%1,xr%2,xr%3,S,2" \ + : \ + :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \ + } while (0) + +#define S16MAD_S_LH(xra,xrb,xrc,xrd) \ + do { \ + __asm__ __volatile ("S16MAD xr%0,xr%1,xr%2,xr%3,S,3" \ + : \ + :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \ + } while (0) + +/***********************************Q8MUL********************************/ +#define Q8MUL(xra,xrb,xrc,xrd) \ + do { \ + __asm__ __volatile ("Q8MUL xr%0,xr%1,xr%2,xr%3" \ + : \ + :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \ + } while (0) + +/***********************************Q8MAC********************************/ +#define Q8MAC_AA(xra,xrb,xrc,xrd) \ + do { \ + __asm__ __volatile ("Q8MAC xr%0,xr%1,xr%2,xr%3,AA" \ + : \ + :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \ + } while (0) + +#define Q8MAC_AS(xra,xrb,xrc,xrd) \ + do { \ + __asm__ __volatile ("Q8MAC xr%0,xr%1,xr%2,xr%3,AS" \ + : \ + :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \ + } while (0) + +#define Q8MAC_SA(xra,xrb,xrc,xrd) \ + do { \ + __asm__ __volatile ("Q8MAC xr%0,xr%1,xr%2,xr%3,SA" \ + : \ + :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \ + } while (0) + +#define Q8MAC_SS(xra,xrb,xrc,xrd) \ + do { \ + __asm__ __volatile ("Q8MAC xr%0,xr%1,xr%2,xr%3,SS" \ + : \ + :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \ + } while (0) + +/***********************************Q8MADL********************************/ +#define Q8MADL_AA(xra,xrb,xrc,xrd) \ + do { \ + __asm__ __volatile ("Q8MADL xr%0,xr%1,xr%2,xr%3,AA" \ + : \ + :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \ + } while (0) + +#define Q8MADL_AS(xra,xrb,xrc,xrd) \ + do { \ + __asm__ __volatile ("Q8MADL xr%0,xr%1,xr%2,xr%3,AS" \ + : \ + :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \ + } while (0) + +#define Q8MADL_SA(xra,xrb,xrc,xrd) \ + do { \ + __asm__ __volatile ("Q8MADL xr%0,xr%1,xr%2,xr%3,SA" \ + : \ + :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \ + } while (0) + +#define Q8MADL_SS(xra,xrb,xrc,xrd) \ + do { \ + __asm__ __volatile ("Q8MADL xr%0,xr%1,xr%2,xr%3,SS" \ + : \ + :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \ + } while (0) + +/***********************************D32ADD********************************/ +#define D32ADD_AA(xra,xrb,xrc,xrd) \ + do { \ + __asm__ __volatile ("D32ADD xr%0,xr%1,xr%2,xr%3,AA" \ + : \ + :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \ + } while (0) + +#define D32ADD_AS(xra,xrb,xrc,xrd) \ + do { \ + __asm__ __volatile ("D32ADD xr%0,xr%1,xr%2,xr%3,AS" \ + : \ + :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \ + } while (0) + +#define D32ADD_SA(xra,xrb,xrc,xrd) \ + do { \ + __asm__ __volatile ("D32ADD xr%0,xr%1,xr%2,xr%3,SA" \ + : \ + :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \ + } while (0) + +#define D32ADD_SS(xra,xrb,xrc,xrd) \ + do { \ + __asm__ __volatile ("D32ADD xr%0,xr%1,xr%2,xr%3,SS" \ + : \ + :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \ + } while (0) + +/***********************************D32ACC********************************/ +#define D32ACC_AA(xra,xrb,xrc,xrd) \ + do { \ + __asm__ __volatile ("D32ACC xr%0,xr%1,xr%2,xr%3,AA" \ + : \ + :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \ + } while (0) + +#define D32ACC_AS(xra,xrb,xrc,xrd) \ + do { \ + __asm__ __volatile ("D32ACC xr%0,xr%1,xr%2,xr%3,AS" \ + : \ + :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \ + } while (0) + +#define D32ACC_SA(xra,xrb,xrc,xrd) \ + do { \ + __asm__ __volatile ("D32ACC xr%0,xr%1,xr%2,xr%3,SA" \ + : \ + :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \ + } while (0) + +#define D32ACC_SS(xra,xrb,xrc,xrd) \ + do { \ + __asm__ __volatile ("D32ACC xr%0,xr%1,xr%2,xr%3,SS" \ + : \ + :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \ + } while (0) + +/***********************************S32CPS********************************/ +#define S32CPS(xra,xrb,xrc) \ + do { \ + __asm__ __volatile ("S32CPS xr%0,xr%1,xr%2" \ + : \ + :"K"(xra),"K"(xrb),"K"(xrc)); \ + } while (0) + +#define S32ABS(xra,xrb) \ + do { \ + __asm__ __volatile ("S32CPS xr%0,xr%1,xr%2" \ + : \ + :"K"(xra),"K"(xrb),"K"(xrb)); \ + } while (0) + +/***********************************Q16ADD********************************/ +#define Q16ADD_AA_WW(xra,xrb,xrc,xrd) \ + do { \ + __asm__ __volatile ("Q16ADD xr%0,xr%1,xr%2,xr%3,AA,WW" \ + : \ + :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \ + } while (0) + +#define Q16ADD_AA_LW(xra,xrb,xrc,xrd) \ + do { \ + __asm__ __volatile ("Q16ADD xr%0,xr%1,xr%2,xr%3,AA,LW" \ + : \ + :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \ + } while (0) + +#define Q16ADD_AA_HW(xra,xrb,xrc,xrd) \ + do { \ + __asm__ __volatile ("Q16ADD xr%0,xr%1,xr%2,xr%3,AA,HW" \ + : \ + :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \ + } while (0) + +#define Q16ADD_AA_XW(xra,xrb,xrc,xrd) \ + do { \ + __asm__ __volatile ("Q16ADD xr%0,xr%1,xr%2,xr%3,AA,XW" \ + : \ + :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \ + } while (0) +#define Q16ADD_AS_WW(xra,xrb,xrc,xrd) \ + do { \ + __asm__ __volatile ("Q16ADD xr%0,xr%1,xr%2,xr%3,AS,WW" \ + : \ + :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \ + } while (0) + +#define Q16ADD_AS_LW(xra,xrb,xrc,xrd) \ + do { \ + __asm__ __volatile ("Q16ADD xr%0,xr%1,xr%2,xr%3,AS,LW" \ + : \ + :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \ + } while (0) + +#define Q16ADD_AS_HW(xra,xrb,xrc,xrd) \ + do { \ + __asm__ __volatile ("Q16ADD xr%0,xr%1,xr%2,xr%3,AS,HW" \ + : \ + :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \ + } while (0) + +#define Q16ADD_AS_XW(xra,xrb,xrc,xrd) \ + do { \ + __asm__ __volatile ("Q16ADD xr%0,xr%1,xr%2,xr%3,AS,XW" \ + : \ + :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \ + } while (0) + +#define Q16ADD_SA_WW(xra,xrb,xrc,xrd) \ + do { \ + __asm__ __volatile ("Q16ADD xr%0,xr%1,xr%2,xr%3,SA,WW" \ + : \ + :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \ + } while (0) + +#define Q16ADD_SA_LW(xra,xrb,xrc,xrd) \ + do { \ + __asm__ __volatile ("Q16ADD xr%0,xr%1,xr%2,xr%3,SA,LW" \ + : \ + :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \ + } while (0) + +#define Q16ADD_SA_HW(xra,xrb,xrc,xrd) \ + do { \ + __asm__ __volatile ("Q16ADD xr%0,xr%1,xr%2,xr%3,SA,HW" \ + : \ + :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \ + } while (0) + +#define Q16ADD_SA_XW(xra,xrb,xrc,xrd) \ + do { \ + __asm__ __volatile ("Q16ADD xr%0,xr%1,xr%2,xr%3,SA,XW" \ + : \ + :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \ + } while (0) + +#define Q16ADD_SS_WW(xra,xrb,xrc,xrd) \ + do { \ + __asm__ __volatile ("Q16ADD xr%0,xr%1,xr%2,xr%3,SS,WW" \ + : \ + :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \ + } while (0) + +#define Q16ADD_SS_LW(xra,xrb,xrc,xrd) \ + do { \ + __asm__ __volatile ("Q16ADD xr%0,xr%1,xr%2,xr%3,SS,LW" \ + : \ + :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \ + } while (0) + +#define Q16ADD_SS_HW(xra,xrb,xrc,xrd) \ + do { \ + __asm__ __volatile ("Q16ADD xr%0,xr%1,xr%2,xr%3,SS,HW" \ + : \ + :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \ + } while (0) + +#define Q16ADD_SS_XW(xra,xrb,xrc,xrd) \ + do { \ + __asm__ __volatile ("Q16ADD xr%0,xr%1,xr%2,xr%3,SS,XW" \ + : \ + :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \ + } while (0) + +/***********************************Q16ACC********************************/ +#define Q16ACC_AA(xra,xrb,xrc,xrd) \ + do { \ + __asm__ __volatile ("Q16ACC xr%0,xr%1,xr%2,xr%3,AA" \ + : \ + :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \ + } while (0) + +#define Q16ACC_AS(xra,xrb,xrc,xrd) \ + do { \ + __asm__ __volatile ("Q16ACC xr%0,xr%1,xr%2,xr%3,AS" \ + : \ + :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \ + } while (0) + +#define Q16ACC_SA(xra,xrb,xrc,xrd) \ + do { \ + __asm__ __volatile ("Q16ACC xr%0,xr%1,xr%2,xr%3,SA" \ + : \ + :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \ + } while (0) + +#define Q16ACC_SS(xra,xrb,xrc,xrd) \ + do { \ + __asm__ __volatile ("Q16ACC xr%0,xr%1,xr%2,xr%3,SS" \ + : \ + :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \ + } while (0) + +/***********************************D16CPS********************************/ +#define D16CPS(xra,xrb,xrc) \ + do { \ + __asm__ __volatile ("D16CPS xr%0,xr%1,xr%2" \ + : \ + :"K"(xra),"K"(xrb),"K"(xrc)); \ + } while (0) + +#define D16ABS(xra,xrb) \ + do { \ + __asm__ __volatile ("D16CPS xr%0,xr%1,xr%2" \ + : \ + :"K"(xra),"K"(xrb),"K"(xrb)); \ + } while (0) + +/*******************************D16AVG/D16AVGR*****************************/ +#define D16AVG(xra,xrb,xrc) \ + do { \ + __asm__ __volatile ("D16AVG xr%0,xr%1,xr%2" \ + : \ + :"K"(xra),"K"(xrb),"K"(xrc)); \ + } while (0) +#define D16AVGR(xra,xrb,xrc) \ + do { \ + __asm__ __volatile ("D16AVGR xr%0,xr%1,xr%2" \ + : \ + :"K"(xra),"K"(xrb),"K"(xrc)); \ + } while (0) + +/************************************Q8ADD********************************/ +#define Q8ADD_AA(xra,xrb,xrc) \ + do { \ + __asm__ __volatile ("Q8ADD xr%0,xr%1,xr%2,AA" \ + : \ + :"K"(xra),"K"(xrb),"K"(xrc)); \ + } while (0) + +#define Q8ADD_AS(xra,xrb,xrc) \ + do { \ + __asm__ __volatile ("Q8ADD xr%0,xr%1,xr%2,AS" \ + : \ + :"K"(xra),"K"(xrb),"K"(xrc)); \ + } while (0) + +#define Q8ADD_SA(xra,xrb,xrc) \ + do { \ + __asm__ __volatile ("Q8ADD xr%0,xr%1,xr%2,SA" \ + : \ + :"K"(xra),"K"(xrb),"K"(xrc)); \ + } while (0) + +#define Q8ADD_SS(xra,xrb,xrc) \ + do { \ + __asm__ __volatile ("Q8ADD xr%0,xr%1,xr%2,SS" \ + : \ + :"K"(xra),"K"(xrb),"K"(xrc)); \ + } while (0) + +/************************************Q8ADDE********************************/ +#define Q8ADDE_AA(xra,xrb,xrc,xrd) \ + do { \ + __asm__ __volatile ("Q8ADDE xr%0,xr%1,xr%2,xr%3,AA" \ + : \ + :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \ + } while (0) + +#define Q8ADDE_AS(xra,xrb,xrc,xrd) \ + do { \ + __asm__ __volatile ("Q8ADDE xr%0,xr%1,xr%2,xr%3,AS" \ + : \ + :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \ + } while (0) + +#define Q8ADDE_SA(xra,xrb,xrc,xrd) \ + do { \ + __asm__ __volatile ("Q8ADDE xr%0,xr%1,xr%2,xr%3,SA" \ + : \ + :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \ + } while (0) + +#define Q8ADDE_SS(xra,xrb,xrc,xrd) \ + do { \ + __asm__ __volatile ("Q8ADDE xr%0,xr%1,xr%2,xr%3,SS" \ + : \ + :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \ + } while (0) + +/************************************Q8ACCE********************************/ +#define Q8ACCE_AA(xra,xrb,xrc,xrd) \ + do { \ + __asm__ __volatile ("Q8ACCE xr%0,xr%1,xr%2,xr%3,AA" \ + : \ + :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \ + } while (0) + +#define Q8ACCE_AS(xra,xrb,xrc,xrd) \ + do { \ + __asm__ __volatile ("Q8ACCE xr%0,xr%1,xr%2,xr%3,AS" \ + : \ + :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \ + } while (0) + +#define Q8ACCE_SA(xra,xrb,xrc,xrd) \ + do { \ + __asm__ __volatile ("Q8ACCE xr%0,xr%1,xr%2,xr%3,SA" \ + : \ + :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \ + } while (0) + +#define Q8ACCE_SS(xra,xrb,xrc,xrd) \ + do { \ + __asm__ __volatile ("Q8ACCE xr%0,xr%1,xr%2,xr%3,SS" \ + : \ + :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \ + } while (0) + +/************************************Q8ABD********************************/ +#define Q8ABD(xra,xrb,xrc) \ + do { \ + __asm__ __volatile ("Q8ABD xr%0,xr%1,xr%2" \ + : \ + :"K"(xra),"K"(xrb),"K"(xrc)); \ + } while (0) + +/************************************Q8SLT********************************/ +#define Q8SLT(xra,xrb,xrc) \ + do { \ + __asm__ __volatile ("Q8SLT xr%0,xr%1,xr%2" \ + : \ + :"K"(xra),"K"(xrb),"K"(xrc)); \ + } while (0) + +/************************************Q8SAD********************************/ +#define Q8SAD(xra,xrb,xrc,xrd) \ + do { \ + __asm__ __volatile ("Q8SAD xr%0,xr%1,xr%2,xr%3" \ + : \ + :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd)); \ + } while (0) + +/********************************Q8AVG/Q8AVGR*****************************/ +#define Q8AVG(xra,xrb,xrc) \ + do { \ + __asm__ __volatile ("Q8AVG xr%0,xr%1,xr%2" \ + : \ + :"K"(xra),"K"(xrb),"K"(xrc)); \ + } while (0) +#define Q8AVGR(xra,xrb,xrc) \ + do { \ + __asm__ __volatile ("Q8AVGR xr%0,xr%1,xr%2" \ + : \ + :"K"(xra),"K"(xrb),"K"(xrc)); \ + } while (0) + +/**********************************D32SHIFT******************************/ +#define D32SLL(xra,xrb,xrc,xrd,SFT4) \ + do { \ + __asm__ __volatile ("D32SLL xr%0,xr%1,xr%2,xr%3,%4" \ + : \ + :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd),"K"(SFT4)); \ + } while (0) + +#define D32SLR(xra,xrb,xrc,xrd,SFT4) \ + do { \ + __asm__ __volatile ("D32SLR xr%0,xr%1,xr%2,xr%3,%4" \ + : \ + :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd),"K"(SFT4)); \ + } while (0) + +#define D32SAR(xra,xrb,xrc,xrd,SFT4) \ + do { \ + __asm__ __volatile ("D32SAR xr%0,xr%1,xr%2,xr%3,%4" \ + : \ + :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd),"K"(SFT4)); \ + } while (0) + +#define D32SARL(xra,xrb,xrc,SFT4) \ + do { \ + __asm__ __volatile ("D32SARL xr%0,xr%1,xr%2,%3" \ + : \ + :"K"(xra),"K"(xrb),"K"(xrc),"K"(SFT4)); \ + } while (0) + +#define D32SLLV(xra,xrd,rb) \ + do { \ + __asm__ __volatile ("D32SLLV xr%0,xr%1,%z2" \ + : \ + :"K"(xra),"K"(xrd),"d"(rb)); \ + } while (0) + +#define D32SLRV(xra,xrd,rb) \ + do { \ + __asm__ __volatile ("D32SLRV xr%0,xr%1,%z2" \ + : \ + :"K"(xra),"K"(xrd),"d"(rb)); \ + } while (0) + +#define D32SARV(xra,xrd,rb) \ + do { \ + __asm__ __volatile ("D32SARV xr%0,xr%1,%z2" \ + : \ + :"K"(xra),"K"(xrd),"d"(rb)); \ + } while (0) + +#define D32SARW(xra,xrb,xrc,rb) \ + do { \ + __asm__ __volatile ("D32SARW xr%0,xr%1,xr%2,%3" \ + : \ + :"K"(xra),"K"(xrb),"K"(xrc),"d"(rb)); \ + } while (0) + +/**********************************Q16SHIFT******************************/ +#define Q16SLL(xra,xrb,xrc,xrd,SFT4) \ + do { \ + __asm__ __volatile ("Q16SLL xr%0,xr%1,xr%2,xr%3,%4" \ + : \ + :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd),"K"(SFT4)); \ + } while (0) + +#define Q16SLR(xra,xrb,xrc,xrd,SFT4) \ + do { \ + __asm__ __volatile ("Q16SLR xr%0,xr%1,xr%2,xr%3,%4" \ + : \ + :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd),"K"(SFT4)); \ + } while (0) + +#define Q16SAR(xra,xrb,xrc,xrd,SFT4) \ + do { \ + __asm__ __volatile ("Q16SAR xr%0,xr%1,xr%2,xr%3,%4" \ + : \ + :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd),"K"(SFT4)); \ + } while (0) + +#define Q16SLLV(xra,xrd,rb) \ + do { \ + __asm__ __volatile ("Q16SLLV xr%0,xr%1,%z2" \ + : \ + :"K"(xra),"K"(xrd),"d"(rb)); \ + } while (0) + +#define Q16SLRV(xra,xrd,rb) \ + do { \ + __asm__ __volatile ("Q16SLRV xr%0,xr%1,%z2" \ + : \ + :"K"(xra),"K"(xrd),"d"(rb)); \ + } while (0) + +#define Q16SARV(xra,xrd,rb) \ + do { \ + __asm__ __volatile ("Q16SARV xr%0,xr%1,%z2" \ + : \ + :"K"(xra),"K"(xrd),"d"(rb)); \ + } while (0) + +/*********************************MAX/MIN*********************************/ +#define S32MAX(xra,xrb,xrc) \ + do { \ + __asm__ __volatile ("S32MAX xr%0,xr%1,xr%2" \ + : \ + :"K"(xra),"K"(xrb),"K"(xrc)); \ + } while (0) + +#define S32MIN(xra,xrb,xrc) \ + do { \ + __asm__ __volatile ("S32MIN xr%0,xr%1,xr%2" \ + : \ + :"K"(xra),"K"(xrb),"K"(xrc)); \ + } while (0) + +#define D16MAX(xra,xrb,xrc) \ + do { \ + __asm__ __volatile ("D16MAX xr%0,xr%1,xr%2" \ + : \ + :"K"(xra),"K"(xrb),"K"(xrc)); \ + } while (0) + +#define D16MIN(xra,xrb,xrc) \ + do { \ + __asm__ __volatile ("D16MIN xr%0,xr%1,xr%2" \ + : \ + :"K"(xra),"K"(xrb),"K"(xrc)); \ + } while (0) + +#define Q8MAX(xra,xrb,xrc) \ + do { \ + __asm__ __volatile ("Q8MAX xr%0,xr%1,xr%2" \ + : \ + :"K"(xra),"K"(xrb),"K"(xrc)); \ + } while (0) + +#define Q8MIN(xra,xrb,xrc) \ + do { \ + __asm__ __volatile ("Q8MIN xr%0,xr%1,xr%2" \ + : \ + :"K"(xra),"K"(xrb),"K"(xrc)); \ + } while (0) + +/*************************************MOVE********************************/ +#define S32I2M(xra,rb) \ + do { \ + __asm__ __volatile ("S32I2M xr%0,%z1" \ + : \ + :"K"(xra),"d"(rb)); \ + } while (0) + +#define S32M2I(xra) \ +__extension__ ({ \ + int __d; \ + __asm__ __volatile ("S32M2I xr%1, %0" \ + :"=d"(__d) \ + :"K"(xra)); \ + __d; \ +}) + +/*********************************S32SFL**********************************/ +#define S32SFL(xra,xrb,xrc,xrd,optn2) \ + do { \ + __asm__ __volatile ("S32SFL xr%0,xr%1,xr%2,xr%3,ptn%4" \ + : \ + :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd),"K"(optn2)); \ + } while (0) + +/*********************************S32ALN**********************************/ +#define S32ALN(xra,xrb,xrc,rs) \ + do { \ + __asm__ __volatile ("S32ALN xr%0,xr%1,xr%2,%z3" \ + : \ + :"K"(xra),"K"(xrb),"K"(xrc),"d"(rs)); \ + } while (0) + +/*********************************Q16SAT**********************************/ +#define Q16SAT(xra,xrb,xrc) \ + do { \ + __asm__ __volatile ("Q16SAT xr%0,xr%1,xr%2" \ + : \ + :"K"(xra),"K"(xrb),"K"(xrc)); \ + } while (0) + +// cache ops + +// cache +#define Index_Invalidate_I 0x00 +#define Index_Writeback_Inv_D 0x01 +#define Index_Load_Tag_I 0x04 +#define Index_Load_Tag_D 0x05 +#define Index_Store_Tag_I 0x08 +#define Index_Store_Tag_D 0x09 +#define Hit_Invalidate_I 0x10 +#define Hit_Invalidate_D 0x11 +#define Hit_Writeback_Inv_D 0x15 +#define Hit_Writeback_I 0x18 +#define Hit_Writeback_D 0x19 + +// pref +#define PrefLoad 0 +#define PrefStore 1 +#define PrefLoadStreamed 4 +#define PrefStoreStreamed 5 +#define PrefLoadRetained 6 +#define PrefStoreRetained 7 +#define PrefWBInval 25 +#define PrefNudge 25 +#define PrefPreForStore 30 + +#define mips_pref(base, offset, op) \ + __asm__ __volatile__( \ + " .set noreorder \n" \ + " pref %1, %2(%0) \n" \ + " .set reorder" \ + : \ + : "r" (base), "i" (op), "i" (offset)) + +#define cache_op(op, addr) \ + __asm__ __volatile__( \ + " .set noreorder \n" \ + " cache %0, %1 \n" \ + " .set reorder" \ + : \ + : "i" (op), "m" (*(unsigned char *)(addr))) + +#define i_pref(hint,base,offset) \ + ({ __asm__ __volatile__("pref %0,%2(%1)"::"i"(hint),"r"(base),"i"(offset):"memory");}) + +struct unaligned_32 { unsigned int l; } __attribute__((packed)); +#define LD32(a) (((const struct unaligned_32 *) (a))->l) +#define ST32(a, b) (((struct unaligned_32 *) (a))->l) = (b) + +#define REVERSE_LD32(xra, xrb, rb, s12) \ +__extension__ ({ \ + int __d; \ + __asm__ __volatile ("S32LDD xr%1,%z3,%4\n\t" \ + "S32SFL xr%1,xr%1, xr%1, xr%2, ptn0\n\t" \ + "S32SFL xr%1,xr%2, xr%1, xr%2, ptn3\n\t" \ + "S32SFL xr%1,xr%2, xr%1, xr%2, ptn2\n\t" \ + "S32M2I xr%1,%0" \ + :"=d"(__d) \ + :"K"(xra), "K"(xrb), "d"(rb), "I"(s12)); \ + __d; \ +}) + +#define IU_CLZ(rb) \ +__extension__ ({ \ + int __d; \ + __asm__ __volatile ("clz %0, %1" \ + :"=d"(__d) \ + :"d"(rb)); \ + __d; \ +}) + +#endif /* C_VERSION */ + +#endif /* JZ_MXU_H_ */ -- 2.11.4.GIT