2 * Header file for wrappers around MSA instructions assembler invocations
4 * Copyright (C) 2018 Wave Computing, Inc.
5 * Copyright (C) 2018 Aleksandar Markovic <amarkovic@wavecomp.com>
7 * This program is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 2 of the License, or
10 * (at your option) any later version.
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with this program. If not, see <https://www.gnu.org/licenses/>.
22 #ifndef WRAPPERS_MSA_H
23 #define WRAPPERS_MSA_H
26 #define DO_MSA__WD__WS(suffix, mnemonic) \
27 static inline void do_msa_##suffix(void *input, void *output) \
31 "ld.d $w11, 0($t0)\n\t" \
32 #mnemonic " $w10, $w11\n\t" \
34 "st.d $w10, 0($t0)\n\t" \
36 : "r" (input), "r" (output) \
41 DO_MSA__WD__WS(NLOC_B
, nloc
.b
)
42 DO_MSA__WD__WS(NLOC_H
, nloc
.h
)
43 DO_MSA__WD__WS(NLOC_W
, nloc
.w
)
44 DO_MSA__WD__WS(NLOC_D
, nloc
.d
)
46 DO_MSA__WD__WS(NLZC_B
, nlzc
.b
)
47 DO_MSA__WD__WS(NLZC_H
, nlzc
.h
)
48 DO_MSA__WD__WS(NLZC_W
, nlzc
.w
)
49 DO_MSA__WD__WS(NLZC_D
, nlzc
.d
)
51 DO_MSA__WD__WS(PCNT_B
, pcnt
.b
)
52 DO_MSA__WD__WS(PCNT_H
, pcnt
.h
)
53 DO_MSA__WD__WS(PCNT_W
, pcnt
.w
)
54 DO_MSA__WD__WS(PCNT_D
, pcnt
.d
)
57 #define DO_MSA__WD__WS_WT(suffix, mnemonic) \
58 static inline void do_msa_##suffix(void *input1, void *input2, \
63 "ld.d $w11, 0($t0)\n\t" \
65 "ld.d $w12, 0($t0)\n\t" \
66 #mnemonic " $w10, $w11, $w12\n\t" \
68 "st.d $w10, 0($t0)\n\t" \
70 : "r" (input1), "r" (input2), "r" (output) \
75 DO_MSA__WD__WS_WT(ILVEV_B
, ilvev
.b
)
76 DO_MSA__WD__WS_WT(ILVEV_H
, ilvev
.h
)
77 DO_MSA__WD__WS_WT(ILVEV_W
, ilvev
.w
)
78 DO_MSA__WD__WS_WT(ILVEV_D
, ilvev
.d
)
80 DO_MSA__WD__WS_WT(ILVOD_B
, ilvod
.b
)
81 DO_MSA__WD__WS_WT(ILVOD_H
, ilvod
.h
)
82 DO_MSA__WD__WS_WT(ILVOD_W
, ilvod
.w
)
83 DO_MSA__WD__WS_WT(ILVOD_D
, ilvod
.d
)
85 DO_MSA__WD__WS_WT(ILVL_B
, ilvl
.b
)
86 DO_MSA__WD__WS_WT(ILVL_H
, ilvl
.h
)
87 DO_MSA__WD__WS_WT(ILVL_W
, ilvl
.w
)
88 DO_MSA__WD__WS_WT(ILVL_D
, ilvl
.d
)
90 DO_MSA__WD__WS_WT(ILVR_B
, ilvr
.b
)
91 DO_MSA__WD__WS_WT(ILVR_H
, ilvr
.h
)
92 DO_MSA__WD__WS_WT(ILVR_W
, ilvr
.w
)
93 DO_MSA__WD__WS_WT(ILVR_D
, ilvr
.d
)
95 DO_MSA__WD__WS_WT(AND_V
, and.v
)
96 DO_MSA__WD__WS_WT(NOR_V
, nor
.v
)
97 DO_MSA__WD__WS_WT(OR_V
, or.v
)
98 DO_MSA__WD__WS_WT(XOR_V
, xor.v
)
100 DO_MSA__WD__WS_WT(CEQ_B
, ceq
.b
)
101 DO_MSA__WD__WS_WT(CEQ_H
, ceq
.h
)
102 DO_MSA__WD__WS_WT(CEQ_W
, ceq
.w
)
103 DO_MSA__WD__WS_WT(CEQ_D
, ceq
.d
)
105 DO_MSA__WD__WS_WT(CLE_S_B
, cle_s
.b
)
106 DO_MSA__WD__WS_WT(CLE_S_H
, cle_s
.h
)
107 DO_MSA__WD__WS_WT(CLE_S_W
, cle_s
.w
)
108 DO_MSA__WD__WS_WT(CLE_S_D
, cle_s
.d
)
110 DO_MSA__WD__WS_WT(CLE_U_B
, cle_u
.b
)
111 DO_MSA__WD__WS_WT(CLE_U_H
, cle_u
.h
)
112 DO_MSA__WD__WS_WT(CLE_U_W
, cle_u
.w
)
113 DO_MSA__WD__WS_WT(CLE_U_D
, cle_u
.d
)
115 DO_MSA__WD__WS_WT(CLT_S_B
, clt_s
.b
)
116 DO_MSA__WD__WS_WT(CLT_S_H
, clt_s
.h
)
117 DO_MSA__WD__WS_WT(CLT_S_W
, clt_s
.w
)
118 DO_MSA__WD__WS_WT(CLT_S_D
, clt_s
.d
)
120 DO_MSA__WD__WS_WT(CLT_U_B
, clt_u
.b
)
121 DO_MSA__WD__WS_WT(CLT_U_H
, clt_u
.h
)
122 DO_MSA__WD__WS_WT(CLT_U_W
, clt_u
.w
)
123 DO_MSA__WD__WS_WT(CLT_U_D
, clt_u
.d
)
125 DO_MSA__WD__WS_WT(MAX_A_B
, max_a
.b
)
126 DO_MSA__WD__WS_WT(MAX_A_H
, max_a
.h
)
127 DO_MSA__WD__WS_WT(MAX_A_W
, max_a
.w
)
128 DO_MSA__WD__WS_WT(MAX_A_D
, max_a
.d
)
130 DO_MSA__WD__WS_WT(MIN_A_B
, min_a
.b
)
131 DO_MSA__WD__WS_WT(MIN_A_H
, min_a
.h
)
132 DO_MSA__WD__WS_WT(MIN_A_W
, min_a
.w
)
133 DO_MSA__WD__WS_WT(MIN_A_D
, min_a
.d
)
135 DO_MSA__WD__WS_WT(MAX_S_B
, max_s
.b
)
136 DO_MSA__WD__WS_WT(MAX_S_H
, max_s
.h
)
137 DO_MSA__WD__WS_WT(MAX_S_W
, max_s
.w
)
138 DO_MSA__WD__WS_WT(MAX_S_D
, max_s
.d
)
140 DO_MSA__WD__WS_WT(MIN_S_B
, min_s
.b
)
141 DO_MSA__WD__WS_WT(MIN_S_H
, min_s
.h
)
142 DO_MSA__WD__WS_WT(MIN_S_W
, min_s
.w
)
143 DO_MSA__WD__WS_WT(MIN_S_D
, min_s
.d
)
145 DO_MSA__WD__WS_WT(MAX_U_B
, max_u
.b
)
146 DO_MSA__WD__WS_WT(MAX_U_H
, max_u
.h
)
147 DO_MSA__WD__WS_WT(MAX_U_W
, max_u
.w
)
148 DO_MSA__WD__WS_WT(MAX_U_D
, max_u
.d
)
150 DO_MSA__WD__WS_WT(MIN_U_B
, min_u
.b
)
151 DO_MSA__WD__WS_WT(MIN_U_H
, min_u
.h
)
152 DO_MSA__WD__WS_WT(MIN_U_W
, min_u
.w
)
153 DO_MSA__WD__WS_WT(MIN_U_D
, min_u
.d
)
155 DO_MSA__WD__WS_WT(BCLR_B
, bclr
.b
)
156 DO_MSA__WD__WS_WT(BCLR_H
, bclr
.h
)
157 DO_MSA__WD__WS_WT(BCLR_W
, bclr
.w
)
158 DO_MSA__WD__WS_WT(BCLR_D
, bclr
.d
)
160 DO_MSA__WD__WS_WT(BSET_B
, bset
.b
)
161 DO_MSA__WD__WS_WT(BSET_H
, bset
.h
)
162 DO_MSA__WD__WS_WT(BSET_W
, bset
.w
)
163 DO_MSA__WD__WS_WT(BSET_D
, bset
.d
)
165 DO_MSA__WD__WS_WT(BNEG_B
, bneg
.b
)
166 DO_MSA__WD__WS_WT(BNEG_H
, bneg
.h
)
167 DO_MSA__WD__WS_WT(BNEG_W
, bneg
.w
)
168 DO_MSA__WD__WS_WT(BNEG_D
, bneg
.d
)
170 DO_MSA__WD__WS_WT(PCKEV_B
, pckev
.b
)
171 DO_MSA__WD__WS_WT(PCKEV_H
, pckev
.h
)
172 DO_MSA__WD__WS_WT(PCKEV_W
, pckev
.w
)
173 DO_MSA__WD__WS_WT(PCKEV_D
, pckev
.d
)
175 DO_MSA__WD__WS_WT(PCKOD_B
, pckod
.b
)
176 DO_MSA__WD__WS_WT(PCKOD_H
, pckod
.h
)
177 DO_MSA__WD__WS_WT(PCKOD_W
, pckod
.w
)
178 DO_MSA__WD__WS_WT(PCKOD_D
, pckod
.d
)
180 DO_MSA__WD__WS_WT(VSHF_B
, vshf
.b
)
181 DO_MSA__WD__WS_WT(VSHF_H
, vshf
.h
)
182 DO_MSA__WD__WS_WT(VSHF_W
, vshf
.w
)
183 DO_MSA__WD__WS_WT(VSHF_D
, vshf
.d
)
185 DO_MSA__WD__WS_WT(SLL_B
, sll
.b
)
186 DO_MSA__WD__WS_WT(SLL_H
, sll
.h
)
187 DO_MSA__WD__WS_WT(SLL_W
, sll
.w
)
188 DO_MSA__WD__WS_WT(SLL_D
, sll
.d
)
190 DO_MSA__WD__WS_WT(SRA_B
, sra
.b
)
191 DO_MSA__WD__WS_WT(SRA_H
, sra
.h
)
192 DO_MSA__WD__WS_WT(SRA_W
, sra
.w
)
193 DO_MSA__WD__WS_WT(SRA_D
, sra
.d
)
195 DO_MSA__WD__WS_WT(SRAR_B
, srar
.b
)
196 DO_MSA__WD__WS_WT(SRAR_H
, srar
.h
)
197 DO_MSA__WD__WS_WT(SRAR_W
, srar
.w
)
198 DO_MSA__WD__WS_WT(SRAR_D
, srar
.d
)
200 DO_MSA__WD__WS_WT(SRL_B
, srl
.b
)
201 DO_MSA__WD__WS_WT(SRL_H
, srl
.h
)
202 DO_MSA__WD__WS_WT(SRL_W
, srl
.w
)
203 DO_MSA__WD__WS_WT(SRL_D
, srl
.d
)
205 DO_MSA__WD__WS_WT(SRLR_B
, srlr
.b
)
206 DO_MSA__WD__WS_WT(SRLR_H
, srlr
.h
)
207 DO_MSA__WD__WS_WT(SRLR_W
, srlr
.w
)
208 DO_MSA__WD__WS_WT(SRLR_D
, srlr
.d
)
210 DO_MSA__WD__WS_WT(BMNZ_V
, bmnz
.v
)
211 DO_MSA__WD__WS_WT(BMZ_V
, bmz
.v
)
213 DO_MSA__WD__WS_WT(FMAX_W
, fmax
.w
)
214 DO_MSA__WD__WS_WT(FMAX_D
, fmax
.d
)
216 DO_MSA__WD__WS_WT(FMAX_A_W
, fmax_a
.w
)
217 DO_MSA__WD__WS_WT(FMAX_A_D
, fmax_a
.d
)
219 DO_MSA__WD__WS_WT(FMIN_W
, fmin
.w
)
220 DO_MSA__WD__WS_WT(FMIN_D
, fmin
.d
)
222 DO_MSA__WD__WS_WT(FMIN_A_W
, fmin_a
.w
)
223 DO_MSA__WD__WS_WT(FMIN_A_D
, fmin_a
.d
)