2 * Copyright 2010 Advanced Micro Devices, Inc.
3 * Copyright 2008 Red Hat Inc.
4 * Copyright 2009 Jerome Glisse.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 * OTHER DEALINGS IN THE SOFTWARE.
24 * Authors: Dave Airlie
30 #include "evergreend.h"
31 #include "evergreen_reg_safe.h"
32 #include "cayman_reg_safe.h"
34 #define MAX(a,b) (((a)>(b))?(a):(b))
35 #define MIN(a,b) (((a)<(b))?(a):(b))
37 static int evergreen_cs_packet_next_reloc(struct radeon_cs_parser
*p
,
38 struct radeon_cs_reloc
**cs_reloc
);
40 struct evergreen_cs_track
{
47 u32 cb_color_base_last
[12];
48 struct radeon_bo
*cb_color_bo
[12];
49 u32 cb_color_bo_offset
[12];
50 struct radeon_bo
*cb_color_fmask_bo
[8];
51 struct radeon_bo
*cb_color_cmask_bo
[8];
52 u32 cb_color_info
[12];
53 u32 cb_color_view
[12];
54 u32 cb_color_pitch_idx
[12];
55 u32 cb_color_slice_idx
[12];
56 u32 cb_color_dim_idx
[12];
58 u32 cb_color_pitch
[12];
59 u32 cb_color_slice
[12];
60 u32 cb_color_attrib
[12];
61 u32 cb_color_cmask_slice
[8];
62 u32 cb_color_fmask_slice
[8];
65 u32 vgt_strmout_config
;
66 u32 vgt_strmout_buffer_config
;
67 struct radeon_bo
*vgt_strmout_bo
[4];
68 u64 vgt_strmout_bo_mc
[4];
69 u32 vgt_strmout_bo_offset
[4];
70 u32 vgt_strmout_size
[4];
75 u32 db_depth_size_idx
;
79 u32 db_z_write_offset
;
80 struct radeon_bo
*db_z_read_bo
;
81 struct radeon_bo
*db_z_write_bo
;
85 u32 db_s_write_offset
;
86 struct radeon_bo
*db_s_read_bo
;
87 struct radeon_bo
*db_s_write_bo
;
90 static u32
evergreen_cs_get_aray_mode(u32 tiling_flags
)
92 if (tiling_flags
& RADEON_TILING_MACRO
)
93 return ARRAY_2D_TILED_THIN1
;
94 else if (tiling_flags
& RADEON_TILING_MICRO
)
95 return ARRAY_1D_TILED_THIN1
;
97 return ARRAY_LINEAR_GENERAL
;
100 static u32
evergreen_cs_get_num_banks(u32 nbanks
)
104 return ADDR_SURF_2_BANK
;
106 return ADDR_SURF_4_BANK
;
109 return ADDR_SURF_8_BANK
;
111 return ADDR_SURF_16_BANK
;
115 static void evergreen_cs_track_init(struct evergreen_cs_track
*track
)
119 for (i
= 0; i
< 8; i
++) {
120 track
->cb_color_fmask_bo
[i
] = NULL
;
121 track
->cb_color_cmask_bo
[i
] = NULL
;
122 track
->cb_color_cmask_slice
[i
] = 0;
123 track
->cb_color_fmask_slice
[i
] = 0;
126 for (i
= 0; i
< 12; i
++) {
127 track
->cb_color_base_last
[i
] = 0;
128 track
->cb_color_bo
[i
] = NULL
;
129 track
->cb_color_bo_offset
[i
] = 0xFFFFFFFF;
130 track
->cb_color_info
[i
] = 0;
131 track
->cb_color_view
[i
] = 0xFFFFFFFF;
132 track
->cb_color_pitch_idx
[i
] = 0;
133 track
->cb_color_slice_idx
[i
] = 0;
134 track
->cb_color_dim
[i
] = 0;
135 track
->cb_color_pitch
[i
] = 0;
136 track
->cb_color_slice
[i
] = 0;
137 track
->cb_color_dim
[i
] = 0;
139 track
->cb_target_mask
= 0xFFFFFFFF;
140 track
->cb_shader_mask
= 0xFFFFFFFF;
142 track
->db_depth_view
= 0xFFFFC000;
143 track
->db_depth_size
= 0xFFFFFFFF;
144 track
->db_depth_size_idx
= 0;
145 track
->db_depth_control
= 0xFFFFFFFF;
146 track
->db_z_info
= 0xFFFFFFFF;
147 track
->db_z_idx
= 0xFFFFFFFF;
148 track
->db_z_read_offset
= 0xFFFFFFFF;
149 track
->db_z_write_offset
= 0xFFFFFFFF;
150 track
->db_z_read_bo
= NULL
;
151 track
->db_z_write_bo
= NULL
;
152 track
->db_s_info
= 0xFFFFFFFF;
153 track
->db_s_idx
= 0xFFFFFFFF;
154 track
->db_s_read_offset
= 0xFFFFFFFF;
155 track
->db_s_write_offset
= 0xFFFFFFFF;
156 track
->db_s_read_bo
= NULL
;
157 track
->db_s_write_bo
= NULL
;
159 for (i
= 0; i
< 4; i
++) {
160 track
->vgt_strmout_size
[i
] = 0;
161 track
->vgt_strmout_bo
[i
] = NULL
;
162 track
->vgt_strmout_bo_offset
[i
] = 0xFFFFFFFF;
163 track
->vgt_strmout_bo_mc
[i
] = 0xFFFFFFFF;
168 /* value gathered from cs */
184 unsigned long base_align
;
187 static int evergreen_surface_check_linear(struct radeon_cs_parser
*p
,
188 struct eg_surface
*surf
,
191 surf
->layer_size
= surf
->nbx
* surf
->nby
* surf
->bpe
* surf
->nsamples
;
192 surf
->base_align
= surf
->bpe
;
198 static int evergreen_surface_check_linear_aligned(struct radeon_cs_parser
*p
,
199 struct eg_surface
*surf
,
202 struct evergreen_cs_track
*track
= p
->track
;
205 palign
= MAX(64, track
->group_size
/ surf
->bpe
);
206 surf
->layer_size
= surf
->nbx
* surf
->nby
* surf
->bpe
* surf
->nsamples
;
207 surf
->base_align
= track
->group_size
;
208 surf
->palign
= palign
;
210 if (surf
->nbx
& (palign
- 1)) {
212 dev_warn(p
->dev
, "%s:%d %s pitch %d invalid must be aligned with %d\n",
213 __func__
, __LINE__
, prefix
, surf
->nbx
, palign
);
220 static int evergreen_surface_check_1d(struct radeon_cs_parser
*p
,
221 struct eg_surface
*surf
,
224 struct evergreen_cs_track
*track
= p
->track
;
227 palign
= track
->group_size
/ (8 * surf
->bpe
* surf
->nsamples
);
228 palign
= MAX(8, palign
);
229 surf
->layer_size
= surf
->nbx
* surf
->nby
* surf
->bpe
;
230 surf
->base_align
= track
->group_size
;
231 surf
->palign
= palign
;
233 if ((surf
->nbx
& (palign
- 1))) {
235 dev_warn(p
->dev
, "%s:%d %s pitch %d invalid must be aligned with %d (%d %d %d)\n",
236 __func__
, __LINE__
, prefix
, surf
->nbx
, palign
,
237 track
->group_size
, surf
->bpe
, surf
->nsamples
);
241 if ((surf
->nby
& (8 - 1))) {
243 dev_warn(p
->dev
, "%s:%d %s height %d invalid must be aligned with 8\n",
244 __func__
, __LINE__
, prefix
, surf
->nby
);
251 static int evergreen_surface_check_2d(struct radeon_cs_parser
*p
,
252 struct eg_surface
*surf
,
255 struct evergreen_cs_track
*track
= p
->track
;
256 unsigned palign
, halign
, tileb
, slice_pt
;
258 tileb
= 64 * surf
->bpe
* surf
->nsamples
;
259 palign
= track
->group_size
/ (8 * surf
->bpe
* surf
->nsamples
);
260 palign
= MAX(8, palign
);
262 if (tileb
> surf
->tsplit
) {
263 slice_pt
= tileb
/ surf
->tsplit
;
265 tileb
= tileb
/ slice_pt
;
266 /* macro tile width & height */
267 palign
= (8 * surf
->bankw
* track
->npipes
) * surf
->mtilea
;
268 halign
= (8 * surf
->bankh
* surf
->nbanks
) / surf
->mtilea
;
269 surf
->layer_size
= surf
->nbx
* surf
->nby
* surf
->bpe
* slice_pt
;
270 surf
->base_align
= (palign
/ 8) * (halign
/ 8) * tileb
;
271 surf
->palign
= palign
;
272 surf
->halign
= halign
;
274 if ((surf
->nbx
& (palign
- 1))) {
276 dev_warn(p
->dev
, "%s:%d %s pitch %d invalid must be aligned with %d\n",
277 __func__
, __LINE__
, prefix
, surf
->nbx
, palign
);
281 if ((surf
->nby
& (halign
- 1))) {
283 dev_warn(p
->dev
, "%s:%d %s height %d invalid must be aligned with %d\n",
284 __func__
, __LINE__
, prefix
, surf
->nby
, halign
);
292 static int evergreen_surface_check(struct radeon_cs_parser
*p
,
293 struct eg_surface
*surf
,
296 /* some common value computed here */
297 surf
->bpe
= r600_fmt_get_blocksize(surf
->format
);
299 switch (surf
->mode
) {
300 case ARRAY_LINEAR_GENERAL
:
301 return evergreen_surface_check_linear(p
, surf
, prefix
);
302 case ARRAY_LINEAR_ALIGNED
:
303 return evergreen_surface_check_linear_aligned(p
, surf
, prefix
);
304 case ARRAY_1D_TILED_THIN1
:
305 return evergreen_surface_check_1d(p
, surf
, prefix
);
306 case ARRAY_2D_TILED_THIN1
:
307 return evergreen_surface_check_2d(p
, surf
, prefix
);
309 dev_warn(p
->dev
, "%s:%d invalid array mode %d\n",
310 __func__
, __LINE__
, surf
->mode
);
316 static int evergreen_surface_value_conv_check(struct radeon_cs_parser
*p
,
317 struct eg_surface
*surf
,
320 switch (surf
->mode
) {
321 case ARRAY_2D_TILED_THIN1
:
323 case ARRAY_LINEAR_GENERAL
:
324 case ARRAY_LINEAR_ALIGNED
:
325 case ARRAY_1D_TILED_THIN1
:
328 dev_warn(p
->dev
, "%s:%d invalid array mode %d\n",
329 __func__
, __LINE__
, surf
->mode
);
333 switch (surf
->nbanks
) {
334 case 0: surf
->nbanks
= 2; break;
335 case 1: surf
->nbanks
= 4; break;
336 case 2: surf
->nbanks
= 8; break;
337 case 3: surf
->nbanks
= 16; break;
339 dev_warn(p
->dev
, "%s:%d %s invalid number of banks %d\n",
340 __func__
, __LINE__
, prefix
, surf
->nbanks
);
343 switch (surf
->bankw
) {
344 case 0: surf
->bankw
= 1; break;
345 case 1: surf
->bankw
= 2; break;
346 case 2: surf
->bankw
= 4; break;
347 case 3: surf
->bankw
= 8; break;
349 dev_warn(p
->dev
, "%s:%d %s invalid bankw %d\n",
350 __func__
, __LINE__
, prefix
, surf
->bankw
);
353 switch (surf
->bankh
) {
354 case 0: surf
->bankh
= 1; break;
355 case 1: surf
->bankh
= 2; break;
356 case 2: surf
->bankh
= 4; break;
357 case 3: surf
->bankh
= 8; break;
359 dev_warn(p
->dev
, "%s:%d %s invalid bankh %d\n",
360 __func__
, __LINE__
, prefix
, surf
->bankh
);
363 switch (surf
->mtilea
) {
364 case 0: surf
->mtilea
= 1; break;
365 case 1: surf
->mtilea
= 2; break;
366 case 2: surf
->mtilea
= 4; break;
367 case 3: surf
->mtilea
= 8; break;
369 dev_warn(p
->dev
, "%s:%d %s invalid macro tile aspect %d\n",
370 __func__
, __LINE__
, prefix
, surf
->mtilea
);
373 switch (surf
->tsplit
) {
374 case 0: surf
->tsplit
= 64; break;
375 case 1: surf
->tsplit
= 128; break;
376 case 2: surf
->tsplit
= 256; break;
377 case 3: surf
->tsplit
= 512; break;
378 case 4: surf
->tsplit
= 1024; break;
379 case 5: surf
->tsplit
= 2048; break;
380 case 6: surf
->tsplit
= 4096; break;
382 dev_warn(p
->dev
, "%s:%d %s invalid tile split %d\n",
383 __func__
, __LINE__
, prefix
, surf
->tsplit
);
389 static int evergreen_cs_track_validate_cb(struct radeon_cs_parser
*p
, unsigned id
)
391 struct evergreen_cs_track
*track
= p
->track
;
392 struct eg_surface surf
;
393 unsigned pitch
, slice
, mslice
;
394 unsigned long offset
;
397 mslice
= G_028C6C_SLICE_MAX(track
->cb_color_view
[id
]) + 1;
398 pitch
= track
->cb_color_pitch
[id
];
399 slice
= track
->cb_color_slice
[id
];
400 surf
.nbx
= (pitch
+ 1) * 8;
401 surf
.nby
= ((slice
+ 1) * 64) / surf
.nbx
;
402 surf
.mode
= G_028C70_ARRAY_MODE(track
->cb_color_info
[id
]);
403 surf
.format
= G_028C70_FORMAT(track
->cb_color_info
[id
]);
404 surf
.tsplit
= G_028C74_TILE_SPLIT(track
->cb_color_attrib
[id
]);
405 surf
.nbanks
= G_028C74_NUM_BANKS(track
->cb_color_attrib
[id
]);
406 surf
.bankw
= G_028C74_BANK_WIDTH(track
->cb_color_attrib
[id
]);
407 surf
.bankh
= G_028C74_BANK_HEIGHT(track
->cb_color_attrib
[id
]);
408 surf
.mtilea
= G_028C74_MACRO_TILE_ASPECT(track
->cb_color_attrib
[id
]);
411 if (!r600_fmt_is_valid_color(surf
.format
)) {
412 dev_warn(p
->dev
, "%s:%d cb invalid format %d for %d (0x%08x)\n",
413 __func__
, __LINE__
, surf
.format
,
414 id
, track
->cb_color_info
[id
]);
418 r
= evergreen_surface_value_conv_check(p
, &surf
, "cb");
423 r
= evergreen_surface_check(p
, &surf
, "cb");
425 dev_warn(p
->dev
, "%s:%d cb[%d] invalid (0x%08x 0x%08x 0x%08x 0x%08x)\n",
426 __func__
, __LINE__
, id
, track
->cb_color_pitch
[id
],
427 track
->cb_color_slice
[id
], track
->cb_color_attrib
[id
],
428 track
->cb_color_info
[id
]);
432 offset
= track
->cb_color_bo_offset
[id
] << 8;
433 if (offset
& (surf
.base_align
- 1)) {
434 dev_warn(p
->dev
, "%s:%d cb[%d] bo base %ld not aligned with %ld\n",
435 __func__
, __LINE__
, id
, offset
, surf
.base_align
);
439 offset
+= surf
.layer_size
* mslice
;
440 if (offset
> radeon_bo_size(track
->cb_color_bo
[id
])) {
441 dev_warn(p
->dev
, "%s:%d cb[%d] bo too small (layer size %d, "
442 "offset %d, max layer %d, bo size %ld, slice %d)\n",
443 __func__
, __LINE__
, id
, surf
.layer_size
,
444 track
->cb_color_bo_offset
[id
] << 8, mslice
,
445 radeon_bo_size(track
->cb_color_bo
[id
]), slice
);
446 dev_warn(p
->dev
, "%s:%d problematic surf: (%d %d) (%d %d %d %d %d %d %d)\n",
447 __func__
, __LINE__
, surf
.nbx
, surf
.nby
,
448 surf
.mode
, surf
.bpe
, surf
.nsamples
,
449 surf
.bankw
, surf
.bankh
,
450 surf
.tsplit
, surf
.mtilea
);
457 static int evergreen_cs_track_validate_stencil(struct radeon_cs_parser
*p
)
459 struct evergreen_cs_track
*track
= p
->track
;
460 struct eg_surface surf
;
461 unsigned pitch
, slice
, mslice
;
462 unsigned long offset
;
465 mslice
= G_028008_SLICE_MAX(track
->db_depth_view
) + 1;
466 pitch
= G_028058_PITCH_TILE_MAX(track
->db_depth_size
);
467 slice
= track
->db_depth_slice
;
468 surf
.nbx
= (pitch
+ 1) * 8;
469 surf
.nby
= ((slice
+ 1) * 64) / surf
.nbx
;
470 surf
.mode
= G_028040_ARRAY_MODE(track
->db_z_info
);
471 surf
.format
= G_028044_FORMAT(track
->db_s_info
);
472 surf
.tsplit
= G_028044_TILE_SPLIT(track
->db_s_info
);
473 surf
.nbanks
= G_028040_NUM_BANKS(track
->db_z_info
);
474 surf
.bankw
= G_028040_BANK_WIDTH(track
->db_z_info
);
475 surf
.bankh
= G_028040_BANK_HEIGHT(track
->db_z_info
);
476 surf
.mtilea
= G_028040_MACRO_TILE_ASPECT(track
->db_z_info
);
479 if (surf
.format
!= 1) {
480 dev_warn(p
->dev
, "%s:%d stencil invalid format %d\n",
481 __func__
, __LINE__
, surf
.format
);
484 /* replace by color format so we can use same code */
485 surf
.format
= V_028C70_COLOR_8
;
487 r
= evergreen_surface_value_conv_check(p
, &surf
, "stencil");
492 r
= evergreen_surface_check(p
, &surf
, NULL
);
494 /* old userspace doesn't compute proper depth/stencil alignment
495 * check that alignment against a bigger byte per elements and
496 * only report if that alignment is wrong too.
498 surf
.format
= V_028C70_COLOR_8_8_8_8
;
499 r
= evergreen_surface_check(p
, &surf
, "stencil");
501 dev_warn(p
->dev
, "%s:%d stencil invalid (0x%08x 0x%08x 0x%08x 0x%08x)\n",
502 __func__
, __LINE__
, track
->db_depth_size
,
503 track
->db_depth_slice
, track
->db_s_info
, track
->db_z_info
);
508 offset
= track
->db_s_read_offset
<< 8;
509 if (offset
& (surf
.base_align
- 1)) {
510 dev_warn(p
->dev
, "%s:%d stencil read bo base %ld not aligned with %ld\n",
511 __func__
, __LINE__
, offset
, surf
.base_align
);
514 offset
+= surf
.layer_size
* mslice
;
515 if (offset
> radeon_bo_size(track
->db_s_read_bo
)) {
516 dev_warn(p
->dev
, "%s:%d stencil read bo too small (layer size %d, "
517 "offset %ld, max layer %d, bo size %ld)\n",
518 __func__
, __LINE__
, surf
.layer_size
,
519 (unsigned long)track
->db_s_read_offset
<< 8, mslice
,
520 radeon_bo_size(track
->db_s_read_bo
));
521 dev_warn(p
->dev
, "%s:%d stencil invalid (0x%08x 0x%08x 0x%08x 0x%08x)\n",
522 __func__
, __LINE__
, track
->db_depth_size
,
523 track
->db_depth_slice
, track
->db_s_info
, track
->db_z_info
);
527 offset
= track
->db_s_write_offset
<< 8;
528 if (offset
& (surf
.base_align
- 1)) {
529 dev_warn(p
->dev
, "%s:%d stencil write bo base %ld not aligned with %ld\n",
530 __func__
, __LINE__
, offset
, surf
.base_align
);
533 offset
+= surf
.layer_size
* mslice
;
534 if (offset
> radeon_bo_size(track
->db_s_write_bo
)) {
535 dev_warn(p
->dev
, "%s:%d stencil write bo too small (layer size %d, "
536 "offset %ld, max layer %d, bo size %ld)\n",
537 __func__
, __LINE__
, surf
.layer_size
,
538 (unsigned long)track
->db_s_write_offset
<< 8, mslice
,
539 radeon_bo_size(track
->db_s_write_bo
));
546 static int evergreen_cs_track_validate_depth(struct radeon_cs_parser
*p
)
548 struct evergreen_cs_track
*track
= p
->track
;
549 struct eg_surface surf
;
550 unsigned pitch
, slice
, mslice
;
551 unsigned long offset
;
554 mslice
= G_028008_SLICE_MAX(track
->db_depth_view
) + 1;
555 pitch
= G_028058_PITCH_TILE_MAX(track
->db_depth_size
);
556 slice
= track
->db_depth_slice
;
557 surf
.nbx
= (pitch
+ 1) * 8;
558 surf
.nby
= ((slice
+ 1) * 64) / surf
.nbx
;
559 surf
.mode
= G_028040_ARRAY_MODE(track
->db_z_info
);
560 surf
.format
= G_028040_FORMAT(track
->db_z_info
);
561 surf
.tsplit
= G_028040_TILE_SPLIT(track
->db_z_info
);
562 surf
.nbanks
= G_028040_NUM_BANKS(track
->db_z_info
);
563 surf
.bankw
= G_028040_BANK_WIDTH(track
->db_z_info
);
564 surf
.bankh
= G_028040_BANK_HEIGHT(track
->db_z_info
);
565 surf
.mtilea
= G_028040_MACRO_TILE_ASPECT(track
->db_z_info
);
568 switch (surf
.format
) {
570 surf
.format
= V_028C70_COLOR_16
;
573 case V_028040_Z_32_FLOAT
:
574 surf
.format
= V_028C70_COLOR_8_8_8_8
;
577 dev_warn(p
->dev
, "%s:%d depth invalid format %d\n",
578 __func__
, __LINE__
, surf
.format
);
582 r
= evergreen_surface_value_conv_check(p
, &surf
, "depth");
584 dev_warn(p
->dev
, "%s:%d depth invalid (0x%08x 0x%08x 0x%08x)\n",
585 __func__
, __LINE__
, track
->db_depth_size
,
586 track
->db_depth_slice
, track
->db_z_info
);
590 r
= evergreen_surface_check(p
, &surf
, "depth");
592 dev_warn(p
->dev
, "%s:%d depth invalid (0x%08x 0x%08x 0x%08x)\n",
593 __func__
, __LINE__
, track
->db_depth_size
,
594 track
->db_depth_slice
, track
->db_z_info
);
598 offset
= track
->db_z_read_offset
<< 8;
599 if (offset
& (surf
.base_align
- 1)) {
600 dev_warn(p
->dev
, "%s:%d stencil read bo base %ld not aligned with %ld\n",
601 __func__
, __LINE__
, offset
, surf
.base_align
);
604 offset
+= surf
.layer_size
* mslice
;
605 if (offset
> radeon_bo_size(track
->db_z_read_bo
)) {
606 dev_warn(p
->dev
, "%s:%d depth read bo too small (layer size %d, "
607 "offset %ld, max layer %d, bo size %ld)\n",
608 __func__
, __LINE__
, surf
.layer_size
,
609 (unsigned long)track
->db_z_read_offset
<< 8, mslice
,
610 radeon_bo_size(track
->db_z_read_bo
));
614 offset
= track
->db_z_write_offset
<< 8;
615 if (offset
& (surf
.base_align
- 1)) {
616 dev_warn(p
->dev
, "%s:%d stencil write bo base %ld not aligned with %ld\n",
617 __func__
, __LINE__
, offset
, surf
.base_align
);
620 offset
+= surf
.layer_size
* mslice
;
621 if (offset
> radeon_bo_size(track
->db_z_write_bo
)) {
622 dev_warn(p
->dev
, "%s:%d depth write bo too small (layer size %d, "
623 "offset %ld, max layer %d, bo size %ld)\n",
624 __func__
, __LINE__
, surf
.layer_size
,
625 (unsigned long)track
->db_z_write_offset
<< 8, mslice
,
626 radeon_bo_size(track
->db_z_write_bo
));
633 static int evergreen_cs_track_validate_texture(struct radeon_cs_parser
*p
,
634 struct radeon_bo
*texture
,
635 struct radeon_bo
*mipmap
,
638 struct eg_surface surf
;
639 unsigned long toffset
, moffset
;
640 unsigned dim
, llevel
, mslice
, width
, height
, depth
, i
;
644 texdw
[0] = radeon_get_ib_value(p
, idx
+ 0);
645 texdw
[1] = radeon_get_ib_value(p
, idx
+ 1);
646 texdw
[2] = radeon_get_ib_value(p
, idx
+ 2);
647 texdw
[3] = radeon_get_ib_value(p
, idx
+ 3);
648 texdw
[4] = radeon_get_ib_value(p
, idx
+ 4);
649 texdw
[5] = radeon_get_ib_value(p
, idx
+ 5);
650 texdw
[6] = radeon_get_ib_value(p
, idx
+ 6);
651 texdw
[7] = radeon_get_ib_value(p
, idx
+ 7);
652 dim
= G_030000_DIM(texdw
[0]);
653 llevel
= G_030014_LAST_LEVEL(texdw
[5]);
654 mslice
= G_030014_LAST_ARRAY(texdw
[5]) + 1;
655 width
= G_030000_TEX_WIDTH(texdw
[0]) + 1;
656 height
= G_030004_TEX_HEIGHT(texdw
[1]) + 1;
657 depth
= G_030004_TEX_DEPTH(texdw
[1]) + 1;
658 surf
.format
= G_03001C_DATA_FORMAT(texdw
[7]);
659 surf
.nbx
= (G_030000_PITCH(texdw
[0]) + 1) * 8;
660 surf
.nbx
= r600_fmt_get_nblocksx(surf
.format
, surf
.nbx
);
661 surf
.nby
= r600_fmt_get_nblocksy(surf
.format
, height
);
662 surf
.mode
= G_030004_ARRAY_MODE(texdw
[1]);
663 surf
.tsplit
= G_030018_TILE_SPLIT(texdw
[6]);
664 surf
.nbanks
= G_03001C_NUM_BANKS(texdw
[7]);
665 surf
.bankw
= G_03001C_BANK_WIDTH(texdw
[7]);
666 surf
.bankh
= G_03001C_BANK_HEIGHT(texdw
[7]);
667 surf
.mtilea
= G_03001C_MACRO_TILE_ASPECT(texdw
[7]);
669 toffset
= texdw
[2] << 8;
670 moffset
= texdw
[3] << 8;
672 if (!r600_fmt_is_valid_texture(surf
.format
, p
->family
)) {
673 dev_warn(p
->dev
, "%s:%d texture invalid format %d\n",
674 __func__
, __LINE__
, surf
.format
);
678 case V_030000_SQ_TEX_DIM_1D
:
679 case V_030000_SQ_TEX_DIM_2D
:
680 case V_030000_SQ_TEX_DIM_CUBEMAP
:
681 case V_030000_SQ_TEX_DIM_1D_ARRAY
:
682 case V_030000_SQ_TEX_DIM_2D_ARRAY
:
684 case V_030000_SQ_TEX_DIM_3D
:
687 dev_warn(p
->dev
, "%s:%d texture invalid dimension %d\n",
688 __func__
, __LINE__
, dim
);
692 r
= evergreen_surface_value_conv_check(p
, &surf
, "texture");
698 evergreen_surface_check(p
, &surf
, NULL
);
699 surf
.nby
= ALIGN(surf
.nby
, surf
.halign
);
701 r
= evergreen_surface_check(p
, &surf
, "texture");
703 dev_warn(p
->dev
, "%s:%d texture invalid 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x\n",
704 __func__
, __LINE__
, texdw
[0], texdw
[1], texdw
[4],
705 texdw
[5], texdw
[6], texdw
[7]);
709 /* check texture size */
710 if (toffset
& (surf
.base_align
- 1)) {
711 dev_warn(p
->dev
, "%s:%d texture bo base %ld not aligned with %ld\n",
712 __func__
, __LINE__
, toffset
, surf
.base_align
);
715 if (moffset
& (surf
.base_align
- 1)) {
716 dev_warn(p
->dev
, "%s:%d mipmap bo base %ld not aligned with %ld\n",
717 __func__
, __LINE__
, moffset
, surf
.base_align
);
720 if (dim
== SQ_TEX_DIM_3D
) {
721 toffset
+= surf
.layer_size
* depth
;
723 toffset
+= surf
.layer_size
* mslice
;
725 if (toffset
> radeon_bo_size(texture
)) {
726 dev_warn(p
->dev
, "%s:%d texture bo too small (layer size %d, "
727 "offset %ld, max layer %d, depth %d, bo size %ld) (%d %d)\n",
728 __func__
, __LINE__
, surf
.layer_size
,
729 (unsigned long)texdw
[2] << 8, mslice
,
730 depth
, radeon_bo_size(texture
),
735 /* check mipmap size */
736 for (i
= 1; i
<= llevel
; i
++) {
739 w
= r600_mip_minify(width
, i
);
740 h
= r600_mip_minify(height
, i
);
741 d
= r600_mip_minify(depth
, i
);
742 surf
.nbx
= r600_fmt_get_nblocksx(surf
.format
, w
);
743 surf
.nby
= r600_fmt_get_nblocksy(surf
.format
, h
);
746 case ARRAY_2D_TILED_THIN1
:
747 if (surf
.nbx
< surf
.palign
|| surf
.nby
< surf
.halign
) {
748 surf
.mode
= ARRAY_1D_TILED_THIN1
;
750 /* recompute alignment */
751 evergreen_surface_check(p
, &surf
, NULL
);
753 case ARRAY_LINEAR_GENERAL
:
754 case ARRAY_LINEAR_ALIGNED
:
755 case ARRAY_1D_TILED_THIN1
:
758 dev_warn(p
->dev
, "%s:%d invalid array mode %d\n",
759 __func__
, __LINE__
, surf
.mode
);
762 surf
.nbx
= ALIGN(surf
.nbx
, surf
.palign
);
763 surf
.nby
= ALIGN(surf
.nby
, surf
.halign
);
765 r
= evergreen_surface_check(p
, &surf
, "mipmap");
770 if (dim
== SQ_TEX_DIM_3D
) {
771 moffset
+= surf
.layer_size
* d
;
773 moffset
+= surf
.layer_size
* mslice
;
775 if (moffset
> radeon_bo_size(mipmap
)) {
776 dev_warn(p
->dev
, "%s:%d mipmap [%d] bo too small (layer size %d, "
777 "offset %ld, coffset %ld, max layer %d, depth %d, "
778 "bo size %ld) level0 (%d %d %d)\n",
779 __func__
, __LINE__
, i
, surf
.layer_size
,
780 (unsigned long)texdw
[3] << 8, moffset
, mslice
,
781 d
, radeon_bo_size(mipmap
),
782 width
, height
, depth
);
783 dev_warn(p
->dev
, "%s:%d problematic surf: (%d %d) (%d %d %d %d %d %d %d)\n",
784 __func__
, __LINE__
, surf
.nbx
, surf
.nby
,
785 surf
.mode
, surf
.bpe
, surf
.nsamples
,
786 surf
.bankw
, surf
.bankh
,
787 surf
.tsplit
, surf
.mtilea
);
795 static int evergreen_cs_track_check(struct radeon_cs_parser
*p
)
797 struct evergreen_cs_track
*track
= p
->track
;
801 /* check streamout */
802 for (i
= 0; i
< 4; i
++) {
803 if (track
->vgt_strmout_config
& (1 << i
)) {
804 for (j
= 0; j
< 4; j
++) {
805 if ((track
->vgt_strmout_buffer_config
>> (i
* 4)) & (1 << j
)) {
806 if (track
->vgt_strmout_bo
[j
]) {
807 u64 offset
= (u64
)track
->vgt_strmout_bo_offset
[j
] +
808 (u64
)track
->vgt_strmout_size
[j
];
809 if (offset
> radeon_bo_size(track
->vgt_strmout_bo
[i
])) {
810 DRM_ERROR("streamout %d bo too small: 0x%llx, 0x%lx\n",
812 radeon_bo_size(track
->vgt_strmout_bo
[j
]));
816 dev_warn(p
->dev
, "No buffer for streamout %d\n", j
);
824 /* check that we have a cb for each enabled target
826 tmp
= track
->cb_target_mask
;
827 for (i
= 0; i
< 8; i
++) {
828 if ((tmp
>> (i
* 4)) & 0xF) {
829 /* at least one component is enabled */
830 if (track
->cb_color_bo
[i
] == NULL
) {
831 dev_warn(p
->dev
, "%s:%d mask 0x%08X | 0x%08X no cb for %d\n",
832 __func__
, __LINE__
, track
->cb_target_mask
, track
->cb_shader_mask
, i
);
836 r
= evergreen_cs_track_validate_cb(p
, i
);
843 /* Check stencil buffer */
844 if (G_028800_STENCIL_ENABLE(track
->db_depth_control
)) {
845 r
= evergreen_cs_track_validate_stencil(p
);
849 /* Check depth buffer */
850 if (G_028800_Z_WRITE_ENABLE(track
->db_depth_control
)) {
851 r
= evergreen_cs_track_validate_depth(p
);
860 * evergreen_cs_packet_parse() - parse cp packet and point ib index to next packet
861 * @parser: parser structure holding parsing context.
862 * @pkt: where to store packet informations
864 * Assume that chunk_ib_index is properly set. Will return -EINVAL
865 * if packet is bigger than remaining ib size. or if packets is unknown.
867 int evergreen_cs_packet_parse(struct radeon_cs_parser
*p
,
868 struct radeon_cs_packet
*pkt
,
871 struct radeon_cs_chunk
*ib_chunk
= &p
->chunks
[p
->chunk_ib_idx
];
874 if (idx
>= ib_chunk
->length_dw
) {
875 DRM_ERROR("Can not parse packet at %d after CS end %d !\n",
876 idx
, ib_chunk
->length_dw
);
879 header
= radeon_get_ib_value(p
, idx
);
881 pkt
->type
= CP_PACKET_GET_TYPE(header
);
882 pkt
->count
= CP_PACKET_GET_COUNT(header
);
886 pkt
->reg
= CP_PACKET0_GET_REG(header
);
889 pkt
->opcode
= CP_PACKET3_GET_OPCODE(header
);
895 DRM_ERROR("Unknown packet type %d at %d !\n", pkt
->type
, idx
);
898 if ((pkt
->count
+ 1 + pkt
->idx
) >= ib_chunk
->length_dw
) {
899 DRM_ERROR("Packet (%d:%d:%d) end after CS buffer (%d) !\n",
900 pkt
->idx
, pkt
->type
, pkt
->count
, ib_chunk
->length_dw
);
907 * evergreen_cs_packet_next_reloc() - parse next packet which should be reloc packet3
908 * @parser: parser structure holding parsing context.
909 * @data: pointer to relocation data
910 * @offset_start: starting offset
911 * @offset_mask: offset mask (to align start offset on)
912 * @reloc: reloc informations
914 * Check next packet is relocation packet3, do bo validation and compute
915 * GPU offset using the provided start.
917 static int evergreen_cs_packet_next_reloc(struct radeon_cs_parser
*p
,
918 struct radeon_cs_reloc
**cs_reloc
)
920 struct radeon_cs_chunk
*relocs_chunk
;
921 struct radeon_cs_packet p3reloc
;
925 if (p
->chunk_relocs_idx
== -1) {
926 DRM_ERROR("No relocation chunk !\n");
930 relocs_chunk
= &p
->chunks
[p
->chunk_relocs_idx
];
931 r
= evergreen_cs_packet_parse(p
, &p3reloc
, p
->idx
);
935 p
->idx
+= p3reloc
.count
+ 2;
936 if (p3reloc
.type
!= PACKET_TYPE3
|| p3reloc
.opcode
!= PACKET3_NOP
) {
937 DRM_ERROR("No packet3 for relocation for packet at %d.\n",
941 idx
= radeon_get_ib_value(p
, p3reloc
.idx
+ 1);
942 if (idx
>= relocs_chunk
->length_dw
) {
943 DRM_ERROR("Relocs at %d after relocations chunk end %d !\n",
944 idx
, relocs_chunk
->length_dw
);
947 /* FIXME: we assume reloc size is 4 dwords */
948 *cs_reloc
= p
->relocs_ptr
[(idx
/ 4)];
953 * evergreen_cs_packet_next_vline() - parse userspace VLINE packet
954 * @parser: parser structure holding parsing context.
956 * Userspace sends a special sequence for VLINE waits.
957 * PACKET0 - VLINE_START_END + value
958 * PACKET3 - WAIT_REG_MEM poll vline status reg
959 * RELOC (P3) - crtc_id in reloc.
961 * This function parses this and relocates the VLINE START END
962 * and WAIT_REG_MEM packets to the correct crtc.
963 * It also detects a switched off crtc and nulls out the
966 static int evergreen_cs_packet_parse_vline(struct radeon_cs_parser
*p
)
968 struct drm_mode_object
*obj
;
969 struct drm_crtc
*crtc
;
970 struct radeon_crtc
*radeon_crtc
;
971 struct radeon_cs_packet p3reloc
, wait_reg_mem
;
974 uint32_t header
, h_idx
, reg
, wait_reg_mem_info
;
975 volatile uint32_t *ib
;
979 /* parse the WAIT_REG_MEM */
980 r
= evergreen_cs_packet_parse(p
, &wait_reg_mem
, p
->idx
);
984 /* check its a WAIT_REG_MEM */
985 if (wait_reg_mem
.type
!= PACKET_TYPE3
||
986 wait_reg_mem
.opcode
!= PACKET3_WAIT_REG_MEM
) {
987 DRM_ERROR("vline wait missing WAIT_REG_MEM segment\n");
991 wait_reg_mem_info
= radeon_get_ib_value(p
, wait_reg_mem
.idx
+ 1);
992 /* bit 4 is reg (0) or mem (1) */
993 if (wait_reg_mem_info
& 0x10) {
994 DRM_ERROR("vline WAIT_REG_MEM waiting on MEM rather than REG\n");
997 /* waiting for value to be equal */
998 if ((wait_reg_mem_info
& 0x7) != 0x3) {
999 DRM_ERROR("vline WAIT_REG_MEM function not equal\n");
1002 if ((radeon_get_ib_value(p
, wait_reg_mem
.idx
+ 2) << 2) != EVERGREEN_VLINE_STATUS
) {
1003 DRM_ERROR("vline WAIT_REG_MEM bad reg\n");
1007 if (radeon_get_ib_value(p
, wait_reg_mem
.idx
+ 5) != EVERGREEN_VLINE_STAT
) {
1008 DRM_ERROR("vline WAIT_REG_MEM bad bit mask\n");
1012 /* jump over the NOP */
1013 r
= evergreen_cs_packet_parse(p
, &p3reloc
, p
->idx
+ wait_reg_mem
.count
+ 2);
1018 p
->idx
+= wait_reg_mem
.count
+ 2;
1019 p
->idx
+= p3reloc
.count
+ 2;
1021 header
= radeon_get_ib_value(p
, h_idx
);
1022 crtc_id
= radeon_get_ib_value(p
, h_idx
+ 2 + 7 + 1);
1023 reg
= CP_PACKET0_GET_REG(header
);
1024 obj
= drm_mode_object_find(p
->rdev
->ddev
, crtc_id
, DRM_MODE_OBJECT_CRTC
);
1026 DRM_ERROR("cannot find crtc %d\n", crtc_id
);
1029 crtc
= obj_to_crtc(obj
);
1030 radeon_crtc
= to_radeon_crtc(crtc
);
1031 crtc_id
= radeon_crtc
->crtc_id
;
1033 if (!crtc
->enabled
) {
1034 /* if the CRTC isn't enabled - we need to nop out the WAIT_REG_MEM */
1035 ib
[h_idx
+ 2] = PACKET2(0);
1036 ib
[h_idx
+ 3] = PACKET2(0);
1037 ib
[h_idx
+ 4] = PACKET2(0);
1038 ib
[h_idx
+ 5] = PACKET2(0);
1039 ib
[h_idx
+ 6] = PACKET2(0);
1040 ib
[h_idx
+ 7] = PACKET2(0);
1041 ib
[h_idx
+ 8] = PACKET2(0);
1044 case EVERGREEN_VLINE_START_END
:
1045 header
&= ~R600_CP_PACKET0_REG_MASK
;
1046 header
|= (EVERGREEN_VLINE_START_END
+ radeon_crtc
->crtc_offset
) >> 2;
1048 ib
[h_idx
+ 4] = (EVERGREEN_VLINE_STATUS
+ radeon_crtc
->crtc_offset
) >> 2;
1051 DRM_ERROR("unknown crtc reloc\n");
1058 static int evergreen_packet0_check(struct radeon_cs_parser
*p
,
1059 struct radeon_cs_packet
*pkt
,
1060 unsigned idx
, unsigned reg
)
1065 case EVERGREEN_VLINE_START_END
:
1066 r
= evergreen_cs_packet_parse_vline(p
);
1068 DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
1074 printk(KERN_ERR
"Forbidden register 0x%04X in cs at %d\n",
1081 static int evergreen_cs_parse_packet0(struct radeon_cs_parser
*p
,
1082 struct radeon_cs_packet
*pkt
)
1090 for (i
= 0; i
<= pkt
->count
; i
++, idx
++, reg
+= 4) {
1091 r
= evergreen_packet0_check(p
, pkt
, idx
, reg
);
1100 * evergreen_cs_check_reg() - check if register is authorized or not
1101 * @parser: parser structure holding parsing context
1102 * @reg: register we are testing
1103 * @idx: index into the cs buffer
1105 * This function will test against evergreen_reg_safe_bm and return 0
1106 * if register is safe. If register is not flag as safe this function
1107 * will test it against a list of register needind special handling.
1109 static int evergreen_cs_check_reg(struct radeon_cs_parser
*p
, u32 reg
, u32 idx
)
1111 struct evergreen_cs_track
*track
= (struct evergreen_cs_track
*)p
->track
;
1112 struct radeon_cs_reloc
*reloc
;
1117 if (p
->rdev
->family
>= CHIP_CAYMAN
)
1118 last_reg
= ARRAY_SIZE(cayman_reg_safe_bm
);
1120 last_reg
= ARRAY_SIZE(evergreen_reg_safe_bm
);
1123 if (i
>= last_reg
) {
1124 dev_warn(p
->dev
, "forbidden register 0x%08x at %d\n", reg
, idx
);
1127 m
= 1 << ((reg
>> 2) & 31);
1128 if (p
->rdev
->family
>= CHIP_CAYMAN
) {
1129 if (!(cayman_reg_safe_bm
[i
] & m
))
1132 if (!(evergreen_reg_safe_bm
[i
] & m
))
1137 /* force following reg to 0 in an attempt to disable out buffer
1138 * which will need us to better understand how it works to perform
1139 * security check on it (Jerome)
1141 case SQ_ESGS_RING_SIZE
:
1142 case SQ_GSVS_RING_SIZE
:
1143 case SQ_ESTMP_RING_SIZE
:
1144 case SQ_GSTMP_RING_SIZE
:
1145 case SQ_HSTMP_RING_SIZE
:
1146 case SQ_LSTMP_RING_SIZE
:
1147 case SQ_PSTMP_RING_SIZE
:
1148 case SQ_VSTMP_RING_SIZE
:
1149 case SQ_ESGS_RING_ITEMSIZE
:
1150 case SQ_ESTMP_RING_ITEMSIZE
:
1151 case SQ_GSTMP_RING_ITEMSIZE
:
1152 case SQ_GSVS_RING_ITEMSIZE
:
1153 case SQ_GS_VERT_ITEMSIZE
:
1154 case SQ_GS_VERT_ITEMSIZE_1
:
1155 case SQ_GS_VERT_ITEMSIZE_2
:
1156 case SQ_GS_VERT_ITEMSIZE_3
:
1157 case SQ_GSVS_RING_OFFSET_1
:
1158 case SQ_GSVS_RING_OFFSET_2
:
1159 case SQ_GSVS_RING_OFFSET_3
:
1160 case SQ_HSTMP_RING_ITEMSIZE
:
1161 case SQ_LSTMP_RING_ITEMSIZE
:
1162 case SQ_PSTMP_RING_ITEMSIZE
:
1163 case SQ_VSTMP_RING_ITEMSIZE
:
1164 case VGT_TF_RING_SIZE
:
1165 /* get value to populate the IB don't remove */
1166 /*tmp =radeon_get_ib_value(p, idx);
1169 case SQ_ESGS_RING_BASE
:
1170 case SQ_GSVS_RING_BASE
:
1171 case SQ_ESTMP_RING_BASE
:
1172 case SQ_GSTMP_RING_BASE
:
1173 case SQ_HSTMP_RING_BASE
:
1174 case SQ_LSTMP_RING_BASE
:
1175 case SQ_PSTMP_RING_BASE
:
1176 case SQ_VSTMP_RING_BASE
:
1177 r
= evergreen_cs_packet_next_reloc(p
, &reloc
);
1179 dev_warn(p
->dev
, "bad SET_CONTEXT_REG "
1183 ib
[idx
] += (u32
)((reloc
->lobj
.gpu_offset
>> 8) & 0xffffffff);
1185 case DB_DEPTH_CONTROL
:
1186 track
->db_depth_control
= radeon_get_ib_value(p
, idx
);
1188 case CAYMAN_DB_EQAA
:
1189 if (p
->rdev
->family
< CHIP_CAYMAN
) {
1190 dev_warn(p
->dev
, "bad SET_CONTEXT_REG "
1195 case CAYMAN_DB_DEPTH_INFO
:
1196 if (p
->rdev
->family
< CHIP_CAYMAN
) {
1197 dev_warn(p
->dev
, "bad SET_CONTEXT_REG "
1203 track
->db_z_info
= radeon_get_ib_value(p
, idx
);
1204 if (!(p
->cs_flags
& RADEON_CS_KEEP_TILING_FLAGS
)) {
1205 r
= evergreen_cs_packet_next_reloc(p
, &reloc
);
1207 dev_warn(p
->dev
, "bad SET_CONTEXT_REG "
1211 ib
[idx
] &= ~Z_ARRAY_MODE(0xf);
1212 track
->db_z_info
&= ~Z_ARRAY_MODE(0xf);
1213 ib
[idx
] |= Z_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc
->lobj
.tiling_flags
));
1214 track
->db_z_info
|= Z_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc
->lobj
.tiling_flags
));
1215 if (reloc
->lobj
.tiling_flags
& RADEON_TILING_MACRO
) {
1216 unsigned bankw
, bankh
, mtaspect
, tile_split
;
1218 evergreen_tiling_fields(reloc
->lobj
.tiling_flags
,
1219 &bankw
, &bankh
, &mtaspect
,
1221 ib
[idx
] |= DB_NUM_BANKS(evergreen_cs_get_num_banks(track
->nbanks
));
1222 ib
[idx
] |= DB_TILE_SPLIT(tile_split
) |
1223 DB_BANK_WIDTH(bankw
) |
1224 DB_BANK_HEIGHT(bankh
) |
1225 DB_MACRO_TILE_ASPECT(mtaspect
);
1229 case DB_STENCIL_INFO
:
1230 track
->db_s_info
= radeon_get_ib_value(p
, idx
);
1233 track
->db_depth_view
= radeon_get_ib_value(p
, idx
);
1236 track
->db_depth_size
= radeon_get_ib_value(p
, idx
);
1237 track
->db_depth_size_idx
= idx
;
1239 case R_02805C_DB_DEPTH_SLICE
:
1240 track
->db_depth_slice
= radeon_get_ib_value(p
, idx
);
1242 case DB_Z_READ_BASE
:
1243 r
= evergreen_cs_packet_next_reloc(p
, &reloc
);
1245 dev_warn(p
->dev
, "bad SET_CONTEXT_REG "
1249 track
->db_z_read_offset
= radeon_get_ib_value(p
, idx
);
1250 ib
[idx
] += (u32
)((reloc
->lobj
.gpu_offset
>> 8) & 0xffffffff);
1251 track
->db_z_read_bo
= reloc
->robj
;
1253 case DB_Z_WRITE_BASE
:
1254 r
= evergreen_cs_packet_next_reloc(p
, &reloc
);
1256 dev_warn(p
->dev
, "bad SET_CONTEXT_REG "
1260 track
->db_z_write_offset
= radeon_get_ib_value(p
, idx
);
1261 ib
[idx
] += (u32
)((reloc
->lobj
.gpu_offset
>> 8) & 0xffffffff);
1262 track
->db_z_write_bo
= reloc
->robj
;
1264 case DB_STENCIL_READ_BASE
:
1265 r
= evergreen_cs_packet_next_reloc(p
, &reloc
);
1267 dev_warn(p
->dev
, "bad SET_CONTEXT_REG "
1271 track
->db_s_read_offset
= radeon_get_ib_value(p
, idx
);
1272 ib
[idx
] += (u32
)((reloc
->lobj
.gpu_offset
>> 8) & 0xffffffff);
1273 track
->db_s_read_bo
= reloc
->robj
;
1275 case DB_STENCIL_WRITE_BASE
:
1276 r
= evergreen_cs_packet_next_reloc(p
, &reloc
);
1278 dev_warn(p
->dev
, "bad SET_CONTEXT_REG "
1282 track
->db_s_write_offset
= radeon_get_ib_value(p
, idx
);
1283 ib
[idx
] += (u32
)((reloc
->lobj
.gpu_offset
>> 8) & 0xffffffff);
1284 track
->db_s_write_bo
= reloc
->robj
;
1286 case VGT_STRMOUT_CONFIG
:
1287 track
->vgt_strmout_config
= radeon_get_ib_value(p
, idx
);
1289 case VGT_STRMOUT_BUFFER_CONFIG
:
1290 track
->vgt_strmout_buffer_config
= radeon_get_ib_value(p
, idx
);
1292 case VGT_STRMOUT_BUFFER_BASE_0
:
1293 case VGT_STRMOUT_BUFFER_BASE_1
:
1294 case VGT_STRMOUT_BUFFER_BASE_2
:
1295 case VGT_STRMOUT_BUFFER_BASE_3
:
1296 r
= evergreen_cs_packet_next_reloc(p
, &reloc
);
1298 dev_warn(p
->dev
, "bad SET_CONTEXT_REG "
1302 tmp
= (reg
- VGT_STRMOUT_BUFFER_BASE_0
) / 16;
1303 track
->vgt_strmout_bo_offset
[tmp
] = radeon_get_ib_value(p
, idx
) << 8;
1304 ib
[idx
] += (u32
)((reloc
->lobj
.gpu_offset
>> 8) & 0xffffffff);
1305 track
->vgt_strmout_bo
[tmp
] = reloc
->robj
;
1306 track
->vgt_strmout_bo_mc
[tmp
] = reloc
->lobj
.gpu_offset
;
1308 case VGT_STRMOUT_BUFFER_SIZE_0
:
1309 case VGT_STRMOUT_BUFFER_SIZE_1
:
1310 case VGT_STRMOUT_BUFFER_SIZE_2
:
1311 case VGT_STRMOUT_BUFFER_SIZE_3
:
1312 tmp
= (reg
- VGT_STRMOUT_BUFFER_SIZE_0
) / 16;
1313 /* size in register is DWs, convert to bytes */
1314 track
->vgt_strmout_size
[tmp
] = radeon_get_ib_value(p
, idx
) * 4;
1317 r
= evergreen_cs_packet_next_reloc(p
, &reloc
);
1319 dev_warn(p
->dev
, "missing reloc for CP_COHER_BASE "
1323 ib
[idx
] += (u32
)((reloc
->lobj
.gpu_offset
>> 8) & 0xffffffff);
1324 case CB_TARGET_MASK
:
1325 track
->cb_target_mask
= radeon_get_ib_value(p
, idx
);
1327 case CB_SHADER_MASK
:
1328 track
->cb_shader_mask
= radeon_get_ib_value(p
, idx
);
1330 case PA_SC_AA_CONFIG
:
1331 if (p
->rdev
->family
>= CHIP_CAYMAN
) {
1332 dev_warn(p
->dev
, "bad SET_CONTEXT_REG "
1336 tmp
= radeon_get_ib_value(p
, idx
) & MSAA_NUM_SAMPLES_MASK
;
1337 track
->nsamples
= 1 << tmp
;
1339 case CAYMAN_PA_SC_AA_CONFIG
:
1340 if (p
->rdev
->family
< CHIP_CAYMAN
) {
1341 dev_warn(p
->dev
, "bad SET_CONTEXT_REG "
1345 tmp
= radeon_get_ib_value(p
, idx
) & CAYMAN_MSAA_NUM_SAMPLES_MASK
;
1346 track
->nsamples
= 1 << tmp
;
1348 case CB_COLOR0_VIEW
:
1349 case CB_COLOR1_VIEW
:
1350 case CB_COLOR2_VIEW
:
1351 case CB_COLOR3_VIEW
:
1352 case CB_COLOR4_VIEW
:
1353 case CB_COLOR5_VIEW
:
1354 case CB_COLOR6_VIEW
:
1355 case CB_COLOR7_VIEW
:
1356 tmp
= (reg
- CB_COLOR0_VIEW
) / 0x3c;
1357 track
->cb_color_view
[tmp
] = radeon_get_ib_value(p
, idx
);
1359 case CB_COLOR8_VIEW
:
1360 case CB_COLOR9_VIEW
:
1361 case CB_COLOR10_VIEW
:
1362 case CB_COLOR11_VIEW
:
1363 tmp
= ((reg
- CB_COLOR8_VIEW
) / 0x1c) + 8;
1364 track
->cb_color_view
[tmp
] = radeon_get_ib_value(p
, idx
);
1366 case CB_COLOR0_INFO
:
1367 case CB_COLOR1_INFO
:
1368 case CB_COLOR2_INFO
:
1369 case CB_COLOR3_INFO
:
1370 case CB_COLOR4_INFO
:
1371 case CB_COLOR5_INFO
:
1372 case CB_COLOR6_INFO
:
1373 case CB_COLOR7_INFO
:
1374 tmp
= (reg
- CB_COLOR0_INFO
) / 0x3c;
1375 track
->cb_color_info
[tmp
] = radeon_get_ib_value(p
, idx
);
1376 if (!(p
->cs_flags
& RADEON_CS_KEEP_TILING_FLAGS
)) {
1377 r
= evergreen_cs_packet_next_reloc(p
, &reloc
);
1379 dev_warn(p
->dev
, "bad SET_CONTEXT_REG "
1383 ib
[idx
] |= CB_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc
->lobj
.tiling_flags
));
1384 track
->cb_color_info
[tmp
] |= CB_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc
->lobj
.tiling_flags
));
1387 case CB_COLOR8_INFO
:
1388 case CB_COLOR9_INFO
:
1389 case CB_COLOR10_INFO
:
1390 case CB_COLOR11_INFO
:
1391 tmp
= ((reg
- CB_COLOR8_INFO
) / 0x1c) + 8;
1392 track
->cb_color_info
[tmp
] = radeon_get_ib_value(p
, idx
);
1393 if (!(p
->cs_flags
& RADEON_CS_KEEP_TILING_FLAGS
)) {
1394 r
= evergreen_cs_packet_next_reloc(p
, &reloc
);
1396 dev_warn(p
->dev
, "bad SET_CONTEXT_REG "
1400 ib
[idx
] |= CB_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc
->lobj
.tiling_flags
));
1401 track
->cb_color_info
[tmp
] |= CB_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc
->lobj
.tiling_flags
));
1404 case CB_COLOR0_PITCH
:
1405 case CB_COLOR1_PITCH
:
1406 case CB_COLOR2_PITCH
:
1407 case CB_COLOR3_PITCH
:
1408 case CB_COLOR4_PITCH
:
1409 case CB_COLOR5_PITCH
:
1410 case CB_COLOR6_PITCH
:
1411 case CB_COLOR7_PITCH
:
1412 tmp
= (reg
- CB_COLOR0_PITCH
) / 0x3c;
1413 track
->cb_color_pitch
[tmp
] = radeon_get_ib_value(p
, idx
);
1414 track
->cb_color_pitch_idx
[tmp
] = idx
;
1416 case CB_COLOR8_PITCH
:
1417 case CB_COLOR9_PITCH
:
1418 case CB_COLOR10_PITCH
:
1419 case CB_COLOR11_PITCH
:
1420 tmp
= ((reg
- CB_COLOR8_PITCH
) / 0x1c) + 8;
1421 track
->cb_color_pitch
[tmp
] = radeon_get_ib_value(p
, idx
);
1422 track
->cb_color_pitch_idx
[tmp
] = idx
;
1424 case CB_COLOR0_SLICE
:
1425 case CB_COLOR1_SLICE
:
1426 case CB_COLOR2_SLICE
:
1427 case CB_COLOR3_SLICE
:
1428 case CB_COLOR4_SLICE
:
1429 case CB_COLOR5_SLICE
:
1430 case CB_COLOR6_SLICE
:
1431 case CB_COLOR7_SLICE
:
1432 tmp
= (reg
- CB_COLOR0_SLICE
) / 0x3c;
1433 track
->cb_color_slice
[tmp
] = radeon_get_ib_value(p
, idx
);
1434 track
->cb_color_slice_idx
[tmp
] = idx
;
1436 case CB_COLOR8_SLICE
:
1437 case CB_COLOR9_SLICE
:
1438 case CB_COLOR10_SLICE
:
1439 case CB_COLOR11_SLICE
:
1440 tmp
= ((reg
- CB_COLOR8_SLICE
) / 0x1c) + 8;
1441 track
->cb_color_slice
[tmp
] = radeon_get_ib_value(p
, idx
);
1442 track
->cb_color_slice_idx
[tmp
] = idx
;
1444 case CB_COLOR0_ATTRIB
:
1445 case CB_COLOR1_ATTRIB
:
1446 case CB_COLOR2_ATTRIB
:
1447 case CB_COLOR3_ATTRIB
:
1448 case CB_COLOR4_ATTRIB
:
1449 case CB_COLOR5_ATTRIB
:
1450 case CB_COLOR6_ATTRIB
:
1451 case CB_COLOR7_ATTRIB
:
1452 r
= evergreen_cs_packet_next_reloc(p
, &reloc
);
1454 dev_warn(p
->dev
, "bad SET_CONTEXT_REG "
1458 if (!(p
->cs_flags
& RADEON_CS_KEEP_TILING_FLAGS
)) {
1459 if (reloc
->lobj
.tiling_flags
& RADEON_TILING_MACRO
) {
1460 unsigned bankw
, bankh
, mtaspect
, tile_split
;
1462 evergreen_tiling_fields(reloc
->lobj
.tiling_flags
,
1463 &bankw
, &bankh
, &mtaspect
,
1465 ib
[idx
] |= CB_NUM_BANKS(evergreen_cs_get_num_banks(track
->nbanks
));
1466 ib
[idx
] |= CB_TILE_SPLIT(tile_split
) |
1467 CB_BANK_WIDTH(bankw
) |
1468 CB_BANK_HEIGHT(bankh
) |
1469 CB_MACRO_TILE_ASPECT(mtaspect
);
1472 tmp
= ((reg
- CB_COLOR0_ATTRIB
) / 0x3c);
1473 track
->cb_color_attrib
[tmp
] = ib
[idx
];
1475 case CB_COLOR8_ATTRIB
:
1476 case CB_COLOR9_ATTRIB
:
1477 case CB_COLOR10_ATTRIB
:
1478 case CB_COLOR11_ATTRIB
:
1479 r
= evergreen_cs_packet_next_reloc(p
, &reloc
);
1481 dev_warn(p
->dev
, "bad SET_CONTEXT_REG "
1485 if (!(p
->cs_flags
& RADEON_CS_KEEP_TILING_FLAGS
)) {
1486 if (reloc
->lobj
.tiling_flags
& RADEON_TILING_MACRO
) {
1487 unsigned bankw
, bankh
, mtaspect
, tile_split
;
1489 evergreen_tiling_fields(reloc
->lobj
.tiling_flags
,
1490 &bankw
, &bankh
, &mtaspect
,
1492 ib
[idx
] |= CB_NUM_BANKS(evergreen_cs_get_num_banks(track
->nbanks
));
1493 ib
[idx
] |= CB_TILE_SPLIT(tile_split
) |
1494 CB_BANK_WIDTH(bankw
) |
1495 CB_BANK_HEIGHT(bankh
) |
1496 CB_MACRO_TILE_ASPECT(mtaspect
);
1499 tmp
= ((reg
- CB_COLOR8_ATTRIB
) / 0x1c) + 8;
1500 track
->cb_color_attrib
[tmp
] = ib
[idx
];
1510 tmp
= (reg
- CB_COLOR0_DIM
) / 0x3c;
1511 track
->cb_color_dim
[tmp
] = radeon_get_ib_value(p
, idx
);
1512 track
->cb_color_dim_idx
[tmp
] = idx
;
1516 case CB_COLOR10_DIM
:
1517 case CB_COLOR11_DIM
:
1518 tmp
= ((reg
- CB_COLOR8_DIM
) / 0x1c) + 8;
1519 track
->cb_color_dim
[tmp
] = radeon_get_ib_value(p
, idx
);
1520 track
->cb_color_dim_idx
[tmp
] = idx
;
1522 case CB_COLOR0_FMASK
:
1523 case CB_COLOR1_FMASK
:
1524 case CB_COLOR2_FMASK
:
1525 case CB_COLOR3_FMASK
:
1526 case CB_COLOR4_FMASK
:
1527 case CB_COLOR5_FMASK
:
1528 case CB_COLOR6_FMASK
:
1529 case CB_COLOR7_FMASK
:
1530 tmp
= (reg
- CB_COLOR0_FMASK
) / 0x3c;
1531 r
= evergreen_cs_packet_next_reloc(p
, &reloc
);
1533 dev_err(p
->dev
, "bad SET_CONTEXT_REG 0x%04X\n", reg
);
1536 ib
[idx
] += (u32
)((reloc
->lobj
.gpu_offset
>> 8) & 0xffffffff);
1537 track
->cb_color_fmask_bo
[tmp
] = reloc
->robj
;
1539 case CB_COLOR0_CMASK
:
1540 case CB_COLOR1_CMASK
:
1541 case CB_COLOR2_CMASK
:
1542 case CB_COLOR3_CMASK
:
1543 case CB_COLOR4_CMASK
:
1544 case CB_COLOR5_CMASK
:
1545 case CB_COLOR6_CMASK
:
1546 case CB_COLOR7_CMASK
:
1547 tmp
= (reg
- CB_COLOR0_CMASK
) / 0x3c;
1548 r
= evergreen_cs_packet_next_reloc(p
, &reloc
);
1550 dev_err(p
->dev
, "bad SET_CONTEXT_REG 0x%04X\n", reg
);
1553 ib
[idx
] += (u32
)((reloc
->lobj
.gpu_offset
>> 8) & 0xffffffff);
1554 track
->cb_color_cmask_bo
[tmp
] = reloc
->robj
;
1556 case CB_COLOR0_FMASK_SLICE
:
1557 case CB_COLOR1_FMASK_SLICE
:
1558 case CB_COLOR2_FMASK_SLICE
:
1559 case CB_COLOR3_FMASK_SLICE
:
1560 case CB_COLOR4_FMASK_SLICE
:
1561 case CB_COLOR5_FMASK_SLICE
:
1562 case CB_COLOR6_FMASK_SLICE
:
1563 case CB_COLOR7_FMASK_SLICE
:
1564 tmp
= (reg
- CB_COLOR0_FMASK_SLICE
) / 0x3c;
1565 track
->cb_color_fmask_slice
[tmp
] = radeon_get_ib_value(p
, idx
);
1567 case CB_COLOR0_CMASK_SLICE
:
1568 case CB_COLOR1_CMASK_SLICE
:
1569 case CB_COLOR2_CMASK_SLICE
:
1570 case CB_COLOR3_CMASK_SLICE
:
1571 case CB_COLOR4_CMASK_SLICE
:
1572 case CB_COLOR5_CMASK_SLICE
:
1573 case CB_COLOR6_CMASK_SLICE
:
1574 case CB_COLOR7_CMASK_SLICE
:
1575 tmp
= (reg
- CB_COLOR0_CMASK_SLICE
) / 0x3c;
1576 track
->cb_color_cmask_slice
[tmp
] = radeon_get_ib_value(p
, idx
);
1578 case CB_COLOR0_BASE
:
1579 case CB_COLOR1_BASE
:
1580 case CB_COLOR2_BASE
:
1581 case CB_COLOR3_BASE
:
1582 case CB_COLOR4_BASE
:
1583 case CB_COLOR5_BASE
:
1584 case CB_COLOR6_BASE
:
1585 case CB_COLOR7_BASE
:
1586 r
= evergreen_cs_packet_next_reloc(p
, &reloc
);
1588 dev_warn(p
->dev
, "bad SET_CONTEXT_REG "
1592 tmp
= (reg
- CB_COLOR0_BASE
) / 0x3c;
1593 track
->cb_color_bo_offset
[tmp
] = radeon_get_ib_value(p
, idx
);
1594 ib
[idx
] += (u32
)((reloc
->lobj
.gpu_offset
>> 8) & 0xffffffff);
1595 track
->cb_color_base_last
[tmp
] = ib
[idx
];
1596 track
->cb_color_bo
[tmp
] = reloc
->robj
;
1598 case CB_COLOR8_BASE
:
1599 case CB_COLOR9_BASE
:
1600 case CB_COLOR10_BASE
:
1601 case CB_COLOR11_BASE
:
1602 r
= evergreen_cs_packet_next_reloc(p
, &reloc
);
1604 dev_warn(p
->dev
, "bad SET_CONTEXT_REG "
1608 tmp
= ((reg
- CB_COLOR8_BASE
) / 0x1c) + 8;
1609 track
->cb_color_bo_offset
[tmp
] = radeon_get_ib_value(p
, idx
);
1610 ib
[idx
] += (u32
)((reloc
->lobj
.gpu_offset
>> 8) & 0xffffffff);
1611 track
->cb_color_base_last
[tmp
] = ib
[idx
];
1612 track
->cb_color_bo
[tmp
] = reloc
->robj
;
1614 case CB_IMMED0_BASE
:
1615 case CB_IMMED1_BASE
:
1616 case CB_IMMED2_BASE
:
1617 case CB_IMMED3_BASE
:
1618 case CB_IMMED4_BASE
:
1619 case CB_IMMED5_BASE
:
1620 case CB_IMMED6_BASE
:
1621 case CB_IMMED7_BASE
:
1622 case CB_IMMED8_BASE
:
1623 case CB_IMMED9_BASE
:
1624 case CB_IMMED10_BASE
:
1625 case CB_IMMED11_BASE
:
1626 case DB_HTILE_DATA_BASE
:
1627 case SQ_PGM_START_FS
:
1628 case SQ_PGM_START_ES
:
1629 case SQ_PGM_START_VS
:
1630 case SQ_PGM_START_GS
:
1631 case SQ_PGM_START_PS
:
1632 case SQ_PGM_START_HS
:
1633 case SQ_PGM_START_LS
:
1634 case SQ_CONST_MEM_BASE
:
1635 case SQ_ALU_CONST_CACHE_GS_0
:
1636 case SQ_ALU_CONST_CACHE_GS_1
:
1637 case SQ_ALU_CONST_CACHE_GS_2
:
1638 case SQ_ALU_CONST_CACHE_GS_3
:
1639 case SQ_ALU_CONST_CACHE_GS_4
:
1640 case SQ_ALU_CONST_CACHE_GS_5
:
1641 case SQ_ALU_CONST_CACHE_GS_6
:
1642 case SQ_ALU_CONST_CACHE_GS_7
:
1643 case SQ_ALU_CONST_CACHE_GS_8
:
1644 case SQ_ALU_CONST_CACHE_GS_9
:
1645 case SQ_ALU_CONST_CACHE_GS_10
:
1646 case SQ_ALU_CONST_CACHE_GS_11
:
1647 case SQ_ALU_CONST_CACHE_GS_12
:
1648 case SQ_ALU_CONST_CACHE_GS_13
:
1649 case SQ_ALU_CONST_CACHE_GS_14
:
1650 case SQ_ALU_CONST_CACHE_GS_15
:
1651 case SQ_ALU_CONST_CACHE_PS_0
:
1652 case SQ_ALU_CONST_CACHE_PS_1
:
1653 case SQ_ALU_CONST_CACHE_PS_2
:
1654 case SQ_ALU_CONST_CACHE_PS_3
:
1655 case SQ_ALU_CONST_CACHE_PS_4
:
1656 case SQ_ALU_CONST_CACHE_PS_5
:
1657 case SQ_ALU_CONST_CACHE_PS_6
:
1658 case SQ_ALU_CONST_CACHE_PS_7
:
1659 case SQ_ALU_CONST_CACHE_PS_8
:
1660 case SQ_ALU_CONST_CACHE_PS_9
:
1661 case SQ_ALU_CONST_CACHE_PS_10
:
1662 case SQ_ALU_CONST_CACHE_PS_11
:
1663 case SQ_ALU_CONST_CACHE_PS_12
:
1664 case SQ_ALU_CONST_CACHE_PS_13
:
1665 case SQ_ALU_CONST_CACHE_PS_14
:
1666 case SQ_ALU_CONST_CACHE_PS_15
:
1667 case SQ_ALU_CONST_CACHE_VS_0
:
1668 case SQ_ALU_CONST_CACHE_VS_1
:
1669 case SQ_ALU_CONST_CACHE_VS_2
:
1670 case SQ_ALU_CONST_CACHE_VS_3
:
1671 case SQ_ALU_CONST_CACHE_VS_4
:
1672 case SQ_ALU_CONST_CACHE_VS_5
:
1673 case SQ_ALU_CONST_CACHE_VS_6
:
1674 case SQ_ALU_CONST_CACHE_VS_7
:
1675 case SQ_ALU_CONST_CACHE_VS_8
:
1676 case SQ_ALU_CONST_CACHE_VS_9
:
1677 case SQ_ALU_CONST_CACHE_VS_10
:
1678 case SQ_ALU_CONST_CACHE_VS_11
:
1679 case SQ_ALU_CONST_CACHE_VS_12
:
1680 case SQ_ALU_CONST_CACHE_VS_13
:
1681 case SQ_ALU_CONST_CACHE_VS_14
:
1682 case SQ_ALU_CONST_CACHE_VS_15
:
1683 case SQ_ALU_CONST_CACHE_HS_0
:
1684 case SQ_ALU_CONST_CACHE_HS_1
:
1685 case SQ_ALU_CONST_CACHE_HS_2
:
1686 case SQ_ALU_CONST_CACHE_HS_3
:
1687 case SQ_ALU_CONST_CACHE_HS_4
:
1688 case SQ_ALU_CONST_CACHE_HS_5
:
1689 case SQ_ALU_CONST_CACHE_HS_6
:
1690 case SQ_ALU_CONST_CACHE_HS_7
:
1691 case SQ_ALU_CONST_CACHE_HS_8
:
1692 case SQ_ALU_CONST_CACHE_HS_9
:
1693 case SQ_ALU_CONST_CACHE_HS_10
:
1694 case SQ_ALU_CONST_CACHE_HS_11
:
1695 case SQ_ALU_CONST_CACHE_HS_12
:
1696 case SQ_ALU_CONST_CACHE_HS_13
:
1697 case SQ_ALU_CONST_CACHE_HS_14
:
1698 case SQ_ALU_CONST_CACHE_HS_15
:
1699 case SQ_ALU_CONST_CACHE_LS_0
:
1700 case SQ_ALU_CONST_CACHE_LS_1
:
1701 case SQ_ALU_CONST_CACHE_LS_2
:
1702 case SQ_ALU_CONST_CACHE_LS_3
:
1703 case SQ_ALU_CONST_CACHE_LS_4
:
1704 case SQ_ALU_CONST_CACHE_LS_5
:
1705 case SQ_ALU_CONST_CACHE_LS_6
:
1706 case SQ_ALU_CONST_CACHE_LS_7
:
1707 case SQ_ALU_CONST_CACHE_LS_8
:
1708 case SQ_ALU_CONST_CACHE_LS_9
:
1709 case SQ_ALU_CONST_CACHE_LS_10
:
1710 case SQ_ALU_CONST_CACHE_LS_11
:
1711 case SQ_ALU_CONST_CACHE_LS_12
:
1712 case SQ_ALU_CONST_CACHE_LS_13
:
1713 case SQ_ALU_CONST_CACHE_LS_14
:
1714 case SQ_ALU_CONST_CACHE_LS_15
:
1715 r
= evergreen_cs_packet_next_reloc(p
, &reloc
);
1717 dev_warn(p
->dev
, "bad SET_CONTEXT_REG "
1721 ib
[idx
] += (u32
)((reloc
->lobj
.gpu_offset
>> 8) & 0xffffffff);
1723 case SX_MEMORY_EXPORT_BASE
:
1724 if (p
->rdev
->family
>= CHIP_CAYMAN
) {
1725 dev_warn(p
->dev
, "bad SET_CONFIG_REG "
1729 r
= evergreen_cs_packet_next_reloc(p
, &reloc
);
1731 dev_warn(p
->dev
, "bad SET_CONFIG_REG "
1735 ib
[idx
] += (u32
)((reloc
->lobj
.gpu_offset
>> 8) & 0xffffffff);
1737 case CAYMAN_SX_SCATTER_EXPORT_BASE
:
1738 if (p
->rdev
->family
< CHIP_CAYMAN
) {
1739 dev_warn(p
->dev
, "bad SET_CONTEXT_REG "
1743 r
= evergreen_cs_packet_next_reloc(p
, &reloc
);
1745 dev_warn(p
->dev
, "bad SET_CONTEXT_REG "
1749 ib
[idx
] += (u32
)((reloc
->lobj
.gpu_offset
>> 8) & 0xffffffff);
1752 dev_warn(p
->dev
, "forbidden register 0x%08x at %d\n", reg
, idx
);
1758 static bool evergreen_is_safe_reg(struct radeon_cs_parser
*p
, u32 reg
, u32 idx
)
1762 if (p
->rdev
->family
>= CHIP_CAYMAN
)
1763 last_reg
= ARRAY_SIZE(cayman_reg_safe_bm
);
1765 last_reg
= ARRAY_SIZE(evergreen_reg_safe_bm
);
1768 if (i
>= last_reg
) {
1769 dev_warn(p
->dev
, "forbidden register 0x%08x at %d\n", reg
, idx
);
1772 m
= 1 << ((reg
>> 2) & 31);
1773 if (p
->rdev
->family
>= CHIP_CAYMAN
) {
1774 if (!(cayman_reg_safe_bm
[i
] & m
))
1777 if (!(evergreen_reg_safe_bm
[i
] & m
))
1780 dev_warn(p
->dev
, "forbidden register 0x%08x at %d\n", reg
, idx
);
1784 static int evergreen_packet3_check(struct radeon_cs_parser
*p
,
1785 struct radeon_cs_packet
*pkt
)
1787 struct radeon_cs_reloc
*reloc
;
1788 struct evergreen_cs_track
*track
;
1792 unsigned start_reg
, end_reg
, reg
;
1796 track
= (struct evergreen_cs_track
*)p
->track
;
1799 idx_value
= radeon_get_ib_value(p
, idx
);
1801 switch (pkt
->opcode
) {
1802 case PACKET3_SET_PREDICATION
:
1806 if (pkt
->count
!= 1) {
1807 DRM_ERROR("bad SET PREDICATION\n");
1811 tmp
= radeon_get_ib_value(p
, idx
+ 1);
1812 pred_op
= (tmp
>> 16) & 0x7;
1814 /* for the clear predicate operation */
1819 DRM_ERROR("bad SET PREDICATION operation %d\n", pred_op
);
1823 r
= evergreen_cs_packet_next_reloc(p
, &reloc
);
1825 DRM_ERROR("bad SET PREDICATION\n");
1829 ib
[idx
+ 0] = idx_value
+ (u32
)(reloc
->lobj
.gpu_offset
& 0xffffffff);
1830 ib
[idx
+ 1] = tmp
+ (upper_32_bits(reloc
->lobj
.gpu_offset
) & 0xff);
1833 case PACKET3_CONTEXT_CONTROL
:
1834 if (pkt
->count
!= 1) {
1835 DRM_ERROR("bad CONTEXT_CONTROL\n");
1839 case PACKET3_INDEX_TYPE
:
1840 case PACKET3_NUM_INSTANCES
:
1841 case PACKET3_CLEAR_STATE
:
1843 DRM_ERROR("bad INDEX_TYPE/NUM_INSTANCES/CLEAR_STATE\n");
1847 case CAYMAN_PACKET3_DEALLOC_STATE
:
1848 if (p
->rdev
->family
< CHIP_CAYMAN
) {
1849 DRM_ERROR("bad PACKET3_DEALLOC_STATE\n");
1853 DRM_ERROR("bad INDEX_TYPE/NUM_INSTANCES/CLEAR_STATE\n");
1857 case PACKET3_INDEX_BASE
:
1858 if (pkt
->count
!= 1) {
1859 DRM_ERROR("bad INDEX_BASE\n");
1862 r
= evergreen_cs_packet_next_reloc(p
, &reloc
);
1864 DRM_ERROR("bad INDEX_BASE\n");
1867 ib
[idx
+0] = idx_value
+ (u32
)(reloc
->lobj
.gpu_offset
& 0xffffffff);
1868 ib
[idx
+1] += upper_32_bits(reloc
->lobj
.gpu_offset
) & 0xff;
1869 r
= evergreen_cs_track_check(p
);
1871 dev_warn(p
->dev
, "%s:%d invalid cmd stream\n", __func__
, __LINE__
);
1875 case PACKET3_DRAW_INDEX
:
1876 if (pkt
->count
!= 3) {
1877 DRM_ERROR("bad DRAW_INDEX\n");
1880 r
= evergreen_cs_packet_next_reloc(p
, &reloc
);
1882 DRM_ERROR("bad DRAW_INDEX\n");
1885 ib
[idx
+0] = idx_value
+ (u32
)(reloc
->lobj
.gpu_offset
& 0xffffffff);
1886 ib
[idx
+1] += upper_32_bits(reloc
->lobj
.gpu_offset
) & 0xff;
1887 r
= evergreen_cs_track_check(p
);
1889 dev_warn(p
->dev
, "%s:%d invalid cmd stream\n", __func__
, __LINE__
);
1893 case PACKET3_DRAW_INDEX_2
:
1894 if (pkt
->count
!= 4) {
1895 DRM_ERROR("bad DRAW_INDEX_2\n");
1898 r
= evergreen_cs_packet_next_reloc(p
, &reloc
);
1900 DRM_ERROR("bad DRAW_INDEX_2\n");
1903 ib
[idx
+1] = idx_value
+ (u32
)(reloc
->lobj
.gpu_offset
& 0xffffffff);
1904 ib
[idx
+2] += upper_32_bits(reloc
->lobj
.gpu_offset
) & 0xff;
1905 r
= evergreen_cs_track_check(p
);
1907 dev_warn(p
->dev
, "%s:%d invalid cmd stream\n", __func__
, __LINE__
);
1911 case PACKET3_DRAW_INDEX_AUTO
:
1912 if (pkt
->count
!= 1) {
1913 DRM_ERROR("bad DRAW_INDEX_AUTO\n");
1916 r
= evergreen_cs_track_check(p
);
1918 dev_warn(p
->dev
, "%s:%d invalid cmd stream %d\n", __func__
, __LINE__
, idx
);
1922 case PACKET3_DRAW_INDEX_MULTI_AUTO
:
1923 if (pkt
->count
!= 2) {
1924 DRM_ERROR("bad DRAW_INDEX_MULTI_AUTO\n");
1927 r
= evergreen_cs_track_check(p
);
1929 dev_warn(p
->dev
, "%s:%d invalid cmd stream %d\n", __func__
, __LINE__
, idx
);
1933 case PACKET3_DRAW_INDEX_IMMD
:
1934 if (pkt
->count
< 2) {
1935 DRM_ERROR("bad DRAW_INDEX_IMMD\n");
1938 r
= evergreen_cs_track_check(p
);
1940 dev_warn(p
->dev
, "%s:%d invalid cmd stream\n", __func__
, __LINE__
);
1944 case PACKET3_DRAW_INDEX_OFFSET
:
1945 if (pkt
->count
!= 2) {
1946 DRM_ERROR("bad DRAW_INDEX_OFFSET\n");
1949 r
= evergreen_cs_track_check(p
);
1951 dev_warn(p
->dev
, "%s:%d invalid cmd stream\n", __func__
, __LINE__
);
1955 case PACKET3_DRAW_INDEX_OFFSET_2
:
1956 if (pkt
->count
!= 3) {
1957 DRM_ERROR("bad DRAW_INDEX_OFFSET_2\n");
1960 r
= evergreen_cs_track_check(p
);
1962 dev_warn(p
->dev
, "%s:%d invalid cmd stream\n", __func__
, __LINE__
);
1966 case PACKET3_DISPATCH_DIRECT
:
1967 if (pkt
->count
!= 3) {
1968 DRM_ERROR("bad DISPATCH_DIRECT\n");
1971 r
= evergreen_cs_track_check(p
);
1973 dev_warn(p
->dev
, "%s:%d invalid cmd stream %d\n", __func__
, __LINE__
, idx
);
1977 case PACKET3_DISPATCH_INDIRECT
:
1978 if (pkt
->count
!= 1) {
1979 DRM_ERROR("bad DISPATCH_INDIRECT\n");
1982 r
= evergreen_cs_packet_next_reloc(p
, &reloc
);
1984 DRM_ERROR("bad DISPATCH_INDIRECT\n");
1987 ib
[idx
+0] = idx_value
+ (u32
)(reloc
->lobj
.gpu_offset
& 0xffffffff);
1988 r
= evergreen_cs_track_check(p
);
1990 dev_warn(p
->dev
, "%s:%d invalid cmd stream\n", __func__
, __LINE__
);
1994 case PACKET3_WAIT_REG_MEM
:
1995 if (pkt
->count
!= 5) {
1996 DRM_ERROR("bad WAIT_REG_MEM\n");
1999 /* bit 4 is reg (0) or mem (1) */
2000 if (idx_value
& 0x10) {
2001 r
= evergreen_cs_packet_next_reloc(p
, &reloc
);
2003 DRM_ERROR("bad WAIT_REG_MEM\n");
2006 ib
[idx
+1] += (u32
)(reloc
->lobj
.gpu_offset
& 0xffffffff);
2007 ib
[idx
+2] += upper_32_bits(reloc
->lobj
.gpu_offset
) & 0xff;
2010 case PACKET3_SURFACE_SYNC
:
2011 if (pkt
->count
!= 3) {
2012 DRM_ERROR("bad SURFACE_SYNC\n");
2015 /* 0xffffffff/0x0 is flush all cache flag */
2016 if (radeon_get_ib_value(p
, idx
+ 1) != 0xffffffff ||
2017 radeon_get_ib_value(p
, idx
+ 2) != 0) {
2018 r
= evergreen_cs_packet_next_reloc(p
, &reloc
);
2020 DRM_ERROR("bad SURFACE_SYNC\n");
2023 ib
[idx
+2] += (u32
)((reloc
->lobj
.gpu_offset
>> 8) & 0xffffffff);
2026 case PACKET3_EVENT_WRITE
:
2027 if (pkt
->count
!= 2 && pkt
->count
!= 0) {
2028 DRM_ERROR("bad EVENT_WRITE\n");
2032 r
= evergreen_cs_packet_next_reloc(p
, &reloc
);
2034 DRM_ERROR("bad EVENT_WRITE\n");
2037 ib
[idx
+1] += (u32
)(reloc
->lobj
.gpu_offset
& 0xffffffff);
2038 ib
[idx
+2] += upper_32_bits(reloc
->lobj
.gpu_offset
) & 0xff;
2041 case PACKET3_EVENT_WRITE_EOP
:
2042 if (pkt
->count
!= 4) {
2043 DRM_ERROR("bad EVENT_WRITE_EOP\n");
2046 r
= evergreen_cs_packet_next_reloc(p
, &reloc
);
2048 DRM_ERROR("bad EVENT_WRITE_EOP\n");
2051 ib
[idx
+1] += (u32
)(reloc
->lobj
.gpu_offset
& 0xffffffff);
2052 ib
[idx
+2] += upper_32_bits(reloc
->lobj
.gpu_offset
) & 0xff;
2054 case PACKET3_EVENT_WRITE_EOS
:
2055 if (pkt
->count
!= 3) {
2056 DRM_ERROR("bad EVENT_WRITE_EOS\n");
2059 r
= evergreen_cs_packet_next_reloc(p
, &reloc
);
2061 DRM_ERROR("bad EVENT_WRITE_EOS\n");
2064 ib
[idx
+1] += (u32
)(reloc
->lobj
.gpu_offset
& 0xffffffff);
2065 ib
[idx
+2] += upper_32_bits(reloc
->lobj
.gpu_offset
) & 0xff;
2067 case PACKET3_SET_CONFIG_REG
:
2068 start_reg
= (idx_value
<< 2) + PACKET3_SET_CONFIG_REG_START
;
2069 end_reg
= 4 * pkt
->count
+ start_reg
- 4;
2070 if ((start_reg
< PACKET3_SET_CONFIG_REG_START
) ||
2071 (start_reg
>= PACKET3_SET_CONFIG_REG_END
) ||
2072 (end_reg
>= PACKET3_SET_CONFIG_REG_END
)) {
2073 DRM_ERROR("bad PACKET3_SET_CONFIG_REG\n");
2076 for (i
= 0; i
< pkt
->count
; i
++) {
2077 reg
= start_reg
+ (4 * i
);
2078 r
= evergreen_cs_check_reg(p
, reg
, idx
+1+i
);
2083 case PACKET3_SET_CONTEXT_REG
:
2084 start_reg
= (idx_value
<< 2) + PACKET3_SET_CONTEXT_REG_START
;
2085 end_reg
= 4 * pkt
->count
+ start_reg
- 4;
2086 if ((start_reg
< PACKET3_SET_CONTEXT_REG_START
) ||
2087 (start_reg
>= PACKET3_SET_CONTEXT_REG_END
) ||
2088 (end_reg
>= PACKET3_SET_CONTEXT_REG_END
)) {
2089 DRM_ERROR("bad PACKET3_SET_CONTEXT_REG\n");
2092 for (i
= 0; i
< pkt
->count
; i
++) {
2093 reg
= start_reg
+ (4 * i
);
2094 r
= evergreen_cs_check_reg(p
, reg
, idx
+1+i
);
2099 case PACKET3_SET_RESOURCE
:
2100 if (pkt
->count
% 8) {
2101 DRM_ERROR("bad SET_RESOURCE\n");
2104 start_reg
= (idx_value
<< 2) + PACKET3_SET_RESOURCE_START
;
2105 end_reg
= 4 * pkt
->count
+ start_reg
- 4;
2106 if ((start_reg
< PACKET3_SET_RESOURCE_START
) ||
2107 (start_reg
>= PACKET3_SET_RESOURCE_END
) ||
2108 (end_reg
>= PACKET3_SET_RESOURCE_END
)) {
2109 DRM_ERROR("bad SET_RESOURCE\n");
2112 for (i
= 0; i
< (pkt
->count
/ 8); i
++) {
2113 struct radeon_bo
*texture
, *mipmap
;
2114 u32 toffset
, moffset
;
2117 switch (G__SQ_CONSTANT_TYPE(radeon_get_ib_value(p
, idx
+1+(i
*8)+7))) {
2118 case SQ_TEX_VTX_VALID_TEXTURE
:
2120 r
= evergreen_cs_packet_next_reloc(p
, &reloc
);
2122 DRM_ERROR("bad SET_RESOURCE (tex)\n");
2125 if (!(p
->cs_flags
& RADEON_CS_KEEP_TILING_FLAGS
)) {
2126 ib
[idx
+1+(i
*8)+1] |=
2127 TEX_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc
->lobj
.tiling_flags
));
2128 if (reloc
->lobj
.tiling_flags
& RADEON_TILING_MACRO
) {
2129 unsigned bankw
, bankh
, mtaspect
, tile_split
;
2131 evergreen_tiling_fields(reloc
->lobj
.tiling_flags
,
2132 &bankw
, &bankh
, &mtaspect
,
2134 ib
[idx
+1+(i
*8)+6] |= TEX_TILE_SPLIT(tile_split
);
2135 ib
[idx
+1+(i
*8)+7] |=
2136 TEX_BANK_WIDTH(bankw
) |
2137 TEX_BANK_HEIGHT(bankh
) |
2138 MACRO_TILE_ASPECT(mtaspect
) |
2139 TEX_NUM_BANKS(evergreen_cs_get_num_banks(track
->nbanks
));
2142 texture
= reloc
->robj
;
2143 toffset
= (u32
)((reloc
->lobj
.gpu_offset
>> 8) & 0xffffffff);
2145 r
= evergreen_cs_packet_next_reloc(p
, &reloc
);
2147 DRM_ERROR("bad SET_RESOURCE (tex)\n");
2150 moffset
= (u32
)((reloc
->lobj
.gpu_offset
>> 8) & 0xffffffff);
2151 mipmap
= reloc
->robj
;
2152 r
= evergreen_cs_track_validate_texture(p
, texture
, mipmap
, idx
+1+(i
*8));
2155 ib
[idx
+1+(i
*8)+2] += toffset
;
2156 ib
[idx
+1+(i
*8)+3] += moffset
;
2158 case SQ_TEX_VTX_VALID_BUFFER
:
2160 r
= evergreen_cs_packet_next_reloc(p
, &reloc
);
2162 DRM_ERROR("bad SET_RESOURCE (vtx)\n");
2165 offset
= radeon_get_ib_value(p
, idx
+1+(i
*8)+0);
2166 size
= radeon_get_ib_value(p
, idx
+1+(i
*8)+1);
2167 if (p
->rdev
&& (size
+ offset
) > radeon_bo_size(reloc
->robj
)) {
2168 /* force size to size of the buffer */
2169 dev_warn(p
->dev
, "vbo resource seems too big for the bo\n");
2170 ib
[idx
+1+(i
*8)+1] = radeon_bo_size(reloc
->robj
);
2172 ib
[idx
+1+(i
*8)+0] += (u32
)((reloc
->lobj
.gpu_offset
) & 0xffffffff);
2173 ib
[idx
+1+(i
*8)+2] += upper_32_bits(reloc
->lobj
.gpu_offset
) & 0xff;
2175 case SQ_TEX_VTX_INVALID_TEXTURE
:
2176 case SQ_TEX_VTX_INVALID_BUFFER
:
2178 DRM_ERROR("bad SET_RESOURCE\n");
2183 case PACKET3_SET_ALU_CONST
:
2184 /* XXX fix me ALU const buffers only */
2186 case PACKET3_SET_BOOL_CONST
:
2187 start_reg
= (idx_value
<< 2) + PACKET3_SET_BOOL_CONST_START
;
2188 end_reg
= 4 * pkt
->count
+ start_reg
- 4;
2189 if ((start_reg
< PACKET3_SET_BOOL_CONST_START
) ||
2190 (start_reg
>= PACKET3_SET_BOOL_CONST_END
) ||
2191 (end_reg
>= PACKET3_SET_BOOL_CONST_END
)) {
2192 DRM_ERROR("bad SET_BOOL_CONST\n");
2196 case PACKET3_SET_LOOP_CONST
:
2197 start_reg
= (idx_value
<< 2) + PACKET3_SET_LOOP_CONST_START
;
2198 end_reg
= 4 * pkt
->count
+ start_reg
- 4;
2199 if ((start_reg
< PACKET3_SET_LOOP_CONST_START
) ||
2200 (start_reg
>= PACKET3_SET_LOOP_CONST_END
) ||
2201 (end_reg
>= PACKET3_SET_LOOP_CONST_END
)) {
2202 DRM_ERROR("bad SET_LOOP_CONST\n");
2206 case PACKET3_SET_CTL_CONST
:
2207 start_reg
= (idx_value
<< 2) + PACKET3_SET_CTL_CONST_START
;
2208 end_reg
= 4 * pkt
->count
+ start_reg
- 4;
2209 if ((start_reg
< PACKET3_SET_CTL_CONST_START
) ||
2210 (start_reg
>= PACKET3_SET_CTL_CONST_END
) ||
2211 (end_reg
>= PACKET3_SET_CTL_CONST_END
)) {
2212 DRM_ERROR("bad SET_CTL_CONST\n");
2216 case PACKET3_SET_SAMPLER
:
2217 if (pkt
->count
% 3) {
2218 DRM_ERROR("bad SET_SAMPLER\n");
2221 start_reg
= (idx_value
<< 2) + PACKET3_SET_SAMPLER_START
;
2222 end_reg
= 4 * pkt
->count
+ start_reg
- 4;
2223 if ((start_reg
< PACKET3_SET_SAMPLER_START
) ||
2224 (start_reg
>= PACKET3_SET_SAMPLER_END
) ||
2225 (end_reg
>= PACKET3_SET_SAMPLER_END
)) {
2226 DRM_ERROR("bad SET_SAMPLER\n");
2230 case PACKET3_STRMOUT_BUFFER_UPDATE
:
2231 if (pkt
->count
!= 4) {
2232 DRM_ERROR("bad STRMOUT_BUFFER_UPDATE (invalid count)\n");
2235 /* Updating memory at DST_ADDRESS. */
2236 if (idx_value
& 0x1) {
2238 r
= evergreen_cs_packet_next_reloc(p
, &reloc
);
2240 DRM_ERROR("bad STRMOUT_BUFFER_UPDATE (missing dst reloc)\n");
2243 offset
= radeon_get_ib_value(p
, idx
+1);
2244 offset
+= ((u64
)(radeon_get_ib_value(p
, idx
+2) & 0xff)) << 32;
2245 if ((offset
+ 4) > radeon_bo_size(reloc
->robj
)) {
2246 DRM_ERROR("bad STRMOUT_BUFFER_UPDATE dst bo too small: 0x%llx, 0x%lx\n",
2247 offset
+ 4, radeon_bo_size(reloc
->robj
));
2250 ib
[idx
+1] += (u32
)(reloc
->lobj
.gpu_offset
& 0xffffffff);
2251 ib
[idx
+2] += upper_32_bits(reloc
->lobj
.gpu_offset
) & 0xff;
2253 /* Reading data from SRC_ADDRESS. */
2254 if (((idx_value
>> 1) & 0x3) == 2) {
2256 r
= evergreen_cs_packet_next_reloc(p
, &reloc
);
2258 DRM_ERROR("bad STRMOUT_BUFFER_UPDATE (missing src reloc)\n");
2261 offset
= radeon_get_ib_value(p
, idx
+3);
2262 offset
+= ((u64
)(radeon_get_ib_value(p
, idx
+4) & 0xff)) << 32;
2263 if ((offset
+ 4) > radeon_bo_size(reloc
->robj
)) {
2264 DRM_ERROR("bad STRMOUT_BUFFER_UPDATE src bo too small: 0x%llx, 0x%lx\n",
2265 offset
+ 4, radeon_bo_size(reloc
->robj
));
2268 ib
[idx
+3] += (u32
)(reloc
->lobj
.gpu_offset
& 0xffffffff);
2269 ib
[idx
+4] += upper_32_bits(reloc
->lobj
.gpu_offset
) & 0xff;
2272 case PACKET3_COPY_DW
:
2273 if (pkt
->count
!= 4) {
2274 DRM_ERROR("bad COPY_DW (invalid count)\n");
2277 if (idx_value
& 0x1) {
2279 /* SRC is memory. */
2280 r
= evergreen_cs_packet_next_reloc(p
, &reloc
);
2282 DRM_ERROR("bad COPY_DW (missing src reloc)\n");
2285 offset
= radeon_get_ib_value(p
, idx
+1);
2286 offset
+= ((u64
)(radeon_get_ib_value(p
, idx
+2) & 0xff)) << 32;
2287 if ((offset
+ 4) > radeon_bo_size(reloc
->robj
)) {
2288 DRM_ERROR("bad COPY_DW src bo too small: 0x%llx, 0x%lx\n",
2289 offset
+ 4, radeon_bo_size(reloc
->robj
));
2292 ib
[idx
+1] += (u32
)(reloc
->lobj
.gpu_offset
& 0xffffffff);
2293 ib
[idx
+2] += upper_32_bits(reloc
->lobj
.gpu_offset
) & 0xff;
2296 reg
= radeon_get_ib_value(p
, idx
+1) << 2;
2297 if (!evergreen_is_safe_reg(p
, reg
, idx
+1))
2300 if (idx_value
& 0x2) {
2302 /* DST is memory. */
2303 r
= evergreen_cs_packet_next_reloc(p
, &reloc
);
2305 DRM_ERROR("bad COPY_DW (missing dst reloc)\n");
2308 offset
= radeon_get_ib_value(p
, idx
+3);
2309 offset
+= ((u64
)(radeon_get_ib_value(p
, idx
+4) & 0xff)) << 32;
2310 if ((offset
+ 4) > radeon_bo_size(reloc
->robj
)) {
2311 DRM_ERROR("bad COPY_DW dst bo too small: 0x%llx, 0x%lx\n",
2312 offset
+ 4, radeon_bo_size(reloc
->robj
));
2315 ib
[idx
+3] += (u32
)(reloc
->lobj
.gpu_offset
& 0xffffffff);
2316 ib
[idx
+4] += upper_32_bits(reloc
->lobj
.gpu_offset
) & 0xff;
2319 reg
= radeon_get_ib_value(p
, idx
+3) << 2;
2320 if (!evergreen_is_safe_reg(p
, reg
, idx
+3))
2327 DRM_ERROR("Packet3 opcode %x not supported\n", pkt
->opcode
);
2333 int evergreen_cs_parse(struct radeon_cs_parser
*p
)
2335 struct radeon_cs_packet pkt
;
2336 struct evergreen_cs_track
*track
;
2340 if (p
->track
== NULL
) {
2341 /* initialize tracker, we are in kms */
2342 track
= kzalloc(sizeof(*track
), GFP_KERNEL
);
2345 evergreen_cs_track_init(track
);
2346 if (p
->rdev
->family
>= CHIP_CAYMAN
)
2347 tmp
= p
->rdev
->config
.cayman
.tile_config
;
2349 tmp
= p
->rdev
->config
.evergreen
.tile_config
;
2351 switch (tmp
& 0xf) {
2367 switch ((tmp
& 0xf0) >> 4) {
2380 switch ((tmp
& 0xf00) >> 8) {
2382 track
->group_size
= 256;
2386 track
->group_size
= 512;
2390 switch ((tmp
& 0xf000) >> 12) {
2392 track
->row_size
= 1;
2396 track
->row_size
= 2;
2399 track
->row_size
= 4;
2406 r
= evergreen_cs_packet_parse(p
, &pkt
, p
->idx
);
2412 p
->idx
+= pkt
.count
+ 2;
2415 r
= evergreen_cs_parse_packet0(p
, &pkt
);
2420 r
= evergreen_packet3_check(p
, &pkt
);
2423 DRM_ERROR("Unknown packet type %d !\n", pkt
.type
);
2433 } while (p
->idx
< p
->chunks
[p
->chunk_ib_idx
].length_dw
);
2435 for (r
= 0; r
< p
->ib
->length_dw
; r
++) {
2436 printk(KERN_INFO
"%05d 0x%08X\n", r
, p
->ib
->ptr
[r
]);
2446 static bool evergreen_vm_reg_valid(u32 reg
)
2448 /* context regs are fine */
2452 /* check config regs */
2454 case GRBM_GFX_INDEX
:
2455 case VGT_VTX_VECT_EJECT_REG
:
2456 case VGT_CACHE_INVALIDATION
:
2457 case VGT_GS_VERTEX_REUSE
:
2458 case VGT_PRIMITIVE_TYPE
:
2459 case VGT_INDEX_TYPE
:
2460 case VGT_NUM_INDICES
:
2461 case VGT_NUM_INSTANCES
:
2462 case VGT_COMPUTE_DIM_X
:
2463 case VGT_COMPUTE_DIM_Y
:
2464 case VGT_COMPUTE_DIM_Z
:
2465 case VGT_COMPUTE_START_X
:
2466 case VGT_COMPUTE_START_Y
:
2467 case VGT_COMPUTE_START_Z
:
2468 case VGT_COMPUTE_INDEX
:
2469 case VGT_COMPUTE_THREAD_GROUP_SIZE
:
2470 case VGT_HS_OFFCHIP_PARAM
:
2472 case PA_SU_LINE_STIPPLE_VALUE
:
2473 case PA_SC_LINE_STIPPLE_STATE
:
2475 case SQ_DYN_GPR_CNTL_PS_FLUSH_REQ
:
2476 case SQ_DYN_GPR_SIMD_LOCK_EN
:
2478 case SQ_GPR_RESOURCE_MGMT_1
:
2479 case SQ_GLOBAL_GPR_RESOURCE_MGMT_1
:
2480 case SQ_GLOBAL_GPR_RESOURCE_MGMT_2
:
2481 case SQ_CONST_MEM_BASE
:
2482 case SQ_STATIC_THREAD_MGMT_1
:
2483 case SQ_STATIC_THREAD_MGMT_2
:
2484 case SQ_STATIC_THREAD_MGMT_3
:
2485 case SPI_CONFIG_CNTL
:
2486 case SPI_CONFIG_CNTL_1
:
2493 case TD_PS_BORDER_COLOR_INDEX
:
2494 case TD_PS_BORDER_COLOR_RED
:
2495 case TD_PS_BORDER_COLOR_GREEN
:
2496 case TD_PS_BORDER_COLOR_BLUE
:
2497 case TD_PS_BORDER_COLOR_ALPHA
:
2498 case TD_VS_BORDER_COLOR_INDEX
:
2499 case TD_VS_BORDER_COLOR_RED
:
2500 case TD_VS_BORDER_COLOR_GREEN
:
2501 case TD_VS_BORDER_COLOR_BLUE
:
2502 case TD_VS_BORDER_COLOR_ALPHA
:
2503 case TD_GS_BORDER_COLOR_INDEX
:
2504 case TD_GS_BORDER_COLOR_RED
:
2505 case TD_GS_BORDER_COLOR_GREEN
:
2506 case TD_GS_BORDER_COLOR_BLUE
:
2507 case TD_GS_BORDER_COLOR_ALPHA
:
2508 case TD_HS_BORDER_COLOR_INDEX
:
2509 case TD_HS_BORDER_COLOR_RED
:
2510 case TD_HS_BORDER_COLOR_GREEN
:
2511 case TD_HS_BORDER_COLOR_BLUE
:
2512 case TD_HS_BORDER_COLOR_ALPHA
:
2513 case TD_LS_BORDER_COLOR_INDEX
:
2514 case TD_LS_BORDER_COLOR_RED
:
2515 case TD_LS_BORDER_COLOR_GREEN
:
2516 case TD_LS_BORDER_COLOR_BLUE
:
2517 case TD_LS_BORDER_COLOR_ALPHA
:
2518 case TD_CS_BORDER_COLOR_INDEX
:
2519 case TD_CS_BORDER_COLOR_RED
:
2520 case TD_CS_BORDER_COLOR_GREEN
:
2521 case TD_CS_BORDER_COLOR_BLUE
:
2522 case TD_CS_BORDER_COLOR_ALPHA
:
2523 case SQ_ESGS_RING_SIZE
:
2524 case SQ_GSVS_RING_SIZE
:
2525 case SQ_ESTMP_RING_SIZE
:
2526 case SQ_GSTMP_RING_SIZE
:
2527 case SQ_HSTMP_RING_SIZE
:
2528 case SQ_LSTMP_RING_SIZE
:
2529 case SQ_PSTMP_RING_SIZE
:
2530 case SQ_VSTMP_RING_SIZE
:
2531 case SQ_ESGS_RING_ITEMSIZE
:
2532 case SQ_ESTMP_RING_ITEMSIZE
:
2533 case SQ_GSTMP_RING_ITEMSIZE
:
2534 case SQ_GSVS_RING_ITEMSIZE
:
2535 case SQ_GS_VERT_ITEMSIZE
:
2536 case SQ_GS_VERT_ITEMSIZE_1
:
2537 case SQ_GS_VERT_ITEMSIZE_2
:
2538 case SQ_GS_VERT_ITEMSIZE_3
:
2539 case SQ_GSVS_RING_OFFSET_1
:
2540 case SQ_GSVS_RING_OFFSET_2
:
2541 case SQ_GSVS_RING_OFFSET_3
:
2542 case SQ_HSTMP_RING_ITEMSIZE
:
2543 case SQ_LSTMP_RING_ITEMSIZE
:
2544 case SQ_PSTMP_RING_ITEMSIZE
:
2545 case SQ_VSTMP_RING_ITEMSIZE
:
2546 case VGT_TF_RING_SIZE
:
2547 case SQ_ESGS_RING_BASE
:
2548 case SQ_GSVS_RING_BASE
:
2549 case SQ_ESTMP_RING_BASE
:
2550 case SQ_GSTMP_RING_BASE
:
2551 case SQ_HSTMP_RING_BASE
:
2552 case SQ_LSTMP_RING_BASE
:
2553 case SQ_PSTMP_RING_BASE
:
2554 case SQ_VSTMP_RING_BASE
:
2555 case CAYMAN_VGT_OFFCHIP_LDS_BASE
:
2556 case CAYMAN_SQ_EX_ALLOC_TABLE_SLOTS
:
2563 static int evergreen_vm_packet3_check(struct radeon_device
*rdev
,
2564 u32
*ib
, struct radeon_cs_packet
*pkt
)
2566 u32 idx
= pkt
->idx
+ 1;
2567 u32 idx_value
= ib
[idx
];
2568 u32 start_reg
, end_reg
, reg
, i
;
2570 switch (pkt
->opcode
) {
2572 case PACKET3_SET_BASE
:
2573 case PACKET3_CLEAR_STATE
:
2574 case PACKET3_INDEX_BUFFER_SIZE
:
2575 case PACKET3_DISPATCH_DIRECT
:
2576 case PACKET3_DISPATCH_INDIRECT
:
2577 case PACKET3_MODE_CONTROL
:
2578 case PACKET3_SET_PREDICATION
:
2579 case PACKET3_COND_EXEC
:
2580 case PACKET3_PRED_EXEC
:
2581 case PACKET3_DRAW_INDIRECT
:
2582 case PACKET3_DRAW_INDEX_INDIRECT
:
2583 case PACKET3_INDEX_BASE
:
2584 case PACKET3_DRAW_INDEX_2
:
2585 case PACKET3_CONTEXT_CONTROL
:
2586 case PACKET3_DRAW_INDEX_OFFSET
:
2587 case PACKET3_INDEX_TYPE
:
2588 case PACKET3_DRAW_INDEX
:
2589 case PACKET3_DRAW_INDEX_AUTO
:
2590 case PACKET3_DRAW_INDEX_IMMD
:
2591 case PACKET3_NUM_INSTANCES
:
2592 case PACKET3_DRAW_INDEX_MULTI_AUTO
:
2593 case PACKET3_STRMOUT_BUFFER_UPDATE
:
2594 case PACKET3_DRAW_INDEX_OFFSET_2
:
2595 case PACKET3_DRAW_INDEX_MULTI_ELEMENT
:
2596 case PACKET3_MPEG_INDEX
:
2597 case PACKET3_WAIT_REG_MEM
:
2598 case PACKET3_MEM_WRITE
:
2599 case PACKET3_SURFACE_SYNC
:
2600 case PACKET3_EVENT_WRITE
:
2601 case PACKET3_EVENT_WRITE_EOP
:
2602 case PACKET3_EVENT_WRITE_EOS
:
2603 case PACKET3_SET_CONTEXT_REG
:
2604 case PACKET3_SET_BOOL_CONST
:
2605 case PACKET3_SET_LOOP_CONST
:
2606 case PACKET3_SET_RESOURCE
:
2607 case PACKET3_SET_SAMPLER
:
2608 case PACKET3_SET_CTL_CONST
:
2609 case PACKET3_SET_RESOURCE_OFFSET
:
2610 case PACKET3_SET_CONTEXT_REG_INDIRECT
:
2611 case PACKET3_SET_RESOURCE_INDIRECT
:
2612 case CAYMAN_PACKET3_DEALLOC_STATE
:
2614 case PACKET3_COND_WRITE
:
2615 if (idx_value
& 0x100) {
2616 reg
= ib
[idx
+ 5] * 4;
2617 if (!evergreen_vm_reg_valid(reg
))
2621 case PACKET3_COPY_DW
:
2622 if (idx_value
& 0x2) {
2623 reg
= ib
[idx
+ 3] * 4;
2624 if (!evergreen_vm_reg_valid(reg
))
2628 case PACKET3_SET_CONFIG_REG
:
2629 start_reg
= (idx_value
<< 2) + PACKET3_SET_CONFIG_REG_START
;
2630 end_reg
= 4 * pkt
->count
+ start_reg
- 4;
2631 if ((start_reg
< PACKET3_SET_CONFIG_REG_START
) ||
2632 (start_reg
>= PACKET3_SET_CONFIG_REG_END
) ||
2633 (end_reg
>= PACKET3_SET_CONFIG_REG_END
)) {
2634 DRM_ERROR("bad PACKET3_SET_CONFIG_REG\n");
2637 for (i
= 0; i
< pkt
->count
; i
++) {
2638 reg
= start_reg
+ (4 * i
);
2639 if (!evergreen_vm_reg_valid(reg
))
2649 int evergreen_ib_parse(struct radeon_device
*rdev
, struct radeon_ib
*ib
)
2653 struct radeon_cs_packet pkt
;
2657 pkt
.type
= CP_PACKET_GET_TYPE(ib
->ptr
[idx
]);
2658 pkt
.count
= CP_PACKET_GET_COUNT(ib
->ptr
[idx
]);
2662 dev_err(rdev
->dev
, "Packet0 not allowed!\n");
2669 pkt
.opcode
= CP_PACKET3_GET_OPCODE(ib
->ptr
[idx
]);
2670 ret
= evergreen_vm_packet3_check(rdev
, ib
->ptr
, &pkt
);
2671 idx
+= pkt
.count
+ 2;
2674 dev_err(rdev
->dev
, "Unknown packet type %d !\n", pkt
.type
);
2680 } while (idx
< ib
->length_dw
);