kernel - AHCI - enable AHCI device initiated power management
[dragonfly.git] / sys / dev / drm / radeon_state.c
blobe7c5c5e43a9602c7204ce95bab512d26a4aefe0a
1 /*-
2 * Copyright 2000 VA Linux Systems, Inc., Fremont, California.
3 * All Rights Reserved.
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
24 * Authors:
25 * Gareth Hughes <gareth@valinux.com>
26 * Kevin E. Martin <martin@valinux.com>
29 #include "dev/drm/drmP.h"
30 #include "dev/drm/drm.h"
31 #include "dev/drm/drm_sarea.h"
32 #include "dev/drm/radeon_drm.h"
33 #include "dev/drm/radeon_drv.h"
35 /* ================================================================
36 * Helper functions for client state checking and fixup
39 static __inline__ int radeon_check_and_fixup_offset(drm_radeon_private_t *
40 dev_priv,
41 struct drm_file *file_priv,
42 u32 *offset)
44 u64 off = *offset;
45 u32 fb_end = dev_priv->fb_location + dev_priv->fb_size - 1;
46 struct drm_radeon_driver_file_fields *radeon_priv;
48 /* Hrm ... the story of the offset ... So this function converts
49 * the various ideas of what userland clients might have for an
50 * offset in the card address space into an offset into the card
51 * address space :) So with a sane client, it should just keep
52 * the value intact and just do some boundary checking. However,
53 * not all clients are sane. Some older clients pass us 0 based
54 * offsets relative to the start of the framebuffer and some may
55 * assume the AGP aperture it appended to the framebuffer, so we
56 * try to detect those cases and fix them up.
58 * Note: It might be a good idea here to make sure the offset lands
59 * in some "allowed" area to protect things like the PCIE GART...
62 /* First, the best case, the offset already lands in either the
63 * framebuffer or the GART mapped space
65 if (radeon_check_offset(dev_priv, off))
66 return 0;
68 /* Ok, that didn't happen... now check if we have a zero based
69 * offset that fits in the framebuffer + gart space, apply the
70 * magic offset we get from SETPARAM or calculated from fb_location
72 if (off < (dev_priv->fb_size + dev_priv->gart_size)) {
73 radeon_priv = file_priv->driver_priv;
74 off += radeon_priv->radeon_fb_delta;
77 /* Finally, assume we aimed at a GART offset if beyond the fb */
78 if (off > fb_end)
79 off = off - fb_end - 1 + dev_priv->gart_vm_start;
81 /* Now recheck and fail if out of bounds */
82 if (radeon_check_offset(dev_priv, off)) {
83 DRM_DEBUG("offset fixed up to 0x%x\n", (unsigned int)off);
84 *offset = off;
85 return 0;
87 return -EINVAL;
90 static __inline__ int radeon_check_and_fixup_packets(drm_radeon_private_t *
91 dev_priv,
92 struct drm_file *file_priv,
93 int id, u32 *data)
95 switch (id) {
97 case RADEON_EMIT_PP_MISC:
98 if (radeon_check_and_fixup_offset(dev_priv, file_priv,
99 &data[(RADEON_RB3D_DEPTHOFFSET - RADEON_PP_MISC) / 4])) {
100 DRM_ERROR("Invalid depth buffer offset\n");
101 return -EINVAL;
103 break;
105 case RADEON_EMIT_PP_CNTL:
106 if (radeon_check_and_fixup_offset(dev_priv, file_priv,
107 &data[(RADEON_RB3D_COLOROFFSET - RADEON_PP_CNTL) / 4])) {
108 DRM_ERROR("Invalid colour buffer offset\n");
109 return -EINVAL;
111 break;
113 case R200_EMIT_PP_TXOFFSET_0:
114 case R200_EMIT_PP_TXOFFSET_1:
115 case R200_EMIT_PP_TXOFFSET_2:
116 case R200_EMIT_PP_TXOFFSET_3:
117 case R200_EMIT_PP_TXOFFSET_4:
118 case R200_EMIT_PP_TXOFFSET_5:
119 if (radeon_check_and_fixup_offset(dev_priv, file_priv,
120 &data[0])) {
121 DRM_ERROR("Invalid R200 texture offset\n");
122 return -EINVAL;
124 break;
126 case RADEON_EMIT_PP_TXFILTER_0:
127 case RADEON_EMIT_PP_TXFILTER_1:
128 case RADEON_EMIT_PP_TXFILTER_2:
129 if (radeon_check_and_fixup_offset(dev_priv, file_priv,
130 &data[(RADEON_PP_TXOFFSET_0 - RADEON_PP_TXFILTER_0) / 4])) {
131 DRM_ERROR("Invalid R100 texture offset\n");
132 return -EINVAL;
134 break;
136 case R200_EMIT_PP_CUBIC_OFFSETS_0:
137 case R200_EMIT_PP_CUBIC_OFFSETS_1:
138 case R200_EMIT_PP_CUBIC_OFFSETS_2:
139 case R200_EMIT_PP_CUBIC_OFFSETS_3:
140 case R200_EMIT_PP_CUBIC_OFFSETS_4:
141 case R200_EMIT_PP_CUBIC_OFFSETS_5:{
142 int i;
143 for (i = 0; i < 5; i++) {
144 if (radeon_check_and_fixup_offset(dev_priv,
145 file_priv,
146 &data[i])) {
147 DRM_ERROR
148 ("Invalid R200 cubic texture offset\n");
149 return -EINVAL;
152 break;
155 case RADEON_EMIT_PP_CUBIC_OFFSETS_T0:
156 case RADEON_EMIT_PP_CUBIC_OFFSETS_T1:
157 case RADEON_EMIT_PP_CUBIC_OFFSETS_T2:{
158 int i;
159 for (i = 0; i < 5; i++) {
160 if (radeon_check_and_fixup_offset(dev_priv,
161 file_priv,
162 &data[i])) {
163 DRM_ERROR
164 ("Invalid R100 cubic texture offset\n");
165 return -EINVAL;
169 break;
171 case R200_EMIT_VAP_CTL: {
172 RING_LOCALS;
173 BEGIN_RING(2);
174 OUT_RING_REG(RADEON_SE_TCL_STATE_FLUSH, 0);
175 ADVANCE_RING();
177 break;
179 case RADEON_EMIT_RB3D_COLORPITCH:
180 case RADEON_EMIT_RE_LINE_PATTERN:
181 case RADEON_EMIT_SE_LINE_WIDTH:
182 case RADEON_EMIT_PP_LUM_MATRIX:
183 case RADEON_EMIT_PP_ROT_MATRIX_0:
184 case RADEON_EMIT_RB3D_STENCILREFMASK:
185 case RADEON_EMIT_SE_VPORT_XSCALE:
186 case RADEON_EMIT_SE_CNTL:
187 case RADEON_EMIT_SE_CNTL_STATUS:
188 case RADEON_EMIT_RE_MISC:
189 case RADEON_EMIT_PP_BORDER_COLOR_0:
190 case RADEON_EMIT_PP_BORDER_COLOR_1:
191 case RADEON_EMIT_PP_BORDER_COLOR_2:
192 case RADEON_EMIT_SE_ZBIAS_FACTOR:
193 case RADEON_EMIT_SE_TCL_OUTPUT_VTX_FMT:
194 case RADEON_EMIT_SE_TCL_MATERIAL_EMMISSIVE_RED:
195 case R200_EMIT_PP_TXCBLEND_0:
196 case R200_EMIT_PP_TXCBLEND_1:
197 case R200_EMIT_PP_TXCBLEND_2:
198 case R200_EMIT_PP_TXCBLEND_3:
199 case R200_EMIT_PP_TXCBLEND_4:
200 case R200_EMIT_PP_TXCBLEND_5:
201 case R200_EMIT_PP_TXCBLEND_6:
202 case R200_EMIT_PP_TXCBLEND_7:
203 case R200_EMIT_TCL_LIGHT_MODEL_CTL_0:
204 case R200_EMIT_TFACTOR_0:
205 case R200_EMIT_VTX_FMT_0:
206 case R200_EMIT_MATRIX_SELECT_0:
207 case R200_EMIT_TEX_PROC_CTL_2:
208 case R200_EMIT_TCL_UCP_VERT_BLEND_CTL:
209 case R200_EMIT_PP_TXFILTER_0:
210 case R200_EMIT_PP_TXFILTER_1:
211 case R200_EMIT_PP_TXFILTER_2:
212 case R200_EMIT_PP_TXFILTER_3:
213 case R200_EMIT_PP_TXFILTER_4:
214 case R200_EMIT_PP_TXFILTER_5:
215 case R200_EMIT_VTE_CNTL:
216 case R200_EMIT_OUTPUT_VTX_COMP_SEL:
217 case R200_EMIT_PP_TAM_DEBUG3:
218 case R200_EMIT_PP_CNTL_X:
219 case R200_EMIT_RB3D_DEPTHXY_OFFSET:
220 case R200_EMIT_RE_AUX_SCISSOR_CNTL:
221 case R200_EMIT_RE_SCISSOR_TL_0:
222 case R200_EMIT_RE_SCISSOR_TL_1:
223 case R200_EMIT_RE_SCISSOR_TL_2:
224 case R200_EMIT_SE_VAP_CNTL_STATUS:
225 case R200_EMIT_SE_VTX_STATE_CNTL:
226 case R200_EMIT_RE_POINTSIZE:
227 case R200_EMIT_TCL_INPUT_VTX_VECTOR_ADDR_0:
228 case R200_EMIT_PP_CUBIC_FACES_0:
229 case R200_EMIT_PP_CUBIC_FACES_1:
230 case R200_EMIT_PP_CUBIC_FACES_2:
231 case R200_EMIT_PP_CUBIC_FACES_3:
232 case R200_EMIT_PP_CUBIC_FACES_4:
233 case R200_EMIT_PP_CUBIC_FACES_5:
234 case RADEON_EMIT_PP_TEX_SIZE_0:
235 case RADEON_EMIT_PP_TEX_SIZE_1:
236 case RADEON_EMIT_PP_TEX_SIZE_2:
237 case R200_EMIT_RB3D_BLENDCOLOR:
238 case R200_EMIT_TCL_POINT_SPRITE_CNTL:
239 case RADEON_EMIT_PP_CUBIC_FACES_0:
240 case RADEON_EMIT_PP_CUBIC_FACES_1:
241 case RADEON_EMIT_PP_CUBIC_FACES_2:
242 case R200_EMIT_PP_TRI_PERF_CNTL:
243 case R200_EMIT_PP_AFS_0:
244 case R200_EMIT_PP_AFS_1:
245 case R200_EMIT_ATF_TFACTOR:
246 case R200_EMIT_PP_TXCTLALL_0:
247 case R200_EMIT_PP_TXCTLALL_1:
248 case R200_EMIT_PP_TXCTLALL_2:
249 case R200_EMIT_PP_TXCTLALL_3:
250 case R200_EMIT_PP_TXCTLALL_4:
251 case R200_EMIT_PP_TXCTLALL_5:
252 case R200_EMIT_VAP_PVS_CNTL:
253 /* These packets don't contain memory offsets */
254 break;
256 default:
257 DRM_ERROR("Unknown state packet ID %d\n", id);
258 return -EINVAL;
261 return 0;
264 static __inline__ int radeon_check_and_fixup_packet3(drm_radeon_private_t *
265 dev_priv,
266 struct drm_file *file_priv,
267 drm_radeon_kcmd_buffer_t *
268 cmdbuf,
269 unsigned int *cmdsz)
271 u32 *cmd = (u32 *) cmdbuf->buf;
272 u32 offset, narrays;
273 int count, i, k;
275 *cmdsz = 2 + ((cmd[0] & RADEON_CP_PACKET_COUNT_MASK) >> 16);
277 if ((cmd[0] & 0xc0000000) != RADEON_CP_PACKET3) {
278 DRM_ERROR("Not a type 3 packet\n");
279 return -EINVAL;
282 if (4 * *cmdsz > cmdbuf->bufsz) {
283 DRM_ERROR("Packet size larger than size of data provided\n");
284 return -EINVAL;
287 switch(cmd[0] & 0xff00) {
288 /* XXX Are there old drivers needing other packets? */
290 case RADEON_3D_DRAW_IMMD:
291 case RADEON_3D_DRAW_VBUF:
292 case RADEON_3D_DRAW_INDX:
293 case RADEON_WAIT_FOR_IDLE:
294 case RADEON_CP_NOP:
295 case RADEON_3D_CLEAR_ZMASK:
296 /* case RADEON_CP_NEXT_CHAR:
297 case RADEON_CP_PLY_NEXTSCAN:
298 case RADEON_CP_SET_SCISSORS: */ /* probably safe but will never need them? */
299 /* these packets are safe */
300 break;
302 case RADEON_CP_3D_DRAW_IMMD_2:
303 case RADEON_CP_3D_DRAW_VBUF_2:
304 case RADEON_CP_3D_DRAW_INDX_2:
305 case RADEON_3D_CLEAR_HIZ:
306 /* safe but r200 only */
307 if (dev_priv->microcode_version != UCODE_R200) {
308 DRM_ERROR("Invalid 3d packet for r100-class chip\n");
309 return -EINVAL;
311 break;
313 case RADEON_3D_LOAD_VBPNTR:
314 count = (cmd[0] >> 16) & 0x3fff;
316 if (count > 18) { /* 12 arrays max */
317 DRM_ERROR("Too large payload in 3D_LOAD_VBPNTR (count=%d)\n",
318 count);
319 return -EINVAL;
322 /* carefully check packet contents */
323 narrays = cmd[1] & ~0xc000;
324 k = 0;
325 i = 2;
326 while ((k < narrays) && (i < (count + 2))) {
327 i++; /* skip attribute field */
328 if (radeon_check_and_fixup_offset(dev_priv, file_priv,
329 &cmd[i])) {
330 DRM_ERROR
331 ("Invalid offset (k=%d i=%d) in 3D_LOAD_VBPNTR packet.\n",
332 k, i);
333 return -EINVAL;
335 k++;
336 i++;
337 if (k == narrays)
338 break;
339 /* have one more to process, they come in pairs */
340 if (radeon_check_and_fixup_offset(dev_priv,
341 file_priv, &cmd[i]))
343 DRM_ERROR
344 ("Invalid offset (k=%d i=%d) in 3D_LOAD_VBPNTR packet.\n",
345 k, i);
346 return -EINVAL;
348 k++;
349 i++;
351 /* do the counts match what we expect ? */
352 if ((k != narrays) || (i != (count + 2))) {
353 DRM_ERROR
354 ("Malformed 3D_LOAD_VBPNTR packet (k=%d i=%d narrays=%d count+1=%d).\n",
355 k, i, narrays, count + 1);
356 return -EINVAL;
358 break;
360 case RADEON_3D_RNDR_GEN_INDX_PRIM:
361 if (dev_priv->microcode_version != UCODE_R100) {
362 DRM_ERROR("Invalid 3d packet for r200-class chip\n");
363 return -EINVAL;
365 if (radeon_check_and_fixup_offset(dev_priv, file_priv, &cmd[1])) {
366 DRM_ERROR("Invalid rndr_gen_indx offset\n");
367 return -EINVAL;
369 break;
371 case RADEON_CP_INDX_BUFFER:
372 if (dev_priv->microcode_version != UCODE_R200) {
373 DRM_ERROR("Invalid 3d packet for r100-class chip\n");
374 return -EINVAL;
376 if ((cmd[1] & 0x8000ffff) != 0x80000810) {
377 DRM_ERROR("Invalid indx_buffer reg address %08X\n", cmd[1]);
378 return -EINVAL;
380 if (radeon_check_and_fixup_offset(dev_priv, file_priv, &cmd[2])) {
381 DRM_ERROR("Invalid indx_buffer offset is %08X\n", cmd[2]);
382 return -EINVAL;
384 break;
386 case RADEON_CNTL_HOSTDATA_BLT:
387 case RADEON_CNTL_PAINT_MULTI:
388 case RADEON_CNTL_BITBLT_MULTI:
389 /* MSB of opcode: next DWORD GUI_CNTL */
390 if (cmd[1] & (RADEON_GMC_SRC_PITCH_OFFSET_CNTL
391 | RADEON_GMC_DST_PITCH_OFFSET_CNTL)) {
392 offset = cmd[2] << 10;
393 if (radeon_check_and_fixup_offset
394 (dev_priv, file_priv, &offset)) {
395 DRM_ERROR("Invalid first packet offset\n");
396 return -EINVAL;
398 cmd[2] = (cmd[2] & 0xffc00000) | offset >> 10;
401 if ((cmd[1] & RADEON_GMC_SRC_PITCH_OFFSET_CNTL) &&
402 (cmd[1] & RADEON_GMC_DST_PITCH_OFFSET_CNTL)) {
403 offset = cmd[3] << 10;
404 if (radeon_check_and_fixup_offset
405 (dev_priv, file_priv, &offset)) {
406 DRM_ERROR("Invalid second packet offset\n");
407 return -EINVAL;
409 cmd[3] = (cmd[3] & 0xffc00000) | offset >> 10;
411 break;
413 default:
414 DRM_ERROR("Invalid packet type %x\n", cmd[0] & 0xff00);
415 return -EINVAL;
418 return 0;
421 /* ================================================================
422 * CP hardware state programming functions
425 static __inline__ void radeon_emit_clip_rect(drm_radeon_private_t * dev_priv,
426 struct drm_clip_rect * box)
428 RING_LOCALS;
430 DRM_DEBUG(" box: x1=%d y1=%d x2=%d y2=%d\n",
431 box->x1, box->y1, box->x2, box->y2);
433 BEGIN_RING(4);
434 OUT_RING(CP_PACKET0(RADEON_RE_TOP_LEFT, 0));
435 OUT_RING((box->y1 << 16) | box->x1);
436 OUT_RING(CP_PACKET0(RADEON_RE_WIDTH_HEIGHT, 0));
437 OUT_RING(((box->y2 - 1) << 16) | (box->x2 - 1));
438 ADVANCE_RING();
441 /* Emit 1.1 state
443 static int radeon_emit_state(drm_radeon_private_t * dev_priv,
444 struct drm_file *file_priv,
445 drm_radeon_context_regs_t * ctx,
446 drm_radeon_texture_regs_t * tex,
447 unsigned int dirty)
449 RING_LOCALS;
450 DRM_DEBUG("dirty=0x%08x\n", dirty);
452 if (dirty & RADEON_UPLOAD_CONTEXT) {
453 if (radeon_check_and_fixup_offset(dev_priv, file_priv,
454 &ctx->rb3d_depthoffset)) {
455 DRM_ERROR("Invalid depth buffer offset\n");
456 return -EINVAL;
459 if (radeon_check_and_fixup_offset(dev_priv, file_priv,
460 &ctx->rb3d_coloroffset)) {
461 DRM_ERROR("Invalid depth buffer offset\n");
462 return -EINVAL;
465 BEGIN_RING(14);
466 OUT_RING(CP_PACKET0(RADEON_PP_MISC, 6));
467 OUT_RING(ctx->pp_misc);
468 OUT_RING(ctx->pp_fog_color);
469 OUT_RING(ctx->re_solid_color);
470 OUT_RING(ctx->rb3d_blendcntl);
471 OUT_RING(ctx->rb3d_depthoffset);
472 OUT_RING(ctx->rb3d_depthpitch);
473 OUT_RING(ctx->rb3d_zstencilcntl);
474 OUT_RING(CP_PACKET0(RADEON_PP_CNTL, 2));
475 OUT_RING(ctx->pp_cntl);
476 OUT_RING(ctx->rb3d_cntl);
477 OUT_RING(ctx->rb3d_coloroffset);
478 OUT_RING(CP_PACKET0(RADEON_RB3D_COLORPITCH, 0));
479 OUT_RING(ctx->rb3d_colorpitch);
480 ADVANCE_RING();
483 if (dirty & RADEON_UPLOAD_VERTFMT) {
484 BEGIN_RING(2);
485 OUT_RING(CP_PACKET0(RADEON_SE_COORD_FMT, 0));
486 OUT_RING(ctx->se_coord_fmt);
487 ADVANCE_RING();
490 if (dirty & RADEON_UPLOAD_LINE) {
491 BEGIN_RING(5);
492 OUT_RING(CP_PACKET0(RADEON_RE_LINE_PATTERN, 1));
493 OUT_RING(ctx->re_line_pattern);
494 OUT_RING(ctx->re_line_state);
495 OUT_RING(CP_PACKET0(RADEON_SE_LINE_WIDTH, 0));
496 OUT_RING(ctx->se_line_width);
497 ADVANCE_RING();
500 if (dirty & RADEON_UPLOAD_BUMPMAP) {
501 BEGIN_RING(5);
502 OUT_RING(CP_PACKET0(RADEON_PP_LUM_MATRIX, 0));
503 OUT_RING(ctx->pp_lum_matrix);
504 OUT_RING(CP_PACKET0(RADEON_PP_ROT_MATRIX_0, 1));
505 OUT_RING(ctx->pp_rot_matrix_0);
506 OUT_RING(ctx->pp_rot_matrix_1);
507 ADVANCE_RING();
510 if (dirty & RADEON_UPLOAD_MASKS) {
511 BEGIN_RING(4);
512 OUT_RING(CP_PACKET0(RADEON_RB3D_STENCILREFMASK, 2));
513 OUT_RING(ctx->rb3d_stencilrefmask);
514 OUT_RING(ctx->rb3d_ropcntl);
515 OUT_RING(ctx->rb3d_planemask);
516 ADVANCE_RING();
519 if (dirty & RADEON_UPLOAD_VIEWPORT) {
520 BEGIN_RING(7);
521 OUT_RING(CP_PACKET0(RADEON_SE_VPORT_XSCALE, 5));
522 OUT_RING(ctx->se_vport_xscale);
523 OUT_RING(ctx->se_vport_xoffset);
524 OUT_RING(ctx->se_vport_yscale);
525 OUT_RING(ctx->se_vport_yoffset);
526 OUT_RING(ctx->se_vport_zscale);
527 OUT_RING(ctx->se_vport_zoffset);
528 ADVANCE_RING();
531 if (dirty & RADEON_UPLOAD_SETUP) {
532 BEGIN_RING(4);
533 OUT_RING(CP_PACKET0(RADEON_SE_CNTL, 0));
534 OUT_RING(ctx->se_cntl);
535 OUT_RING(CP_PACKET0(RADEON_SE_CNTL_STATUS, 0));
536 OUT_RING(ctx->se_cntl_status);
537 ADVANCE_RING();
540 if (dirty & RADEON_UPLOAD_MISC) {
541 BEGIN_RING(2);
542 OUT_RING(CP_PACKET0(RADEON_RE_MISC, 0));
543 OUT_RING(ctx->re_misc);
544 ADVANCE_RING();
547 if (dirty & RADEON_UPLOAD_TEX0) {
548 if (radeon_check_and_fixup_offset(dev_priv, file_priv,
549 &tex[0].pp_txoffset)) {
550 DRM_ERROR("Invalid texture offset for unit 0\n");
551 return -EINVAL;
554 BEGIN_RING(9);
555 OUT_RING(CP_PACKET0(RADEON_PP_TXFILTER_0, 5));
556 OUT_RING(tex[0].pp_txfilter);
557 OUT_RING(tex[0].pp_txformat);
558 OUT_RING(tex[0].pp_txoffset);
559 OUT_RING(tex[0].pp_txcblend);
560 OUT_RING(tex[0].pp_txablend);
561 OUT_RING(tex[0].pp_tfactor);
562 OUT_RING(CP_PACKET0(RADEON_PP_BORDER_COLOR_0, 0));
563 OUT_RING(tex[0].pp_border_color);
564 ADVANCE_RING();
567 if (dirty & RADEON_UPLOAD_TEX1) {
568 if (radeon_check_and_fixup_offset(dev_priv, file_priv,
569 &tex[1].pp_txoffset)) {
570 DRM_ERROR("Invalid texture offset for unit 1\n");
571 return -EINVAL;
574 BEGIN_RING(9);
575 OUT_RING(CP_PACKET0(RADEON_PP_TXFILTER_1, 5));
576 OUT_RING(tex[1].pp_txfilter);
577 OUT_RING(tex[1].pp_txformat);
578 OUT_RING(tex[1].pp_txoffset);
579 OUT_RING(tex[1].pp_txcblend);
580 OUT_RING(tex[1].pp_txablend);
581 OUT_RING(tex[1].pp_tfactor);
582 OUT_RING(CP_PACKET0(RADEON_PP_BORDER_COLOR_1, 0));
583 OUT_RING(tex[1].pp_border_color);
584 ADVANCE_RING();
587 if (dirty & RADEON_UPLOAD_TEX2) {
588 if (radeon_check_and_fixup_offset(dev_priv, file_priv,
589 &tex[2].pp_txoffset)) {
590 DRM_ERROR("Invalid texture offset for unit 2\n");
591 return -EINVAL;
594 BEGIN_RING(9);
595 OUT_RING(CP_PACKET0(RADEON_PP_TXFILTER_2, 5));
596 OUT_RING(tex[2].pp_txfilter);
597 OUT_RING(tex[2].pp_txformat);
598 OUT_RING(tex[2].pp_txoffset);
599 OUT_RING(tex[2].pp_txcblend);
600 OUT_RING(tex[2].pp_txablend);
601 OUT_RING(tex[2].pp_tfactor);
602 OUT_RING(CP_PACKET0(RADEON_PP_BORDER_COLOR_2, 0));
603 OUT_RING(tex[2].pp_border_color);
604 ADVANCE_RING();
607 return 0;
610 /* Emit 1.2 state
612 static int radeon_emit_state2(drm_radeon_private_t * dev_priv,
613 struct drm_file *file_priv,
614 drm_radeon_state_t * state)
616 RING_LOCALS;
618 if (state->dirty & RADEON_UPLOAD_ZBIAS) {
619 BEGIN_RING(3);
620 OUT_RING(CP_PACKET0(RADEON_SE_ZBIAS_FACTOR, 1));
621 OUT_RING(state->context2.se_zbias_factor);
622 OUT_RING(state->context2.se_zbias_constant);
623 ADVANCE_RING();
626 return radeon_emit_state(dev_priv, file_priv, &state->context,
627 state->tex, state->dirty);
630 /* New (1.3) state mechanism. 3 commands (packet, scalar, vector) in
631 * 1.3 cmdbuffers allow all previous state to be updated as well as
632 * the tcl scalar and vector areas.
634 static struct {
635 int start;
636 int len;
637 const char *name;
638 } packet[RADEON_MAX_STATE_PACKETS] = {
639 {RADEON_PP_MISC, 7, "RADEON_PP_MISC"},
640 {RADEON_PP_CNTL, 3, "RADEON_PP_CNTL"},
641 {RADEON_RB3D_COLORPITCH, 1, "RADEON_RB3D_COLORPITCH"},
642 {RADEON_RE_LINE_PATTERN, 2, "RADEON_RE_LINE_PATTERN"},
643 {RADEON_SE_LINE_WIDTH, 1, "RADEON_SE_LINE_WIDTH"},
644 {RADEON_PP_LUM_MATRIX, 1, "RADEON_PP_LUM_MATRIX"},
645 {RADEON_PP_ROT_MATRIX_0, 2, "RADEON_PP_ROT_MATRIX_0"},
646 {RADEON_RB3D_STENCILREFMASK, 3, "RADEON_RB3D_STENCILREFMASK"},
647 {RADEON_SE_VPORT_XSCALE, 6, "RADEON_SE_VPORT_XSCALE"},
648 {RADEON_SE_CNTL, 2, "RADEON_SE_CNTL"},
649 {RADEON_SE_CNTL_STATUS, 1, "RADEON_SE_CNTL_STATUS"},
650 {RADEON_RE_MISC, 1, "RADEON_RE_MISC"},
651 {RADEON_PP_TXFILTER_0, 6, "RADEON_PP_TXFILTER_0"},
652 {RADEON_PP_BORDER_COLOR_0, 1, "RADEON_PP_BORDER_COLOR_0"},
653 {RADEON_PP_TXFILTER_1, 6, "RADEON_PP_TXFILTER_1"},
654 {RADEON_PP_BORDER_COLOR_1, 1, "RADEON_PP_BORDER_COLOR_1"},
655 {RADEON_PP_TXFILTER_2, 6, "RADEON_PP_TXFILTER_2"},
656 {RADEON_PP_BORDER_COLOR_2, 1, "RADEON_PP_BORDER_COLOR_2"},
657 {RADEON_SE_ZBIAS_FACTOR, 2, "RADEON_SE_ZBIAS_FACTOR"},
658 {RADEON_SE_TCL_OUTPUT_VTX_FMT, 11, "RADEON_SE_TCL_OUTPUT_VTX_FMT"},
659 {RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED, 17,
660 "RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED"},
661 {R200_PP_TXCBLEND_0, 4, "R200_PP_TXCBLEND_0"},
662 {R200_PP_TXCBLEND_1, 4, "R200_PP_TXCBLEND_1"},
663 {R200_PP_TXCBLEND_2, 4, "R200_PP_TXCBLEND_2"},
664 {R200_PP_TXCBLEND_3, 4, "R200_PP_TXCBLEND_3"},
665 {R200_PP_TXCBLEND_4, 4, "R200_PP_TXCBLEND_4"},
666 {R200_PP_TXCBLEND_5, 4, "R200_PP_TXCBLEND_5"},
667 {R200_PP_TXCBLEND_6, 4, "R200_PP_TXCBLEND_6"},
668 {R200_PP_TXCBLEND_7, 4, "R200_PP_TXCBLEND_7"},
669 {R200_SE_TCL_LIGHT_MODEL_CTL_0, 6, "R200_SE_TCL_LIGHT_MODEL_CTL_0"},
670 {R200_PP_TFACTOR_0, 6, "R200_PP_TFACTOR_0"},
671 {R200_SE_VTX_FMT_0, 4, "R200_SE_VTX_FMT_0"},
672 {R200_SE_VAP_CNTL, 1, "R200_SE_VAP_CNTL"},
673 {R200_SE_TCL_MATRIX_SEL_0, 5, "R200_SE_TCL_MATRIX_SEL_0"},
674 {R200_SE_TCL_TEX_PROC_CTL_2, 5, "R200_SE_TCL_TEX_PROC_CTL_2"},
675 {R200_SE_TCL_UCP_VERT_BLEND_CTL, 1, "R200_SE_TCL_UCP_VERT_BLEND_CTL"},
676 {R200_PP_TXFILTER_0, 6, "R200_PP_TXFILTER_0"},
677 {R200_PP_TXFILTER_1, 6, "R200_PP_TXFILTER_1"},
678 {R200_PP_TXFILTER_2, 6, "R200_PP_TXFILTER_2"},
679 {R200_PP_TXFILTER_3, 6, "R200_PP_TXFILTER_3"},
680 {R200_PP_TXFILTER_4, 6, "R200_PP_TXFILTER_4"},
681 {R200_PP_TXFILTER_5, 6, "R200_PP_TXFILTER_5"},
682 {R200_PP_TXOFFSET_0, 1, "R200_PP_TXOFFSET_0"},
683 {R200_PP_TXOFFSET_1, 1, "R200_PP_TXOFFSET_1"},
684 {R200_PP_TXOFFSET_2, 1, "R200_PP_TXOFFSET_2"},
685 {R200_PP_TXOFFSET_3, 1, "R200_PP_TXOFFSET_3"},
686 {R200_PP_TXOFFSET_4, 1, "R200_PP_TXOFFSET_4"},
687 {R200_PP_TXOFFSET_5, 1, "R200_PP_TXOFFSET_5"},
688 {R200_SE_VTE_CNTL, 1, "R200_SE_VTE_CNTL"},
689 {R200_SE_TCL_OUTPUT_VTX_COMP_SEL, 1,
690 "R200_SE_TCL_OUTPUT_VTX_COMP_SEL"},
691 {R200_PP_TAM_DEBUG3, 1, "R200_PP_TAM_DEBUG3"},
692 {R200_PP_CNTL_X, 1, "R200_PP_CNTL_X"},
693 {R200_RB3D_DEPTHXY_OFFSET, 1, "R200_RB3D_DEPTHXY_OFFSET"},
694 {R200_RE_AUX_SCISSOR_CNTL, 1, "R200_RE_AUX_SCISSOR_CNTL"},
695 {R200_RE_SCISSOR_TL_0, 2, "R200_RE_SCISSOR_TL_0"},
696 {R200_RE_SCISSOR_TL_1, 2, "R200_RE_SCISSOR_TL_1"},
697 {R200_RE_SCISSOR_TL_2, 2, "R200_RE_SCISSOR_TL_2"},
698 {R200_SE_VAP_CNTL_STATUS, 1, "R200_SE_VAP_CNTL_STATUS"},
699 {R200_SE_VTX_STATE_CNTL, 1, "R200_SE_VTX_STATE_CNTL"},
700 {R200_RE_POINTSIZE, 1, "R200_RE_POINTSIZE"},
701 {R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_0, 4,
702 "R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_0"},
703 {R200_PP_CUBIC_FACES_0, 1, "R200_PP_CUBIC_FACES_0"}, /* 61 */
704 {R200_PP_CUBIC_OFFSET_F1_0, 5, "R200_PP_CUBIC_OFFSET_F1_0"}, /* 62 */
705 {R200_PP_CUBIC_FACES_1, 1, "R200_PP_CUBIC_FACES_1"},
706 {R200_PP_CUBIC_OFFSET_F1_1, 5, "R200_PP_CUBIC_OFFSET_F1_1"},
707 {R200_PP_CUBIC_FACES_2, 1, "R200_PP_CUBIC_FACES_2"},
708 {R200_PP_CUBIC_OFFSET_F1_2, 5, "R200_PP_CUBIC_OFFSET_F1_2"},
709 {R200_PP_CUBIC_FACES_3, 1, "R200_PP_CUBIC_FACES_3"},
710 {R200_PP_CUBIC_OFFSET_F1_3, 5, "R200_PP_CUBIC_OFFSET_F1_3"},
711 {R200_PP_CUBIC_FACES_4, 1, "R200_PP_CUBIC_FACES_4"},
712 {R200_PP_CUBIC_OFFSET_F1_4, 5, "R200_PP_CUBIC_OFFSET_F1_4"},
713 {R200_PP_CUBIC_FACES_5, 1, "R200_PP_CUBIC_FACES_5"},
714 {R200_PP_CUBIC_OFFSET_F1_5, 5, "R200_PP_CUBIC_OFFSET_F1_5"},
715 {RADEON_PP_TEX_SIZE_0, 2, "RADEON_PP_TEX_SIZE_0"},
716 {RADEON_PP_TEX_SIZE_1, 2, "RADEON_PP_TEX_SIZE_1"},
717 {RADEON_PP_TEX_SIZE_2, 2, "RADEON_PP_TEX_SIZE_2"},
718 {R200_RB3D_BLENDCOLOR, 3, "R200_RB3D_BLENDCOLOR"},
719 {R200_SE_TCL_POINT_SPRITE_CNTL, 1, "R200_SE_TCL_POINT_SPRITE_CNTL"},
720 {RADEON_PP_CUBIC_FACES_0, 1, "RADEON_PP_CUBIC_FACES_0"},
721 {RADEON_PP_CUBIC_OFFSET_T0_0, 5, "RADEON_PP_CUBIC_OFFSET_T0_0"},
722 {RADEON_PP_CUBIC_FACES_1, 1, "RADEON_PP_CUBIC_FACES_1"},
723 {RADEON_PP_CUBIC_OFFSET_T1_0, 5, "RADEON_PP_CUBIC_OFFSET_T1_0"},
724 {RADEON_PP_CUBIC_FACES_2, 1, "RADEON_PP_CUBIC_FACES_2"},
725 {RADEON_PP_CUBIC_OFFSET_T2_0, 5, "RADEON_PP_CUBIC_OFFSET_T2_0"},
726 {R200_PP_TRI_PERF, 2, "R200_PP_TRI_PERF"},
727 {R200_PP_AFS_0, 32, "R200_PP_AFS_0"}, /* 85 */
728 {R200_PP_AFS_1, 32, "R200_PP_AFS_1"},
729 {R200_PP_TFACTOR_0, 8, "R200_ATF_TFACTOR"},
730 {R200_PP_TXFILTER_0, 8, "R200_PP_TXCTLALL_0"},
731 {R200_PP_TXFILTER_1, 8, "R200_PP_TXCTLALL_1"},
732 {R200_PP_TXFILTER_2, 8, "R200_PP_TXCTLALL_2"},
733 {R200_PP_TXFILTER_3, 8, "R200_PP_TXCTLALL_3"},
734 {R200_PP_TXFILTER_4, 8, "R200_PP_TXCTLALL_4"},
735 {R200_PP_TXFILTER_5, 8, "R200_PP_TXCTLALL_5"},
736 {R200_VAP_PVS_CNTL_1, 2, "R200_VAP_PVS_CNTL"},
739 /* ================================================================
740 * Performance monitoring functions
743 static void radeon_clear_box(drm_radeon_private_t * dev_priv,
744 int x, int y, int w, int h, int r, int g, int b)
746 u32 color;
747 RING_LOCALS;
749 x += dev_priv->sarea_priv->boxes[0].x1;
750 y += dev_priv->sarea_priv->boxes[0].y1;
752 switch (dev_priv->color_fmt) {
753 case RADEON_COLOR_FORMAT_RGB565:
754 color = (((r & 0xf8) << 8) |
755 ((g & 0xfc) << 3) | ((b & 0xf8) >> 3));
756 break;
757 case RADEON_COLOR_FORMAT_ARGB8888:
758 default:
759 color = (((0xff) << 24) | (r << 16) | (g << 8) | b);
760 break;
763 BEGIN_RING(4);
764 RADEON_WAIT_UNTIL_3D_IDLE();
765 OUT_RING(CP_PACKET0(RADEON_DP_WRITE_MASK, 0));
766 OUT_RING(0xffffffff);
767 ADVANCE_RING();
769 BEGIN_RING(6);
771 OUT_RING(CP_PACKET3(RADEON_CNTL_PAINT_MULTI, 4));
772 OUT_RING(RADEON_GMC_DST_PITCH_OFFSET_CNTL |
773 RADEON_GMC_BRUSH_SOLID_COLOR |
774 (dev_priv->color_fmt << 8) |
775 RADEON_GMC_SRC_DATATYPE_COLOR |
776 RADEON_ROP3_P | RADEON_GMC_CLR_CMP_CNTL_DIS);
778 if (dev_priv->sarea_priv->pfCurrentPage == 1) {
779 OUT_RING(dev_priv->front_pitch_offset);
780 } else {
781 OUT_RING(dev_priv->back_pitch_offset);
784 OUT_RING(color);
786 OUT_RING((x << 16) | y);
787 OUT_RING((w << 16) | h);
789 ADVANCE_RING();
792 static void radeon_cp_performance_boxes(drm_radeon_private_t *dev_priv)
794 /* Collapse various things into a wait flag -- trying to
795 * guess if userspase slept -- better just to have them tell us.
797 if (dev_priv->stats.last_frame_reads > 1 ||
798 dev_priv->stats.last_clear_reads > dev_priv->stats.clears) {
799 dev_priv->stats.boxes |= RADEON_BOX_WAIT_IDLE;
802 if (dev_priv->stats.freelist_loops) {
803 dev_priv->stats.boxes |= RADEON_BOX_WAIT_IDLE;
806 /* Purple box for page flipping
808 if (dev_priv->stats.boxes & RADEON_BOX_FLIP)
809 radeon_clear_box(dev_priv, 4, 4, 8, 8, 255, 0, 255);
811 /* Red box if we have to wait for idle at any point
813 if (dev_priv->stats.boxes & RADEON_BOX_WAIT_IDLE)
814 radeon_clear_box(dev_priv, 16, 4, 8, 8, 255, 0, 0);
816 /* Blue box: lost context?
819 /* Yellow box for texture swaps
821 if (dev_priv->stats.boxes & RADEON_BOX_TEXTURE_LOAD)
822 radeon_clear_box(dev_priv, 40, 4, 8, 8, 255, 255, 0);
824 /* Green box if hardware never idles (as far as we can tell)
826 if (!(dev_priv->stats.boxes & RADEON_BOX_DMA_IDLE))
827 radeon_clear_box(dev_priv, 64, 4, 8, 8, 0, 255, 0);
829 /* Draw bars indicating number of buffers allocated
830 * (not a great measure, easily confused)
832 if (dev_priv->stats.requested_bufs) {
833 if (dev_priv->stats.requested_bufs > 100)
834 dev_priv->stats.requested_bufs = 100;
836 radeon_clear_box(dev_priv, 4, 16,
837 dev_priv->stats.requested_bufs, 4,
838 196, 128, 128);
841 memset(&dev_priv->stats, 0, sizeof(dev_priv->stats));
845 /* ================================================================
846 * CP command dispatch functions
849 static void radeon_cp_dispatch_clear(struct drm_device * dev,
850 drm_radeon_clear_t * clear,
851 drm_radeon_clear_rect_t * depth_boxes)
853 drm_radeon_private_t *dev_priv = dev->dev_private;
854 drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
855 drm_radeon_depth_clear_t *depth_clear = &dev_priv->depth_clear;
856 int nbox = sarea_priv->nbox;
857 struct drm_clip_rect *pbox = sarea_priv->boxes;
858 unsigned int flags = clear->flags;
859 u32 rb3d_cntl = 0, rb3d_stencilrefmask = 0;
860 int i;
861 RING_LOCALS;
862 DRM_DEBUG("flags = 0x%x\n", flags);
864 dev_priv->stats.clears++;
866 if (sarea_priv->pfCurrentPage == 1) {
867 unsigned int tmp = flags;
869 flags &= ~(RADEON_FRONT | RADEON_BACK);
870 if (tmp & RADEON_FRONT)
871 flags |= RADEON_BACK;
872 if (tmp & RADEON_BACK)
873 flags |= RADEON_FRONT;
876 if (flags & (RADEON_FRONT | RADEON_BACK)) {
878 BEGIN_RING(4);
880 /* Ensure the 3D stream is idle before doing a
881 * 2D fill to clear the front or back buffer.
883 RADEON_WAIT_UNTIL_3D_IDLE();
885 OUT_RING(CP_PACKET0(RADEON_DP_WRITE_MASK, 0));
886 OUT_RING(clear->color_mask);
888 ADVANCE_RING();
890 /* Make sure we restore the 3D state next time.
892 sarea_priv->ctx_owner = 0;
894 for (i = 0; i < nbox; i++) {
895 int x = pbox[i].x1;
896 int y = pbox[i].y1;
897 int w = pbox[i].x2 - x;
898 int h = pbox[i].y2 - y;
900 DRM_DEBUG("%d,%d-%d,%d flags 0x%x\n",
901 x, y, w, h, flags);
903 if (flags & RADEON_FRONT) {
904 BEGIN_RING(6);
906 OUT_RING(CP_PACKET3
907 (RADEON_CNTL_PAINT_MULTI, 4));
908 OUT_RING(RADEON_GMC_DST_PITCH_OFFSET_CNTL |
909 RADEON_GMC_BRUSH_SOLID_COLOR |
910 (dev_priv->
911 color_fmt << 8) |
912 RADEON_GMC_SRC_DATATYPE_COLOR |
913 RADEON_ROP3_P |
914 RADEON_GMC_CLR_CMP_CNTL_DIS);
916 OUT_RING(dev_priv->front_pitch_offset);
917 OUT_RING(clear->clear_color);
919 OUT_RING((x << 16) | y);
920 OUT_RING((w << 16) | h);
922 ADVANCE_RING();
925 if (flags & RADEON_BACK) {
926 BEGIN_RING(6);
928 OUT_RING(CP_PACKET3
929 (RADEON_CNTL_PAINT_MULTI, 4));
930 OUT_RING(RADEON_GMC_DST_PITCH_OFFSET_CNTL |
931 RADEON_GMC_BRUSH_SOLID_COLOR |
932 (dev_priv->
933 color_fmt << 8) |
934 RADEON_GMC_SRC_DATATYPE_COLOR |
935 RADEON_ROP3_P |
936 RADEON_GMC_CLR_CMP_CNTL_DIS);
938 OUT_RING(dev_priv->back_pitch_offset);
939 OUT_RING(clear->clear_color);
941 OUT_RING((x << 16) | y);
942 OUT_RING((w << 16) | h);
944 ADVANCE_RING();
949 /* hyper z clear */
950 /* no docs available, based on reverse engeneering by Stephane Marchesin */
951 if ((flags & (RADEON_DEPTH | RADEON_STENCIL))
952 && (flags & RADEON_CLEAR_FASTZ)) {
954 int i;
955 int depthpixperline =
956 dev_priv->depth_fmt ==
957 RADEON_DEPTH_FORMAT_16BIT_INT_Z ? (dev_priv->depth_pitch /
958 2) : (dev_priv->
959 depth_pitch / 4);
961 u32 clearmask;
963 u32 tempRB3D_DEPTHCLEARVALUE = clear->clear_depth |
964 ((clear->depth_mask & 0xff) << 24);
966 /* Make sure we restore the 3D state next time.
967 * we haven't touched any "normal" state - still need this?
969 sarea_priv->ctx_owner = 0;
971 if ((dev_priv->flags & RADEON_HAS_HIERZ)
972 && (flags & RADEON_USE_HIERZ)) {
973 /* FIXME : reverse engineer that for Rx00 cards */
974 /* FIXME : the mask supposedly contains low-res z values. So can't set
975 just to the max (0xff? or actually 0x3fff?), need to take z clear
976 value into account? */
977 /* pattern seems to work for r100, though get slight
978 rendering errors with glxgears. If hierz is not enabled for r100,
979 only 4 bits which indicate clear (15,16,31,32, all zero) matter, the
980 other ones are ignored, and the same clear mask can be used. That's
981 very different behaviour than R200 which needs different clear mask
982 and different number of tiles to clear if hierz is enabled or not !?!
984 clearmask = (0xff << 22) | (0xff << 6) | 0x003f003f;
985 } else {
986 /* clear mask : chooses the clearing pattern.
987 rv250: could be used to clear only parts of macrotiles
988 (but that would get really complicated...)?
989 bit 0 and 1 (either or both of them ?!?!) are used to
990 not clear tile (or maybe one of the bits indicates if the tile is
991 compressed or not), bit 2 and 3 to not clear tile 1,...,.
992 Pattern is as follows:
993 | 0,1 | 4,5 | 8,9 |12,13|16,17|20,21|24,25|28,29|
994 bits -------------------------------------------------
995 | 2,3 | 6,7 |10,11|14,15|18,19|22,23|26,27|30,31|
996 rv100: clearmask covers 2x8 4x1 tiles, but one clear still
997 covers 256 pixels ?!?
999 clearmask = 0x0;
1002 BEGIN_RING(8);
1003 RADEON_WAIT_UNTIL_2D_IDLE();
1004 OUT_RING_REG(RADEON_RB3D_DEPTHCLEARVALUE,
1005 tempRB3D_DEPTHCLEARVALUE);
1006 /* what offset is this exactly ? */
1007 OUT_RING_REG(RADEON_RB3D_ZMASKOFFSET, 0);
1008 /* need ctlstat, otherwise get some strange black flickering */
1009 OUT_RING_REG(RADEON_RB3D_ZCACHE_CTLSTAT,
1010 RADEON_RB3D_ZC_FLUSH_ALL);
1011 ADVANCE_RING();
1013 for (i = 0; i < nbox; i++) {
1014 int tileoffset, nrtilesx, nrtilesy, j;
1015 /* it looks like r200 needs rv-style clears, at least if hierz is not enabled? */
1016 if ((dev_priv->flags & RADEON_HAS_HIERZ)
1017 && !(dev_priv->microcode_version == UCODE_R200)) {
1018 /* FIXME : figure this out for r200 (when hierz is enabled). Or
1019 maybe r200 actually doesn't need to put the low-res z value into
1020 the tile cache like r100, but just needs to clear the hi-level z-buffer?
1021 Works for R100, both with hierz and without.
1022 R100 seems to operate on 2x1 8x8 tiles, but...
1023 odd: offset/nrtiles need to be 64 pix (4 block) aligned? Potentially
1024 problematic with resolutions which are not 64 pix aligned? */
1025 tileoffset =
1026 ((pbox[i].y1 >> 3) * depthpixperline +
1027 pbox[i].x1) >> 6;
1028 nrtilesx =
1029 ((pbox[i].x2 & ~63) -
1030 (pbox[i].x1 & ~63)) >> 4;
1031 nrtilesy =
1032 (pbox[i].y2 >> 3) - (pbox[i].y1 >> 3);
1033 for (j = 0; j <= nrtilesy; j++) {
1034 BEGIN_RING(4);
1035 OUT_RING(CP_PACKET3
1036 (RADEON_3D_CLEAR_ZMASK, 2));
1037 /* first tile */
1038 OUT_RING(tileoffset * 8);
1039 /* the number of tiles to clear */
1040 OUT_RING(nrtilesx + 4);
1041 /* clear mask : chooses the clearing pattern. */
1042 OUT_RING(clearmask);
1043 ADVANCE_RING();
1044 tileoffset += depthpixperline >> 6;
1046 } else if (dev_priv->microcode_version == UCODE_R200) {
1047 /* works for rv250. */
1048 /* find first macro tile (8x2 4x4 z-pixels on rv250) */
1049 tileoffset =
1050 ((pbox[i].y1 >> 3) * depthpixperline +
1051 pbox[i].x1) >> 5;
1052 nrtilesx =
1053 (pbox[i].x2 >> 5) - (pbox[i].x1 >> 5);
1054 nrtilesy =
1055 (pbox[i].y2 >> 3) - (pbox[i].y1 >> 3);
1056 for (j = 0; j <= nrtilesy; j++) {
1057 BEGIN_RING(4);
1058 OUT_RING(CP_PACKET3
1059 (RADEON_3D_CLEAR_ZMASK, 2));
1060 /* first tile */
1061 /* judging by the first tile offset needed, could possibly
1062 directly address/clear 4x4 tiles instead of 8x2 * 4x4
1063 macro tiles, though would still need clear mask for
1064 right/bottom if truely 4x4 granularity is desired ? */
1065 OUT_RING(tileoffset * 16);
1066 /* the number of tiles to clear */
1067 OUT_RING(nrtilesx + 1);
1068 /* clear mask : chooses the clearing pattern. */
1069 OUT_RING(clearmask);
1070 ADVANCE_RING();
1071 tileoffset += depthpixperline >> 5;
1073 } else { /* rv 100 */
1074 /* rv100 might not need 64 pix alignment, who knows */
1075 /* offsets are, hmm, weird */
1076 tileoffset =
1077 ((pbox[i].y1 >> 4) * depthpixperline +
1078 pbox[i].x1) >> 6;
1079 nrtilesx =
1080 ((pbox[i].x2 & ~63) -
1081 (pbox[i].x1 & ~63)) >> 4;
1082 nrtilesy =
1083 (pbox[i].y2 >> 4) - (pbox[i].y1 >> 4);
1084 for (j = 0; j <= nrtilesy; j++) {
1085 BEGIN_RING(4);
1086 OUT_RING(CP_PACKET3
1087 (RADEON_3D_CLEAR_ZMASK, 2));
1088 OUT_RING(tileoffset * 128);
1089 /* the number of tiles to clear */
1090 OUT_RING(nrtilesx + 4);
1091 /* clear mask : chooses the clearing pattern. */
1092 OUT_RING(clearmask);
1093 ADVANCE_RING();
1094 tileoffset += depthpixperline >> 6;
1099 /* TODO don't always clear all hi-level z tiles */
1100 if ((dev_priv->flags & RADEON_HAS_HIERZ)
1101 && (dev_priv->microcode_version == UCODE_R200)
1102 && (flags & RADEON_USE_HIERZ))
1103 /* r100 and cards without hierarchical z-buffer have no high-level z-buffer */
1104 /* FIXME : the mask supposedly contains low-res z values. So can't set
1105 just to the max (0xff? or actually 0x3fff?), need to take z clear
1106 value into account? */
1108 BEGIN_RING(4);
1109 OUT_RING(CP_PACKET3(RADEON_3D_CLEAR_HIZ, 2));
1110 OUT_RING(0x0); /* First tile */
1111 OUT_RING(0x3cc0);
1112 OUT_RING((0xff << 22) | (0xff << 6) | 0x003f003f);
1113 ADVANCE_RING();
1117 /* We have to clear the depth and/or stencil buffers by
1118 * rendering a quad into just those buffers. Thus, we have to
1119 * make sure the 3D engine is configured correctly.
1121 else if ((dev_priv->microcode_version == UCODE_R200) &&
1122 (flags & (RADEON_DEPTH | RADEON_STENCIL))) {
1124 int tempPP_CNTL;
1125 int tempRE_CNTL;
1126 int tempRB3D_CNTL;
1127 int tempRB3D_ZSTENCILCNTL;
1128 int tempRB3D_STENCILREFMASK;
1129 int tempRB3D_PLANEMASK;
1130 int tempSE_CNTL;
1131 int tempSE_VTE_CNTL;
1132 int tempSE_VTX_FMT_0;
1133 int tempSE_VTX_FMT_1;
1134 int tempSE_VAP_CNTL;
1135 int tempRE_AUX_SCISSOR_CNTL;
1137 tempPP_CNTL = 0;
1138 tempRE_CNTL = 0;
1140 tempRB3D_CNTL = depth_clear->rb3d_cntl;
1142 tempRB3D_ZSTENCILCNTL = depth_clear->rb3d_zstencilcntl;
1143 tempRB3D_STENCILREFMASK = 0x0;
1145 tempSE_CNTL = depth_clear->se_cntl;
1147 /* Disable TCL */
1149 tempSE_VAP_CNTL = ( /* SE_VAP_CNTL__FORCE_W_TO_ONE_MASK | */
1150 (0x9 <<
1151 SE_VAP_CNTL__VF_MAX_VTX_NUM__SHIFT));
1153 tempRB3D_PLANEMASK = 0x0;
1155 tempRE_AUX_SCISSOR_CNTL = 0x0;
1157 tempSE_VTE_CNTL =
1158 SE_VTE_CNTL__VTX_XY_FMT_MASK | SE_VTE_CNTL__VTX_Z_FMT_MASK;
1160 /* Vertex format (X, Y, Z, W) */
1161 tempSE_VTX_FMT_0 =
1162 SE_VTX_FMT_0__VTX_Z0_PRESENT_MASK |
1163 SE_VTX_FMT_0__VTX_W0_PRESENT_MASK;
1164 tempSE_VTX_FMT_1 = 0x0;
1167 * Depth buffer specific enables
1169 if (flags & RADEON_DEPTH) {
1170 /* Enable depth buffer */
1171 tempRB3D_CNTL |= RADEON_Z_ENABLE;
1172 } else {
1173 /* Disable depth buffer */
1174 tempRB3D_CNTL &= ~RADEON_Z_ENABLE;
1178 * Stencil buffer specific enables
1180 if (flags & RADEON_STENCIL) {
1181 tempRB3D_CNTL |= RADEON_STENCIL_ENABLE;
1182 tempRB3D_STENCILREFMASK = clear->depth_mask;
1183 } else {
1184 tempRB3D_CNTL &= ~RADEON_STENCIL_ENABLE;
1185 tempRB3D_STENCILREFMASK = 0x00000000;
1188 if (flags & RADEON_USE_COMP_ZBUF) {
1189 tempRB3D_ZSTENCILCNTL |= RADEON_Z_COMPRESSION_ENABLE |
1190 RADEON_Z_DECOMPRESSION_ENABLE;
1192 if (flags & RADEON_USE_HIERZ) {
1193 tempRB3D_ZSTENCILCNTL |= RADEON_Z_HIERARCHY_ENABLE;
1196 BEGIN_RING(26);
1197 RADEON_WAIT_UNTIL_2D_IDLE();
1199 OUT_RING_REG(RADEON_PP_CNTL, tempPP_CNTL);
1200 OUT_RING_REG(R200_RE_CNTL, tempRE_CNTL);
1201 OUT_RING_REG(RADEON_RB3D_CNTL, tempRB3D_CNTL);
1202 OUT_RING_REG(RADEON_RB3D_ZSTENCILCNTL, tempRB3D_ZSTENCILCNTL);
1203 OUT_RING_REG(RADEON_RB3D_STENCILREFMASK,
1204 tempRB3D_STENCILREFMASK);
1205 OUT_RING_REG(RADEON_RB3D_PLANEMASK, tempRB3D_PLANEMASK);
1206 OUT_RING_REG(RADEON_SE_CNTL, tempSE_CNTL);
1207 OUT_RING_REG(R200_SE_VTE_CNTL, tempSE_VTE_CNTL);
1208 OUT_RING_REG(R200_SE_VTX_FMT_0, tempSE_VTX_FMT_0);
1209 OUT_RING_REG(R200_SE_VTX_FMT_1, tempSE_VTX_FMT_1);
1210 OUT_RING_REG(R200_SE_VAP_CNTL, tempSE_VAP_CNTL);
1211 OUT_RING_REG(R200_RE_AUX_SCISSOR_CNTL, tempRE_AUX_SCISSOR_CNTL);
1212 ADVANCE_RING();
1214 /* Make sure we restore the 3D state next time.
1216 sarea_priv->ctx_owner = 0;
1218 for (i = 0; i < nbox; i++) {
1220 /* Funny that this should be required --
1221 * sets top-left?
1223 radeon_emit_clip_rect(dev_priv, &sarea_priv->boxes[i]);
1225 BEGIN_RING(14);
1226 OUT_RING(CP_PACKET3(R200_3D_DRAW_IMMD_2, 12));
1227 OUT_RING((RADEON_PRIM_TYPE_RECT_LIST |
1228 RADEON_PRIM_WALK_RING |
1229 (3 << RADEON_NUM_VERTICES_SHIFT)));
1230 OUT_RING(depth_boxes[i].ui[CLEAR_X1]);
1231 OUT_RING(depth_boxes[i].ui[CLEAR_Y1]);
1232 OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1233 OUT_RING(0x3f800000);
1234 OUT_RING(depth_boxes[i].ui[CLEAR_X1]);
1235 OUT_RING(depth_boxes[i].ui[CLEAR_Y2]);
1236 OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1237 OUT_RING(0x3f800000);
1238 OUT_RING(depth_boxes[i].ui[CLEAR_X2]);
1239 OUT_RING(depth_boxes[i].ui[CLEAR_Y2]);
1240 OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1241 OUT_RING(0x3f800000);
1242 ADVANCE_RING();
1244 } else if ((flags & (RADEON_DEPTH | RADEON_STENCIL))) {
1246 int tempRB3D_ZSTENCILCNTL = depth_clear->rb3d_zstencilcntl;
1248 rb3d_cntl = depth_clear->rb3d_cntl;
1250 if (flags & RADEON_DEPTH) {
1251 rb3d_cntl |= RADEON_Z_ENABLE;
1252 } else {
1253 rb3d_cntl &= ~RADEON_Z_ENABLE;
1256 if (flags & RADEON_STENCIL) {
1257 rb3d_cntl |= RADEON_STENCIL_ENABLE;
1258 rb3d_stencilrefmask = clear->depth_mask; /* misnamed field */
1259 } else {
1260 rb3d_cntl &= ~RADEON_STENCIL_ENABLE;
1261 rb3d_stencilrefmask = 0x00000000;
1264 if (flags & RADEON_USE_COMP_ZBUF) {
1265 tempRB3D_ZSTENCILCNTL |= RADEON_Z_COMPRESSION_ENABLE |
1266 RADEON_Z_DECOMPRESSION_ENABLE;
1268 if (flags & RADEON_USE_HIERZ) {
1269 tempRB3D_ZSTENCILCNTL |= RADEON_Z_HIERARCHY_ENABLE;
1272 BEGIN_RING(13);
1273 RADEON_WAIT_UNTIL_2D_IDLE();
1275 OUT_RING(CP_PACKET0(RADEON_PP_CNTL, 1));
1276 OUT_RING(0x00000000);
1277 OUT_RING(rb3d_cntl);
1279 OUT_RING_REG(RADEON_RB3D_ZSTENCILCNTL, tempRB3D_ZSTENCILCNTL);
1280 OUT_RING_REG(RADEON_RB3D_STENCILREFMASK, rb3d_stencilrefmask);
1281 OUT_RING_REG(RADEON_RB3D_PLANEMASK, 0x00000000);
1282 OUT_RING_REG(RADEON_SE_CNTL, depth_clear->se_cntl);
1283 ADVANCE_RING();
1285 /* Make sure we restore the 3D state next time.
1287 sarea_priv->ctx_owner = 0;
1289 for (i = 0; i < nbox; i++) {
1291 /* Funny that this should be required --
1292 * sets top-left?
1294 radeon_emit_clip_rect(dev_priv, &sarea_priv->boxes[i]);
1296 BEGIN_RING(15);
1298 OUT_RING(CP_PACKET3(RADEON_3D_DRAW_IMMD, 13));
1299 OUT_RING(RADEON_VTX_Z_PRESENT |
1300 RADEON_VTX_PKCOLOR_PRESENT);
1301 OUT_RING((RADEON_PRIM_TYPE_RECT_LIST |
1302 RADEON_PRIM_WALK_RING |
1303 RADEON_MAOS_ENABLE |
1304 RADEON_VTX_FMT_RADEON_MODE |
1305 (3 << RADEON_NUM_VERTICES_SHIFT)));
1307 OUT_RING(depth_boxes[i].ui[CLEAR_X1]);
1308 OUT_RING(depth_boxes[i].ui[CLEAR_Y1]);
1309 OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1310 OUT_RING(0x0);
1312 OUT_RING(depth_boxes[i].ui[CLEAR_X1]);
1313 OUT_RING(depth_boxes[i].ui[CLEAR_Y2]);
1314 OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1315 OUT_RING(0x0);
1317 OUT_RING(depth_boxes[i].ui[CLEAR_X2]);
1318 OUT_RING(depth_boxes[i].ui[CLEAR_Y2]);
1319 OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1320 OUT_RING(0x0);
1322 ADVANCE_RING();
1326 /* Increment the clear counter. The client-side 3D driver must
1327 * wait on this value before performing the clear ioctl. We
1328 * need this because the card's so damned fast...
1330 sarea_priv->last_clear++;
1332 BEGIN_RING(4);
1334 RADEON_CLEAR_AGE(sarea_priv->last_clear);
1335 RADEON_WAIT_UNTIL_IDLE();
1337 ADVANCE_RING();
1340 static void radeon_cp_dispatch_swap(struct drm_device *dev)
1342 drm_radeon_private_t *dev_priv = dev->dev_private;
1343 drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
1344 int nbox = sarea_priv->nbox;
1345 struct drm_clip_rect *pbox = sarea_priv->boxes;
1346 int i;
1347 RING_LOCALS;
1348 DRM_DEBUG("\n");
1350 /* Do some trivial performance monitoring...
1352 if (dev_priv->do_boxes)
1353 radeon_cp_performance_boxes(dev_priv);
1355 /* Wait for the 3D stream to idle before dispatching the bitblt.
1356 * This will prevent data corruption between the two streams.
1358 BEGIN_RING(2);
1360 RADEON_WAIT_UNTIL_3D_IDLE();
1362 ADVANCE_RING();
1364 for (i = 0; i < nbox; i++) {
1365 int x = pbox[i].x1;
1366 int y = pbox[i].y1;
1367 int w = pbox[i].x2 - x;
1368 int h = pbox[i].y2 - y;
1370 DRM_DEBUG("%d,%d-%d,%d\n", x, y, w, h);
1372 BEGIN_RING(9);
1374 OUT_RING(CP_PACKET0(RADEON_DP_GUI_MASTER_CNTL, 0));
1375 OUT_RING(RADEON_GMC_SRC_PITCH_OFFSET_CNTL |
1376 RADEON_GMC_DST_PITCH_OFFSET_CNTL |
1377 RADEON_GMC_BRUSH_NONE |
1378 (dev_priv->color_fmt << 8) |
1379 RADEON_GMC_SRC_DATATYPE_COLOR |
1380 RADEON_ROP3_S |
1381 RADEON_DP_SRC_SOURCE_MEMORY |
1382 RADEON_GMC_CLR_CMP_CNTL_DIS | RADEON_GMC_WR_MSK_DIS);
1384 /* Make this work even if front & back are flipped:
1386 OUT_RING(CP_PACKET0(RADEON_SRC_PITCH_OFFSET, 1));
1387 if (sarea_priv->pfCurrentPage == 0) {
1388 OUT_RING(dev_priv->back_pitch_offset);
1389 OUT_RING(dev_priv->front_pitch_offset);
1390 } else {
1391 OUT_RING(dev_priv->front_pitch_offset);
1392 OUT_RING(dev_priv->back_pitch_offset);
1395 OUT_RING(CP_PACKET0(RADEON_SRC_X_Y, 2));
1396 OUT_RING((x << 16) | y);
1397 OUT_RING((x << 16) | y);
1398 OUT_RING((w << 16) | h);
1400 ADVANCE_RING();
1403 /* Increment the frame counter. The client-side 3D driver must
1404 * throttle the framerate by waiting for this value before
1405 * performing the swapbuffer ioctl.
1407 sarea_priv->last_frame++;
1409 BEGIN_RING(4);
1411 RADEON_FRAME_AGE(sarea_priv->last_frame);
1412 RADEON_WAIT_UNTIL_2D_IDLE();
1414 ADVANCE_RING();
1417 static void radeon_cp_dispatch_flip(struct drm_device *dev)
1419 drm_radeon_private_t *dev_priv = dev->dev_private;
1420 struct drm_sarea *sarea = (struct drm_sarea *)dev_priv->sarea->handle;
1421 int offset = (dev_priv->sarea_priv->pfCurrentPage == 1)
1422 ? dev_priv->front_offset : dev_priv->back_offset;
1423 RING_LOCALS;
1424 DRM_DEBUG("pfCurrentPage=%d\n",
1425 dev_priv->sarea_priv->pfCurrentPage);
1427 /* Do some trivial performance monitoring...
1429 if (dev_priv->do_boxes) {
1430 dev_priv->stats.boxes |= RADEON_BOX_FLIP;
1431 radeon_cp_performance_boxes(dev_priv);
1434 /* Update the frame offsets for both CRTCs
1436 BEGIN_RING(6);
1438 RADEON_WAIT_UNTIL_3D_IDLE();
1439 OUT_RING_REG(RADEON_CRTC_OFFSET,
1440 ((sarea->frame.y * dev_priv->front_pitch +
1441 sarea->frame.x * (dev_priv->color_fmt - 2)) & ~7)
1442 + offset);
1443 OUT_RING_REG(RADEON_CRTC2_OFFSET, dev_priv->sarea_priv->crtc2_base
1444 + offset);
1446 ADVANCE_RING();
1448 /* Increment the frame counter. The client-side 3D driver must
1449 * throttle the framerate by waiting for this value before
1450 * performing the swapbuffer ioctl.
1452 dev_priv->sarea_priv->last_frame++;
1453 dev_priv->sarea_priv->pfCurrentPage =
1454 1 - dev_priv->sarea_priv->pfCurrentPage;
1456 BEGIN_RING(2);
1458 RADEON_FRAME_AGE(dev_priv->sarea_priv->last_frame);
1460 ADVANCE_RING();
1463 static int bad_prim_vertex_nr(int primitive, int nr)
1465 switch (primitive & RADEON_PRIM_TYPE_MASK) {
1466 case RADEON_PRIM_TYPE_NONE:
1467 case RADEON_PRIM_TYPE_POINT:
1468 return nr < 1;
1469 case RADEON_PRIM_TYPE_LINE:
1470 return (nr & 1) || nr == 0;
1471 case RADEON_PRIM_TYPE_LINE_STRIP:
1472 return nr < 2;
1473 case RADEON_PRIM_TYPE_TRI_LIST:
1474 case RADEON_PRIM_TYPE_3VRT_POINT_LIST:
1475 case RADEON_PRIM_TYPE_3VRT_LINE_LIST:
1476 case RADEON_PRIM_TYPE_RECT_LIST:
1477 return nr % 3 || nr == 0;
1478 case RADEON_PRIM_TYPE_TRI_FAN:
1479 case RADEON_PRIM_TYPE_TRI_STRIP:
1480 return nr < 3;
1481 default:
1482 return 1;
1486 typedef struct {
1487 unsigned int start;
1488 unsigned int finish;
1489 unsigned int prim;
1490 unsigned int numverts;
1491 unsigned int offset;
1492 unsigned int vc_format;
1493 } drm_radeon_tcl_prim_t;
1495 static void radeon_cp_dispatch_vertex(struct drm_device * dev,
1496 struct drm_buf * buf,
1497 drm_radeon_tcl_prim_t * prim)
1499 drm_radeon_private_t *dev_priv = dev->dev_private;
1500 drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
1501 int offset = dev_priv->gart_buffers_offset + buf->offset + prim->start;
1502 int numverts = (int)prim->numverts;
1503 int nbox = sarea_priv->nbox;
1504 int i = 0;
1505 RING_LOCALS;
1507 DRM_DEBUG("hwprim 0x%x vfmt 0x%x %d..%d %d verts\n",
1508 prim->prim,
1509 prim->vc_format, prim->start, prim->finish, prim->numverts);
1511 if (bad_prim_vertex_nr(prim->prim, prim->numverts)) {
1512 DRM_ERROR("bad prim %x numverts %d\n",
1513 prim->prim, prim->numverts);
1514 return;
1517 do {
1518 /* Emit the next cliprect */
1519 if (i < nbox) {
1520 radeon_emit_clip_rect(dev_priv, &sarea_priv->boxes[i]);
1523 /* Emit the vertex buffer rendering commands */
1524 BEGIN_RING(5);
1526 OUT_RING(CP_PACKET3(RADEON_3D_RNDR_GEN_INDX_PRIM, 3));
1527 OUT_RING(offset);
1528 OUT_RING(numverts);
1529 OUT_RING(prim->vc_format);
1530 OUT_RING(prim->prim | RADEON_PRIM_WALK_LIST |
1531 RADEON_COLOR_ORDER_RGBA |
1532 RADEON_VTX_FMT_RADEON_MODE |
1533 (numverts << RADEON_NUM_VERTICES_SHIFT));
1535 ADVANCE_RING();
1537 i++;
1538 } while (i < nbox);
1541 static void radeon_cp_discard_buffer(struct drm_device *dev, struct drm_buf *buf)
1543 drm_radeon_private_t *dev_priv = dev->dev_private;
1544 drm_radeon_buf_priv_t *buf_priv = buf->dev_private;
1545 RING_LOCALS;
1547 buf_priv->age = ++dev_priv->sarea_priv->last_dispatch;
1549 /* Emit the vertex buffer age */
1550 if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_R600) {
1551 BEGIN_RING(3);
1552 R600_DISPATCH_AGE(buf_priv->age);
1553 ADVANCE_RING();
1554 } else {
1555 BEGIN_RING(2);
1556 RADEON_DISPATCH_AGE(buf_priv->age);
1557 ADVANCE_RING();
1560 buf->pending = 1;
1561 buf->used = 0;
1564 static void radeon_cp_dispatch_indirect(struct drm_device * dev,
1565 struct drm_buf * buf, int start, int end)
1567 drm_radeon_private_t *dev_priv = dev->dev_private;
1568 RING_LOCALS;
1569 DRM_DEBUG("buf=%d s=0x%x e=0x%x\n", buf->idx, start, end);
1571 if (start != end) {
1572 int offset = (dev_priv->gart_buffers_offset
1573 + buf->offset + start);
1574 int dwords = (end - start + 3) / sizeof(u32);
1576 /* Indirect buffer data must be an even number of
1577 * dwords, so if we've been given an odd number we must
1578 * pad the data with a Type-2 CP packet.
1580 if (dwords & 1) {
1581 u32 *data = (u32 *)
1582 ((char *)dev->agp_buffer_map->handle
1583 + buf->offset + start);
1584 data[dwords++] = RADEON_CP_PACKET2;
1587 /* Fire off the indirect buffer */
1588 BEGIN_RING(3);
1590 OUT_RING(CP_PACKET0(RADEON_CP_IB_BASE, 1));
1591 OUT_RING(offset);
1592 OUT_RING(dwords);
1594 ADVANCE_RING();
1598 static void radeon_cp_dispatch_indices(struct drm_device *dev,
1599 struct drm_buf * elt_buf,
1600 drm_radeon_tcl_prim_t * prim)
1602 drm_radeon_private_t *dev_priv = dev->dev_private;
1603 drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
1604 int offset = dev_priv->gart_buffers_offset + prim->offset;
1605 u32 *data;
1606 int dwords;
1607 int i = 0;
1608 int start = prim->start + RADEON_INDEX_PRIM_OFFSET;
1609 int count = (prim->finish - start) / sizeof(u16);
1610 int nbox = sarea_priv->nbox;
1612 DRM_DEBUG("hwprim 0x%x vfmt 0x%x %d..%d offset: %x nr %d\n",
1613 prim->prim,
1614 prim->vc_format,
1615 prim->start, prim->finish, prim->offset, prim->numverts);
1617 if (bad_prim_vertex_nr(prim->prim, count)) {
1618 DRM_ERROR("bad prim %x count %d\n", prim->prim, count);
1619 return;
1622 if (start >= prim->finish || (prim->start & 0x7)) {
1623 DRM_ERROR("buffer prim %d\n", prim->prim);
1624 return;
1627 dwords = (prim->finish - prim->start + 3) / sizeof(u32);
1629 data = (u32 *) ((char *)dev->agp_buffer_map->handle +
1630 elt_buf->offset + prim->start);
1632 data[0] = CP_PACKET3(RADEON_3D_RNDR_GEN_INDX_PRIM, dwords - 2);
1633 data[1] = offset;
1634 data[2] = prim->numverts;
1635 data[3] = prim->vc_format;
1636 data[4] = (prim->prim |
1637 RADEON_PRIM_WALK_IND |
1638 RADEON_COLOR_ORDER_RGBA |
1639 RADEON_VTX_FMT_RADEON_MODE |
1640 (count << RADEON_NUM_VERTICES_SHIFT));
1642 do {
1643 if (i < nbox)
1644 radeon_emit_clip_rect(dev_priv, &sarea_priv->boxes[i]);
1646 radeon_cp_dispatch_indirect(dev, elt_buf,
1647 prim->start, prim->finish);
1649 i++;
1650 } while (i < nbox);
1654 #define RADEON_MAX_TEXTURE_SIZE RADEON_BUFFER_SIZE
1656 static int radeon_cp_dispatch_texture(struct drm_device * dev,
1657 struct drm_file *file_priv,
1658 drm_radeon_texture_t * tex,
1659 drm_radeon_tex_image_t * image)
1661 drm_radeon_private_t *dev_priv = dev->dev_private;
1662 struct drm_buf *buf;
1663 u32 format;
1664 u32 *buffer;
1665 const u8 __user *data;
1666 int size, dwords, tex_width, blit_width, spitch;
1667 u32 height;
1668 int i;
1669 u32 texpitch, microtile;
1670 u32 offset, byte_offset;
1671 RING_LOCALS;
1673 if (radeon_check_and_fixup_offset(dev_priv, file_priv, &tex->offset)) {
1674 DRM_ERROR("Invalid destination offset\n");
1675 return -EINVAL;
1678 dev_priv->stats.boxes |= RADEON_BOX_TEXTURE_LOAD;
1680 /* Flush the pixel cache. This ensures no pixel data gets mixed
1681 * up with the texture data from the host data blit, otherwise
1682 * part of the texture image may be corrupted.
1684 BEGIN_RING(4);
1685 RADEON_FLUSH_CACHE();
1686 RADEON_WAIT_UNTIL_IDLE();
1687 ADVANCE_RING();
1689 /* The compiler won't optimize away a division by a variable,
1690 * even if the only legal values are powers of two. Thus, we'll
1691 * use a shift instead.
1693 switch (tex->format) {
1694 case RADEON_TXFORMAT_ARGB8888:
1695 case RADEON_TXFORMAT_RGBA8888:
1696 format = RADEON_COLOR_FORMAT_ARGB8888;
1697 tex_width = tex->width * 4;
1698 blit_width = image->width * 4;
1699 break;
1700 case RADEON_TXFORMAT_AI88:
1701 case RADEON_TXFORMAT_ARGB1555:
1702 case RADEON_TXFORMAT_RGB565:
1703 case RADEON_TXFORMAT_ARGB4444:
1704 case RADEON_TXFORMAT_VYUY422:
1705 case RADEON_TXFORMAT_YVYU422:
1706 format = RADEON_COLOR_FORMAT_RGB565;
1707 tex_width = tex->width * 2;
1708 blit_width = image->width * 2;
1709 break;
1710 case RADEON_TXFORMAT_I8:
1711 case RADEON_TXFORMAT_RGB332:
1712 format = RADEON_COLOR_FORMAT_CI8;
1713 tex_width = tex->width * 1;
1714 blit_width = image->width * 1;
1715 break;
1716 default:
1717 DRM_ERROR("invalid texture format %d\n", tex->format);
1718 return -EINVAL;
1720 spitch = blit_width >> 6;
1721 if (spitch == 0 && image->height > 1)
1722 return -EINVAL;
1724 texpitch = tex->pitch;
1725 if ((texpitch << 22) & RADEON_DST_TILE_MICRO) {
1726 microtile = 1;
1727 if (tex_width < 64) {
1728 texpitch &= ~(RADEON_DST_TILE_MICRO >> 22);
1729 /* we got tiled coordinates, untile them */
1730 image->x *= 2;
1732 } else
1733 microtile = 0;
1735 /* this might fail for zero-sized uploads - are those illegal? */
1736 if (!radeon_check_offset(dev_priv, tex->offset + image->height *
1737 blit_width - 1)) {
1738 DRM_ERROR("Invalid final destination offset\n");
1739 return -EINVAL;
1742 DRM_DEBUG("tex=%dx%d blit=%d\n", tex_width, tex->height, blit_width);
1744 do {
1745 DRM_DEBUG("tex: ofs=0x%x p=%d f=%d x=%hd y=%hd w=%hd h=%hd\n",
1746 tex->offset >> 10, tex->pitch, tex->format,
1747 image->x, image->y, image->width, image->height);
1749 /* Make a copy of some parameters in case we have to
1750 * update them for a multi-pass texture blit.
1752 height = image->height;
1753 data = (const u8 __user *)image->data;
1755 size = height * blit_width;
1757 if (size > RADEON_MAX_TEXTURE_SIZE) {
1758 height = RADEON_MAX_TEXTURE_SIZE / blit_width;
1759 size = height * blit_width;
1760 } else if (size < 4 && size > 0) {
1761 size = 4;
1762 } else if (size == 0) {
1763 return 0;
1766 buf = radeon_freelist_get(dev);
1767 if (0 && !buf) {
1768 radeon_do_cp_idle(dev_priv);
1769 buf = radeon_freelist_get(dev);
1771 if (!buf) {
1772 DRM_DEBUG("EAGAIN\n");
1773 if (DRM_COPY_TO_USER(tex->image, image, sizeof(*image)))
1774 return -EFAULT;
1775 return -EAGAIN;
1778 /* Dispatch the indirect buffer.
1780 buffer =
1781 (u32 *) ((char *)dev->agp_buffer_map->handle + buf->offset);
1782 dwords = size / 4;
1784 #define RADEON_COPY_MT(_buf, _data, _width) \
1785 do { \
1786 if (DRM_COPY_FROM_USER(_buf, _data, (_width))) {\
1787 DRM_ERROR("EFAULT on pad, %d bytes\n", (_width)); \
1788 return -EFAULT; \
1790 } while(0)
1792 if (microtile) {
1793 /* texture micro tiling in use, minimum texture width is thus 16 bytes.
1794 however, we cannot use blitter directly for texture width < 64 bytes,
1795 since minimum tex pitch is 64 bytes and we need this to match
1796 the texture width, otherwise the blitter will tile it wrong.
1797 Thus, tiling manually in this case. Additionally, need to special
1798 case tex height = 1, since our actual image will have height 2
1799 and we need to ensure we don't read beyond the texture size
1800 from user space. */
1801 if (tex->height == 1) {
1802 if (tex_width >= 64 || tex_width <= 16) {
1803 RADEON_COPY_MT(buffer, data,
1804 (int)(tex_width * sizeof(u32)));
1805 } else if (tex_width == 32) {
1806 RADEON_COPY_MT(buffer, data, 16);
1807 RADEON_COPY_MT(buffer + 8,
1808 data + 16, 16);
1810 } else if (tex_width >= 64 || tex_width == 16) {
1811 RADEON_COPY_MT(buffer, data,
1812 (int)(dwords * sizeof(u32)));
1813 } else if (tex_width < 16) {
1814 for (i = 0; i < tex->height; i++) {
1815 RADEON_COPY_MT(buffer, data, tex_width);
1816 buffer += 4;
1817 data += tex_width;
1819 } else if (tex_width == 32) {
1820 /* TODO: make sure this works when not fitting in one buffer
1821 (i.e. 32bytes x 2048...) */
1822 for (i = 0; i < tex->height; i += 2) {
1823 RADEON_COPY_MT(buffer, data, 16);
1824 data += 16;
1825 RADEON_COPY_MT(buffer + 8, data, 16);
1826 data += 16;
1827 RADEON_COPY_MT(buffer + 4, data, 16);
1828 data += 16;
1829 RADEON_COPY_MT(buffer + 12, data, 16);
1830 data += 16;
1831 buffer += 16;
1834 } else {
1835 if (tex_width >= 32) {
1836 /* Texture image width is larger than the minimum, so we
1837 * can upload it directly.
1839 RADEON_COPY_MT(buffer, data,
1840 (int)(dwords * sizeof(u32)));
1841 } else {
1842 /* Texture image width is less than the minimum, so we
1843 * need to pad out each image scanline to the minimum
1844 * width.
1846 for (i = 0; i < tex->height; i++) {
1847 RADEON_COPY_MT(buffer, data, tex_width);
1848 buffer += 8;
1849 data += tex_width;
1854 #undef RADEON_COPY_MT
1855 byte_offset = (image->y & ~2047) * blit_width;
1856 buf->file_priv = file_priv;
1857 buf->used = size;
1858 offset = dev_priv->gart_buffers_offset + buf->offset;
1859 BEGIN_RING(9);
1860 OUT_RING(CP_PACKET3(RADEON_CNTL_BITBLT_MULTI, 5));
1861 OUT_RING(RADEON_GMC_SRC_PITCH_OFFSET_CNTL |
1862 RADEON_GMC_DST_PITCH_OFFSET_CNTL |
1863 RADEON_GMC_BRUSH_NONE |
1864 (format << 8) |
1865 RADEON_GMC_SRC_DATATYPE_COLOR |
1866 RADEON_ROP3_S |
1867 RADEON_DP_SRC_SOURCE_MEMORY |
1868 RADEON_GMC_CLR_CMP_CNTL_DIS | RADEON_GMC_WR_MSK_DIS);
1869 OUT_RING((spitch << 22) | (offset >> 10));
1870 OUT_RING((texpitch << 22) | ((tex->offset >> 10) + (byte_offset >> 10)));
1871 OUT_RING(0);
1872 OUT_RING((image->x << 16) | (image->y % 2048));
1873 OUT_RING((image->width << 16) | height);
1874 RADEON_WAIT_UNTIL_2D_IDLE();
1875 ADVANCE_RING();
1876 COMMIT_RING();
1878 radeon_cp_discard_buffer(dev, buf);
1880 /* Update the input parameters for next time */
1881 image->y += height;
1882 image->height -= height;
1883 image->data = (const u8 __user *)image->data + size;
1884 } while (image->height > 0);
1886 /* Flush the pixel cache after the blit completes. This ensures
1887 * the texture data is written out to memory before rendering
1888 * continues.
1890 BEGIN_RING(4);
1891 RADEON_FLUSH_CACHE();
1892 RADEON_WAIT_UNTIL_2D_IDLE();
1893 ADVANCE_RING();
1894 COMMIT_RING();
1896 return 0;
1899 static void radeon_cp_dispatch_stipple(struct drm_device * dev, u32 * stipple)
1901 drm_radeon_private_t *dev_priv = dev->dev_private;
1902 int i;
1903 RING_LOCALS;
1904 DRM_DEBUG("\n");
1906 BEGIN_RING(35);
1908 OUT_RING(CP_PACKET0(RADEON_RE_STIPPLE_ADDR, 0));
1909 OUT_RING(0x00000000);
1911 OUT_RING(CP_PACKET0_TABLE(RADEON_RE_STIPPLE_DATA, 31));
1912 for (i = 0; i < 32; i++) {
1913 OUT_RING(stipple[i]);
1916 ADVANCE_RING();
1919 static void radeon_apply_surface_regs(int surf_index,
1920 drm_radeon_private_t *dev_priv)
1922 if (!dev_priv->mmio)
1923 return;
1925 radeon_do_cp_idle(dev_priv);
1927 RADEON_WRITE(RADEON_SURFACE0_INFO + 16 * surf_index,
1928 dev_priv->surfaces[surf_index].flags);
1929 RADEON_WRITE(RADEON_SURFACE0_LOWER_BOUND + 16 * surf_index,
1930 dev_priv->surfaces[surf_index].lower);
1931 RADEON_WRITE(RADEON_SURFACE0_UPPER_BOUND + 16 * surf_index,
1932 dev_priv->surfaces[surf_index].upper);
1935 /* Allocates a virtual surface
1936 * doesn't always allocate a real surface, will stretch an existing
1937 * surface when possible.
1939 * Note that refcount can be at most 2, since during a free refcount=3
1940 * might mean we have to allocate a new surface which might not always
1941 * be available.
1942 * For example : we allocate three contigous surfaces ABC. If B is
1943 * freed, we suddenly need two surfaces to store A and C, which might
1944 * not always be available.
1946 static int alloc_surface(drm_radeon_surface_alloc_t *new,
1947 drm_radeon_private_t *dev_priv,
1948 struct drm_file *file_priv)
1950 struct radeon_virt_surface *s;
1951 int i;
1952 int virt_surface_index;
1953 uint32_t new_upper, new_lower;
1955 new_lower = new->address;
1956 new_upper = new_lower + new->size - 1;
1958 /* sanity check */
1959 if ((new_lower >= new_upper) || (new->flags == 0) || (new->size == 0) ||
1960 ((new_upper & RADEON_SURF_ADDRESS_FIXED_MASK) !=
1961 RADEON_SURF_ADDRESS_FIXED_MASK)
1962 || ((new_lower & RADEON_SURF_ADDRESS_FIXED_MASK) != 0))
1963 return -1;
1965 /* make sure there is no overlap with existing surfaces */
1966 for (i = 0; i < RADEON_MAX_SURFACES; i++) {
1967 if ((dev_priv->surfaces[i].refcount != 0) &&
1968 (((new_lower >= dev_priv->surfaces[i].lower) &&
1969 (new_lower < dev_priv->surfaces[i].upper)) ||
1970 ((new_lower < dev_priv->surfaces[i].lower) &&
1971 (new_upper > dev_priv->surfaces[i].lower)))) {
1972 return -1;
1976 /* find a virtual surface */
1977 for (i = 0; i < 2 * RADEON_MAX_SURFACES; i++)
1978 if (dev_priv->virt_surfaces[i].file_priv == 0)
1979 break;
1980 if (i == 2 * RADEON_MAX_SURFACES) {
1981 return -1;
1983 virt_surface_index = i;
1985 /* try to reuse an existing surface */
1986 for (i = 0; i < RADEON_MAX_SURFACES; i++) {
1987 /* extend before */
1988 if ((dev_priv->surfaces[i].refcount == 1) &&
1989 (new->flags == dev_priv->surfaces[i].flags) &&
1990 (new_upper + 1 == dev_priv->surfaces[i].lower)) {
1991 s = &(dev_priv->virt_surfaces[virt_surface_index]);
1992 s->surface_index = i;
1993 s->lower = new_lower;
1994 s->upper = new_upper;
1995 s->flags = new->flags;
1996 s->file_priv = file_priv;
1997 dev_priv->surfaces[i].refcount++;
1998 dev_priv->surfaces[i].lower = s->lower;
1999 radeon_apply_surface_regs(s->surface_index, dev_priv);
2000 return virt_surface_index;
2003 /* extend after */
2004 if ((dev_priv->surfaces[i].refcount == 1) &&
2005 (new->flags == dev_priv->surfaces[i].flags) &&
2006 (new_lower == dev_priv->surfaces[i].upper + 1)) {
2007 s = &(dev_priv->virt_surfaces[virt_surface_index]);
2008 s->surface_index = i;
2009 s->lower = new_lower;
2010 s->upper = new_upper;
2011 s->flags = new->flags;
2012 s->file_priv = file_priv;
2013 dev_priv->surfaces[i].refcount++;
2014 dev_priv->surfaces[i].upper = s->upper;
2015 radeon_apply_surface_regs(s->surface_index, dev_priv);
2016 return virt_surface_index;
2020 /* okay, we need a new one */
2021 for (i = 0; i < RADEON_MAX_SURFACES; i++) {
2022 if (dev_priv->surfaces[i].refcount == 0) {
2023 s = &(dev_priv->virt_surfaces[virt_surface_index]);
2024 s->surface_index = i;
2025 s->lower = new_lower;
2026 s->upper = new_upper;
2027 s->flags = new->flags;
2028 s->file_priv = file_priv;
2029 dev_priv->surfaces[i].refcount = 1;
2030 dev_priv->surfaces[i].lower = s->lower;
2031 dev_priv->surfaces[i].upper = s->upper;
2032 dev_priv->surfaces[i].flags = s->flags;
2033 radeon_apply_surface_regs(s->surface_index, dev_priv);
2034 return virt_surface_index;
2038 /* we didn't find anything */
2039 return -1;
2042 static int free_surface(struct drm_file *file_priv,
2043 drm_radeon_private_t * dev_priv,
2044 int lower)
2046 struct radeon_virt_surface *s;
2047 int i;
2048 /* find the virtual surface */
2049 for (i = 0; i < 2 * RADEON_MAX_SURFACES; i++) {
2050 s = &(dev_priv->virt_surfaces[i]);
2051 if (s->file_priv) {
2052 if ((lower == s->lower) && (file_priv == s->file_priv))
2054 if (dev_priv->surfaces[s->surface_index].
2055 lower == s->lower)
2056 dev_priv->surfaces[s->surface_index].
2057 lower = s->upper;
2059 if (dev_priv->surfaces[s->surface_index].
2060 upper == s->upper)
2061 dev_priv->surfaces[s->surface_index].
2062 upper = s->lower;
2064 dev_priv->surfaces[s->surface_index].refcount--;
2065 if (dev_priv->surfaces[s->surface_index].
2066 refcount == 0)
2067 dev_priv->surfaces[s->surface_index].
2068 flags = 0;
2069 s->file_priv = NULL;
2070 radeon_apply_surface_regs(s->surface_index,
2071 dev_priv);
2072 return 0;
2076 return 1;
2079 static void radeon_surfaces_release(struct drm_file *file_priv,
2080 drm_radeon_private_t * dev_priv)
2082 int i;
2083 for (i = 0; i < 2 * RADEON_MAX_SURFACES; i++) {
2084 if (dev_priv->virt_surfaces[i].file_priv == file_priv)
2085 free_surface(file_priv, dev_priv,
2086 dev_priv->virt_surfaces[i].lower);
2090 /* ================================================================
2091 * IOCTL functions
2093 static int radeon_surface_alloc(struct drm_device *dev, void *data, struct drm_file *file_priv)
2095 drm_radeon_private_t *dev_priv = dev->dev_private;
2096 drm_radeon_surface_alloc_t *alloc = data;
2098 if (alloc_surface(alloc, dev_priv, file_priv) == -1)
2099 return -EINVAL;
2100 else
2101 return 0;
2104 static int radeon_surface_free(struct drm_device *dev, void *data, struct drm_file *file_priv)
2106 drm_radeon_private_t *dev_priv = dev->dev_private;
2107 drm_radeon_surface_free_t *memfree = data;
2109 if (free_surface(file_priv, dev_priv, memfree->address))
2110 return -EINVAL;
2111 else
2112 return 0;
2115 static int radeon_cp_clear(struct drm_device *dev, void *data, struct drm_file *file_priv)
2117 drm_radeon_private_t *dev_priv = dev->dev_private;
2118 drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
2119 drm_radeon_clear_t *clear = data;
2120 drm_radeon_clear_rect_t depth_boxes[RADEON_NR_SAREA_CLIPRECTS];
2121 DRM_DEBUG("\n");
2123 LOCK_TEST_WITH_RETURN(dev, file_priv);
2125 RING_SPACE_TEST_WITH_RETURN(dev_priv);
2127 if (sarea_priv->nbox > RADEON_NR_SAREA_CLIPRECTS)
2128 sarea_priv->nbox = RADEON_NR_SAREA_CLIPRECTS;
2130 if (DRM_COPY_FROM_USER(&depth_boxes, clear->depth_boxes,
2131 sarea_priv->nbox * sizeof(depth_boxes[0])))
2132 return -EFAULT;
2134 radeon_cp_dispatch_clear(dev, clear, depth_boxes);
2136 COMMIT_RING();
2137 return 0;
2140 /* Not sure why this isn't set all the time:
2142 static int radeon_do_init_pageflip(struct drm_device *dev)
2144 drm_radeon_private_t *dev_priv = dev->dev_private;
2145 RING_LOCALS;
2147 DRM_DEBUG("\n");
2149 BEGIN_RING(6);
2150 RADEON_WAIT_UNTIL_3D_IDLE();
2151 OUT_RING(CP_PACKET0(RADEON_CRTC_OFFSET_CNTL, 0));
2152 OUT_RING(RADEON_READ(RADEON_CRTC_OFFSET_CNTL) |
2153 RADEON_CRTC_OFFSET_FLIP_CNTL);
2154 OUT_RING(CP_PACKET0(RADEON_CRTC2_OFFSET_CNTL, 0));
2155 OUT_RING(RADEON_READ(RADEON_CRTC2_OFFSET_CNTL) |
2156 RADEON_CRTC_OFFSET_FLIP_CNTL);
2157 ADVANCE_RING();
2159 dev_priv->page_flipping = 1;
2161 if (dev_priv->sarea_priv->pfCurrentPage != 1)
2162 dev_priv->sarea_priv->pfCurrentPage = 0;
2164 return 0;
2167 /* Swapping and flipping are different operations, need different ioctls.
2168 * They can & should be intermixed to support multiple 3d windows.
2170 static int radeon_cp_flip(struct drm_device *dev, void *data, struct drm_file *file_priv)
2172 drm_radeon_private_t *dev_priv = dev->dev_private;
2173 DRM_DEBUG("\n");
2175 LOCK_TEST_WITH_RETURN(dev, file_priv);
2177 RING_SPACE_TEST_WITH_RETURN(dev_priv);
2179 if (!dev_priv->page_flipping)
2180 radeon_do_init_pageflip(dev);
2182 radeon_cp_dispatch_flip(dev);
2184 COMMIT_RING();
2185 return 0;
2188 static int radeon_cp_swap(struct drm_device *dev, void *data, struct drm_file *file_priv)
2190 drm_radeon_private_t *dev_priv = dev->dev_private;
2191 drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
2193 DRM_DEBUG("\n");
2195 LOCK_TEST_WITH_RETURN(dev, file_priv);
2197 RING_SPACE_TEST_WITH_RETURN(dev_priv);
2199 if (sarea_priv->nbox > RADEON_NR_SAREA_CLIPRECTS)
2200 sarea_priv->nbox = RADEON_NR_SAREA_CLIPRECTS;
2202 radeon_cp_dispatch_swap(dev);
2203 sarea_priv->ctx_owner = 0;
2205 COMMIT_RING();
2206 return 0;
2209 static int radeon_cp_vertex(struct drm_device *dev, void *data, struct drm_file *file_priv)
2211 drm_radeon_private_t *dev_priv = dev->dev_private;
2212 drm_radeon_sarea_t *sarea_priv;
2213 struct drm_device_dma *dma = dev->dma;
2214 struct drm_buf *buf;
2215 drm_radeon_vertex_t *vertex = data;
2216 drm_radeon_tcl_prim_t prim;
2218 LOCK_TEST_WITH_RETURN(dev, file_priv);
2220 sarea_priv = dev_priv->sarea_priv;
2222 DRM_DEBUG("pid=%d index=%d count=%d discard=%d\n",
2223 DRM_CURRENTPID, vertex->idx, vertex->count, vertex->discard);
2225 if (vertex->idx < 0 || vertex->idx >= dma->buf_count) {
2226 DRM_ERROR("buffer index %d (of %d max)\n",
2227 vertex->idx, dma->buf_count - 1);
2228 return -EINVAL;
2230 if (vertex->prim < 0 || vertex->prim > RADEON_PRIM_TYPE_3VRT_LINE_LIST) {
2231 DRM_ERROR("buffer prim %d\n", vertex->prim);
2232 return -EINVAL;
2235 RING_SPACE_TEST_WITH_RETURN(dev_priv);
2236 VB_AGE_TEST_WITH_RETURN(dev_priv);
2238 buf = dma->buflist[vertex->idx];
2240 if (buf->file_priv != file_priv) {
2241 DRM_ERROR("process %d using buffer owned by %p\n",
2242 DRM_CURRENTPID, buf->file_priv);
2243 return -EINVAL;
2245 if (buf->pending) {
2246 DRM_ERROR("sending pending buffer %d\n", vertex->idx);
2247 return -EINVAL;
2250 /* Build up a prim_t record:
2252 if (vertex->count) {
2253 buf->used = vertex->count; /* not used? */
2255 if (sarea_priv->dirty & ~RADEON_UPLOAD_CLIPRECTS) {
2256 if (radeon_emit_state(dev_priv, file_priv,
2257 &sarea_priv->context_state,
2258 sarea_priv->tex_state,
2259 sarea_priv->dirty)) {
2260 DRM_ERROR("radeon_emit_state failed\n");
2261 return -EINVAL;
2264 sarea_priv->dirty &= ~(RADEON_UPLOAD_TEX0IMAGES |
2265 RADEON_UPLOAD_TEX1IMAGES |
2266 RADEON_UPLOAD_TEX2IMAGES |
2267 RADEON_REQUIRE_QUIESCENCE);
2270 prim.start = 0;
2271 prim.finish = vertex->count; /* unused */
2272 prim.prim = vertex->prim;
2273 prim.numverts = vertex->count;
2274 prim.vc_format = sarea_priv->vc_format;
2276 radeon_cp_dispatch_vertex(dev, buf, &prim);
2279 if (vertex->discard) {
2280 radeon_cp_discard_buffer(dev, buf);
2283 COMMIT_RING();
2284 return 0;
2287 static int radeon_cp_indices(struct drm_device *dev, void *data, struct drm_file *file_priv)
2289 drm_radeon_private_t *dev_priv = dev->dev_private;
2290 drm_radeon_sarea_t *sarea_priv;
2291 struct drm_device_dma *dma = dev->dma;
2292 struct drm_buf *buf;
2293 drm_radeon_indices_t *elts = data;
2294 drm_radeon_tcl_prim_t prim;
2295 int count;
2297 LOCK_TEST_WITH_RETURN(dev, file_priv);
2299 sarea_priv = dev_priv->sarea_priv;
2301 DRM_DEBUG("pid=%d index=%d start=%d end=%d discard=%d\n",
2302 DRM_CURRENTPID, elts->idx, elts->start, elts->end,
2303 elts->discard);
2305 if (elts->idx < 0 || elts->idx >= dma->buf_count) {
2306 DRM_ERROR("buffer index %d (of %d max)\n",
2307 elts->idx, dma->buf_count - 1);
2308 return -EINVAL;
2310 if (elts->prim < 0 || elts->prim > RADEON_PRIM_TYPE_3VRT_LINE_LIST) {
2311 DRM_ERROR("buffer prim %d\n", elts->prim);
2312 return -EINVAL;
2315 RING_SPACE_TEST_WITH_RETURN(dev_priv);
2316 VB_AGE_TEST_WITH_RETURN(dev_priv);
2318 buf = dma->buflist[elts->idx];
2320 if (buf->file_priv != file_priv) {
2321 DRM_ERROR("process %d using buffer owned by %p\n",
2322 DRM_CURRENTPID, buf->file_priv);
2323 return -EINVAL;
2325 if (buf->pending) {
2326 DRM_ERROR("sending pending buffer %d\n", elts->idx);
2327 return -EINVAL;
2330 count = (elts->end - elts->start) / sizeof(u16);
2331 elts->start -= RADEON_INDEX_PRIM_OFFSET;
2333 if (elts->start & 0x7) {
2334 DRM_ERROR("misaligned buffer 0x%x\n", elts->start);
2335 return -EINVAL;
2337 if (elts->start < buf->used) {
2338 DRM_ERROR("no header 0x%x - 0x%x\n", elts->start, buf->used);
2339 return -EINVAL;
2342 buf->used = elts->end;
2344 if (sarea_priv->dirty & ~RADEON_UPLOAD_CLIPRECTS) {
2345 if (radeon_emit_state(dev_priv, file_priv,
2346 &sarea_priv->context_state,
2347 sarea_priv->tex_state,
2348 sarea_priv->dirty)) {
2349 DRM_ERROR("radeon_emit_state failed\n");
2350 return -EINVAL;
2353 sarea_priv->dirty &= ~(RADEON_UPLOAD_TEX0IMAGES |
2354 RADEON_UPLOAD_TEX1IMAGES |
2355 RADEON_UPLOAD_TEX2IMAGES |
2356 RADEON_REQUIRE_QUIESCENCE);
2359 /* Build up a prim_t record:
2361 prim.start = elts->start;
2362 prim.finish = elts->end;
2363 prim.prim = elts->prim;
2364 prim.offset = 0; /* offset from start of dma buffers */
2365 prim.numverts = RADEON_MAX_VB_VERTS; /* duh */
2366 prim.vc_format = sarea_priv->vc_format;
2368 radeon_cp_dispatch_indices(dev, buf, &prim);
2369 if (elts->discard) {
2370 radeon_cp_discard_buffer(dev, buf);
2373 COMMIT_RING();
2374 return 0;
2377 static int radeon_cp_texture(struct drm_device *dev, void *data, struct drm_file *file_priv)
2379 drm_radeon_private_t *dev_priv = dev->dev_private;
2380 drm_radeon_texture_t *tex = data;
2381 drm_radeon_tex_image_t image;
2382 int ret;
2384 LOCK_TEST_WITH_RETURN(dev, file_priv);
2386 if (tex->image == NULL) {
2387 DRM_ERROR("null texture image!\n");
2388 return -EINVAL;
2391 if (DRM_COPY_FROM_USER(&image,
2392 (drm_radeon_tex_image_t __user *) tex->image,
2393 sizeof(image)))
2394 return -EFAULT;
2396 RING_SPACE_TEST_WITH_RETURN(dev_priv);
2397 VB_AGE_TEST_WITH_RETURN(dev_priv);
2399 ret = radeon_cp_dispatch_texture(dev, file_priv, tex, &image);
2401 return ret;
2404 static int radeon_cp_stipple(struct drm_device *dev, void *data, struct drm_file *file_priv)
2406 drm_radeon_private_t *dev_priv = dev->dev_private;
2407 drm_radeon_stipple_t *stipple = data;
2408 u32 mask[32];
2410 LOCK_TEST_WITH_RETURN(dev, file_priv);
2412 if (DRM_COPY_FROM_USER(&mask, stipple->mask, 32 * sizeof(u32)))
2413 return -EFAULT;
2415 RING_SPACE_TEST_WITH_RETURN(dev_priv);
2417 radeon_cp_dispatch_stipple(dev, mask);
2419 COMMIT_RING();
2420 return 0;
2423 static int radeon_cp_indirect(struct drm_device *dev, void *data, struct drm_file *file_priv)
2425 drm_radeon_private_t *dev_priv = dev->dev_private;
2426 struct drm_device_dma *dma = dev->dma;
2427 struct drm_buf *buf;
2428 drm_radeon_indirect_t *indirect = data;
2429 RING_LOCALS;
2431 LOCK_TEST_WITH_RETURN(dev, file_priv);
2433 if (!dev_priv) {
2434 DRM_ERROR("called with no initialization\n");
2435 return -EINVAL;
2438 DRM_DEBUG("idx=%d s=%d e=%d d=%d\n",
2439 indirect->idx, indirect->start, indirect->end,
2440 indirect->discard);
2442 if (indirect->idx < 0 || indirect->idx >= dma->buf_count) {
2443 DRM_ERROR("buffer index %d (of %d max)\n",
2444 indirect->idx, dma->buf_count - 1);
2445 return -EINVAL;
2448 buf = dma->buflist[indirect->idx];
2450 if (buf->file_priv != file_priv) {
2451 DRM_ERROR("process %d using buffer owned by %p\n",
2452 DRM_CURRENTPID, buf->file_priv);
2453 return -EINVAL;
2455 if (buf->pending) {
2456 DRM_ERROR("sending pending buffer %d\n", indirect->idx);
2457 return -EINVAL;
2460 if (indirect->start < buf->used) {
2461 DRM_ERROR("reusing indirect: start=0x%x actual=0x%x\n",
2462 indirect->start, buf->used);
2463 return -EINVAL;
2466 RING_SPACE_TEST_WITH_RETURN(dev_priv);
2467 VB_AGE_TEST_WITH_RETURN(dev_priv);
2469 buf->used = indirect->end;
2471 /* Dispatch the indirect buffer full of commands from the
2472 * X server. This is insecure and is thus only available to
2473 * privileged clients.
2475 if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_R600)
2476 r600_cp_dispatch_indirect(dev, buf, indirect->start, indirect->end);
2477 else {
2478 /* Wait for the 3D stream to idle before the indirect buffer
2479 * containing 2D acceleration commands is processed.
2481 BEGIN_RING(2);
2482 RADEON_WAIT_UNTIL_3D_IDLE();
2483 ADVANCE_RING();
2484 radeon_cp_dispatch_indirect(dev, buf, indirect->start, indirect->end);
2487 if (indirect->discard)
2488 radeon_cp_discard_buffer(dev, buf);
2490 COMMIT_RING();
2491 return 0;
2494 static int radeon_cp_vertex2(struct drm_device *dev, void *data, struct drm_file *file_priv)
2496 drm_radeon_private_t *dev_priv = dev->dev_private;
2497 drm_radeon_sarea_t *sarea_priv;
2498 struct drm_device_dma *dma = dev->dma;
2499 struct drm_buf *buf;
2500 drm_radeon_vertex2_t *vertex = data;
2501 int i;
2502 unsigned char laststate;
2504 LOCK_TEST_WITH_RETURN(dev, file_priv);
2506 sarea_priv = dev_priv->sarea_priv;
2508 DRM_DEBUG("pid=%d index=%d discard=%d\n",
2509 DRM_CURRENTPID, vertex->idx, vertex->discard);
2511 if (vertex->idx < 0 || vertex->idx >= dma->buf_count) {
2512 DRM_ERROR("buffer index %d (of %d max)\n",
2513 vertex->idx, dma->buf_count - 1);
2514 return -EINVAL;
2517 RING_SPACE_TEST_WITH_RETURN(dev_priv);
2518 VB_AGE_TEST_WITH_RETURN(dev_priv);
2520 buf = dma->buflist[vertex->idx];
2522 if (buf->file_priv != file_priv) {
2523 DRM_ERROR("process %d using buffer owned by %p\n",
2524 DRM_CURRENTPID, buf->file_priv);
2525 return -EINVAL;
2528 if (buf->pending) {
2529 DRM_ERROR("sending pending buffer %d\n", vertex->idx);
2530 return -EINVAL;
2533 if (sarea_priv->nbox > RADEON_NR_SAREA_CLIPRECTS)
2534 return -EINVAL;
2536 for (laststate = 0xff, i = 0; i < vertex->nr_prims; i++) {
2537 drm_radeon_prim_t prim;
2538 drm_radeon_tcl_prim_t tclprim;
2540 if (DRM_COPY_FROM_USER(&prim, &vertex->prim[i], sizeof(prim)))
2541 return -EFAULT;
2543 if (prim.stateidx != laststate) {
2544 drm_radeon_state_t state;
2546 if (DRM_COPY_FROM_USER(&state,
2547 &vertex->state[prim.stateidx],
2548 sizeof(state)))
2549 return -EFAULT;
2551 if (radeon_emit_state2(dev_priv, file_priv, &state)) {
2552 DRM_ERROR("radeon_emit_state2 failed\n");
2553 return -EINVAL;
2556 laststate = prim.stateidx;
2559 tclprim.start = prim.start;
2560 tclprim.finish = prim.finish;
2561 tclprim.prim = prim.prim;
2562 tclprim.vc_format = prim.vc_format;
2564 if (prim.prim & RADEON_PRIM_WALK_IND) {
2565 tclprim.offset = prim.numverts * 64;
2566 tclprim.numverts = RADEON_MAX_VB_VERTS; /* duh */
2568 radeon_cp_dispatch_indices(dev, buf, &tclprim);
2569 } else {
2570 tclprim.numverts = prim.numverts;
2571 tclprim.offset = 0; /* not used */
2573 radeon_cp_dispatch_vertex(dev, buf, &tclprim);
2576 if (sarea_priv->nbox == 1)
2577 sarea_priv->nbox = 0;
2580 if (vertex->discard) {
2581 radeon_cp_discard_buffer(dev, buf);
2584 COMMIT_RING();
2585 return 0;
2588 static int radeon_emit_packets(drm_radeon_private_t * dev_priv,
2589 struct drm_file *file_priv,
2590 drm_radeon_cmd_header_t header,
2591 drm_radeon_kcmd_buffer_t *cmdbuf)
2593 int id = (int)header.packet.packet_id;
2594 int sz, reg;
2595 int *data = (int *)cmdbuf->buf;
2596 RING_LOCALS;
2598 if (id >= RADEON_MAX_STATE_PACKETS)
2599 return -EINVAL;
2601 sz = packet[id].len;
2602 reg = packet[id].start;
2604 if (sz * sizeof(int) > cmdbuf->bufsz) {
2605 DRM_ERROR("Packet size provided larger than data provided\n");
2606 return -EINVAL;
2609 if (radeon_check_and_fixup_packets(dev_priv, file_priv, id, data)) {
2610 DRM_ERROR("Packet verification failed\n");
2611 return -EINVAL;
2614 BEGIN_RING(sz + 1);
2615 OUT_RING(CP_PACKET0(reg, (sz - 1)));
2616 OUT_RING_TABLE(data, sz);
2617 ADVANCE_RING();
2619 cmdbuf->buf += sz * sizeof(int);
2620 cmdbuf->bufsz -= sz * sizeof(int);
2621 return 0;
2624 static __inline__ int radeon_emit_scalars(drm_radeon_private_t *dev_priv,
2625 drm_radeon_cmd_header_t header,
2626 drm_radeon_kcmd_buffer_t *cmdbuf)
2628 int sz = header.scalars.count;
2629 int start = header.scalars.offset;
2630 int stride = header.scalars.stride;
2631 RING_LOCALS;
2633 BEGIN_RING(3 + sz);
2634 OUT_RING(CP_PACKET0(RADEON_SE_TCL_SCALAR_INDX_REG, 0));
2635 OUT_RING(start | (stride << RADEON_SCAL_INDX_DWORD_STRIDE_SHIFT));
2636 OUT_RING(CP_PACKET0_TABLE(RADEON_SE_TCL_SCALAR_DATA_REG, sz - 1));
2637 OUT_RING_TABLE(cmdbuf->buf, sz);
2638 ADVANCE_RING();
2639 cmdbuf->buf += sz * sizeof(int);
2640 cmdbuf->bufsz -= sz * sizeof(int);
2641 return 0;
2644 /* God this is ugly
2646 static __inline__ int radeon_emit_scalars2(drm_radeon_private_t *dev_priv,
2647 drm_radeon_cmd_header_t header,
2648 drm_radeon_kcmd_buffer_t *cmdbuf)
2650 int sz = header.scalars.count;
2651 int start = ((unsigned int)header.scalars.offset) + 0x100;
2652 int stride = header.scalars.stride;
2653 RING_LOCALS;
2655 BEGIN_RING(3 + sz);
2656 OUT_RING(CP_PACKET0(RADEON_SE_TCL_SCALAR_INDX_REG, 0));
2657 OUT_RING(start | (stride << RADEON_SCAL_INDX_DWORD_STRIDE_SHIFT));
2658 OUT_RING(CP_PACKET0_TABLE(RADEON_SE_TCL_SCALAR_DATA_REG, sz - 1));
2659 OUT_RING_TABLE(cmdbuf->buf, sz);
2660 ADVANCE_RING();
2661 cmdbuf->buf += sz * sizeof(int);
2662 cmdbuf->bufsz -= sz * sizeof(int);
2663 return 0;
2666 static __inline__ int radeon_emit_vectors(drm_radeon_private_t *dev_priv,
2667 drm_radeon_cmd_header_t header,
2668 drm_radeon_kcmd_buffer_t *cmdbuf)
2670 int sz = header.vectors.count;
2671 int start = header.vectors.offset;
2672 int stride = header.vectors.stride;
2673 RING_LOCALS;
2675 BEGIN_RING(5 + sz);
2676 OUT_RING_REG(RADEON_SE_TCL_STATE_FLUSH, 0);
2677 OUT_RING(CP_PACKET0(RADEON_SE_TCL_VECTOR_INDX_REG, 0));
2678 OUT_RING(start | (stride << RADEON_VEC_INDX_OCTWORD_STRIDE_SHIFT));
2679 OUT_RING(CP_PACKET0_TABLE(RADEON_SE_TCL_VECTOR_DATA_REG, (sz - 1)));
2680 OUT_RING_TABLE(cmdbuf->buf, sz);
2681 ADVANCE_RING();
2683 cmdbuf->buf += sz * sizeof(int);
2684 cmdbuf->bufsz -= sz * sizeof(int);
2685 return 0;
2688 static __inline__ int radeon_emit_veclinear(drm_radeon_private_t *dev_priv,
2689 drm_radeon_cmd_header_t header,
2690 drm_radeon_kcmd_buffer_t *cmdbuf)
2692 int sz = header.veclinear.count * 4;
2693 int start = header.veclinear.addr_lo | (header.veclinear.addr_hi << 8);
2694 RING_LOCALS;
2696 if (!sz)
2697 return 0;
2698 if (sz * 4 > cmdbuf->bufsz)
2699 return -EINVAL;
2701 BEGIN_RING(5 + sz);
2702 OUT_RING_REG(RADEON_SE_TCL_STATE_FLUSH, 0);
2703 OUT_RING(CP_PACKET0(RADEON_SE_TCL_VECTOR_INDX_REG, 0));
2704 OUT_RING(start | (1 << RADEON_VEC_INDX_OCTWORD_STRIDE_SHIFT));
2705 OUT_RING(CP_PACKET0_TABLE(RADEON_SE_TCL_VECTOR_DATA_REG, (sz - 1)));
2706 OUT_RING_TABLE(cmdbuf->buf, sz);
2707 ADVANCE_RING();
2709 cmdbuf->buf += sz * sizeof(int);
2710 cmdbuf->bufsz -= sz * sizeof(int);
2711 return 0;
2714 static int radeon_emit_packet3(struct drm_device * dev,
2715 struct drm_file *file_priv,
2716 drm_radeon_kcmd_buffer_t *cmdbuf)
2718 drm_radeon_private_t *dev_priv = dev->dev_private;
2719 unsigned int cmdsz;
2720 int ret;
2721 RING_LOCALS;
2723 DRM_DEBUG("\n");
2725 if ((ret = radeon_check_and_fixup_packet3(dev_priv, file_priv,
2726 cmdbuf, &cmdsz))) {
2727 DRM_ERROR("Packet verification failed\n");
2728 return ret;
2731 BEGIN_RING(cmdsz);
2732 OUT_RING_TABLE(cmdbuf->buf, cmdsz);
2733 ADVANCE_RING();
2735 cmdbuf->buf += cmdsz * 4;
2736 cmdbuf->bufsz -= cmdsz * 4;
2737 return 0;
2740 static int radeon_emit_packet3_cliprect(struct drm_device *dev,
2741 struct drm_file *file_priv,
2742 drm_radeon_kcmd_buffer_t *cmdbuf,
2743 int orig_nbox)
2745 drm_radeon_private_t *dev_priv = dev->dev_private;
2746 struct drm_clip_rect box;
2747 unsigned int cmdsz;
2748 int ret;
2749 struct drm_clip_rect __user *boxes = cmdbuf->boxes;
2750 int i = 0;
2751 RING_LOCALS;
2753 DRM_DEBUG("\n");
2755 if ((ret = radeon_check_and_fixup_packet3(dev_priv, file_priv,
2756 cmdbuf, &cmdsz))) {
2757 DRM_ERROR("Packet verification failed\n");
2758 return ret;
2761 if (!orig_nbox)
2762 goto out;
2764 do {
2765 if (i < cmdbuf->nbox) {
2766 if (DRM_COPY_FROM_USER(&box, &boxes[i], sizeof(box)))
2767 return -EFAULT;
2768 /* FIXME The second and subsequent times round
2769 * this loop, send a WAIT_UNTIL_3D_IDLE before
2770 * calling emit_clip_rect(). This fixes a
2771 * lockup on fast machines when sending
2772 * several cliprects with a cmdbuf, as when
2773 * waving a 2D window over a 3D
2774 * window. Something in the commands from user
2775 * space seems to hang the card when they're
2776 * sent several times in a row. That would be
2777 * the correct place to fix it but this works
2778 * around it until I can figure that out - Tim
2779 * Smith */
2780 if (i) {
2781 BEGIN_RING(2);
2782 RADEON_WAIT_UNTIL_3D_IDLE();
2783 ADVANCE_RING();
2785 radeon_emit_clip_rect(dev_priv, &box);
2788 BEGIN_RING(cmdsz);
2789 OUT_RING_TABLE(cmdbuf->buf, cmdsz);
2790 ADVANCE_RING();
2792 } while (++i < cmdbuf->nbox);
2793 if (cmdbuf->nbox == 1)
2794 cmdbuf->nbox = 0;
2796 out:
2797 cmdbuf->buf += cmdsz * 4;
2798 cmdbuf->bufsz -= cmdsz * 4;
2799 return 0;
2802 static int radeon_emit_wait(struct drm_device * dev, int flags)
2804 drm_radeon_private_t *dev_priv = dev->dev_private;
2805 RING_LOCALS;
2807 DRM_DEBUG("%x\n", flags);
2808 switch (flags) {
2809 case RADEON_WAIT_2D:
2810 BEGIN_RING(2);
2811 RADEON_WAIT_UNTIL_2D_IDLE();
2812 ADVANCE_RING();
2813 break;
2814 case RADEON_WAIT_3D:
2815 BEGIN_RING(2);
2816 RADEON_WAIT_UNTIL_3D_IDLE();
2817 ADVANCE_RING();
2818 break;
2819 case RADEON_WAIT_2D | RADEON_WAIT_3D:
2820 BEGIN_RING(2);
2821 RADEON_WAIT_UNTIL_IDLE();
2822 ADVANCE_RING();
2823 break;
2824 default:
2825 return -EINVAL;
2828 return 0;
2831 static int radeon_cp_cmdbuf(struct drm_device *dev, void *data, struct drm_file *file_priv)
2833 drm_radeon_private_t *dev_priv = dev->dev_private;
2834 struct drm_device_dma *dma = dev->dma;
2835 struct drm_buf *buf = NULL;
2836 int idx;
2837 drm_radeon_kcmd_buffer_t *cmdbuf = data;
2838 drm_radeon_cmd_header_t header;
2839 int orig_nbox, orig_bufsz;
2840 char *kbuf = NULL;
2842 LOCK_TEST_WITH_RETURN(dev, file_priv);
2844 RING_SPACE_TEST_WITH_RETURN(dev_priv);
2845 VB_AGE_TEST_WITH_RETURN(dev_priv);
2847 if (cmdbuf->bufsz > 64 * 1024 || cmdbuf->bufsz < 0) {
2848 return -EINVAL;
2851 /* Allocate an in-kernel area and copy in the cmdbuf. Do this to avoid
2852 * races between checking values and using those values in other code,
2853 * and simply to avoid a lot of function calls to copy in data.
2855 orig_bufsz = cmdbuf->bufsz;
2856 if (orig_bufsz != 0) {
2857 kbuf = drm_alloc(cmdbuf->bufsz, DRM_MEM_DRIVER);
2858 if (kbuf == NULL)
2859 return -ENOMEM;
2860 if (DRM_COPY_FROM_USER(kbuf, (void __user *)cmdbuf->buf,
2861 cmdbuf->bufsz)) {
2862 drm_free(kbuf, orig_bufsz, DRM_MEM_DRIVER);
2863 return -EFAULT;
2865 cmdbuf->buf = kbuf;
2868 orig_nbox = cmdbuf->nbox;
2870 if (dev_priv->microcode_version == UCODE_R300) {
2871 int temp;
2872 temp = r300_do_cp_cmdbuf(dev, file_priv, cmdbuf);
2874 if (orig_bufsz != 0)
2875 drm_free(kbuf, orig_bufsz, DRM_MEM_DRIVER);
2877 return temp;
2880 /* microcode_version != r300 */
2881 while (cmdbuf->bufsz >= sizeof(header)) {
2883 header.i = *(int *)cmdbuf->buf;
2884 cmdbuf->buf += sizeof(header);
2885 cmdbuf->bufsz -= sizeof(header);
2887 switch (header.header.cmd_type) {
2888 case RADEON_CMD_PACKET:
2889 DRM_DEBUG("RADEON_CMD_PACKET\n");
2890 if (radeon_emit_packets
2891 (dev_priv, file_priv, header, cmdbuf)) {
2892 DRM_ERROR("radeon_emit_packets failed\n");
2893 goto err;
2895 break;
2897 case RADEON_CMD_SCALARS:
2898 DRM_DEBUG("RADEON_CMD_SCALARS\n");
2899 if (radeon_emit_scalars(dev_priv, header, cmdbuf)) {
2900 DRM_ERROR("radeon_emit_scalars failed\n");
2901 goto err;
2903 break;
2905 case RADEON_CMD_VECTORS:
2906 DRM_DEBUG("RADEON_CMD_VECTORS\n");
2907 if (radeon_emit_vectors(dev_priv, header, cmdbuf)) {
2908 DRM_ERROR("radeon_emit_vectors failed\n");
2909 goto err;
2911 break;
2913 case RADEON_CMD_DMA_DISCARD:
2914 DRM_DEBUG("RADEON_CMD_DMA_DISCARD\n");
2915 idx = header.dma.buf_idx;
2916 if (idx < 0 || idx >= dma->buf_count) {
2917 DRM_ERROR("buffer index %d (of %d max)\n",
2918 idx, dma->buf_count - 1);
2919 goto err;
2922 buf = dma->buflist[idx];
2923 if (buf->file_priv != file_priv || buf->pending) {
2924 DRM_ERROR("bad buffer %p %p %d\n",
2925 buf->file_priv, file_priv,
2926 buf->pending);
2927 goto err;
2930 radeon_cp_discard_buffer(dev, buf);
2931 break;
2933 case RADEON_CMD_PACKET3:
2934 DRM_DEBUG("RADEON_CMD_PACKET3\n");
2935 if (radeon_emit_packet3(dev, file_priv, cmdbuf)) {
2936 DRM_ERROR("radeon_emit_packet3 failed\n");
2937 goto err;
2939 break;
2941 case RADEON_CMD_PACKET3_CLIP:
2942 DRM_DEBUG("RADEON_CMD_PACKET3_CLIP\n");
2943 if (radeon_emit_packet3_cliprect
2944 (dev, file_priv, cmdbuf, orig_nbox)) {
2945 DRM_ERROR("radeon_emit_packet3_clip failed\n");
2946 goto err;
2948 break;
2950 case RADEON_CMD_SCALARS2:
2951 DRM_DEBUG("RADEON_CMD_SCALARS2\n");
2952 if (radeon_emit_scalars2(dev_priv, header, cmdbuf)) {
2953 DRM_ERROR("radeon_emit_scalars2 failed\n");
2954 goto err;
2956 break;
2958 case RADEON_CMD_WAIT:
2959 DRM_DEBUG("RADEON_CMD_WAIT\n");
2960 if (radeon_emit_wait(dev, header.wait.flags)) {
2961 DRM_ERROR("radeon_emit_wait failed\n");
2962 goto err;
2964 break;
2965 case RADEON_CMD_VECLINEAR:
2966 DRM_DEBUG("RADEON_CMD_VECLINEAR\n");
2967 if (radeon_emit_veclinear(dev_priv, header, cmdbuf)) {
2968 DRM_ERROR("radeon_emit_veclinear failed\n");
2969 goto err;
2971 break;
2973 default:
2974 DRM_ERROR("bad cmd_type %d at %p\n",
2975 header.header.cmd_type,
2976 cmdbuf->buf - sizeof(header));
2977 goto err;
2981 if (orig_bufsz != 0)
2982 drm_free(kbuf, orig_bufsz, DRM_MEM_DRIVER);
2984 DRM_DEBUG("DONE\n");
2985 COMMIT_RING();
2986 return 0;
2988 err:
2989 if (orig_bufsz != 0)
2990 drm_free(kbuf, orig_bufsz, DRM_MEM_DRIVER);
2991 return -EINVAL;
2994 static int radeon_cp_getparam(struct drm_device *dev, void *data, struct drm_file *file_priv)
2996 drm_radeon_private_t *dev_priv = dev->dev_private;
2997 drm_radeon_getparam_t *param = data;
2998 int value;
3000 DRM_DEBUG("pid=%d\n", DRM_CURRENTPID);
3002 switch (param->param) {
3003 case RADEON_PARAM_GART_BUFFER_OFFSET:
3004 value = dev_priv->gart_buffers_offset;
3005 break;
3006 case RADEON_PARAM_LAST_FRAME:
3007 dev_priv->stats.last_frame_reads++;
3008 value = GET_SCRATCH(dev_priv, 0);
3009 break;
3010 case RADEON_PARAM_LAST_DISPATCH:
3011 value = GET_SCRATCH(dev_priv, 1);
3012 break;
3013 case RADEON_PARAM_LAST_CLEAR:
3014 dev_priv->stats.last_clear_reads++;
3015 value = GET_SCRATCH(dev_priv, 2);
3016 break;
3017 case RADEON_PARAM_IRQ_NR:
3018 value = dev->irq;
3019 break;
3020 case RADEON_PARAM_GART_BASE:
3021 value = dev_priv->gart_vm_start;
3022 break;
3023 case RADEON_PARAM_REGISTER_HANDLE:
3024 value = dev_priv->mmio->offset;
3025 break;
3026 case RADEON_PARAM_STATUS_HANDLE:
3027 value = dev_priv->ring_rptr_offset;
3028 break;
3029 #ifndef __LP64__
3031 * This ioctl() doesn't work on 64-bit platforms because hw_lock is a
3032 * pointer which can't fit into an int-sized variable. According to
3033 * Michel Dänzer, the ioctl() is only used on embedded platforms, so
3034 * not supporting it shouldn't be a problem. If the same functionality
3035 * is needed on 64-bit platforms, a new ioctl() would have to be added,
3036 * so backwards-compatibility for the embedded platforms can be
3037 * maintained. --davidm 4-Feb-2004.
3039 case RADEON_PARAM_SAREA_HANDLE:
3040 /* The lock is the first dword in the sarea. */
3041 value = (long)dev->lock.hw_lock;
3042 break;
3043 #endif
3044 case RADEON_PARAM_GART_TEX_HANDLE:
3045 value = dev_priv->gart_textures_offset;
3046 break;
3047 case RADEON_PARAM_SCRATCH_OFFSET:
3048 if (!dev_priv->writeback_works)
3049 return -EINVAL;
3050 if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_R600)
3051 value = R600_SCRATCH_REG_OFFSET;
3052 else
3053 value = RADEON_SCRATCH_REG_OFFSET;
3054 break;
3055 case RADEON_PARAM_CARD_TYPE:
3056 if (dev_priv->flags & RADEON_IS_PCIE)
3057 value = RADEON_CARD_PCIE;
3058 else if (dev_priv->flags & RADEON_IS_AGP)
3059 value = RADEON_CARD_AGP;
3060 else
3061 value = RADEON_CARD_PCI;
3062 break;
3063 case RADEON_PARAM_VBLANK_CRTC:
3064 value = radeon_vblank_crtc_get(dev);
3065 break;
3066 case RADEON_PARAM_FB_LOCATION:
3067 value = radeon_read_fb_location(dev_priv);
3068 break;
3069 case RADEON_PARAM_NUM_GB_PIPES:
3070 value = dev_priv->num_gb_pipes;
3071 break;
3072 default:
3073 DRM_DEBUG("Invalid parameter %d\n", param->param);
3074 return -EINVAL;
3077 if (DRM_COPY_TO_USER(param->value, &value, sizeof(int))) {
3078 DRM_ERROR("copy_to_user\n");
3079 return -EFAULT;
3082 return 0;
3085 static int radeon_cp_setparam(struct drm_device *dev, void *data, struct drm_file *file_priv)
3087 drm_radeon_private_t *dev_priv = dev->dev_private;
3088 drm_radeon_setparam_t *sp = data;
3089 struct drm_radeon_driver_file_fields *radeon_priv;
3091 switch (sp->param) {
3092 case RADEON_SETPARAM_FB_LOCATION:
3093 radeon_priv = file_priv->driver_priv;
3094 radeon_priv->radeon_fb_delta = dev_priv->fb_location -
3095 sp->value;
3096 break;
3097 case RADEON_SETPARAM_SWITCH_TILING:
3098 if (sp->value == 0) {
3099 DRM_DEBUG("color tiling disabled\n");
3100 dev_priv->front_pitch_offset &= ~RADEON_DST_TILE_MACRO;
3101 dev_priv->back_pitch_offset &= ~RADEON_DST_TILE_MACRO;
3102 if (dev_priv->sarea_priv)
3103 dev_priv->sarea_priv->tiling_enabled = 0;
3104 } else if (sp->value == 1) {
3105 DRM_DEBUG("color tiling enabled\n");
3106 dev_priv->front_pitch_offset |= RADEON_DST_TILE_MACRO;
3107 dev_priv->back_pitch_offset |= RADEON_DST_TILE_MACRO;
3108 if (dev_priv->sarea_priv)
3109 dev_priv->sarea_priv->tiling_enabled = 1;
3111 break;
3112 case RADEON_SETPARAM_PCIGART_LOCATION:
3113 dev_priv->pcigart_offset = sp->value;
3114 dev_priv->pcigart_offset_set = 1;
3115 break;
3116 case RADEON_SETPARAM_NEW_MEMMAP:
3117 dev_priv->new_memmap = sp->value;
3118 break;
3119 case RADEON_SETPARAM_PCIGART_TABLE_SIZE:
3120 dev_priv->gart_info.table_size = sp->value;
3121 if (dev_priv->gart_info.table_size < RADEON_PCIGART_TABLE_SIZE)
3122 dev_priv->gart_info.table_size = RADEON_PCIGART_TABLE_SIZE;
3123 break;
3124 case RADEON_SETPARAM_VBLANK_CRTC:
3125 return radeon_vblank_crtc_set(dev, sp->value);
3126 break;
3127 default:
3128 DRM_DEBUG("Invalid parameter %d\n", sp->param);
3129 return -EINVAL;
3132 return 0;
3135 /* When a client dies:
3136 * - Check for and clean up flipped page state
3137 * - Free any alloced GART memory.
3138 * - Free any alloced radeon surfaces.
3140 * DRM infrastructure takes care of reclaiming dma buffers.
3142 void radeon_driver_preclose(struct drm_device *dev, struct drm_file *file_priv)
3144 if (dev->dev_private) {
3145 drm_radeon_private_t *dev_priv = dev->dev_private;
3146 dev_priv->page_flipping = 0;
3147 radeon_mem_release(file_priv, dev_priv->gart_heap);
3148 radeon_mem_release(file_priv, dev_priv->fb_heap);
3149 radeon_surfaces_release(file_priv, dev_priv);
3153 void radeon_driver_lastclose(struct drm_device *dev)
3155 radeon_surfaces_release(PCIGART_FILE_PRIV, dev->dev_private);
3156 radeon_do_release(dev);
3159 int radeon_driver_open(struct drm_device *dev, struct drm_file *file_priv)
3161 drm_radeon_private_t *dev_priv = dev->dev_private;
3162 struct drm_radeon_driver_file_fields *radeon_priv;
3164 DRM_DEBUG("\n");
3165 radeon_priv =
3166 (struct drm_radeon_driver_file_fields *)
3167 drm_alloc(sizeof(*radeon_priv), DRM_MEM_FILES);
3169 if (!radeon_priv)
3170 return -ENOMEM;
3172 file_priv->driver_priv = radeon_priv;
3174 if (dev_priv)
3175 radeon_priv->radeon_fb_delta = dev_priv->fb_location;
3176 else
3177 radeon_priv->radeon_fb_delta = 0;
3178 return 0;
3181 void radeon_driver_postclose(struct drm_device *dev, struct drm_file *file_priv)
3183 struct drm_radeon_driver_file_fields *radeon_priv =
3184 file_priv->driver_priv;
3186 drm_free(radeon_priv, sizeof(*radeon_priv), DRM_MEM_FILES);
3189 struct drm_ioctl_desc radeon_ioctls[] = {
3190 DRM_IOCTL_DEF(DRM_RADEON_CP_INIT, radeon_cp_init, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
3191 DRM_IOCTL_DEF(DRM_RADEON_CP_START, radeon_cp_start, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
3192 DRM_IOCTL_DEF(DRM_RADEON_CP_STOP, radeon_cp_stop, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
3193 DRM_IOCTL_DEF(DRM_RADEON_CP_RESET, radeon_cp_reset, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
3194 DRM_IOCTL_DEF(DRM_RADEON_CP_IDLE, radeon_cp_idle, DRM_AUTH),
3195 DRM_IOCTL_DEF(DRM_RADEON_CP_RESUME, radeon_cp_resume, DRM_AUTH),
3196 DRM_IOCTL_DEF(DRM_RADEON_RESET, radeon_engine_reset, DRM_AUTH),
3197 DRM_IOCTL_DEF(DRM_RADEON_FULLSCREEN, radeon_fullscreen, DRM_AUTH),
3198 DRM_IOCTL_DEF(DRM_RADEON_SWAP, radeon_cp_swap, DRM_AUTH),
3199 DRM_IOCTL_DEF(DRM_RADEON_CLEAR, radeon_cp_clear, DRM_AUTH),
3200 DRM_IOCTL_DEF(DRM_RADEON_VERTEX, radeon_cp_vertex, DRM_AUTH),
3201 DRM_IOCTL_DEF(DRM_RADEON_INDICES, radeon_cp_indices, DRM_AUTH),
3202 DRM_IOCTL_DEF(DRM_RADEON_TEXTURE, radeon_cp_texture, DRM_AUTH),
3203 DRM_IOCTL_DEF(DRM_RADEON_STIPPLE, radeon_cp_stipple, DRM_AUTH),
3204 DRM_IOCTL_DEF(DRM_RADEON_INDIRECT, radeon_cp_indirect, DRM_AUTH|DRM_ROOT_ONLY),
3205 DRM_IOCTL_DEF(DRM_RADEON_VERTEX2, radeon_cp_vertex2, DRM_AUTH),
3206 DRM_IOCTL_DEF(DRM_RADEON_CMDBUF, radeon_cp_cmdbuf, DRM_AUTH),
3207 DRM_IOCTL_DEF(DRM_RADEON_GETPARAM, radeon_cp_getparam, DRM_AUTH),
3208 DRM_IOCTL_DEF(DRM_RADEON_FLIP, radeon_cp_flip, DRM_AUTH),
3209 DRM_IOCTL_DEF(DRM_RADEON_ALLOC, radeon_mem_alloc, DRM_AUTH),
3210 DRM_IOCTL_DEF(DRM_RADEON_FREE, radeon_mem_free, DRM_AUTH),
3211 DRM_IOCTL_DEF(DRM_RADEON_INIT_HEAP, radeon_mem_init_heap, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
3212 DRM_IOCTL_DEF(DRM_RADEON_IRQ_EMIT, radeon_irq_emit, DRM_AUTH),
3213 DRM_IOCTL_DEF(DRM_RADEON_IRQ_WAIT, radeon_irq_wait, DRM_AUTH),
3214 DRM_IOCTL_DEF(DRM_RADEON_SETPARAM, radeon_cp_setparam, DRM_AUTH),
3215 DRM_IOCTL_DEF(DRM_RADEON_SURF_ALLOC, radeon_surface_alloc, DRM_AUTH),
3216 DRM_IOCTL_DEF(DRM_RADEON_SURF_FREE, radeon_surface_free, DRM_AUTH)
3219 int radeon_max_ioctl = DRM_ARRAY_SIZE(radeon_ioctls);