From 4cd92098975238a3e2cfccf057598cf2a5e54b55 Mon Sep 17 00:00:00 2001 From: zrj Date: Fri, 7 Aug 2015 08:31:18 +0300 Subject: [PATCH] drm/radeon: Partial update to Linux 3.12 This brings new radeon features (mostly code refactoring): DMA functions seperated from asics code; UVD block separation; Improvements to DPMS (by default disabled); Better audio handling. Notes: drm/ttm layer is put into transitional state pending memory functions rewrite. Currently VRAM handling capabilities are limited, still enough for light gaming and watching videos. X acceleration on radeon SI cards requires updated xorg+mesa(glamour support). --- sys/dev/drm/drm_bufs.c | 45 +- sys/dev/drm/include/drm/drmP.h | 1 - sys/dev/drm/include/drm/ttm/ttm_bo_api.h | 4 +- sys/dev/drm/include/drm/ttm/ttm_bo_driver.h | 36 +- sys/dev/drm/include/drm/ttm/ttm_execbuf_util.h | 13 +- sys/dev/drm/include/linux/reservation.h | 50 + sys/dev/drm/include/uapi_drm/radeon_drm.h | 2 + sys/dev/drm/radeon/Makefile | 26 +- sys/dev/drm/radeon/atombios.h | 614 +-- sys/dev/drm/radeon/atombios_crtc.c | 6 + sys/dev/drm/radeon/atombios_dp.c | 8 +- sys/dev/drm/radeon/atombios_encoders.c | 67 +- sys/dev/drm/radeon/atombios_i2c.c | 19 +- sys/dev/drm/radeon/btc_dpm.c | 69 +- sys/dev/drm/radeon/btc_dpm.h | 2 + sys/dev/drm/radeon/cayman_blit_shaders.c | 54 - sys/dev/drm/radeon/ci_dpm.c | 5250 ++++++++++++++++++++ sys/dev/drm/radeon/ci_dpm.h | 332 ++ sys/dev/drm/radeon/ci_smc.c | 263 + sys/dev/drm/radeon/cik.c | 4473 +++++++++++------ sys/dev/drm/radeon/cik_reg.h | 3 + sys/dev/drm/radeon/cik_sdma.c | 782 +++ sys/dev/drm/radeon/cikd.h | 594 ++- sys/dev/drm/radeon/clearstate_cayman.h | 2 +- .../{clearstate_cayman.h => clearstate_ci.h} | 755 ++- sys/dev/drm/radeon/clearstate_evergreen.h | 2 +- sys/dev/drm/radeon/cypress_dpm.c | 9 +- sys/dev/drm/radeon/dce6_afmt.c | 282 ++ sys/dev/drm/radeon/evergreen.c | 541 +- sys/dev/drm/radeon/evergreen_blit_kms.c | 730 --- sys/dev/drm/radeon/evergreen_blit_shaders.c | 54 - sys/dev/drm/radeon/evergreen_cs.c | 163 +- sys/dev/drm/radeon/evergreen_dma.c | 186 + sys/dev/drm/radeon/evergreen_hdmi.c | 103 +- sys/dev/drm/radeon/evergreend.h | 15 +- sys/dev/drm/radeon/kv_dpm.c | 2733 ++++++++++ sys/dev/drm/radeon/kv_dpm.h | 201 + sys/dev/drm/radeon/kv_smc.c | 214 + sys/dev/drm/radeon/ni.c | 371 +- sys/dev/drm/radeon/ni_dma.c | 335 ++ sys/dev/drm/radeon/ni_dpm.c | 44 +- sys/dev/drm/radeon/ppsmc.h | 59 + sys/dev/drm/radeon/pptable.h | 682 +++ sys/dev/drm/radeon/r100.c | 12 +- sys/dev/drm/radeon/r300.c | 2 +- sys/dev/drm/radeon/r600.c | 824 +-- sys/dev/drm/radeon/r600_audio.c | 60 +- sys/dev/drm/radeon/r600_blit.c | 31 + sys/dev/drm/radeon/r600_blit_kms.c | 786 --- sys/dev/drm/radeon/r600_blit_shaders.h | 1 - sys/dev/drm/radeon/r600_cp.c | 6 +- sys/dev/drm/radeon/r600_cs.c | 48 +- sys/dev/drm/radeon/r600_dma.c | 496 ++ sys/dev/drm/radeon/r600_dpm.c | 314 +- sys/dev/drm/radeon/r600_dpm.h | 6 + sys/dev/drm/radeon/r600_hdmi.c | 157 +- sys/dev/drm/radeon/r600d.h | 40 +- sys/dev/drm/radeon/radeon.h | 284 +- sys/dev/drm/radeon/radeon_agp.c | 7 +- sys/dev/drm/radeon/radeon_asic.c | 1269 ++--- sys/dev/drm/radeon/radeon_asic.h | 170 +- sys/dev/drm/radeon/radeon_atombios.c | 254 +- sys/dev/drm/radeon/radeon_blit_common.h | 44 - sys/dev/drm/radeon/radeon_combios.c | 4 +- sys/dev/drm/radeon/radeon_connectors.c | 78 +- sys/dev/drm/radeon/radeon_cp.c | 6 +- sys/dev/drm/radeon/radeon_cs.c | 44 +- sys/dev/drm/radeon/radeon_device.c | 45 +- sys/dev/drm/radeon/radeon_display.c | 78 +- sys/dev/drm/radeon/radeon_drv.c | 23 +- sys/dev/drm/radeon/radeon_fence.c | 2 +- sys/dev/drm/radeon/radeon_gem.c | 7 - sys/dev/drm/radeon/radeon_irq_kms.c | 15 +- sys/dev/drm/radeon/radeon_kms.c | 51 +- sys/dev/drm/radeon/radeon_kms.h | 3 - sys/dev/drm/radeon/radeon_mode.h | 14 +- sys/dev/drm/radeon/radeon_object.c | 7 +- sys/dev/drm/radeon/radeon_object.h | 3 +- sys/dev/drm/radeon/radeon_pm.c | 118 +- sys/dev/drm/radeon/radeon_prime.c | 1 - sys/dev/drm/radeon/radeon_ring.c | 21 +- sys/dev/drm/radeon/radeon_test.c | 4 +- sys/dev/drm/radeon/radeon_trace.h | 27 +- sys/dev/drm/radeon/radeon_ucode.h | 17 + sys/dev/drm/radeon/radeon_uvd.c | 86 +- sys/dev/drm/radeon/rs400.c | 12 +- sys/dev/drm/radeon/rs780_dpm.c | 112 +- sys/dev/drm/radeon/rv6xx_dpm.c | 7 +- sys/dev/drm/radeon/rv770.c | 210 +- sys/dev/drm/radeon/rv770_dma.c | 101 + sys/dev/drm/radeon/rv770_dpm.c | 33 +- sys/dev/drm/radeon/rv770d.h | 18 +- sys/dev/drm/radeon/si.c | 872 ++-- sys/dev/drm/radeon/si_dma.c | 232 + sys/dev/drm/radeon/si_dpm.c | 111 +- sys/dev/drm/radeon/sid.h | 75 +- sys/dev/drm/radeon/smu7.h | 169 + sys/dev/drm/radeon/smu7_discrete.h | 485 ++ sys/dev/drm/radeon/smu7_fusion.h | 299 ++ sys/dev/drm/radeon/sumo_dpm.c | 24 +- sys/dev/drm/radeon/sumo_dpm.h | 3 + sys/dev/drm/radeon/trinity_dpm.c | 24 +- sys/dev/drm/radeon/trinity_dpm.h | 2 + sys/dev/drm/radeon/trinity_smc.c | 8 + sys/dev/drm/radeon/uvd_v1_0.c | 436 ++ sys/dev/drm/radeon/uvd_v2_2.c | 165 + sys/dev/drm/radeon/uvd_v3_1.c | 55 + sys/dev/drm/radeon/uvd_v4_2.c | 68 + sys/dev/drm/ttm/ttm_bo.c | 50 +- sys/dev/drm/ttm/ttm_bo_manager.c | 41 +- sys/dev/drm/ttm/ttm_bo_util.c | 13 +- sys/dev/drm/ttm/ttm_bo_vm.c | 159 - sys/dev/drm/ttm/ttm_execbuf_util.c | 60 +- 113 files changed, 21346 insertions(+), 8157 deletions(-) create mode 100644 sys/dev/drm/include/linux/reservation.h create mode 100644 sys/dev/drm/radeon/ci_dpm.c create mode 100644 sys/dev/drm/radeon/ci_dpm.h create mode 100644 sys/dev/drm/radeon/ci_smc.c create mode 100644 sys/dev/drm/radeon/cik_sdma.c copy sys/dev/drm/radeon/{clearstate_cayman.h => clearstate_ci.h} (57%) create mode 100644 sys/dev/drm/radeon/dce6_afmt.c delete mode 100644 sys/dev/drm/radeon/evergreen_blit_kms.c create mode 100644 sys/dev/drm/radeon/evergreen_dma.c create mode 100644 sys/dev/drm/radeon/kv_dpm.c create mode 100644 sys/dev/drm/radeon/kv_dpm.h create mode 100644 sys/dev/drm/radeon/kv_smc.c create mode 100644 sys/dev/drm/radeon/ni_dma.c create mode 100644 sys/dev/drm/radeon/pptable.h delete mode 100644 sys/dev/drm/radeon/r600_blit_kms.c create mode 100644 sys/dev/drm/radeon/r600_dma.c delete mode 100644 sys/dev/drm/radeon/radeon_blit_common.h create mode 100644 sys/dev/drm/radeon/rv770_dma.c create mode 100644 sys/dev/drm/radeon/si_dma.c create mode 100644 sys/dev/drm/radeon/smu7.h create mode 100644 sys/dev/drm/radeon/smu7_discrete.h create mode 100644 sys/dev/drm/radeon/smu7_fusion.h create mode 100644 sys/dev/drm/radeon/uvd_v1_0.c create mode 100644 sys/dev/drm/radeon/uvd_v2_2.c create mode 100644 sys/dev/drm/radeon/uvd_v3_1.c create mode 100644 sys/dev/drm/radeon/uvd_v4_2.c diff --git a/sys/dev/drm/drm_bufs.c b/sys/dev/drm/drm_bufs.c index d3ef94ba59..882194c3af 100644 --- a/sys/dev/drm/drm_bufs.c +++ b/sys/dev/drm/drm_bufs.c @@ -172,7 +172,7 @@ int drm_addmap(struct drm_device * dev, resource_size_t offset, case _DRM_SHM: map->handle = kmalloc(map->size, M_DRM, M_WAITOK | M_NULLOK); DRM_DEBUG("%lu %d %p\n", - map->size, drm_order(map->size), map->handle); + map->size, order_base_2(map->size), map->handle); if (!map->handle) { drm_free(map, M_DRM); return ENOMEM; @@ -460,7 +460,7 @@ static int drm_do_addbufs_agp(struct drm_device *dev, struct drm_buf_desc *reque drm_buf_t **temp_buflist; count = request->count; - order = drm_order(request->size); + order = order_base_2(request->size); size = 1 << order; alignment = (request->flags & _DRM_PAGE_ALIGN) @@ -591,7 +591,7 @@ static int drm_do_addbufs_pci(struct drm_device *dev, struct drm_buf_desc *reque drm_buf_t **temp_buflist; count = request->count; - order = drm_order(request->size); + order = order_base_2(request->size); size = 1 << order; DRM_DEBUG("count=%d, size=%d (%d), order=%d\n", @@ -743,7 +743,7 @@ static int drm_do_addbufs_sg(struct drm_device *dev, struct drm_buf_desc *reques drm_buf_t **temp_buflist; count = request->count; - order = drm_order(request->size); + order = order_base_2(request->size); size = 1 << order; alignment = (request->flags & _DRM_PAGE_ALIGN) @@ -854,7 +854,7 @@ int drm_addbufs_agp(struct drm_device * dev, struct drm_buf_desc * request) if (request->count < 0 || request->count > 4096) return EINVAL; - order = drm_order(request->size); + order = order_base_2(request->size); if (order < DRM_MIN_ORDER || order > DRM_MAX_ORDER) return EINVAL; @@ -888,7 +888,7 @@ static int drm_addbufs_sg(struct drm_device * dev, struct drm_buf_desc * request if (request->count < 0 || request->count > 4096) return EINVAL; - order = drm_order(request->size); + order = order_base_2(request->size); if (order < DRM_MIN_ORDER || order > DRM_MAX_ORDER) return EINVAL; @@ -922,7 +922,7 @@ int drm_addbufs_pci(struct drm_device * dev, struct drm_buf_desc * request) if (request->count < 0 || request->count > 4096) return EINVAL; - order = drm_order(request->size); + order = order_base_2(request->size); if (order < DRM_MIN_ORDER || order > DRM_MAX_ORDER) return EINVAL; @@ -1066,9 +1066,7 @@ int drm_markbufs(struct drm_device *dev, void *data, DRM_DEBUG("%d, %d, %d\n", request->size, request->low_mark, request->high_mark); - - - order = drm_order(request->size); + order = order_base_2(request->size); if (order < DRM_MIN_ORDER || order > DRM_MAX_ORDER || request->low_mark < 0 || request->high_mark < 0) { return EINVAL; @@ -1223,34 +1221,9 @@ int drm_mapbufs(struct drm_device *dev, void *data, goto done; } } - - done: + done: request->count = dma->buf_count; - DRM_DEBUG("%d buffers, retcode = %d\n", request->count, retcode); return retcode; } - -/** - * Compute size order. Returns the exponent of the smaller power of two which - * is greater or equal to given number. - * - * \param size size. - * \return order. - * - * \todo Can be made faster. - */ -int drm_order(unsigned long size) -{ - int order; - unsigned long tmp; - - for (order = 0, tmp = size >> 1; tmp; tmp >>= 1, order++) ; - - if (size & (size - 1)) - ++order; - - return order; -} -EXPORT_SYMBOL(drm_order); diff --git a/sys/dev/drm/include/drm/drmP.h b/sys/dev/drm/include/drm/drmP.h index f9d85d7afd..0b8f118ab7 100644 --- a/sys/dev/drm/include/drm/drmP.h +++ b/sys/dev/drm/include/drm/drmP.h @@ -1386,7 +1386,6 @@ unsigned long drm_get_resource_start(struct drm_device *dev, unsigned long drm_get_resource_len(struct drm_device *dev, unsigned int resource); void drm_rmmap(struct drm_device *dev, drm_local_map_t *map); -int drm_order(unsigned long size); int drm_addbufs_agp(struct drm_device *dev, struct drm_buf_desc *request); int drm_addbufs_pci(struct drm_device *dev, struct drm_buf_desc *request); extern int drm_addmap(struct drm_device *dev, resource_size_t offset, diff --git a/sys/dev/drm/include/drm/ttm/ttm_bo_api.h b/sys/dev/drm/include/drm/ttm/ttm_bo_api.h index 0629356e8c..f5c48f7a36 100644 --- a/sys/dev/drm/include/drm/ttm/ttm_bo_api.h +++ b/sys/dev/drm/include/drm/ttm/ttm_bo_api.h @@ -37,6 +37,8 @@ #include #include #include +#include +#include struct ttm_bo_device; @@ -233,7 +235,7 @@ struct ttm_buffer_object { struct list_head ddestroy; struct list_head swap; struct list_head io_reserve_lru; - uint32_t val_seq; + unsigned long val_seq; bool seq_valid; /** diff --git a/sys/dev/drm/include/drm/ttm/ttm_bo_driver.h b/sys/dev/drm/include/drm/ttm/ttm_bo_driver.h index 37d2bc71c6..5731262fab 100644 --- a/sys/dev/drm/include/drm/ttm/ttm_bo_driver.h +++ b/sys/dev/drm/include/drm/ttm/ttm_bo_driver.h @@ -38,6 +38,7 @@ #include #include #include +#include /* XXX nasty hack, but does the job */ #undef RB_ROOT @@ -784,7 +785,7 @@ extern void ttm_mem_io_unlock(struct ttm_mem_type_manager *man); * @bo: A pointer to a struct ttm_buffer_object. * @interruptible: Sleep interruptible if waiting. * @no_wait: Don't sleep while trying to reserve, rather return -EBUSY. - * @use_sequence: If @bo is already reserved, Only sleep waiting for + * @use_ticket: If @bo is already reserved, Only sleep waiting for * it to become unreserved if @sequence < (@bo)->sequence. * * Locks a buffer object for validation. (Or prevents other processes from @@ -825,7 +826,8 @@ extern void ttm_mem_io_unlock(struct ttm_mem_type_manager *man); */ extern int ttm_bo_reserve(struct ttm_buffer_object *bo, bool interruptible, - bool no_wait, bool use_sequence, uint32_t sequence); + bool no_wait, bool use_ticket, + struct ww_acquire_ctx *ticket); /** * ttm_bo_reserve_slowpath_nolru: @@ -842,7 +844,7 @@ extern int ttm_bo_reserve(struct ttm_buffer_object *bo, */ extern int ttm_bo_reserve_slowpath_nolru(struct ttm_buffer_object *bo, bool interruptible, - uint32_t sequence); + struct ww_acquire_ctx *ticket); /** @@ -856,7 +858,8 @@ extern int ttm_bo_reserve_slowpath_nolru(struct ttm_buffer_object *bo, * held by us, this function cannot deadlock any more. */ extern int ttm_bo_reserve_slowpath(struct ttm_buffer_object *bo, - bool interruptible, uint32_t sequence); + bool interruptible, + struct ww_acquire_ctx *ticket); /** * ttm_bo_reserve_nolru: @@ -882,8 +885,8 @@ extern int ttm_bo_reserve_slowpath(struct ttm_buffer_object *bo, */ extern int ttm_bo_reserve_nolru(struct ttm_buffer_object *bo, bool interruptible, - bool no_wait, bool use_sequence, - uint32_t sequence); + bool no_wait, bool use_ticket, + struct ww_acquire_ctx *ticket); /** * ttm_bo_unreserve @@ -895,19 +898,34 @@ extern int ttm_bo_reserve_nolru(struct ttm_buffer_object *bo, extern void ttm_bo_unreserve(struct ttm_buffer_object *bo); /** - * ttm_bo_unreserve_locked + * ttm_bo_unreserve_ticket + * @bo: A pointer to a struct ttm_buffer_object. + * @ticket: ww_acquire_ctx used for reserving * + * Unreserve a previous reservation of @bo made with @ticket. + */ +extern void ttm_bo_unreserve_ticket(struct ttm_buffer_object *bo, + struct ww_acquire_ctx *ticket); + +/** + * ttm_bo_unreserve_locked * @bo: A pointer to a struct ttm_buffer_object. + * @ticket: ww_acquire_ctx used for reserving, or NULL * - * Unreserve a previous reservation of @bo. + * Unreserve a previous reservation of @bo made with @ticket. * Needs to be called with struct ttm_bo_global::lru_lock held. */ -extern void ttm_bo_unreserve_locked(struct ttm_buffer_object *bo); +extern void ttm_bo_unreserve_ticket_locked(struct ttm_buffer_object *bo, + struct ww_acquire_ctx *ticket); /* * ttm_bo_util.c */ +int ttm_mem_io_reserve(struct ttm_bo_device *bdev, + struct ttm_mem_reg *mem); +void ttm_mem_io_free(struct ttm_bo_device *bdev, + struct ttm_mem_reg *mem); /** * ttm_bo_move_ttm * diff --git a/sys/dev/drm/include/drm/ttm/ttm_execbuf_util.h b/sys/dev/drm/include/drm/ttm/ttm_execbuf_util.h index 8b6dbf5d0a..d84087af89 100644 --- a/sys/dev/drm/include/drm/ttm/ttm_execbuf_util.h +++ b/sys/dev/drm/include/drm/ttm/ttm_execbuf_util.h @@ -33,6 +33,7 @@ #define _TTM_EXECBUF_UTIL_H_ #include +#include /** * struct ttm_validate_buffer @@ -57,17 +58,20 @@ struct ttm_validate_buffer { /** * function ttm_eu_backoff_reservation * + * @ticket: ww_acquire_ctx from reserve call * @list: thread private list of ttm_validate_buffer structs. * * Undoes all buffer validation reservations for bos pointed to by * the list entries. */ -extern void ttm_eu_backoff_reservation(struct list_head *list); +extern void ttm_eu_backoff_reservation(struct ww_acquire_ctx *ticket, + struct list_head *list); /** * function ttm_eu_reserve_buffers * + * @ticket: [out] ww_acquire_ctx returned by call. * @list: thread private list of ttm_validate_buffer structs. * * Tries to reserve bos pointed to by the list entries for validation. @@ -90,11 +94,13 @@ extern void ttm_eu_backoff_reservation(struct list_head *list); * has failed. */ -extern int ttm_eu_reserve_buffers(struct list_head *list); +extern int ttm_eu_reserve_buffers(struct ww_acquire_ctx *ticket, + struct list_head *list); /** * function ttm_eu_fence_buffer_objects. * + * @ticket: ww_acquire_ctx from reserve call * @list: thread private list of ttm_validate_buffer structs. * @sync_obj: The new sync object for the buffers. * @@ -104,6 +110,7 @@ extern int ttm_eu_reserve_buffers(struct list_head *list); * */ -extern void ttm_eu_fence_buffer_objects(struct list_head *list, void *sync_obj); +extern void ttm_eu_fence_buffer_objects(struct ww_acquire_ctx *ticket, + struct list_head *list, void *sync_obj); #endif diff --git a/sys/dev/drm/include/linux/reservation.h b/sys/dev/drm/include/linux/reservation.h new file mode 100644 index 0000000000..b5a1a5517a --- /dev/null +++ b/sys/dev/drm/include/linux/reservation.h @@ -0,0 +1,50 @@ +/* + * Header file for reservations for dma-buf and ttm + * + * Copyright(C) 2011 Linaro Limited. All rights reserved. + * Copyright (C) 2012-2013 Canonical Ltd + * Copyright (C) 2012 Texas Instruments + * + * Authors: + * Rob Clark + * Maarten Lankhorst + * Thomas Hellstrom + * + * Based on bo.c which bears the following copyright notice, + * but is dual licensed: + * + * Copyright (c) 2006-2009 VMware, Inc., Palo Alto, CA., USA + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#ifndef _LINUX_RESERVATION_H +#define _LINUX_RESERVATION_H + +#include + +extern struct ww_class reservation_ww_class; + +struct reservation_object { + struct ww_mutex lock; +}; + +#endif /* _LINUX_RESERVATION_H */ diff --git a/sys/dev/drm/include/uapi_drm/radeon_drm.h b/sys/dev/drm/include/uapi_drm/radeon_drm.h index ca82852c5f..7120fb19f4 100644 --- a/sys/dev/drm/include/uapi_drm/radeon_drm.h +++ b/sys/dev/drm/include/uapi_drm/radeon_drm.h @@ -979,6 +979,8 @@ struct drm_radeon_cs { #define RADEON_INFO_RING_WORKING 0x15 /* SI tile mode array */ #define RADEON_INFO_SI_TILE_MODE_ARRAY 0x16 +/* query if CP DMA is supported on the compute ring */ +#define RADEON_INFO_SI_CP_DMA_COMPUTE 0x17 struct drm_radeon_info { diff --git a/sys/dev/drm/radeon/Makefile b/sys/dev/drm/radeon/Makefile index 77acde2fa7..96a7e5c29c 100644 --- a/sys/dev/drm/radeon/Makefile +++ b/sys/dev/drm/radeon/Makefile @@ -61,13 +61,11 @@ SRCS += \ r520.c \ r600.c \ r600_audio.c \ - r600_blit_kms.c \ r600_blit_shaders.c \ r600_cs.c \ r600_hdmi.c \ rv770.c \ evergreen.c \ - evergreen_blit_kms.c \ evergreen_blit_shaders.c \ evergreen_cs.c \ evergreen_hdmi.c \ @@ -75,7 +73,6 @@ SRCS += \ ni.c \ si.c \ si_blit_shaders.c \ - radeon_uvd.c \ cik.c \ cik_blit_shaders.c \ r600_dpm.c \ @@ -93,8 +90,29 @@ SRCS += \ trinity_smc.c \ ni_dpm.c \ si_smc.c \ - si_dpm.c + si_dpm.c \ + kv_smc.c \ + kv_dpm.c \ + ci_smc.c \ + ci_dpm.c \ + dce6_afmt.c + +# async DMA block +SRCS += \ + r600_dma.c \ + rv770_dma.c \ + evergreen_dma.c \ + ni_dma.c \ + si_dma.c \ + cik_sdma.c +# UVD block +SRCS += \ + radeon_uvd.c \ + uvd_v1_0.c \ + uvd_v2_2.c \ + uvd_v3_1.c \ + uvd_v4_2.c # UMS driver #SRCS += \ diff --git a/sys/dev/drm/radeon/atombios.h b/sys/dev/drm/radeon/atombios.h index 9105d2cc07..a91fa5dc4e 100644 --- a/sys/dev/drm/radeon/atombios.h +++ b/sys/dev/drm/radeon/atombios.h @@ -7661,618 +7661,6 @@ typedef struct _ATOM_POWERPLAY_INFO_V3 ATOM_POWERMODE_INFO_V3 asPowerPlayInfo[ATOM_MAX_NUMBEROF_POWER_BLOCK]; }ATOM_POWERPLAY_INFO_V3; -/* New PPlib */ -/**************************************************************************/ -typedef struct _ATOM_PPLIB_THERMALCONTROLLER - -{ - UCHAR ucType; // one of ATOM_PP_THERMALCONTROLLER_* - UCHAR ucI2cLine; // as interpreted by DAL I2C - UCHAR ucI2cAddress; - UCHAR ucFanParameters; // Fan Control Parameters. - UCHAR ucFanMinRPM; // Fan Minimum RPM (hundreds) -- for display purposes only. - UCHAR ucFanMaxRPM; // Fan Maximum RPM (hundreds) -- for display purposes only. - UCHAR ucReserved; // ---- - UCHAR ucFlags; // to be defined -} ATOM_PPLIB_THERMALCONTROLLER; - -#define ATOM_PP_FANPARAMETERS_TACHOMETER_PULSES_PER_REVOLUTION_MASK 0x0f -#define ATOM_PP_FANPARAMETERS_NOFAN 0x80 // No fan is connected to this controller. - -#define ATOM_PP_THERMALCONTROLLER_NONE 0 -#define ATOM_PP_THERMALCONTROLLER_LM63 1 // Not used by PPLib -#define ATOM_PP_THERMALCONTROLLER_ADM1032 2 // Not used by PPLib -#define ATOM_PP_THERMALCONTROLLER_ADM1030 3 // Not used by PPLib -#define ATOM_PP_THERMALCONTROLLER_MUA6649 4 // Not used by PPLib -#define ATOM_PP_THERMALCONTROLLER_LM64 5 -#define ATOM_PP_THERMALCONTROLLER_F75375 6 // Not used by PPLib -#define ATOM_PP_THERMALCONTROLLER_RV6xx 7 -#define ATOM_PP_THERMALCONTROLLER_RV770 8 -#define ATOM_PP_THERMALCONTROLLER_ADT7473 9 -#define ATOM_PP_THERMALCONTROLLER_EXTERNAL_GPIO 11 -#define ATOM_PP_THERMALCONTROLLER_EVERGREEN 12 -#define ATOM_PP_THERMALCONTROLLER_EMC2103 13 /* 0x0D */ // Only fan control will be implemented, do NOT show this in PPGen. -#define ATOM_PP_THERMALCONTROLLER_SUMO 14 /* 0x0E */ // Sumo type, used internally -#define ATOM_PP_THERMALCONTROLLER_NISLANDS 15 -#define ATOM_PP_THERMALCONTROLLER_SISLANDS 16 -#define ATOM_PP_THERMALCONTROLLER_LM96163 17 -#define ATOM_PP_THERMALCONTROLLER_CISLANDS 18 - -// Thermal controller 'combo type' to use an external controller for Fan control and an internal controller for thermal. -// We probably should reserve the bit 0x80 for this use. -// To keep the number of these types low we should also use the same code for all ASICs (i.e. do not distinguish RV6xx and RV7xx Internal here). -// The driver can pick the correct internal controller based on the ASIC. - -#define ATOM_PP_THERMALCONTROLLER_ADT7473_WITH_INTERNAL 0x89 // ADT7473 Fan Control + Internal Thermal Controller -#define ATOM_PP_THERMALCONTROLLER_EMC2103_WITH_INTERNAL 0x8D // EMC2103 Fan Control + Internal Thermal Controller - -typedef struct _ATOM_PPLIB_STATE -{ - UCHAR ucNonClockStateIndex; - UCHAR ucClockStateIndices[1]; // variable-sized -} ATOM_PPLIB_STATE; - - -typedef struct _ATOM_PPLIB_FANTABLE -{ - UCHAR ucFanTableFormat; // Change this if the table format changes or version changes so that the other fields are not the same. - UCHAR ucTHyst; // Temperature hysteresis. Integer. - USHORT usTMin; // The temperature, in 0.01 centigrades, below which we just run at a minimal PWM. - USHORT usTMed; // The middle temperature where we change slopes. - USHORT usTHigh; // The high point above TMed for adjusting the second slope. - USHORT usPWMMin; // The minimum PWM value in percent (0.01% increments). - USHORT usPWMMed; // The PWM value (in percent) at TMed. - USHORT usPWMHigh; // The PWM value at THigh. -} ATOM_PPLIB_FANTABLE; - -typedef struct _ATOM_PPLIB_FANTABLE2 -{ - ATOM_PPLIB_FANTABLE basicTable; - USHORT usTMax; // The max temperature -} ATOM_PPLIB_FANTABLE2; - -typedef struct _ATOM_PPLIB_EXTENDEDHEADER -{ - USHORT usSize; - ULONG ulMaxEngineClock; // For Overdrive. - ULONG ulMaxMemoryClock; // For Overdrive. - // Add extra system parameters here, always adjust size to include all fields. - USHORT usVCETableOffset; //points to ATOM_PPLIB_VCE_Table - USHORT usUVDTableOffset; //points to ATOM_PPLIB_UVD_Table - USHORT usSAMUTableOffset; //points to ATOM_PPLIB_SAMU_Table - USHORT usPPMTableOffset; //points to ATOM_PPLIB_PPM_Table -} ATOM_PPLIB_EXTENDEDHEADER; - -//// ATOM_PPLIB_POWERPLAYTABLE::ulPlatformCaps -#define ATOM_PP_PLATFORM_CAP_BACKBIAS 1 -#define ATOM_PP_PLATFORM_CAP_POWERPLAY 2 -#define ATOM_PP_PLATFORM_CAP_SBIOSPOWERSOURCE 4 -#define ATOM_PP_PLATFORM_CAP_ASPM_L0s 8 -#define ATOM_PP_PLATFORM_CAP_ASPM_L1 16 -#define ATOM_PP_PLATFORM_CAP_HARDWAREDC 32 -#define ATOM_PP_PLATFORM_CAP_GEMINIPRIMARY 64 -#define ATOM_PP_PLATFORM_CAP_STEPVDDC 128 -#define ATOM_PP_PLATFORM_CAP_VOLTAGECONTROL 256 -#define ATOM_PP_PLATFORM_CAP_SIDEPORTCONTROL 512 -#define ATOM_PP_PLATFORM_CAP_TURNOFFPLL_ASPML1 1024 -#define ATOM_PP_PLATFORM_CAP_HTLINKCONTROL 2048 -#define ATOM_PP_PLATFORM_CAP_MVDDCONTROL 4096 -#define ATOM_PP_PLATFORM_CAP_GOTO_BOOT_ON_ALERT 0x2000 // Go to boot state on alerts, e.g. on an AC->DC transition. -#define ATOM_PP_PLATFORM_CAP_DONT_WAIT_FOR_VBLANK_ON_ALERT 0x4000 // Do NOT wait for VBLANK during an alert (e.g. AC->DC transition). -#define ATOM_PP_PLATFORM_CAP_VDDCI_CONTROL 0x8000 // Does the driver control VDDCI independently from VDDC. -#define ATOM_PP_PLATFORM_CAP_REGULATOR_HOT 0x00010000 // Enable the 'regulator hot' feature. -#define ATOM_PP_PLATFORM_CAP_BACO 0x00020000 // Does the driver supports BACO state. -#define ATOM_PP_PLATFORM_CAP_NEW_CAC_VOLTAGE 0x00040000 // Does the driver supports new CAC voltage table. -#define ATOM_PP_PLATFORM_CAP_REVERT_GPIO5_POLARITY 0x00080000 // Does the driver supports revert GPIO5 polarity. -#define ATOM_PP_PLATFORM_CAP_OUTPUT_THERMAL2GPIO17 0x00100000 // Does the driver supports thermal2GPIO17. -#define ATOM_PP_PLATFORM_CAP_VRHOT_GPIO_CONFIGURABLE 0x00200000 // Does the driver supports VR HOT GPIO Configurable. - -typedef struct _ATOM_PPLIB_POWERPLAYTABLE -{ - ATOM_COMMON_TABLE_HEADER sHeader; - - UCHAR ucDataRevision; - - UCHAR ucNumStates; - UCHAR ucStateEntrySize; - UCHAR ucClockInfoSize; - UCHAR ucNonClockSize; - - // offset from start of this table to array of ucNumStates ATOM_PPLIB_STATE structures - USHORT usStateArrayOffset; - - // offset from start of this table to array of ASIC-specific structures, - // currently ATOM_PPLIB_CLOCK_INFO. - USHORT usClockInfoArrayOffset; - - // offset from start of this table to array of ATOM_PPLIB_NONCLOCK_INFO - USHORT usNonClockInfoArrayOffset; - - USHORT usBackbiasTime; // in microseconds - USHORT usVoltageTime; // in microseconds - USHORT usTableSize; //the size of this structure, or the extended structure - - ULONG ulPlatformCaps; // See ATOM_PPLIB_CAPS_* - - ATOM_PPLIB_THERMALCONTROLLER sThermalController; - - USHORT usBootClockInfoOffset; - USHORT usBootNonClockInfoOffset; - -} ATOM_PPLIB_POWERPLAYTABLE; - -typedef struct _ATOM_PPLIB_POWERPLAYTABLE2 -{ - ATOM_PPLIB_POWERPLAYTABLE basicTable; - UCHAR ucNumCustomThermalPolicy; - USHORT usCustomThermalPolicyArrayOffset; -}ATOM_PPLIB_POWERPLAYTABLE2, *LPATOM_PPLIB_POWERPLAYTABLE2; - -typedef struct _ATOM_PPLIB_POWERPLAYTABLE3 -{ - ATOM_PPLIB_POWERPLAYTABLE2 basicTable2; - USHORT usFormatID; // To be used ONLY by PPGen. - USHORT usFanTableOffset; - USHORT usExtendendedHeaderOffset; -} ATOM_PPLIB_POWERPLAYTABLE3, *LPATOM_PPLIB_POWERPLAYTABLE3; - -typedef struct _ATOM_PPLIB_POWERPLAYTABLE4 -{ - ATOM_PPLIB_POWERPLAYTABLE3 basicTable3; - ULONG ulGoldenPPID; // PPGen use only - ULONG ulGoldenRevision; // PPGen use only - USHORT usVddcDependencyOnSCLKOffset; - USHORT usVddciDependencyOnMCLKOffset; - USHORT usVddcDependencyOnMCLKOffset; - USHORT usMaxClockVoltageOnDCOffset; - USHORT usVddcPhaseShedLimitsTableOffset; // Points to ATOM_PPLIB_PhaseSheddingLimits_Table - USHORT usMvddDependencyOnMCLKOffset; -} ATOM_PPLIB_POWERPLAYTABLE4, *LPATOM_PPLIB_POWERPLAYTABLE4; - -typedef struct _ATOM_PPLIB_POWERPLAYTABLE5 -{ - ATOM_PPLIB_POWERPLAYTABLE4 basicTable4; - ULONG ulTDPLimit; - ULONG ulNearTDPLimit; - ULONG ulSQRampingThreshold; - USHORT usCACLeakageTableOffset; // Points to ATOM_PPLIB_CAC_Leakage_Table - ULONG ulCACLeakage; // The iLeakage for driver calculated CAC leakage table - USHORT usTDPODLimit; - USHORT usLoadLineSlope; // in milliOhms * 100 -} ATOM_PPLIB_POWERPLAYTABLE5, *LPATOM_PPLIB_POWERPLAYTABLE5; - -//// ATOM_PPLIB_NONCLOCK_INFO::usClassification -#define ATOM_PPLIB_CLASSIFICATION_UI_MASK 0x0007 -#define ATOM_PPLIB_CLASSIFICATION_UI_SHIFT 0 -#define ATOM_PPLIB_CLASSIFICATION_UI_NONE 0 -#define ATOM_PPLIB_CLASSIFICATION_UI_BATTERY 1 -#define ATOM_PPLIB_CLASSIFICATION_UI_BALANCED 3 -#define ATOM_PPLIB_CLASSIFICATION_UI_PERFORMANCE 5 -// 2, 4, 6, 7 are reserved - -#define ATOM_PPLIB_CLASSIFICATION_BOOT 0x0008 -#define ATOM_PPLIB_CLASSIFICATION_THERMAL 0x0010 -#define ATOM_PPLIB_CLASSIFICATION_LIMITEDPOWERSOURCE 0x0020 -#define ATOM_PPLIB_CLASSIFICATION_REST 0x0040 -#define ATOM_PPLIB_CLASSIFICATION_FORCED 0x0080 -#define ATOM_PPLIB_CLASSIFICATION_3DPERFORMANCE 0x0100 -#define ATOM_PPLIB_CLASSIFICATION_OVERDRIVETEMPLATE 0x0200 -#define ATOM_PPLIB_CLASSIFICATION_UVDSTATE 0x0400 -#define ATOM_PPLIB_CLASSIFICATION_3DLOW 0x0800 -#define ATOM_PPLIB_CLASSIFICATION_ACPI 0x1000 -#define ATOM_PPLIB_CLASSIFICATION_HD2STATE 0x2000 -#define ATOM_PPLIB_CLASSIFICATION_HDSTATE 0x4000 -#define ATOM_PPLIB_CLASSIFICATION_SDSTATE 0x8000 - -//// ATOM_PPLIB_NONCLOCK_INFO::usClassification2 -#define ATOM_PPLIB_CLASSIFICATION2_LIMITEDPOWERSOURCE_2 0x0001 -#define ATOM_PPLIB_CLASSIFICATION2_ULV 0x0002 -#define ATOM_PPLIB_CLASSIFICATION2_MVC 0x0004 //Multi-View Codec (BD-3D) - -//// ATOM_PPLIB_NONCLOCK_INFO::ulCapsAndSettings -#define ATOM_PPLIB_SINGLE_DISPLAY_ONLY 0x00000001 -#define ATOM_PPLIB_SUPPORTS_VIDEO_PLAYBACK 0x00000002 - -// 0 is 2.5Gb/s, 1 is 5Gb/s -#define ATOM_PPLIB_PCIE_LINK_SPEED_MASK 0x00000004 -#define ATOM_PPLIB_PCIE_LINK_SPEED_SHIFT 2 - -// lanes - 1: 1, 2, 4, 8, 12, 16 permitted by PCIE spec -#define ATOM_PPLIB_PCIE_LINK_WIDTH_MASK 0x000000F8 -#define ATOM_PPLIB_PCIE_LINK_WIDTH_SHIFT 3 - -// lookup into reduced refresh-rate table -#define ATOM_PPLIB_LIMITED_REFRESHRATE_VALUE_MASK 0x00000F00 -#define ATOM_PPLIB_LIMITED_REFRESHRATE_VALUE_SHIFT 8 - -#define ATOM_PPLIB_LIMITED_REFRESHRATE_UNLIMITED 0 -#define ATOM_PPLIB_LIMITED_REFRESHRATE_50HZ 1 -// 2-15 TBD as needed. - -#define ATOM_PPLIB_SOFTWARE_DISABLE_LOADBALANCING 0x00001000 -#define ATOM_PPLIB_SOFTWARE_ENABLE_SLEEP_FOR_TIMESTAMPS 0x00002000 - -#define ATOM_PPLIB_DISALLOW_ON_DC 0x00004000 - -#define ATOM_PPLIB_ENABLE_VARIBRIGHT 0x00008000 - -//memory related flags -#define ATOM_PPLIB_SWSTATE_MEMORY_DLL_OFF 0x000010000 - -//M3 Arb //2bits, current 3 sets of parameters in total -#define ATOM_PPLIB_M3ARB_MASK 0x00060000 -#define ATOM_PPLIB_M3ARB_SHIFT 17 - -#define ATOM_PPLIB_ENABLE_DRR 0x00080000 - -// remaining 16 bits are reserved -typedef struct _ATOM_PPLIB_THERMAL_STATE -{ - UCHAR ucMinTemperature; - UCHAR ucMaxTemperature; - UCHAR ucThermalAction; -}ATOM_PPLIB_THERMAL_STATE, *LPATOM_PPLIB_THERMAL_STATE; - -// Contained in an array starting at the offset -// in ATOM_PPLIB_POWERPLAYTABLE::usNonClockInfoArrayOffset. -// referenced from ATOM_PPLIB_STATE_INFO::ucNonClockStateIndex -#define ATOM_PPLIB_NONCLOCKINFO_VER1 12 -#define ATOM_PPLIB_NONCLOCKINFO_VER2 24 -typedef struct _ATOM_PPLIB_NONCLOCK_INFO -{ - USHORT usClassification; - UCHAR ucMinTemperature; - UCHAR ucMaxTemperature; - ULONG ulCapsAndSettings; - UCHAR ucRequiredPower; - USHORT usClassification2; - ULONG ulVCLK; - ULONG ulDCLK; - UCHAR ucUnused[5]; -} ATOM_PPLIB_NONCLOCK_INFO; - -// Contained in an array starting at the offset -// in ATOM_PPLIB_POWERPLAYTABLE::usClockInfoArrayOffset. -// referenced from ATOM_PPLIB_STATE::ucClockStateIndices -typedef struct _ATOM_PPLIB_R600_CLOCK_INFO -{ - USHORT usEngineClockLow; - UCHAR ucEngineClockHigh; - - USHORT usMemoryClockLow; - UCHAR ucMemoryClockHigh; - - USHORT usVDDC; - USHORT usUnused1; - USHORT usUnused2; - - ULONG ulFlags; // ATOM_PPLIB_R600_FLAGS_* - -} ATOM_PPLIB_R600_CLOCK_INFO; - -// ulFlags in ATOM_PPLIB_R600_CLOCK_INFO -#define ATOM_PPLIB_R600_FLAGS_PCIEGEN2 1 -#define ATOM_PPLIB_R600_FLAGS_UVDSAFE 2 -#define ATOM_PPLIB_R600_FLAGS_BACKBIASENABLE 4 -#define ATOM_PPLIB_R600_FLAGS_MEMORY_ODT_OFF 8 -#define ATOM_PPLIB_R600_FLAGS_MEMORY_DLL_OFF 16 -#define ATOM_PPLIB_R600_FLAGS_LOWPOWER 32 // On the RV770 use 'low power' setting (sequencer S0). - -typedef struct _ATOM_PPLIB_EVERGREEN_CLOCK_INFO -{ - USHORT usEngineClockLow; - UCHAR ucEngineClockHigh; - - USHORT usMemoryClockLow; - UCHAR ucMemoryClockHigh; - - USHORT usVDDC; - USHORT usVDDCI; - USHORT usUnused; - - ULONG ulFlags; // ATOM_PPLIB_R600_FLAGS_* - -} ATOM_PPLIB_EVERGREEN_CLOCK_INFO; - -typedef struct _ATOM_PPLIB_SI_CLOCK_INFO -{ - USHORT usEngineClockLow; - UCHAR ucEngineClockHigh; - - USHORT usMemoryClockLow; - UCHAR ucMemoryClockHigh; - - USHORT usVDDC; - USHORT usVDDCI; - UCHAR ucPCIEGen; - UCHAR ucUnused1; - - ULONG ulFlags; // ATOM_PPLIB_SI_FLAGS_*, no flag is necessary for now - -} ATOM_PPLIB_SI_CLOCK_INFO; - -typedef struct _ATOM_PPLIB_CI_CLOCK_INFO -{ - USHORT usEngineClockLow; - UCHAR ucEngineClockHigh; - - USHORT usMemoryClockLow; - UCHAR ucMemoryClockHigh; - - UCHAR ucPCIEGen; - USHORT usPCIELane; -} ATOM_PPLIB_CI_CLOCK_INFO; - -typedef struct _ATOM_PPLIB_RS780_CLOCK_INFO - -{ - USHORT usLowEngineClockLow; // Low Engine clock in MHz (the same way as on the R600). - UCHAR ucLowEngineClockHigh; - USHORT usHighEngineClockLow; // High Engine clock in MHz. - UCHAR ucHighEngineClockHigh; - USHORT usMemoryClockLow; // For now one of the ATOM_PPLIB_RS780_SPMCLK_XXXX constants. - UCHAR ucMemoryClockHigh; // Currentyl unused. - UCHAR ucPadding; // For proper alignment and size. - USHORT usVDDC; // For the 780, use: None, Low, High, Variable - UCHAR ucMaxHTLinkWidth; // From SBIOS - {2, 4, 8, 16} - UCHAR ucMinHTLinkWidth; // From SBIOS - {2, 4, 8, 16}. Effective only if CDLW enabled. Minimum down stream width could be bigger as display BW requriement. - USHORT usHTLinkFreq; // See definition ATOM_PPLIB_RS780_HTLINKFREQ_xxx or in MHz(>=200). - ULONG ulFlags; -} ATOM_PPLIB_RS780_CLOCK_INFO; - -#define ATOM_PPLIB_RS780_VOLTAGE_NONE 0 -#define ATOM_PPLIB_RS780_VOLTAGE_LOW 1 -#define ATOM_PPLIB_RS780_VOLTAGE_HIGH 2 -#define ATOM_PPLIB_RS780_VOLTAGE_VARIABLE 3 - -#define ATOM_PPLIB_RS780_SPMCLK_NONE 0 // We cannot change the side port memory clock, leave it as it is. -#define ATOM_PPLIB_RS780_SPMCLK_LOW 1 -#define ATOM_PPLIB_RS780_SPMCLK_HIGH 2 - -#define ATOM_PPLIB_RS780_HTLINKFREQ_NONE 0 -#define ATOM_PPLIB_RS780_HTLINKFREQ_LOW 1 -#define ATOM_PPLIB_RS780_HTLINKFREQ_HIGH 2 - -typedef struct _ATOM_PPLIB_SUMO_CLOCK_INFO{ - USHORT usEngineClockLow; //clockfrequency & 0xFFFF. The unit is in 10khz - UCHAR ucEngineClockHigh; //clockfrequency >> 16. - UCHAR vddcIndex; //2-bit vddc index; - USHORT tdpLimit; - //please initalize to 0 - USHORT rsv1; - //please initialize to 0s - ULONG rsv2[2]; -}ATOM_PPLIB_SUMO_CLOCK_INFO; - - - -typedef struct _ATOM_PPLIB_STATE_V2 -{ - //number of valid dpm levels in this state; Driver uses it to calculate the whole - //size of the state: sizeof(ATOM_PPLIB_STATE_V2) + (ucNumDPMLevels - 1) * sizeof(UCHAR) - UCHAR ucNumDPMLevels; - - //a index to the array of nonClockInfos - UCHAR nonClockInfoIndex; - /** - * Driver will read the first ucNumDPMLevels in this array - */ - UCHAR clockInfoIndex[1]; -} ATOM_PPLIB_STATE_V2; - -typedef struct _StateArray{ - //how many states we have - UCHAR ucNumEntries; - - ATOM_PPLIB_STATE_V2 states[1]; -}StateArray; - - -typedef struct _ClockInfoArray{ - //how many clock levels we have - UCHAR ucNumEntries; - - //sizeof(ATOM_PPLIB_CLOCK_INFO) - UCHAR ucEntrySize; - - UCHAR clockInfo[1]; -}ClockInfoArray; - -typedef struct _NonClockInfoArray{ - - //how many non-clock levels we have. normally should be same as number of states - UCHAR ucNumEntries; - //sizeof(ATOM_PPLIB_NONCLOCK_INFO) - UCHAR ucEntrySize; - - ATOM_PPLIB_NONCLOCK_INFO nonClockInfo[1]; -}NonClockInfoArray; - -typedef struct _ATOM_PPLIB_Clock_Voltage_Dependency_Record -{ - USHORT usClockLow; - UCHAR ucClockHigh; - USHORT usVoltage; -}ATOM_PPLIB_Clock_Voltage_Dependency_Record; - -typedef struct _ATOM_PPLIB_Clock_Voltage_Dependency_Table -{ - UCHAR ucNumEntries; // Number of entries. - ATOM_PPLIB_Clock_Voltage_Dependency_Record entries[1]; // Dynamically allocate entries. -}ATOM_PPLIB_Clock_Voltage_Dependency_Table; - -typedef struct _ATOM_PPLIB_Clock_Voltage_Limit_Record -{ - USHORT usSclkLow; - UCHAR ucSclkHigh; - USHORT usMclkLow; - UCHAR ucMclkHigh; - USHORT usVddc; - USHORT usVddci; -}ATOM_PPLIB_Clock_Voltage_Limit_Record; - -typedef struct _ATOM_PPLIB_Clock_Voltage_Limit_Table -{ - UCHAR ucNumEntries; // Number of entries. - ATOM_PPLIB_Clock_Voltage_Limit_Record entries[1]; // Dynamically allocate entries. -}ATOM_PPLIB_Clock_Voltage_Limit_Table; - -typedef struct _ATOM_PPLIB_CAC_Leakage_Record -{ - USHORT usVddc; // We use this field for the "fake" standardized VDDC for power calculations; For CI and newer, we use this as the real VDDC value. - ULONG ulLeakageValue; // For CI and newer we use this as the "fake" standar VDDC value. -}ATOM_PPLIB_CAC_Leakage_Record; - -typedef struct _ATOM_PPLIB_CAC_Leakage_Table -{ - UCHAR ucNumEntries; // Number of entries. - ATOM_PPLIB_CAC_Leakage_Record entries[1]; // Dynamically allocate entries. -}ATOM_PPLIB_CAC_Leakage_Table; - -typedef struct _ATOM_PPLIB_PhaseSheddingLimits_Record -{ - USHORT usVoltage; - USHORT usSclkLow; - UCHAR ucSclkHigh; - USHORT usMclkLow; - UCHAR ucMclkHigh; -}ATOM_PPLIB_PhaseSheddingLimits_Record; - -typedef struct _ATOM_PPLIB_PhaseSheddingLimits_Table -{ - UCHAR ucNumEntries; // Number of entries. - ATOM_PPLIB_PhaseSheddingLimits_Record entries[1]; // Dynamically allocate entries. -}ATOM_PPLIB_PhaseSheddingLimits_Table; - -typedef struct _VCEClockInfo{ - USHORT usEVClkLow; - UCHAR ucEVClkHigh; - USHORT usECClkLow; - UCHAR ucECClkHigh; -}VCEClockInfo; - -typedef struct _VCEClockInfoArray{ - UCHAR ucNumEntries; - VCEClockInfo entries[1]; -}VCEClockInfoArray; - -typedef struct _ATOM_PPLIB_VCE_Clock_Voltage_Limit_Record -{ - USHORT usVoltage; - UCHAR ucVCEClockInfoIndex; -}ATOM_PPLIB_VCE_Clock_Voltage_Limit_Record; - -typedef struct _ATOM_PPLIB_VCE_Clock_Voltage_Limit_Table -{ - UCHAR numEntries; - ATOM_PPLIB_VCE_Clock_Voltage_Limit_Record entries[1]; -}ATOM_PPLIB_VCE_Clock_Voltage_Limit_Table; - -typedef struct _ATOM_PPLIB_VCE_State_Record -{ - UCHAR ucVCEClockInfoIndex; - UCHAR ucClockInfoIndex; //highest 2 bits indicates memory p-states, lower 6bits indicates index to ClockInfoArrary -}ATOM_PPLIB_VCE_State_Record; - -typedef struct _ATOM_PPLIB_VCE_State_Table -{ - UCHAR numEntries; - ATOM_PPLIB_VCE_State_Record entries[1]; -}ATOM_PPLIB_VCE_State_Table; - - -typedef struct _ATOM_PPLIB_VCE_Table -{ - UCHAR revid; -// VCEClockInfoArray array; -// ATOM_PPLIB_VCE_Clock_Voltage_Limit_Table limits; -// ATOM_PPLIB_VCE_State_Table states; -}ATOM_PPLIB_VCE_Table; - - -typedef struct _UVDClockInfo{ - USHORT usVClkLow; - UCHAR ucVClkHigh; - USHORT usDClkLow; - UCHAR ucDClkHigh; -}UVDClockInfo; - -typedef struct _UVDClockInfoArray{ - UCHAR ucNumEntries; - UVDClockInfo entries[1]; -}UVDClockInfoArray; - -typedef struct _ATOM_PPLIB_UVD_Clock_Voltage_Limit_Record -{ - USHORT usVoltage; - UCHAR ucUVDClockInfoIndex; -}ATOM_PPLIB_UVD_Clock_Voltage_Limit_Record; - -typedef struct _ATOM_PPLIB_UVD_Clock_Voltage_Limit_Table -{ - UCHAR numEntries; - ATOM_PPLIB_UVD_Clock_Voltage_Limit_Record entries[1]; -}ATOM_PPLIB_UVD_Clock_Voltage_Limit_Table; - -typedef struct _ATOM_PPLIB_UVD_State_Record -{ - UCHAR ucUVDClockInfoIndex; - UCHAR ucClockInfoIndex; //highest 2 bits indicates memory p-states, lower 6bits indicates index to ClockInfoArrary -}ATOM_PPLIB_UVD_State_Record; - -typedef struct _ATOM_PPLIB_UVD_State_Table -{ - UCHAR numEntries; - ATOM_PPLIB_UVD_State_Record entries[1]; -}ATOM_PPLIB_UVD_State_Table; - - -typedef struct _ATOM_PPLIB_UVD_Table -{ - UCHAR revid; -// UVDClockInfoArray array; -// ATOM_PPLIB_UVD_Clock_Voltage_Limit_Table limits; -// ATOM_PPLIB_UVD_State_Table states; -}ATOM_PPLIB_UVD_Table; - - -typedef struct _ATOM_PPLIB_SAMClk_Voltage_Limit_Record -{ - USHORT usVoltage; - USHORT usSAMClockLow; - UCHAR ucSAMClockHigh; -}ATOM_PPLIB_SAMClk_Voltage_Limit_Record; - -typedef struct _ATOM_PPLIB_SAMClk_Voltage_Limit_Table{ - UCHAR numEntries; - ATOM_PPLIB_SAMClk_Voltage_Limit_Record entries[1]; -}ATOM_PPLIB_SAMClk_Voltage_Limit_Table; - -typedef struct _ATOM_PPLIB_SAMU_Table -{ - UCHAR revid; - ATOM_PPLIB_SAMClk_Voltage_Limit_Table limits; -}ATOM_PPLIB_SAMU_Table; - -#define ATOM_PPM_A_A 1 -#define ATOM_PPM_A_I 2 -typedef struct _ATOM_PPLIB_PPM_Table -{ - UCHAR ucRevId; - UCHAR ucPpmDesign; //A+I or A+A - USHORT usCpuCoreNumber; - ULONG ulPlatformTDP; - ULONG ulSmallACPlatformTDP; - ULONG ulPlatformTDC; - ULONG ulSmallACPlatformTDC; - ULONG ulApuTDP; - ULONG ulDGpuTDP; - ULONG ulDGpuUlvPower; - ULONG ulTjmax; -} ATOM_PPLIB_PPM_Table; - -/**************************************************************************/ - // Following definitions are for compatibility issue in different SW components. #define ATOM_MASTER_DATA_TABLE_REVISION 0x01 @@ -8485,3 +7873,5 @@ typedef struct { #endif /* _ATOMBIOS_H */ + +#include "pptable.h" diff --git a/sys/dev/drm/radeon/atombios_crtc.c b/sys/dev/drm/radeon/atombios_crtc.c index 01b7592aaa..5550c22379 100644 --- a/sys/dev/drm/radeon/atombios_crtc.c +++ b/sys/dev/drm/radeon/atombios_crtc.c @@ -1910,6 +1910,12 @@ static void atombios_crtc_disable(struct drm_crtc *crtc) int i; atombios_crtc_dpms(crtc, DRM_MODE_DPMS_OFF); + /* disable the GRPH */ + if (ASIC_IS_DCE4(rdev)) + WREG32(EVERGREEN_GRPH_ENABLE + radeon_crtc->crtc_offset, 0); + else if (ASIC_IS_AVIVO(rdev)) + WREG32(AVIVO_D1GRPH_ENABLE + radeon_crtc->crtc_offset, 0); + if (ASIC_IS_DCE6(rdev)) atombios_powergate_crtc(crtc, ATOM_ENABLE); diff --git a/sys/dev/drm/radeon/atombios_dp.c b/sys/dev/drm/radeon/atombios_dp.c index 8a3bb526c5..a859d3472a 100644 --- a/sys/dev/drm/radeon/atombios_dp.c +++ b/sys/dev/drm/radeon/atombios_dp.c @@ -50,7 +50,7 @@ static char *pre_emph_names[] = { * or from atom. Note that atom operates on * dw units. */ -static void radeon_copy_swap(u8 *dst, u8 *src, u8 num_bytes, bool to_le) +void radeon_atom_copy_swap(u8 *dst, u8 *src, u8 num_bytes, bool to_le) { #ifdef __BIG_ENDIAN u8 src_tmp[20], dst_tmp[20]; /* used for byteswapping */ @@ -100,7 +100,7 @@ static int radeon_process_aux_ch(struct radeon_i2c_chan *chan, base = (unsigned char *)(rdev->mode_info.atom_context->scratch + 1); - radeon_copy_swap(base, send, send_bytes, true); + radeon_atom_copy_swap(base, send, send_bytes, true); args.v1.lpAuxRequest = cpu_to_le16((u16)(0 + 4)); args.v1.lpDataOut = cpu_to_le16((u16)(16 + 4)); @@ -137,7 +137,7 @@ static int radeon_process_aux_ch(struct radeon_i2c_chan *chan, recv_bytes = recv_size; if (recv && recv_size) - radeon_copy_swap(recv, base + 16, recv_bytes, false); + radeon_atom_copy_swap(recv, base + 16, recv_bytes, false); return recv_bytes; } @@ -586,7 +586,7 @@ static bool radeon_dp_get_link_status(struct radeon_connector *radeon_connector, return false; } - DRM_DEBUG_KMS("link status %*ph\n", 6, link_status); + DRM_DEBUG_KMS("link status %6ph\n", link_status); return true; } diff --git a/sys/dev/drm/radeon/atombios_encoders.c b/sys/dev/drm/radeon/atombios_encoders.c index be99a00a18..44a82c0fe4 100644 --- a/sys/dev/drm/radeon/atombios_encoders.c +++ b/sys/dev/drm/radeon/atombios_encoders.c @@ -22,15 +22,12 @@ * * Authors: Dave Airlie * Alex Deucher - * - * $FreeBSD: head/sys/dev/drm2/radeon/atombios_encoders.c 254885 2013-08-25 19:37:15Z dumbbell $ */ - #include #include #include #include "radeon.h" -#include "radeon_asic.h" /* Declares several prototypes; clang is pleased. */ +#include "radeon_asic.h" #include "atom.h" static u8 @@ -751,8 +748,6 @@ atombios_digital_setup(struct drm_encoder *encoder, int action) int atombios_get_encoder_mode(struct drm_encoder *encoder) { - struct drm_device *dev = encoder->dev; - struct radeon_device *rdev = dev->dev_private; struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder); struct drm_connector *connector; struct radeon_connector *radeon_connector; @@ -778,24 +773,37 @@ atombios_get_encoder_mode(struct drm_encoder *encoder) switch (connector->connector_type) { case DRM_MODE_CONNECTOR_DVII: case DRM_MODE_CONNECTOR_HDMIB: /* HDMI-B is basically DL-DVI; analog works fine */ - if (drm_detect_hdmi_monitor(radeon_connector->edid) && - radeon_audio && - !ASIC_IS_DCE6(rdev)) /* remove once we support DCE6 */ - return ATOM_ENCODER_MODE_HDMI; - else if (radeon_connector->use_digital) + if (radeon_audio != 0) { + if (radeon_connector->use_digital && + (radeon_connector->audio == RADEON_AUDIO_ENABLE)) + return ATOM_ENCODER_MODE_HDMI; + else if (drm_detect_hdmi_monitor(radeon_connector->edid) && + (radeon_connector->audio == RADEON_AUDIO_AUTO)) + return ATOM_ENCODER_MODE_HDMI; + else if (radeon_connector->use_digital) + return ATOM_ENCODER_MODE_DVI; + else + return ATOM_ENCODER_MODE_CRT; + } else if (radeon_connector->use_digital) { return ATOM_ENCODER_MODE_DVI; - else + } else { return ATOM_ENCODER_MODE_CRT; + } break; case DRM_MODE_CONNECTOR_DVID: case DRM_MODE_CONNECTOR_HDMIA: default: - if (drm_detect_hdmi_monitor(radeon_connector->edid) && - radeon_audio && - !ASIC_IS_DCE6(rdev)) /* remove once we support DCE6 */ - return ATOM_ENCODER_MODE_HDMI; - else + if (radeon_audio != 0) { + if (radeon_connector->audio == RADEON_AUDIO_ENABLE) + return ATOM_ENCODER_MODE_HDMI; + else if (drm_detect_hdmi_monitor(radeon_connector->edid) && + (radeon_connector->audio == RADEON_AUDIO_AUTO)) + return ATOM_ENCODER_MODE_HDMI; + else + return ATOM_ENCODER_MODE_DVI; + } else { return ATOM_ENCODER_MODE_DVI; + } break; case DRM_MODE_CONNECTOR_LVDS: return ATOM_ENCODER_MODE_LVDS; @@ -803,14 +811,19 @@ atombios_get_encoder_mode(struct drm_encoder *encoder) case DRM_MODE_CONNECTOR_DisplayPort: dig_connector = radeon_connector->con_priv; if ((dig_connector->dp_sink_type == CONNECTOR_OBJECT_ID_DISPLAYPORT) || - (dig_connector->dp_sink_type == CONNECTOR_OBJECT_ID_eDP)) + (dig_connector->dp_sink_type == CONNECTOR_OBJECT_ID_eDP)) { return ATOM_ENCODER_MODE_DP; - else if (drm_detect_hdmi_monitor(radeon_connector->edid) && - radeon_audio && - !ASIC_IS_DCE6(rdev)) /* remove once we support DCE6 */ - return ATOM_ENCODER_MODE_HDMI; - else + } else if (radeon_audio != 0) { + if (radeon_connector->audio == RADEON_AUDIO_ENABLE) + return ATOM_ENCODER_MODE_HDMI; + else if (drm_detect_hdmi_monitor(radeon_connector->edid) && + (radeon_connector->audio == RADEON_AUDIO_AUTO)) + return ATOM_ENCODER_MODE_HDMI; + else + return ATOM_ENCODER_MODE_DVI; + } else { return ATOM_ENCODER_MODE_DVI; + } break; case DRM_MODE_CONNECTOR_eDP: return ATOM_ENCODER_MODE_DP; @@ -1721,8 +1734,12 @@ radeon_atom_encoder_dpms_dig(struct drm_encoder *encoder, int mode) atombios_dig_encoder_setup(encoder, ATOM_ENABLE, 0); atombios_dig_transmitter_setup(encoder, ATOM_TRANSMITTER_ACTION_SETUP, 0, 0); atombios_dig_transmitter_setup(encoder, ATOM_TRANSMITTER_ACTION_ENABLE, 0, 0); - /* some early dce3.2 boards have a bug in their transmitter control table */ - if ((rdev->family != CHIP_RV710) && (rdev->family != CHIP_RV730)) + /* some dce3.x boards have a bug in their transmitter control table. + * ACTION_ENABLE_OUTPUT can probably be dropped since ACTION_ENABLE + * does the same thing and more. + */ + if ((rdev->family != CHIP_RV710) && (rdev->family != CHIP_RV730) && + (rdev->family != CHIP_RS780) && (rdev->family != CHIP_RS880)) atombios_dig_transmitter_setup(encoder, ATOM_TRANSMITTER_ACTION_ENABLE_OUTPUT, 0, 0); } if (ENCODER_MODE_IS_DP(atombios_get_encoder_mode(encoder)) && connector) { diff --git a/sys/dev/drm/radeon/atombios_i2c.c b/sys/dev/drm/radeon/atombios_i2c.c index dacf3b6f95..847c01132f 100644 --- a/sys/dev/drm/radeon/atombios_i2c.c +++ b/sys/dev/drm/radeon/atombios_i2c.c @@ -37,7 +37,7 @@ #define TARGET_HW_I2C_CLOCK 50 /* these are a limitation of ProcessI2cChannelTransaction not the hw */ -#define ATOM_MAX_HW_I2C_WRITE 2 +#define ATOM_MAX_HW_I2C_WRITE 3 #define ATOM_MAX_HW_I2C_READ 255 static int radeon_process_i2c_ch(struct radeon_i2c_chan *chan, @@ -57,15 +57,24 @@ static int radeon_process_i2c_ch(struct radeon_i2c_chan *chan, if (flags & HW_I2C_WRITE) { if (num > ATOM_MAX_HW_I2C_WRITE) { - DRM_ERROR("hw i2c: tried to write too many bytes (%d vs 2)\n", num); + DRM_ERROR("hw i2c: tried to write too many bytes (%d vs 3)\n", num); return EINVAL; } - memcpy(&out, buf, num); + args.ucRegIndex = buf[0]; + if (num > 1) + memcpy(&out, &buf[1], num - 1); args.lpI2CDataOut = cpu_to_le16(out); + } else { + if (num > ATOM_MAX_HW_I2C_READ) { + DRM_ERROR("hw i2c: tried to read too many bytes (%d vs 255)\n", num); + return -EINVAL; + } + args.ucRegIndex = 0; + args.lpI2CDataOut = 0; } + args.ucFlag = flags; args.ucI2CSpeed = TARGET_HW_I2C_CLOCK; - args.ucRegIndex = 0; args.ucTransBytes = num; args.ucSlaveAddr = slave_addr << 1; args.ucLineNumber = chan->rec.i2c_id; @@ -79,7 +88,7 @@ static int radeon_process_i2c_ch(struct radeon_i2c_chan *chan, } if (!(flags & HW_I2C_WRITE)) - memcpy(buf, base, num); + radeon_atom_copy_swap(buf, base, num, false); return 0; } diff --git a/sys/dev/drm/radeon/btc_dpm.c b/sys/dev/drm/radeon/btc_dpm.c index 176d1195a4..67d3b2550c 100644 --- a/sys/dev/drm/radeon/btc_dpm.c +++ b/sys/dev/drm/radeon/btc_dpm.c @@ -1170,6 +1170,23 @@ static const struct radeon_blacklist_clocks btc_blacklist_clocks[] = { 25000, 30000, RADEON_SCLK_UP } }; +void btc_get_max_clock_from_voltage_dependency_table(struct radeon_clock_voltage_dependency_table *table, + u32 *max_clock) +{ + u32 i, clock = 0; + + if ((table == NULL) || (table->count == 0)) { + *max_clock = clock; + return; + } + + for (i = 0; i < table->count; i++) { + if (clock < table->entries[i].clk) + clock = table->entries[i].clk; + } + *max_clock = clock; +} + void btc_apply_voltage_dependency_rules(struct radeon_clock_voltage_dependency_table *table, u32 clock, u16 max_voltage, u16 *voltage) { @@ -1915,7 +1932,7 @@ static int btc_set_mc_special_registers(struct radeon_device *rdev, } j++; - if (j > SMC_EVERGREEN_MC_REGISTER_ARRAY_SIZE) + if (j >= SMC_EVERGREEN_MC_REGISTER_ARRAY_SIZE) return -EINVAL; tmp = RREG32(MC_PMG_CMD_MRS); @@ -1930,7 +1947,7 @@ static int btc_set_mc_special_registers(struct radeon_device *rdev, } j++; - if (j > SMC_EVERGREEN_MC_REGISTER_ARRAY_SIZE) + if (j >= SMC_EVERGREEN_MC_REGISTER_ARRAY_SIZE) return -EINVAL; break; case MC_SEQ_RESERVE_M >> 2: @@ -1944,7 +1961,7 @@ static int btc_set_mc_special_registers(struct radeon_device *rdev, } j++; - if (j > SMC_EVERGREEN_MC_REGISTER_ARRAY_SIZE) + if (j >= SMC_EVERGREEN_MC_REGISTER_ARRAY_SIZE) return -EINVAL; break; default: @@ -2082,6 +2099,7 @@ static void btc_apply_state_adjust_rules(struct radeon_device *rdev, bool disable_mclk_switching; u32 mclk, sclk; u16 vddc, vddci; + u32 max_sclk_vddc, max_mclk_vddci, max_mclk_vddc; if ((rdev->pm.dpm.new_active_crtc_count > 1) || btc_dpm_vblank_too_short(rdev)) @@ -2123,6 +2141,39 @@ static void btc_apply_state_adjust_rules(struct radeon_device *rdev, ps->low.vddci = max_limits->vddci; } + /* limit clocks to max supported clocks based on voltage dependency tables */ + btc_get_max_clock_from_voltage_dependency_table(&rdev->pm.dpm.dyn_state.vddc_dependency_on_sclk, + &max_sclk_vddc); + btc_get_max_clock_from_voltage_dependency_table(&rdev->pm.dpm.dyn_state.vddci_dependency_on_mclk, + &max_mclk_vddci); + btc_get_max_clock_from_voltage_dependency_table(&rdev->pm.dpm.dyn_state.vddc_dependency_on_mclk, + &max_mclk_vddc); + + if (max_sclk_vddc) { + if (ps->low.sclk > max_sclk_vddc) + ps->low.sclk = max_sclk_vddc; + if (ps->medium.sclk > max_sclk_vddc) + ps->medium.sclk = max_sclk_vddc; + if (ps->high.sclk > max_sclk_vddc) + ps->high.sclk = max_sclk_vddc; + } + if (max_mclk_vddci) { + if (ps->low.mclk > max_mclk_vddci) + ps->low.mclk = max_mclk_vddci; + if (ps->medium.mclk > max_mclk_vddci) + ps->medium.mclk = max_mclk_vddci; + if (ps->high.mclk > max_mclk_vddci) + ps->high.mclk = max_mclk_vddci; + } + if (max_mclk_vddc) { + if (ps->low.mclk > max_mclk_vddc) + ps->low.mclk = max_mclk_vddc; + if (ps->medium.mclk > max_mclk_vddc) + ps->medium.mclk = max_mclk_vddc; + if (ps->high.mclk > max_mclk_vddc) + ps->high.mclk = max_mclk_vddc; + } + /* XXX validate the min clocks required for display */ if (disable_mclk_switching) { @@ -2342,12 +2393,6 @@ int btc_dpm_set_power_state(struct radeon_device *rdev) return ret; } - ret = rv770_dpm_force_performance_level(rdev, RADEON_DPM_FORCED_LEVEL_AUTO); - if (ret) { - DRM_ERROR("rv770_dpm_force_performance_level failed\n"); - return ret; - } - return 0; } @@ -2701,6 +2746,12 @@ int btc_dpm_init(struct radeon_device *rdev) else rdev->pm.dpm.dyn_state.sclk_mclk_delta = 10000; + /* make sure dc limits are valid */ + if ((rdev->pm.dpm.dyn_state.max_clock_voltage_on_dc.sclk == 0) || + (rdev->pm.dpm.dyn_state.max_clock_voltage_on_dc.mclk == 0)) + rdev->pm.dpm.dyn_state.max_clock_voltage_on_dc = + rdev->pm.dpm.dyn_state.max_clock_voltage_on_ac; + return 0; } diff --git a/sys/dev/drm/radeon/btc_dpm.h b/sys/dev/drm/radeon/btc_dpm.h index 1a15e0e419..3b6f12b776 100644 --- a/sys/dev/drm/radeon/btc_dpm.h +++ b/sys/dev/drm/radeon/btc_dpm.h @@ -46,6 +46,8 @@ void btc_adjust_clock_combinations(struct radeon_device *rdev, struct rv7xx_pl *pl); void btc_apply_voltage_dependency_rules(struct radeon_clock_voltage_dependency_table *table, u32 clock, u16 max_voltage, u16 *voltage); +void btc_get_max_clock_from_voltage_dependency_table(struct radeon_clock_voltage_dependency_table *table, + u32 *max_clock); void btc_apply_voltage_delta_rules(struct radeon_device *rdev, u16 max_vddc, u16 max_vddci, u16 *vddc, u16 *vddci); diff --git a/sys/dev/drm/radeon/cayman_blit_shaders.c b/sys/dev/drm/radeon/cayman_blit_shaders.c index 19a0114d2e..98d009e154 100644 --- a/sys/dev/drm/radeon/cayman_blit_shaders.c +++ b/sys/dev/drm/radeon/cayman_blit_shaders.c @@ -317,58 +317,4 @@ const u32 cayman_default_state[] = 0x00000010, /* */ }; -const u32 cayman_vs[] = -{ - 0x00000004, - 0x80400400, - 0x0000a03c, - 0x95000688, - 0x00004000, - 0x15000688, - 0x00000000, - 0x88000000, - 0x04000000, - 0x67961001, -#ifdef __BIG_ENDIAN - 0x00020000, -#else - 0x00000000, -#endif - 0x00000000, - 0x04000000, - 0x67961000, -#ifdef __BIG_ENDIAN - 0x00020008, -#else - 0x00000008, -#endif - 0x00000000, -}; - -const u32 cayman_ps[] = -{ - 0x00000004, - 0xa00c0000, - 0x00000008, - 0x80400000, - 0x00000000, - 0x95000688, - 0x00000000, - 0x88000000, - 0x00380400, - 0x00146b10, - 0x00380000, - 0x20146b10, - 0x00380400, - 0x40146b00, - 0x80380000, - 0x60146b00, - 0x00000010, - 0x000d1000, - 0xb0800000, - 0x00000000, -}; - -const u32 cayman_ps_size = ARRAY_SIZE(cayman_ps); -const u32 cayman_vs_size = ARRAY_SIZE(cayman_vs); const u32 cayman_default_size = ARRAY_SIZE(cayman_default_state); diff --git a/sys/dev/drm/radeon/ci_dpm.c b/sys/dev/drm/radeon/ci_dpm.c new file mode 100644 index 0000000000..6123a09a73 --- /dev/null +++ b/sys/dev/drm/radeon/ci_dpm.c @@ -0,0 +1,5250 @@ +/* + * Copyright 2013 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include +#include "radeon.h" +#include "radeon_asic.h" +#include "cikd.h" +#include "r600_dpm.h" +#include "ci_dpm.h" +#include "ni_dpm.h" +#include "atom.h" +#include + +#define MC_CG_ARB_FREQ_F0 0x0a +#define MC_CG_ARB_FREQ_F1 0x0b +#define MC_CG_ARB_FREQ_F2 0x0c +#define MC_CG_ARB_FREQ_F3 0x0d + +#define SMC_RAM_END 0x40000 + +#define VOLTAGE_SCALE 4 +#define VOLTAGE_VID_OFFSET_SCALE1 625 +#define VOLTAGE_VID_OFFSET_SCALE2 100 + +static const struct ci_pt_defaults defaults_bonaire_xt = +{ + 1, 0xF, 0xFD, 0x19, 5, 45, 0, 0xB0000, + { 0x79, 0x253, 0x25D, 0xAE, 0x72, 0x80, 0x83, 0x86, 0x6F, 0xC8, 0xC9, 0xC9, 0x2F, 0x4D, 0x61 }, + { 0x17C, 0x172, 0x180, 0x1BC, 0x1B3, 0x1BD, 0x206, 0x200, 0x203, 0x25D, 0x25A, 0x255, 0x2C3, 0x2C5, 0x2B4 } +}; + +static const struct ci_pt_defaults defaults_bonaire_pro = +{ + 1, 0xF, 0xFD, 0x19, 5, 45, 0, 0x65062, + { 0x8C, 0x23F, 0x244, 0xA6, 0x83, 0x85, 0x86, 0x86, 0x83, 0xDB, 0xDB, 0xDA, 0x67, 0x60, 0x5F }, + { 0x187, 0x193, 0x193, 0x1C7, 0x1D1, 0x1D1, 0x210, 0x219, 0x219, 0x266, 0x26C, 0x26C, 0x2C9, 0x2CB, 0x2CB } +}; + +static const struct ci_pt_defaults defaults_saturn_xt = +{ + 1, 0xF, 0xFD, 0x19, 5, 55, 0, 0x70000, + { 0x8C, 0x247, 0x249, 0xA6, 0x80, 0x81, 0x8B, 0x89, 0x86, 0xC9, 0xCA, 0xC9, 0x4D, 0x4D, 0x4D }, + { 0x187, 0x187, 0x187, 0x1C7, 0x1C7, 0x1C7, 0x210, 0x210, 0x210, 0x266, 0x266, 0x266, 0x2C9, 0x2C9, 0x2C9 } +}; + +static const struct ci_pt_defaults defaults_saturn_pro = +{ + 1, 0xF, 0xFD, 0x19, 5, 55, 0, 0x30000, + { 0x96, 0x21D, 0x23B, 0xA1, 0x85, 0x87, 0x83, 0x84, 0x81, 0xE6, 0xE6, 0xE6, 0x71, 0x6A, 0x6A }, + { 0x193, 0x19E, 0x19E, 0x1D2, 0x1DC, 0x1DC, 0x21A, 0x223, 0x223, 0x26E, 0x27E, 0x274, 0x2CF, 0x2D2, 0x2D2 } +}; + +static const struct ci_pt_config_reg didt_config_ci[] = +{ + { 0x10, 0x000000ff, 0, 0x0, CISLANDS_CONFIGREG_DIDT_IND }, + { 0x10, 0x0000ff00, 8, 0x0, CISLANDS_CONFIGREG_DIDT_IND }, + { 0x10, 0x00ff0000, 16, 0x0, CISLANDS_CONFIGREG_DIDT_IND }, + { 0x10, 0xff000000, 24, 0x0, CISLANDS_CONFIGREG_DIDT_IND }, + { 0x11, 0x000000ff, 0, 0x0, CISLANDS_CONFIGREG_DIDT_IND }, + { 0x11, 0x0000ff00, 8, 0x0, CISLANDS_CONFIGREG_DIDT_IND }, + { 0x11, 0x00ff0000, 16, 0x0, CISLANDS_CONFIGREG_DIDT_IND }, + { 0x11, 0xff000000, 24, 0x0, CISLANDS_CONFIGREG_DIDT_IND }, + { 0x12, 0x000000ff, 0, 0x0, CISLANDS_CONFIGREG_DIDT_IND }, + { 0x12, 0x0000ff00, 8, 0x0, CISLANDS_CONFIGREG_DIDT_IND }, + { 0x12, 0x00ff0000, 16, 0x0, CISLANDS_CONFIGREG_DIDT_IND }, + { 0x12, 0xff000000, 24, 0x0, CISLANDS_CONFIGREG_DIDT_IND }, + { 0x2, 0x00003fff, 0, 0x4, CISLANDS_CONFIGREG_DIDT_IND }, + { 0x2, 0x03ff0000, 16, 0x80, CISLANDS_CONFIGREG_DIDT_IND }, + { 0x2, 0x78000000, 27, 0x3, CISLANDS_CONFIGREG_DIDT_IND }, + { 0x1, 0x0000ffff, 0, 0x3FFF, CISLANDS_CONFIGREG_DIDT_IND }, + { 0x1, 0xffff0000, 16, 0x3FFF, CISLANDS_CONFIGREG_DIDT_IND }, + { 0x0, 0x00000001, 0, 0x0, CISLANDS_CONFIGREG_DIDT_IND }, + { 0x30, 0x000000ff, 0, 0x0, CISLANDS_CONFIGREG_DIDT_IND }, + { 0x30, 0x0000ff00, 8, 0x0, CISLANDS_CONFIGREG_DIDT_IND }, + { 0x30, 0x00ff0000, 16, 0x0, CISLANDS_CONFIGREG_DIDT_IND }, + { 0x30, 0xff000000, 24, 0x0, CISLANDS_CONFIGREG_DIDT_IND }, + { 0x31, 0x000000ff, 0, 0x0, CISLANDS_CONFIGREG_DIDT_IND }, + { 0x31, 0x0000ff00, 8, 0x0, CISLANDS_CONFIGREG_DIDT_IND }, + { 0x31, 0x00ff0000, 16, 0x0, CISLANDS_CONFIGREG_DIDT_IND }, + { 0x31, 0xff000000, 24, 0x0, CISLANDS_CONFIGREG_DIDT_IND }, + { 0x32, 0x000000ff, 0, 0x0, CISLANDS_CONFIGREG_DIDT_IND }, + { 0x32, 0x0000ff00, 8, 0x0, CISLANDS_CONFIGREG_DIDT_IND }, + { 0x32, 0x00ff0000, 16, 0x0, CISLANDS_CONFIGREG_DIDT_IND }, + { 0x32, 0xff000000, 24, 0x0, CISLANDS_CONFIGREG_DIDT_IND }, + { 0x22, 0x00003fff, 0, 0x4, CISLANDS_CONFIGREG_DIDT_IND }, + { 0x22, 0x03ff0000, 16, 0x80, CISLANDS_CONFIGREG_DIDT_IND }, + { 0x22, 0x78000000, 27, 0x3, CISLANDS_CONFIGREG_DIDT_IND }, + { 0x21, 0x0000ffff, 0, 0x3FFF, CISLANDS_CONFIGREG_DIDT_IND }, + { 0x21, 0xffff0000, 16, 0x3FFF, CISLANDS_CONFIGREG_DIDT_IND }, + { 0x20, 0x00000001, 0, 0x0, CISLANDS_CONFIGREG_DIDT_IND }, + { 0x50, 0x000000ff, 0, 0x0, CISLANDS_CONFIGREG_DIDT_IND }, + { 0x50, 0x0000ff00, 8, 0x0, CISLANDS_CONFIGREG_DIDT_IND }, + { 0x50, 0x00ff0000, 16, 0x0, CISLANDS_CONFIGREG_DIDT_IND }, + { 0x50, 0xff000000, 24, 0x0, CISLANDS_CONFIGREG_DIDT_IND }, + { 0x51, 0x000000ff, 0, 0x0, CISLANDS_CONFIGREG_DIDT_IND }, + { 0x51, 0x0000ff00, 8, 0x0, CISLANDS_CONFIGREG_DIDT_IND }, + { 0x51, 0x00ff0000, 16, 0x0, CISLANDS_CONFIGREG_DIDT_IND }, + { 0x51, 0xff000000, 24, 0x0, CISLANDS_CONFIGREG_DIDT_IND }, + { 0x52, 0x000000ff, 0, 0x0, CISLANDS_CONFIGREG_DIDT_IND }, + { 0x52, 0x0000ff00, 8, 0x0, CISLANDS_CONFIGREG_DIDT_IND }, + { 0x52, 0x00ff0000, 16, 0x0, CISLANDS_CONFIGREG_DIDT_IND }, + { 0x52, 0xff000000, 24, 0x0, CISLANDS_CONFIGREG_DIDT_IND }, + { 0x42, 0x00003fff, 0, 0x4, CISLANDS_CONFIGREG_DIDT_IND }, + { 0x42, 0x03ff0000, 16, 0x80, CISLANDS_CONFIGREG_DIDT_IND }, + { 0x42, 0x78000000, 27, 0x3, CISLANDS_CONFIGREG_DIDT_IND }, + { 0x41, 0x0000ffff, 0, 0x3FFF, CISLANDS_CONFIGREG_DIDT_IND }, + { 0x41, 0xffff0000, 16, 0x3FFF, CISLANDS_CONFIGREG_DIDT_IND }, + { 0x40, 0x00000001, 0, 0x0, CISLANDS_CONFIGREG_DIDT_IND }, + { 0x70, 0x000000ff, 0, 0x0, CISLANDS_CONFIGREG_DIDT_IND }, + { 0x70, 0x0000ff00, 8, 0x0, CISLANDS_CONFIGREG_DIDT_IND }, + { 0x70, 0x00ff0000, 16, 0x0, CISLANDS_CONFIGREG_DIDT_IND }, + { 0x70, 0xff000000, 24, 0x0, CISLANDS_CONFIGREG_DIDT_IND }, + { 0x71, 0x000000ff, 0, 0x0, CISLANDS_CONFIGREG_DIDT_IND }, + { 0x71, 0x0000ff00, 8, 0x0, CISLANDS_CONFIGREG_DIDT_IND }, + { 0x71, 0x00ff0000, 16, 0x0, CISLANDS_CONFIGREG_DIDT_IND }, + { 0x71, 0xff000000, 24, 0x0, CISLANDS_CONFIGREG_DIDT_IND }, + { 0x72, 0x000000ff, 0, 0x0, CISLANDS_CONFIGREG_DIDT_IND }, + { 0x72, 0x0000ff00, 8, 0x0, CISLANDS_CONFIGREG_DIDT_IND }, + { 0x72, 0x00ff0000, 16, 0x0, CISLANDS_CONFIGREG_DIDT_IND }, + { 0x72, 0xff000000, 24, 0x0, CISLANDS_CONFIGREG_DIDT_IND }, + { 0x62, 0x00003fff, 0, 0x4, CISLANDS_CONFIGREG_DIDT_IND }, + { 0x62, 0x03ff0000, 16, 0x80, CISLANDS_CONFIGREG_DIDT_IND }, + { 0x62, 0x78000000, 27, 0x3, CISLANDS_CONFIGREG_DIDT_IND }, + { 0x61, 0x0000ffff, 0, 0x3FFF, CISLANDS_CONFIGREG_DIDT_IND }, + { 0x61, 0xffff0000, 16, 0x3FFF, CISLANDS_CONFIGREG_DIDT_IND }, + { 0x60, 0x00000001, 0, 0x0, CISLANDS_CONFIGREG_DIDT_IND }, + { 0xFFFFFFFF } +}; + +static int ci_get_std_voltage_value_sidd(struct radeon_device *rdev, + struct atom_voltage_table_entry *voltage_table, + u16 *std_voltage_hi_sidd, u16 *std_voltage_lo_sidd); +static int ci_set_power_limit(struct radeon_device *rdev, u32 n); +static int ci_set_overdrive_target_tdp(struct radeon_device *rdev, + u32 target_tdp); +static int ci_update_uvd_dpm(struct radeon_device *rdev, bool gate); + +static struct ci_power_info *ci_get_pi(struct radeon_device *rdev) +{ + struct ci_power_info *pi = rdev->pm.dpm.priv; + + return pi; +} + +static struct ci_ps *ci_get_ps(struct radeon_ps *rps) +{ + struct ci_ps *ps = rps->ps_priv; + + return ps; +} + +static void ci_initialize_powertune_defaults(struct radeon_device *rdev) +{ + struct ci_power_info *pi = ci_get_pi(rdev); + + switch (rdev->ddev->pci_device) { + case 0x6650: + case 0x6658: + case 0x665C: + default: + pi->powertune_defaults = &defaults_bonaire_xt; + break; + case 0x6651: + case 0x665D: + pi->powertune_defaults = &defaults_bonaire_pro; + break; + case 0x6640: + pi->powertune_defaults = &defaults_saturn_xt; + break; + case 0x6641: + pi->powertune_defaults = &defaults_saturn_pro; + break; + } + + pi->dte_tj_offset = 0; + + pi->caps_power_containment = true; + pi->caps_cac = false; + pi->caps_sq_ramping = false; + pi->caps_db_ramping = false; + pi->caps_td_ramping = false; + pi->caps_tcp_ramping = false; + + if (pi->caps_power_containment) { + pi->caps_cac = true; + pi->enable_bapm_feature = true; + pi->enable_tdc_limit_feature = true; + pi->enable_pkg_pwr_tracking_feature = true; + } +} + +static u8 ci_convert_to_vid(u16 vddc) +{ + return (6200 - (vddc * VOLTAGE_SCALE)) / 25; +} + +static int ci_populate_bapm_vddc_vid_sidd(struct radeon_device *rdev) +{ + struct ci_power_info *pi = ci_get_pi(rdev); + u8 *hi_vid = pi->smc_powertune_table.BapmVddCVidHiSidd; + u8 *lo_vid = pi->smc_powertune_table.BapmVddCVidLoSidd; + u8 *hi2_vid = pi->smc_powertune_table.BapmVddCVidHiSidd2; + u32 i; + + if (rdev->pm.dpm.dyn_state.cac_leakage_table.entries == NULL) + return -EINVAL; + if (rdev->pm.dpm.dyn_state.cac_leakage_table.count > 8) + return -EINVAL; + if (rdev->pm.dpm.dyn_state.cac_leakage_table.count != + rdev->pm.dpm.dyn_state.vddc_dependency_on_sclk.count) + return -EINVAL; + + for (i = 0; i < rdev->pm.dpm.dyn_state.cac_leakage_table.count; i++) { + if (rdev->pm.dpm.platform_caps & ATOM_PP_PLATFORM_CAP_EVV) { + lo_vid[i] = ci_convert_to_vid(rdev->pm.dpm.dyn_state.cac_leakage_table.entries[i].vddc1); + hi_vid[i] = ci_convert_to_vid(rdev->pm.dpm.dyn_state.cac_leakage_table.entries[i].vddc2); + hi2_vid[i] = ci_convert_to_vid(rdev->pm.dpm.dyn_state.cac_leakage_table.entries[i].vddc3); + } else { + lo_vid[i] = ci_convert_to_vid(rdev->pm.dpm.dyn_state.cac_leakage_table.entries[i].vddc); + hi_vid[i] = ci_convert_to_vid((u16)rdev->pm.dpm.dyn_state.cac_leakage_table.entries[i].leakage); + } + } + return 0; +} + +static int ci_populate_vddc_vid(struct radeon_device *rdev) +{ + struct ci_power_info *pi = ci_get_pi(rdev); + u8 *vid = pi->smc_powertune_table.VddCVid; + u32 i; + + if (pi->vddc_voltage_table.count > 8) + return -EINVAL; + + for (i = 0; i < pi->vddc_voltage_table.count; i++) + vid[i] = ci_convert_to_vid(pi->vddc_voltage_table.entries[i].value); + + return 0; +} + +static int ci_populate_svi_load_line(struct radeon_device *rdev) +{ + struct ci_power_info *pi = ci_get_pi(rdev); + const struct ci_pt_defaults *pt_defaults = pi->powertune_defaults; + + pi->smc_powertune_table.SviLoadLineEn = pt_defaults->svi_load_line_en; + pi->smc_powertune_table.SviLoadLineVddC = pt_defaults->svi_load_line_vddc; + pi->smc_powertune_table.SviLoadLineTrimVddC = 3; + pi->smc_powertune_table.SviLoadLineOffsetVddC = 0; + + return 0; +} + +static int ci_populate_tdc_limit(struct radeon_device *rdev) +{ + struct ci_power_info *pi = ci_get_pi(rdev); + const struct ci_pt_defaults *pt_defaults = pi->powertune_defaults; + u16 tdc_limit; + + tdc_limit = rdev->pm.dpm.dyn_state.cac_tdp_table->tdc * 256; + pi->smc_powertune_table.TDC_VDDC_PkgLimit = cpu_to_be16(tdc_limit); + pi->smc_powertune_table.TDC_VDDC_ThrottleReleaseLimitPerc = + pt_defaults->tdc_vddc_throttle_release_limit_perc; + pi->smc_powertune_table.TDC_MAWt = pt_defaults->tdc_mawt; + + return 0; +} + +static int ci_populate_dw8(struct radeon_device *rdev) +{ + struct ci_power_info *pi = ci_get_pi(rdev); + const struct ci_pt_defaults *pt_defaults = pi->powertune_defaults; + int ret; + + ret = ci_read_smc_sram_dword(rdev, + SMU7_FIRMWARE_HEADER_LOCATION + + offsetof(SMU7_Firmware_Header, PmFuseTable) + + offsetof(SMU7_Discrete_PmFuses, TdcWaterfallCtl), + (u32 *)&pi->smc_powertune_table.TdcWaterfallCtl, + pi->sram_end); + if (ret) + return -EINVAL; + else + pi->smc_powertune_table.TdcWaterfallCtl = pt_defaults->tdc_waterfall_ctl; + + return 0; +} + +static int ci_min_max_v_gnbl_pm_lid_from_bapm_vddc(struct radeon_device *rdev) +{ + struct ci_power_info *pi = ci_get_pi(rdev); + u8 *hi_vid = pi->smc_powertune_table.BapmVddCVidHiSidd; + u8 *lo_vid = pi->smc_powertune_table.BapmVddCVidLoSidd; + int i, min, max; + + min = max = hi_vid[0]; + for (i = 0; i < 8; i++) { + if (0 != hi_vid[i]) { + if (min > hi_vid[i]) + min = hi_vid[i]; + if (max < hi_vid[i]) + max = hi_vid[i]; + } + + if (0 != lo_vid[i]) { + if (min > lo_vid[i]) + min = lo_vid[i]; + if (max < lo_vid[i]) + max = lo_vid[i]; + } + } + + if ((min == 0) || (max == 0)) + return -EINVAL; + pi->smc_powertune_table.GnbLPMLMaxVid = (u8)max; + pi->smc_powertune_table.GnbLPMLMinVid = (u8)min; + + return 0; +} + +static int ci_populate_bapm_vddc_base_leakage_sidd(struct radeon_device *rdev) +{ + struct ci_power_info *pi = ci_get_pi(rdev); + u16 hi_sidd = pi->smc_powertune_table.BapmVddCBaseLeakageHiSidd; + u16 lo_sidd = pi->smc_powertune_table.BapmVddCBaseLeakageLoSidd; + struct radeon_cac_tdp_table *cac_tdp_table = + rdev->pm.dpm.dyn_state.cac_tdp_table; + + hi_sidd = cac_tdp_table->high_cac_leakage / 100 * 256; + lo_sidd = cac_tdp_table->low_cac_leakage / 100 * 256; + + pi->smc_powertune_table.BapmVddCBaseLeakageHiSidd = cpu_to_be16(hi_sidd); + pi->smc_powertune_table.BapmVddCBaseLeakageLoSidd = cpu_to_be16(lo_sidd); + + return 0; +} + +static int ci_populate_bapm_parameters_in_dpm_table(struct radeon_device *rdev) +{ + struct ci_power_info *pi = ci_get_pi(rdev); + const struct ci_pt_defaults *pt_defaults = pi->powertune_defaults; + SMU7_Discrete_DpmTable *dpm_table = &pi->smc_state_table; + struct radeon_cac_tdp_table *cac_tdp_table = + rdev->pm.dpm.dyn_state.cac_tdp_table; + struct radeon_ppm_table *ppm = rdev->pm.dpm.dyn_state.ppm_table; + int i, j, k; + const u16 *def1; + const u16 *def2; + + dpm_table->DefaultTdp = cac_tdp_table->tdp * 256; + dpm_table->TargetTdp = cac_tdp_table->configurable_tdp * 256; + + dpm_table->DTETjOffset = (u8)pi->dte_tj_offset; + dpm_table->GpuTjMax = + (u8)(pi->thermal_temp_setting.temperature_high / 1000); + dpm_table->GpuTjHyst = 8; + + dpm_table->DTEAmbientTempBase = pt_defaults->dte_ambient_temp_base; + + if (ppm) { + dpm_table->PPM_PkgPwrLimit = cpu_to_be16((u16)ppm->dgpu_tdp * 256 / 1000); + dpm_table->PPM_TemperatureLimit = cpu_to_be16((u16)ppm->tj_max * 256); + } else { + dpm_table->PPM_PkgPwrLimit = cpu_to_be16(0); + dpm_table->PPM_TemperatureLimit = cpu_to_be16(0); + } + + dpm_table->BAPM_TEMP_GRADIENT = cpu_to_be32(pt_defaults->bapm_temp_gradient); + def1 = pt_defaults->bapmti_r; + def2 = pt_defaults->bapmti_rc; + + for (i = 0; i < SMU7_DTE_ITERATIONS; i++) { + for (j = 0; j < SMU7_DTE_SOURCES; j++) { + for (k = 0; k < SMU7_DTE_SINKS; k++) { + dpm_table->BAPMTI_R[i][j][k] = cpu_to_be16(*def1); + dpm_table->BAPMTI_RC[i][j][k] = cpu_to_be16(*def2); + def1++; + def2++; + } + } + } + + return 0; +} + +static int ci_populate_pm_base(struct radeon_device *rdev) +{ + struct ci_power_info *pi = ci_get_pi(rdev); + u32 pm_fuse_table_offset; + int ret; + + if (pi->caps_power_containment) { + ret = ci_read_smc_sram_dword(rdev, + SMU7_FIRMWARE_HEADER_LOCATION + + offsetof(SMU7_Firmware_Header, PmFuseTable), + &pm_fuse_table_offset, pi->sram_end); + if (ret) + return ret; + ret = ci_populate_bapm_vddc_vid_sidd(rdev); + if (ret) + return ret; + ret = ci_populate_vddc_vid(rdev); + if (ret) + return ret; + ret = ci_populate_svi_load_line(rdev); + if (ret) + return ret; + ret = ci_populate_tdc_limit(rdev); + if (ret) + return ret; + ret = ci_populate_dw8(rdev); + if (ret) + return ret; + ret = ci_min_max_v_gnbl_pm_lid_from_bapm_vddc(rdev); + if (ret) + return ret; + ret = ci_populate_bapm_vddc_base_leakage_sidd(rdev); + if (ret) + return ret; + ret = ci_copy_bytes_to_smc(rdev, pm_fuse_table_offset, + (u8 *)&pi->smc_powertune_table, + sizeof(SMU7_Discrete_PmFuses), pi->sram_end); + if (ret) + return ret; + } + + return 0; +} + +static void ci_do_enable_didt(struct radeon_device *rdev, const bool enable) +{ + struct ci_power_info *pi = ci_get_pi(rdev); + u32 data; + + if (pi->caps_sq_ramping) { + data = RREG32_DIDT(DIDT_SQ_CTRL0); + if (enable) + data |= DIDT_CTRL_EN; + else + data &= ~DIDT_CTRL_EN; + WREG32_DIDT(DIDT_SQ_CTRL0, data); + } + + if (pi->caps_db_ramping) { + data = RREG32_DIDT(DIDT_DB_CTRL0); + if (enable) + data |= DIDT_CTRL_EN; + else + data &= ~DIDT_CTRL_EN; + WREG32_DIDT(DIDT_DB_CTRL0, data); + } + + if (pi->caps_td_ramping) { + data = RREG32_DIDT(DIDT_TD_CTRL0); + if (enable) + data |= DIDT_CTRL_EN; + else + data &= ~DIDT_CTRL_EN; + WREG32_DIDT(DIDT_TD_CTRL0, data); + } + + if (pi->caps_tcp_ramping) { + data = RREG32_DIDT(DIDT_TCP_CTRL0); + if (enable) + data |= DIDT_CTRL_EN; + else + data &= ~DIDT_CTRL_EN; + WREG32_DIDT(DIDT_TCP_CTRL0, data); + } +} + +static int ci_program_pt_config_registers(struct radeon_device *rdev, + const struct ci_pt_config_reg *cac_config_regs) +{ + const struct ci_pt_config_reg *config_regs = cac_config_regs; + u32 data; + u32 cache = 0; + + if (config_regs == NULL) + return -EINVAL; + + while (config_regs->offset != 0xFFFFFFFF) { + if (config_regs->type == CISLANDS_CONFIGREG_CACHE) { + cache |= ((config_regs->value << config_regs->shift) & config_regs->mask); + } else { + switch (config_regs->type) { + case CISLANDS_CONFIGREG_SMC_IND: + data = RREG32_SMC(config_regs->offset); + break; + case CISLANDS_CONFIGREG_DIDT_IND: + data = RREG32_DIDT(config_regs->offset); + break; + default: + data = RREG32(config_regs->offset << 2); + break; + } + + data &= ~config_regs->mask; + data |= ((config_regs->value << config_regs->shift) & config_regs->mask); + data |= cache; + + switch (config_regs->type) { + case CISLANDS_CONFIGREG_SMC_IND: + WREG32_SMC(config_regs->offset, data); + break; + case CISLANDS_CONFIGREG_DIDT_IND: + WREG32_DIDT(config_regs->offset, data); + break; + default: + WREG32(config_regs->offset << 2, data); + break; + } + cache = 0; + } + config_regs++; + } + return 0; +} + +static int ci_enable_didt(struct radeon_device *rdev, bool enable) +{ + struct ci_power_info *pi = ci_get_pi(rdev); + int ret; + + if (pi->caps_sq_ramping || pi->caps_db_ramping || + pi->caps_td_ramping || pi->caps_tcp_ramping) { + cik_enter_rlc_safe_mode(rdev); + + if (enable) { + ret = ci_program_pt_config_registers(rdev, didt_config_ci); + if (ret) { + cik_exit_rlc_safe_mode(rdev); + return ret; + } + } + + ci_do_enable_didt(rdev, enable); + + cik_exit_rlc_safe_mode(rdev); + } + + return 0; +} + +static int ci_enable_power_containment(struct radeon_device *rdev, bool enable) +{ + struct ci_power_info *pi = ci_get_pi(rdev); + PPSMC_Result smc_result; + int ret = 0; + + if (enable) { + pi->power_containment_features = 0; + if (pi->caps_power_containment) { + if (pi->enable_bapm_feature) { + smc_result = ci_send_msg_to_smc(rdev, PPSMC_MSG_EnableDTE); + if (smc_result != PPSMC_Result_OK) + ret = -EINVAL; + else + pi->power_containment_features |= POWERCONTAINMENT_FEATURE_BAPM; + } + + if (pi->enable_tdc_limit_feature) { + smc_result = ci_send_msg_to_smc(rdev, PPSMC_MSG_TDCLimitEnable); + if (smc_result != PPSMC_Result_OK) + ret = -EINVAL; + else + pi->power_containment_features |= POWERCONTAINMENT_FEATURE_TDCLimit; + } + + if (pi->enable_pkg_pwr_tracking_feature) { + smc_result = ci_send_msg_to_smc(rdev, PPSMC_MSG_PkgPwrLimitEnable); + if (smc_result != PPSMC_Result_OK) { + ret = -EINVAL; + } else { + struct radeon_cac_tdp_table *cac_tdp_table = + rdev->pm.dpm.dyn_state.cac_tdp_table; + u32 default_pwr_limit = + (u32)(cac_tdp_table->maximum_power_delivery_limit * 256); + + pi->power_containment_features |= POWERCONTAINMENT_FEATURE_PkgPwrLimit; + + ci_set_power_limit(rdev, default_pwr_limit); + } + } + } + } else { + if (pi->caps_power_containment && pi->power_containment_features) { + if (pi->power_containment_features & POWERCONTAINMENT_FEATURE_TDCLimit) + ci_send_msg_to_smc(rdev, PPSMC_MSG_TDCLimitDisable); + + if (pi->power_containment_features & POWERCONTAINMENT_FEATURE_BAPM) + ci_send_msg_to_smc(rdev, PPSMC_MSG_DisableDTE); + + if (pi->power_containment_features & POWERCONTAINMENT_FEATURE_PkgPwrLimit) + ci_send_msg_to_smc(rdev, PPSMC_MSG_PkgPwrLimitDisable); + pi->power_containment_features = 0; + } + } + + return ret; +} + +static int ci_enable_smc_cac(struct radeon_device *rdev, bool enable) +{ + struct ci_power_info *pi = ci_get_pi(rdev); + PPSMC_Result smc_result; + int ret = 0; + + if (pi->caps_cac) { + if (enable) { + smc_result = ci_send_msg_to_smc(rdev, PPSMC_MSG_EnableCac); + if (smc_result != PPSMC_Result_OK) { + ret = -EINVAL; + pi->cac_enabled = false; + } else { + pi->cac_enabled = true; + } + } else if (pi->cac_enabled) { + ci_send_msg_to_smc(rdev, PPSMC_MSG_DisableCac); + pi->cac_enabled = false; + } + } + + return ret; +} + +static int ci_power_control_set_level(struct radeon_device *rdev) +{ + struct ci_power_info *pi = ci_get_pi(rdev); + struct radeon_cac_tdp_table *cac_tdp_table = + rdev->pm.dpm.dyn_state.cac_tdp_table; + s32 adjust_percent; + s32 target_tdp; + int ret = 0; + bool adjust_polarity = false; /* ??? */ + + if (pi->caps_power_containment && + (pi->power_containment_features & POWERCONTAINMENT_FEATURE_BAPM)) { + adjust_percent = adjust_polarity ? + rdev->pm.dpm.tdp_adjustment : (-1 * rdev->pm.dpm.tdp_adjustment); + target_tdp = ((100 + adjust_percent) * + (s32)cac_tdp_table->configurable_tdp) / 100; + target_tdp *= 256; + + ret = ci_set_overdrive_target_tdp(rdev, (u32)target_tdp); + } + + return ret; +} + +void ci_dpm_powergate_uvd(struct radeon_device *rdev, bool gate) +{ + struct ci_power_info *pi = ci_get_pi(rdev); + + if (pi->uvd_power_gated == gate) + return; + + pi->uvd_power_gated = gate; + + ci_update_uvd_dpm(rdev, gate); +} + +bool ci_dpm_vblank_too_short(struct radeon_device *rdev) +{ + struct ci_power_info *pi = ci_get_pi(rdev); + u32 vblank_time = r600_dpm_get_vblank_time(rdev); + u32 switch_limit = pi->mem_gddr5 ? 450 : 300; + + if (vblank_time < switch_limit) + return true; + else + return false; + +} + +static void ci_apply_state_adjust_rules(struct radeon_device *rdev, + struct radeon_ps *rps) +{ + struct ci_ps *ps = ci_get_ps(rps); + struct ci_power_info *pi = ci_get_pi(rdev); + struct radeon_clock_and_voltage_limits *max_limits; + bool disable_mclk_switching; + u32 sclk, mclk; + u32 max_sclk_vddc, max_mclk_vddci, max_mclk_vddc; + int i; + + if ((rdev->pm.dpm.new_active_crtc_count > 1) || + ci_dpm_vblank_too_short(rdev)) + disable_mclk_switching = true; + else + disable_mclk_switching = false; + + if ((rps->class & ATOM_PPLIB_CLASSIFICATION_UI_MASK) == ATOM_PPLIB_CLASSIFICATION_UI_BATTERY) + pi->battery_state = true; + else + pi->battery_state = false; + + if (rdev->pm.dpm.ac_power) + max_limits = &rdev->pm.dpm.dyn_state.max_clock_voltage_on_ac; + else + max_limits = &rdev->pm.dpm.dyn_state.max_clock_voltage_on_dc; + + if (rdev->pm.dpm.ac_power == false) { + for (i = 0; i < ps->performance_level_count; i++) { + if (ps->performance_levels[i].mclk > max_limits->mclk) + ps->performance_levels[i].mclk = max_limits->mclk; + if (ps->performance_levels[i].sclk > max_limits->sclk) + ps->performance_levels[i].sclk = max_limits->sclk; + } + } + + /* limit clocks to max supported clocks based on voltage dependency tables */ + btc_get_max_clock_from_voltage_dependency_table(&rdev->pm.dpm.dyn_state.vddc_dependency_on_sclk, + &max_sclk_vddc); + btc_get_max_clock_from_voltage_dependency_table(&rdev->pm.dpm.dyn_state.vddci_dependency_on_mclk, + &max_mclk_vddci); + btc_get_max_clock_from_voltage_dependency_table(&rdev->pm.dpm.dyn_state.vddc_dependency_on_mclk, + &max_mclk_vddc); + + for (i = 0; i < ps->performance_level_count; i++) { + if (max_sclk_vddc) { + if (ps->performance_levels[i].sclk > max_sclk_vddc) + ps->performance_levels[i].sclk = max_sclk_vddc; + } + if (max_mclk_vddci) { + if (ps->performance_levels[i].mclk > max_mclk_vddci) + ps->performance_levels[i].mclk = max_mclk_vddci; + } + if (max_mclk_vddc) { + if (ps->performance_levels[i].mclk > max_mclk_vddc) + ps->performance_levels[i].mclk = max_mclk_vddc; + } + } + + /* XXX validate the min clocks required for display */ + + if (disable_mclk_switching) { + mclk = ps->performance_levels[ps->performance_level_count - 1].mclk; + sclk = ps->performance_levels[0].sclk; + } else { + mclk = ps->performance_levels[0].mclk; + sclk = ps->performance_levels[0].sclk; + } + + ps->performance_levels[0].sclk = sclk; + ps->performance_levels[0].mclk = mclk; + + if (ps->performance_levels[1].sclk < ps->performance_levels[0].sclk) + ps->performance_levels[1].sclk = ps->performance_levels[0].sclk; + + if (disable_mclk_switching) { + if (ps->performance_levels[0].mclk < ps->performance_levels[1].mclk) + ps->performance_levels[0].mclk = ps->performance_levels[1].mclk; + } else { + if (ps->performance_levels[1].mclk < ps->performance_levels[0].mclk) + ps->performance_levels[1].mclk = ps->performance_levels[0].mclk; + } +} + +static int ci_set_thermal_temperature_range(struct radeon_device *rdev, + int min_temp, int max_temp) +{ + int low_temp = 0 * 1000; + int high_temp = 255 * 1000; + u32 tmp; + + if (low_temp < min_temp) + low_temp = min_temp; + if (high_temp > max_temp) + high_temp = max_temp; + if (high_temp < low_temp) { + DRM_ERROR("invalid thermal range: %d - %d\n", low_temp, high_temp); + return -EINVAL; + } + + tmp = RREG32_SMC(CG_THERMAL_INT); + tmp &= ~(CI_DIG_THERM_INTH_MASK | CI_DIG_THERM_INTL_MASK); + tmp |= CI_DIG_THERM_INTH(high_temp / 1000) | + CI_DIG_THERM_INTL(low_temp / 1000); + WREG32_SMC(CG_THERMAL_INT, tmp); + +#if 0 + /* XXX: need to figure out how to handle this properly */ + tmp = RREG32_SMC(CG_THERMAL_CTRL); + tmp &= DIG_THERM_DPM_MASK; + tmp |= DIG_THERM_DPM(high_temp / 1000); + WREG32_SMC(CG_THERMAL_CTRL, tmp); +#endif + + return 0; +} + +#if 0 +static int ci_read_smc_soft_register(struct radeon_device *rdev, + u16 reg_offset, u32 *value) +{ + struct ci_power_info *pi = ci_get_pi(rdev); + + return ci_read_smc_sram_dword(rdev, + pi->soft_regs_start + reg_offset, + value, pi->sram_end); +} +#endif + +static int ci_write_smc_soft_register(struct radeon_device *rdev, + u16 reg_offset, u32 value) +{ + struct ci_power_info *pi = ci_get_pi(rdev); + + return ci_write_smc_sram_dword(rdev, + pi->soft_regs_start + reg_offset, + value, pi->sram_end); +} + +static void ci_init_fps_limits(struct radeon_device *rdev) +{ + struct ci_power_info *pi = ci_get_pi(rdev); + SMU7_Discrete_DpmTable *table = &pi->smc_state_table; + + if (pi->caps_fps) { + u16 tmp; + + tmp = 45; + table->FpsHighT = cpu_to_be16(tmp); + + tmp = 30; + table->FpsLowT = cpu_to_be16(tmp); + } +} + +static int ci_update_sclk_t(struct radeon_device *rdev) +{ + struct ci_power_info *pi = ci_get_pi(rdev); + int ret = 0; + u32 low_sclk_interrupt_t = 0; + + if (pi->caps_sclk_throttle_low_notification) { + low_sclk_interrupt_t = cpu_to_be32(pi->low_sclk_interrupt_t); + + ret = ci_copy_bytes_to_smc(rdev, + pi->dpm_table_start + + offsetof(SMU7_Discrete_DpmTable, LowSclkInterruptT), + (u8 *)&low_sclk_interrupt_t, + sizeof(u32), pi->sram_end); + + } + + return ret; +} + +static void ci_get_leakage_voltages(struct radeon_device *rdev) +{ + struct ci_power_info *pi = ci_get_pi(rdev); + u16 leakage_id, virtual_voltage_id; + u16 vddc, vddci; + int i; + + pi->vddc_leakage.count = 0; + pi->vddci_leakage.count = 0; + + if (radeon_atom_get_leakage_id_from_vbios(rdev, &leakage_id) == 0) { + for (i = 0; i < CISLANDS_MAX_LEAKAGE_COUNT; i++) { + virtual_voltage_id = ATOM_VIRTUAL_VOLTAGE_ID0 + i; + if (radeon_atom_get_leakage_vddc_based_on_leakage_params(rdev, &vddc, &vddci, + virtual_voltage_id, + leakage_id) == 0) { + if (vddc != 0 && vddc != virtual_voltage_id) { + pi->vddc_leakage.actual_voltage[pi->vddc_leakage.count] = vddc; + pi->vddc_leakage.leakage_id[pi->vddc_leakage.count] = virtual_voltage_id; + pi->vddc_leakage.count++; + } + if (vddci != 0 && vddci != virtual_voltage_id) { + pi->vddci_leakage.actual_voltage[pi->vddci_leakage.count] = vddci; + pi->vddci_leakage.leakage_id[pi->vddci_leakage.count] = virtual_voltage_id; + pi->vddci_leakage.count++; + } + } + } + } +} + +static void ci_set_dpm_event_sources(struct radeon_device *rdev, u32 sources) +{ + struct ci_power_info *pi = ci_get_pi(rdev); + bool want_thermal_protection; + enum radeon_dpm_event_src dpm_event_src; + u32 tmp; + + switch (sources) { + case 0: + default: + want_thermal_protection = false; + break; + case (1 << RADEON_DPM_AUTO_THROTTLE_SRC_THERMAL): + want_thermal_protection = true; + dpm_event_src = RADEON_DPM_EVENT_SRC_DIGITAL; + break; + case (1 << RADEON_DPM_AUTO_THROTTLE_SRC_EXTERNAL): + want_thermal_protection = true; + dpm_event_src = RADEON_DPM_EVENT_SRC_EXTERNAL; + break; + case ((1 << RADEON_DPM_AUTO_THROTTLE_SRC_EXTERNAL) | + (1 << RADEON_DPM_AUTO_THROTTLE_SRC_THERMAL)): + want_thermal_protection = true; + dpm_event_src = RADEON_DPM_EVENT_SRC_DIGIAL_OR_EXTERNAL; + break; + } + + if (want_thermal_protection) { +#if 0 + /* XXX: need to figure out how to handle this properly */ + tmp = RREG32_SMC(CG_THERMAL_CTRL); + tmp &= DPM_EVENT_SRC_MASK; + tmp |= DPM_EVENT_SRC(dpm_event_src); + WREG32_SMC(CG_THERMAL_CTRL, tmp); +#endif + + tmp = RREG32_SMC(GENERAL_PWRMGT); + if (pi->thermal_protection) + tmp &= ~THERMAL_PROTECTION_DIS; + else + tmp |= THERMAL_PROTECTION_DIS; + WREG32_SMC(GENERAL_PWRMGT, tmp); + } else { + tmp = RREG32_SMC(GENERAL_PWRMGT); + tmp |= THERMAL_PROTECTION_DIS; + WREG32_SMC(GENERAL_PWRMGT, tmp); + } +} + +static void ci_enable_auto_throttle_source(struct radeon_device *rdev, + enum radeon_dpm_auto_throttle_src source, + bool enable) +{ + struct ci_power_info *pi = ci_get_pi(rdev); + + if (enable) { + if (!(pi->active_auto_throttle_sources & (1 << source))) { + pi->active_auto_throttle_sources |= 1 << source; + ci_set_dpm_event_sources(rdev, pi->active_auto_throttle_sources); + } + } else { + if (pi->active_auto_throttle_sources & (1 << source)) { + pi->active_auto_throttle_sources &= ~(1 << source); + ci_set_dpm_event_sources(rdev, pi->active_auto_throttle_sources); + } + } +} + +static void ci_enable_vr_hot_gpio_interrupt(struct radeon_device *rdev) +{ + if (rdev->pm.dpm.platform_caps & ATOM_PP_PLATFORM_CAP_REGULATOR_HOT) + ci_send_msg_to_smc(rdev, PPSMC_MSG_EnableVRHotGPIOInterrupt); +} + +static int ci_unfreeze_sclk_mclk_dpm(struct radeon_device *rdev) +{ + struct ci_power_info *pi = ci_get_pi(rdev); + PPSMC_Result smc_result; + + if (!pi->need_update_smu7_dpm_table) + return 0; + + if ((!pi->sclk_dpm_key_disabled) && + (pi->need_update_smu7_dpm_table & (DPMTABLE_OD_UPDATE_SCLK | DPMTABLE_UPDATE_SCLK))) { + smc_result = ci_send_msg_to_smc(rdev, PPSMC_MSG_SCLKDPM_UnfreezeLevel); + if (smc_result != PPSMC_Result_OK) + return -EINVAL; + } + + if ((!pi->mclk_dpm_key_disabled) && + (pi->need_update_smu7_dpm_table & DPMTABLE_OD_UPDATE_MCLK)) { + smc_result = ci_send_msg_to_smc(rdev, PPSMC_MSG_MCLKDPM_UnfreezeLevel); + if (smc_result != PPSMC_Result_OK) + return -EINVAL; + } + + pi->need_update_smu7_dpm_table = 0; + return 0; +} + +static int ci_enable_sclk_mclk_dpm(struct radeon_device *rdev, bool enable) +{ + struct ci_power_info *pi = ci_get_pi(rdev); + PPSMC_Result smc_result; + + if (enable) { + if (!pi->sclk_dpm_key_disabled) { + smc_result = ci_send_msg_to_smc(rdev, PPSMC_MSG_DPM_Enable); + if (smc_result != PPSMC_Result_OK) + return -EINVAL; + } + + if (!pi->mclk_dpm_key_disabled) { + smc_result = ci_send_msg_to_smc(rdev, PPSMC_MSG_MCLKDPM_Enable); + if (smc_result != PPSMC_Result_OK) + return -EINVAL; + + WREG32_P(MC_SEQ_CNTL_3, CAC_EN, ~CAC_EN); + + WREG32_SMC(LCAC_MC0_CNTL, 0x05); + WREG32_SMC(LCAC_MC1_CNTL, 0x05); + WREG32_SMC(LCAC_CPL_CNTL, 0x100005); + + udelay(10); + + WREG32_SMC(LCAC_MC0_CNTL, 0x400005); + WREG32_SMC(LCAC_MC1_CNTL, 0x400005); + WREG32_SMC(LCAC_CPL_CNTL, 0x500005); + } + } else { + if (!pi->sclk_dpm_key_disabled) { + smc_result = ci_send_msg_to_smc(rdev, PPSMC_MSG_DPM_Disable); + if (smc_result != PPSMC_Result_OK) + return -EINVAL; + } + + if (!pi->mclk_dpm_key_disabled) { + smc_result = ci_send_msg_to_smc(rdev, PPSMC_MSG_MCLKDPM_Disable); + if (smc_result != PPSMC_Result_OK) + return -EINVAL; + } + } + + return 0; +} + +static int ci_start_dpm(struct radeon_device *rdev) +{ + struct ci_power_info *pi = ci_get_pi(rdev); + PPSMC_Result smc_result; + int ret; + u32 tmp; + + tmp = RREG32_SMC(GENERAL_PWRMGT); + tmp |= GLOBAL_PWRMGT_EN; + WREG32_SMC(GENERAL_PWRMGT, tmp); + + tmp = RREG32_SMC(SCLK_PWRMGT_CNTL); + tmp |= DYNAMIC_PM_EN; + WREG32_SMC(SCLK_PWRMGT_CNTL, tmp); + + ci_write_smc_soft_register(rdev, offsetof(SMU7_SoftRegisters, VoltageChangeTimeout), 0x1000); + + WREG32_P(BIF_LNCNT_RESET, 0, ~RESET_LNCNT_EN); + + smc_result = ci_send_msg_to_smc(rdev, PPSMC_MSG_Voltage_Cntl_Enable); + if (smc_result != PPSMC_Result_OK) + return -EINVAL; + + ret = ci_enable_sclk_mclk_dpm(rdev, true); + if (ret) + return ret; + + if (!pi->pcie_dpm_key_disabled) { + smc_result = ci_send_msg_to_smc(rdev, PPSMC_MSG_PCIeDPM_Enable); + if (smc_result != PPSMC_Result_OK) + return -EINVAL; + } + + return 0; +} + +static int ci_freeze_sclk_mclk_dpm(struct radeon_device *rdev) +{ + struct ci_power_info *pi = ci_get_pi(rdev); + PPSMC_Result smc_result; + + if (!pi->need_update_smu7_dpm_table) + return 0; + + if ((!pi->sclk_dpm_key_disabled) && + (pi->need_update_smu7_dpm_table & (DPMTABLE_OD_UPDATE_SCLK | DPMTABLE_UPDATE_SCLK))) { + smc_result = ci_send_msg_to_smc(rdev, PPSMC_MSG_SCLKDPM_FreezeLevel); + if (smc_result != PPSMC_Result_OK) + return -EINVAL; + } + + if ((!pi->mclk_dpm_key_disabled) && + (pi->need_update_smu7_dpm_table & DPMTABLE_OD_UPDATE_MCLK)) { + smc_result = ci_send_msg_to_smc(rdev, PPSMC_MSG_MCLKDPM_FreezeLevel); + if (smc_result != PPSMC_Result_OK) + return -EINVAL; + } + + return 0; +} + +static int ci_stop_dpm(struct radeon_device *rdev) +{ + struct ci_power_info *pi = ci_get_pi(rdev); + PPSMC_Result smc_result; + int ret; + u32 tmp; + + tmp = RREG32_SMC(GENERAL_PWRMGT); + tmp &= ~GLOBAL_PWRMGT_EN; + WREG32_SMC(GENERAL_PWRMGT, tmp); + + tmp = RREG32(SCLK_PWRMGT_CNTL); + tmp &= ~DYNAMIC_PM_EN; + WREG32_SMC(SCLK_PWRMGT_CNTL, tmp); + + if (!pi->pcie_dpm_key_disabled) { + smc_result = ci_send_msg_to_smc(rdev, PPSMC_MSG_PCIeDPM_Disable); + if (smc_result != PPSMC_Result_OK) + return -EINVAL; + } + + ret = ci_enable_sclk_mclk_dpm(rdev, false); + if (ret) + return ret; + + smc_result = ci_send_msg_to_smc(rdev, PPSMC_MSG_Voltage_Cntl_Disable); + if (smc_result != PPSMC_Result_OK) + return -EINVAL; + + return 0; +} + +static void ci_enable_sclk_control(struct radeon_device *rdev, bool enable) +{ + u32 tmp = RREG32_SMC(SCLK_PWRMGT_CNTL); + + if (enable) + tmp &= ~SCLK_PWRMGT_OFF; + else + tmp |= SCLK_PWRMGT_OFF; + WREG32_SMC(SCLK_PWRMGT_CNTL, tmp); +} + +#if 0 +static int ci_notify_hw_of_power_source(struct radeon_device *rdev, + bool ac_power) +{ + struct ci_power_info *pi = ci_get_pi(rdev); + struct radeon_cac_tdp_table *cac_tdp_table = + rdev->pm.dpm.dyn_state.cac_tdp_table; + u32 power_limit; + + if (ac_power) + power_limit = (u32)(cac_tdp_table->maximum_power_delivery_limit * 256); + else + power_limit = (u32)(cac_tdp_table->battery_power_limit * 256); + + ci_set_power_limit(rdev, power_limit); + + if (pi->caps_automatic_dc_transition) { + if (ac_power) + ci_send_msg_to_smc(rdev, PPSMC_MSG_RunningOnAC); + else + ci_send_msg_to_smc(rdev, PPSMC_MSG_Remove_DC_Clamp); + } + + return 0; +} +#endif + +static PPSMC_Result ci_send_msg_to_smc_with_parameter(struct radeon_device *rdev, + PPSMC_Msg msg, u32 parameter) +{ + WREG32(SMC_MSG_ARG_0, parameter); + return ci_send_msg_to_smc(rdev, msg); +} + +static PPSMC_Result ci_send_msg_to_smc_return_parameter(struct radeon_device *rdev, + PPSMC_Msg msg, u32 *parameter) +{ + PPSMC_Result smc_result; + + smc_result = ci_send_msg_to_smc(rdev, msg); + + if ((smc_result == PPSMC_Result_OK) && parameter) + *parameter = RREG32(SMC_MSG_ARG_0); + + return smc_result; +} + +static int ci_dpm_force_state_sclk(struct radeon_device *rdev, u32 n) +{ + struct ci_power_info *pi = ci_get_pi(rdev); + + if (!pi->sclk_dpm_key_disabled) { + PPSMC_Result smc_result = + ci_send_msg_to_smc_with_parameter(rdev, PPSMC_MSG_DPM_ForceState, n); + if (smc_result != PPSMC_Result_OK) + return -EINVAL; + } + + return 0; +} + +static int ci_dpm_force_state_mclk(struct radeon_device *rdev, u32 n) +{ + struct ci_power_info *pi = ci_get_pi(rdev); + + if (!pi->mclk_dpm_key_disabled) { + PPSMC_Result smc_result = + ci_send_msg_to_smc_with_parameter(rdev, PPSMC_MSG_MCLKDPM_ForceState, n); + if (smc_result != PPSMC_Result_OK) + return -EINVAL; + } + + return 0; +} + +static int ci_dpm_force_state_pcie(struct radeon_device *rdev, u32 n) +{ + struct ci_power_info *pi = ci_get_pi(rdev); + + if (!pi->pcie_dpm_key_disabled) { + PPSMC_Result smc_result = + ci_send_msg_to_smc_with_parameter(rdev, PPSMC_MSG_PCIeDPM_ForceLevel, n); + if (smc_result != PPSMC_Result_OK) + return -EINVAL; + } + + return 0; +} + +static int ci_set_power_limit(struct radeon_device *rdev, u32 n) +{ + struct ci_power_info *pi = ci_get_pi(rdev); + + if (pi->power_containment_features & POWERCONTAINMENT_FEATURE_PkgPwrLimit) { + PPSMC_Result smc_result = + ci_send_msg_to_smc_with_parameter(rdev, PPSMC_MSG_PkgPwrSetLimit, n); + if (smc_result != PPSMC_Result_OK) + return -EINVAL; + } + + return 0; +} + +static int ci_set_overdrive_target_tdp(struct radeon_device *rdev, + u32 target_tdp) +{ + PPSMC_Result smc_result = + ci_send_msg_to_smc_with_parameter(rdev, PPSMC_MSG_OverDriveSetTargetTdp, target_tdp); + if (smc_result != PPSMC_Result_OK) + return -EINVAL; + return 0; +} + +static int ci_set_boot_state(struct radeon_device *rdev) +{ + return ci_enable_sclk_mclk_dpm(rdev, false); +} + +static u32 ci_get_average_sclk_freq(struct radeon_device *rdev) +{ + u32 sclk_freq; + PPSMC_Result smc_result = + ci_send_msg_to_smc_return_parameter(rdev, + PPSMC_MSG_API_GetSclkFrequency, + &sclk_freq); + if (smc_result != PPSMC_Result_OK) + sclk_freq = 0; + + return sclk_freq; +} + +static u32 ci_get_average_mclk_freq(struct radeon_device *rdev) +{ + u32 mclk_freq; + PPSMC_Result smc_result = + ci_send_msg_to_smc_return_parameter(rdev, + PPSMC_MSG_API_GetMclkFrequency, + &mclk_freq); + if (smc_result != PPSMC_Result_OK) + mclk_freq = 0; + + return mclk_freq; +} + +static void ci_dpm_start_smc(struct radeon_device *rdev) +{ + int i; + + ci_program_jump_on_start(rdev); + ci_start_smc_clock(rdev); + ci_start_smc(rdev); + for (i = 0; i < rdev->usec_timeout; i++) { + if (RREG32_SMC(FIRMWARE_FLAGS) & INTERRUPTS_ENABLED) + break; + } +} + +static void ci_dpm_stop_smc(struct radeon_device *rdev) +{ + ci_reset_smc(rdev); + ci_stop_smc_clock(rdev); +} + +static int ci_process_firmware_header(struct radeon_device *rdev) +{ + struct ci_power_info *pi = ci_get_pi(rdev); + u32 tmp; + int ret; + + ret = ci_read_smc_sram_dword(rdev, + SMU7_FIRMWARE_HEADER_LOCATION + + offsetof(SMU7_Firmware_Header, DpmTable), + &tmp, pi->sram_end); + if (ret) + return ret; + + pi->dpm_table_start = tmp; + + ret = ci_read_smc_sram_dword(rdev, + SMU7_FIRMWARE_HEADER_LOCATION + + offsetof(SMU7_Firmware_Header, SoftRegisters), + &tmp, pi->sram_end); + if (ret) + return ret; + + pi->soft_regs_start = tmp; + + ret = ci_read_smc_sram_dword(rdev, + SMU7_FIRMWARE_HEADER_LOCATION + + offsetof(SMU7_Firmware_Header, mcRegisterTable), + &tmp, pi->sram_end); + if (ret) + return ret; + + pi->mc_reg_table_start = tmp; + + ret = ci_read_smc_sram_dword(rdev, + SMU7_FIRMWARE_HEADER_LOCATION + + offsetof(SMU7_Firmware_Header, FanTable), + &tmp, pi->sram_end); + if (ret) + return ret; + + pi->fan_table_start = tmp; + + ret = ci_read_smc_sram_dword(rdev, + SMU7_FIRMWARE_HEADER_LOCATION + + offsetof(SMU7_Firmware_Header, mcArbDramTimingTable), + &tmp, pi->sram_end); + if (ret) + return ret; + + pi->arb_table_start = tmp; + + return 0; +} + +static void ci_read_clock_registers(struct radeon_device *rdev) +{ + struct ci_power_info *pi = ci_get_pi(rdev); + + pi->clock_registers.cg_spll_func_cntl = + RREG32_SMC(CG_SPLL_FUNC_CNTL); + pi->clock_registers.cg_spll_func_cntl_2 = + RREG32_SMC(CG_SPLL_FUNC_CNTL_2); + pi->clock_registers.cg_spll_func_cntl_3 = + RREG32_SMC(CG_SPLL_FUNC_CNTL_3); + pi->clock_registers.cg_spll_func_cntl_4 = + RREG32_SMC(CG_SPLL_FUNC_CNTL_4); + pi->clock_registers.cg_spll_spread_spectrum = + RREG32_SMC(CG_SPLL_SPREAD_SPECTRUM); + pi->clock_registers.cg_spll_spread_spectrum_2 = + RREG32_SMC(CG_SPLL_SPREAD_SPECTRUM_2); + pi->clock_registers.dll_cntl = RREG32(DLL_CNTL); + pi->clock_registers.mclk_pwrmgt_cntl = RREG32(MCLK_PWRMGT_CNTL); + pi->clock_registers.mpll_ad_func_cntl = RREG32(MPLL_AD_FUNC_CNTL); + pi->clock_registers.mpll_dq_func_cntl = RREG32(MPLL_DQ_FUNC_CNTL); + pi->clock_registers.mpll_func_cntl = RREG32(MPLL_FUNC_CNTL); + pi->clock_registers.mpll_func_cntl_1 = RREG32(MPLL_FUNC_CNTL_1); + pi->clock_registers.mpll_func_cntl_2 = RREG32(MPLL_FUNC_CNTL_2); + pi->clock_registers.mpll_ss1 = RREG32(MPLL_SS1); + pi->clock_registers.mpll_ss2 = RREG32(MPLL_SS2); +} + +static void ci_init_sclk_t(struct radeon_device *rdev) +{ + struct ci_power_info *pi = ci_get_pi(rdev); + + pi->low_sclk_interrupt_t = 0; +} + +static void ci_enable_thermal_protection(struct radeon_device *rdev, + bool enable) +{ + u32 tmp = RREG32_SMC(GENERAL_PWRMGT); + + if (enable) + tmp &= ~THERMAL_PROTECTION_DIS; + else + tmp |= THERMAL_PROTECTION_DIS; + WREG32_SMC(GENERAL_PWRMGT, tmp); +} + +static void ci_enable_acpi_power_management(struct radeon_device *rdev) +{ + u32 tmp = RREG32_SMC(GENERAL_PWRMGT); + + tmp |= STATIC_PM_EN; + + WREG32_SMC(GENERAL_PWRMGT, tmp); +} + +#if 0 +static int ci_enter_ulp_state(struct radeon_device *rdev) +{ + + WREG32(SMC_MESSAGE_0, PPSMC_MSG_SwitchToMinimumPower); + + udelay(25000); + + return 0; +} + +static int ci_exit_ulp_state(struct radeon_device *rdev) +{ + int i; + + WREG32(SMC_MESSAGE_0, PPSMC_MSG_ResumeFromMinimumPower); + + udelay(7000); + + for (i = 0; i < rdev->usec_timeout; i++) { + if (RREG32(SMC_RESP_0) == 1) + break; + udelay(1000); + } + + return 0; +} +#endif + +static int ci_notify_smc_display_change(struct radeon_device *rdev, + bool has_display) +{ + PPSMC_Msg msg = has_display ? PPSMC_MSG_HasDisplay : PPSMC_MSG_NoDisplay; + + return (ci_send_msg_to_smc(rdev, msg) == PPSMC_Result_OK) ? 0 : -EINVAL; +} + +static int ci_enable_ds_master_switch(struct radeon_device *rdev, + bool enable) +{ + struct ci_power_info *pi = ci_get_pi(rdev); + + if (enable) { + if (pi->caps_sclk_ds) { + if (ci_send_msg_to_smc(rdev, PPSMC_MSG_MASTER_DeepSleep_ON) != PPSMC_Result_OK) + return -EINVAL; + } else { + if (ci_send_msg_to_smc(rdev, PPSMC_MSG_MASTER_DeepSleep_OFF) != PPSMC_Result_OK) + return -EINVAL; + } + } else { + if (pi->caps_sclk_ds) { + if (ci_send_msg_to_smc(rdev, PPSMC_MSG_MASTER_DeepSleep_OFF) != PPSMC_Result_OK) + return -EINVAL; + } + } + + return 0; +} + +static void ci_program_display_gap(struct radeon_device *rdev) +{ + u32 tmp = RREG32_SMC(CG_DISPLAY_GAP_CNTL); + u32 pre_vbi_time_in_us; + u32 frame_time_in_us; + u32 ref_clock = rdev->clock.spll.reference_freq; + u32 refresh_rate = r600_dpm_get_vrefresh(rdev); + u32 vblank_time = r600_dpm_get_vblank_time(rdev); + + tmp &= ~DISP_GAP_MASK; + if (rdev->pm.dpm.new_active_crtc_count > 0) + tmp |= DISP_GAP(R600_PM_DISPLAY_GAP_VBLANK_OR_WM); + else + tmp |= DISP_GAP(R600_PM_DISPLAY_GAP_IGNORE); + WREG32_SMC(CG_DISPLAY_GAP_CNTL, tmp); + + if (refresh_rate == 0) + refresh_rate = 60; + if (vblank_time == 0xffffffff) + vblank_time = 500; + frame_time_in_us = 1000000 / refresh_rate; + pre_vbi_time_in_us = + frame_time_in_us - 200 - vblank_time; + tmp = pre_vbi_time_in_us * (ref_clock / 100); + + WREG32_SMC(CG_DISPLAY_GAP_CNTL2, tmp); + ci_write_smc_soft_register(rdev, offsetof(SMU7_SoftRegisters, PreVBlankGap), 0x64); + ci_write_smc_soft_register(rdev, offsetof(SMU7_SoftRegisters, VBlankTimeout), (frame_time_in_us - pre_vbi_time_in_us)); + + + ci_notify_smc_display_change(rdev, (rdev->pm.dpm.new_active_crtc_count == 1)); + +} + +static void ci_enable_spread_spectrum(struct radeon_device *rdev, bool enable) +{ + struct ci_power_info *pi = ci_get_pi(rdev); + u32 tmp; + + if (enable) { + if (pi->caps_sclk_ss_support) { + tmp = RREG32_SMC(GENERAL_PWRMGT); + tmp |= DYN_SPREAD_SPECTRUM_EN; + WREG32_SMC(GENERAL_PWRMGT, tmp); + } + } else { + tmp = RREG32_SMC(CG_SPLL_SPREAD_SPECTRUM); + tmp &= ~SSEN; + WREG32_SMC(CG_SPLL_SPREAD_SPECTRUM, tmp); + + tmp = RREG32_SMC(GENERAL_PWRMGT); + tmp &= ~DYN_SPREAD_SPECTRUM_EN; + WREG32_SMC(GENERAL_PWRMGT, tmp); + } +} + +static void ci_program_sstp(struct radeon_device *rdev) +{ + WREG32_SMC(CG_SSP, (SSTU(R600_SSTU_DFLT) | SST(R600_SST_DFLT))); +} + +static void ci_enable_display_gap(struct radeon_device *rdev) +{ + u32 tmp = RREG32_SMC(CG_DISPLAY_GAP_CNTL); + + tmp &= ~(DISP_GAP_MASK | DISP_GAP_MCHG_MASK); + tmp |= (DISP_GAP(R600_PM_DISPLAY_GAP_IGNORE) | + DISP_GAP_MCHG(R600_PM_DISPLAY_GAP_VBLANK)); + + WREG32_SMC(CG_DISPLAY_GAP_CNTL, tmp); +} + +static void ci_program_vc(struct radeon_device *rdev) +{ + u32 tmp; + + tmp = RREG32_SMC(SCLK_PWRMGT_CNTL); + tmp &= ~(RESET_SCLK_CNT | RESET_BUSY_CNT); + WREG32_SMC(SCLK_PWRMGT_CNTL, tmp); + + WREG32_SMC(CG_FTV_0, CISLANDS_VRC_DFLT0); + WREG32_SMC(CG_FTV_1, CISLANDS_VRC_DFLT1); + WREG32_SMC(CG_FTV_2, CISLANDS_VRC_DFLT2); + WREG32_SMC(CG_FTV_3, CISLANDS_VRC_DFLT3); + WREG32_SMC(CG_FTV_4, CISLANDS_VRC_DFLT4); + WREG32_SMC(CG_FTV_5, CISLANDS_VRC_DFLT5); + WREG32_SMC(CG_FTV_6, CISLANDS_VRC_DFLT6); + WREG32_SMC(CG_FTV_7, CISLANDS_VRC_DFLT7); +} + +static void ci_clear_vc(struct radeon_device *rdev) +{ + u32 tmp; + + tmp = RREG32_SMC(SCLK_PWRMGT_CNTL); + tmp |= (RESET_SCLK_CNT | RESET_BUSY_CNT); + WREG32_SMC(SCLK_PWRMGT_CNTL, tmp); + + WREG32_SMC(CG_FTV_0, 0); + WREG32_SMC(CG_FTV_1, 0); + WREG32_SMC(CG_FTV_2, 0); + WREG32_SMC(CG_FTV_3, 0); + WREG32_SMC(CG_FTV_4, 0); + WREG32_SMC(CG_FTV_5, 0); + WREG32_SMC(CG_FTV_6, 0); + WREG32_SMC(CG_FTV_7, 0); +} + +static int ci_upload_firmware(struct radeon_device *rdev) +{ + struct ci_power_info *pi = ci_get_pi(rdev); + int i, ret; + + for (i = 0; i < rdev->usec_timeout; i++) { + if (RREG32_SMC(RCU_UC_EVENTS) & BOOT_SEQ_DONE) + break; + } + WREG32_SMC(SMC_SYSCON_MISC_CNTL, 1); + + ci_stop_smc_clock(rdev); + ci_reset_smc(rdev); + + ret = ci_load_smc_ucode(rdev, pi->sram_end); + + return ret; + +} + +static int ci_get_svi2_voltage_table(struct radeon_device *rdev, + struct radeon_clock_voltage_dependency_table *voltage_dependency_table, + struct atom_voltage_table *voltage_table) +{ + u32 i; + + if (voltage_dependency_table == NULL) + return -EINVAL; + + voltage_table->mask_low = 0; + voltage_table->phase_delay = 0; + + voltage_table->count = voltage_dependency_table->count; + for (i = 0; i < voltage_table->count; i++) { + voltage_table->entries[i].value = voltage_dependency_table->entries[i].v; + voltage_table->entries[i].smio_low = 0; + } + + return 0; +} + +static int ci_construct_voltage_tables(struct radeon_device *rdev) +{ + struct ci_power_info *pi = ci_get_pi(rdev); + int ret; + + if (pi->voltage_control == CISLANDS_VOLTAGE_CONTROL_BY_GPIO) { + ret = radeon_atom_get_voltage_table(rdev, VOLTAGE_TYPE_VDDC, + VOLTAGE_OBJ_GPIO_LUT, + &pi->vddc_voltage_table); + if (ret) + return ret; + } else if (pi->voltage_control == CISLANDS_VOLTAGE_CONTROL_BY_SVID2) { + ret = ci_get_svi2_voltage_table(rdev, + &rdev->pm.dpm.dyn_state.vddc_dependency_on_mclk, + &pi->vddc_voltage_table); + if (ret) + return ret; + } + + if (pi->vddc_voltage_table.count > SMU7_MAX_LEVELS_VDDC) + si_trim_voltage_table_to_fit_state_table(rdev, SMU7_MAX_LEVELS_VDDC, + &pi->vddc_voltage_table); + + if (pi->vddci_control == CISLANDS_VOLTAGE_CONTROL_BY_GPIO) { + ret = radeon_atom_get_voltage_table(rdev, VOLTAGE_TYPE_VDDCI, + VOLTAGE_OBJ_GPIO_LUT, + &pi->vddci_voltage_table); + if (ret) + return ret; + } else if (pi->vddci_control == CISLANDS_VOLTAGE_CONTROL_BY_SVID2) { + ret = ci_get_svi2_voltage_table(rdev, + &rdev->pm.dpm.dyn_state.vddci_dependency_on_mclk, + &pi->vddci_voltage_table); + if (ret) + return ret; + } + + if (pi->vddci_voltage_table.count > SMU7_MAX_LEVELS_VDDCI) + si_trim_voltage_table_to_fit_state_table(rdev, SMU7_MAX_LEVELS_VDDCI, + &pi->vddci_voltage_table); + + if (pi->mvdd_control == CISLANDS_VOLTAGE_CONTROL_BY_GPIO) { + ret = radeon_atom_get_voltage_table(rdev, VOLTAGE_TYPE_MVDDC, + VOLTAGE_OBJ_GPIO_LUT, + &pi->mvdd_voltage_table); + if (ret) + return ret; + } else if (pi->mvdd_control == CISLANDS_VOLTAGE_CONTROL_BY_SVID2) { + ret = ci_get_svi2_voltage_table(rdev, + &rdev->pm.dpm.dyn_state.mvdd_dependency_on_mclk, + &pi->mvdd_voltage_table); + if (ret) + return ret; + } + + if (pi->mvdd_voltage_table.count > SMU7_MAX_LEVELS_MVDD) + si_trim_voltage_table_to_fit_state_table(rdev, SMU7_MAX_LEVELS_MVDD, + &pi->mvdd_voltage_table); + + return 0; +} + +static void ci_populate_smc_voltage_table(struct radeon_device *rdev, + struct atom_voltage_table_entry *voltage_table, + SMU7_Discrete_VoltageLevel *smc_voltage_table) +{ + int ret; + + ret = ci_get_std_voltage_value_sidd(rdev, voltage_table, + &smc_voltage_table->StdVoltageHiSidd, + &smc_voltage_table->StdVoltageLoSidd); + + if (ret) { + smc_voltage_table->StdVoltageHiSidd = voltage_table->value * VOLTAGE_SCALE; + smc_voltage_table->StdVoltageLoSidd = voltage_table->value * VOLTAGE_SCALE; + } + + smc_voltage_table->Voltage = cpu_to_be16(voltage_table->value * VOLTAGE_SCALE); + smc_voltage_table->StdVoltageHiSidd = + cpu_to_be16(smc_voltage_table->StdVoltageHiSidd); + smc_voltage_table->StdVoltageLoSidd = + cpu_to_be16(smc_voltage_table->StdVoltageLoSidd); +} + +static int ci_populate_smc_vddc_table(struct radeon_device *rdev, + SMU7_Discrete_DpmTable *table) +{ + struct ci_power_info *pi = ci_get_pi(rdev); + unsigned int count; + + table->VddcLevelCount = pi->vddc_voltage_table.count; + for (count = 0; count < table->VddcLevelCount; count++) { + ci_populate_smc_voltage_table(rdev, + &pi->vddc_voltage_table.entries[count], + &table->VddcLevel[count]); + + if (pi->voltage_control == CISLANDS_VOLTAGE_CONTROL_BY_GPIO) + table->VddcLevel[count].Smio |= + pi->vddc_voltage_table.entries[count].smio_low; + else + table->VddcLevel[count].Smio = 0; + } + table->VddcLevelCount = cpu_to_be32(table->VddcLevelCount); + + return 0; +} + +static int ci_populate_smc_vddci_table(struct radeon_device *rdev, + SMU7_Discrete_DpmTable *table) +{ + unsigned int count; + struct ci_power_info *pi = ci_get_pi(rdev); + + table->VddciLevelCount = pi->vddci_voltage_table.count; + for (count = 0; count < table->VddciLevelCount; count++) { + ci_populate_smc_voltage_table(rdev, + &pi->vddci_voltage_table.entries[count], + &table->VddciLevel[count]); + + if (pi->vddci_control == CISLANDS_VOLTAGE_CONTROL_BY_GPIO) + table->VddciLevel[count].Smio |= + pi->vddci_voltage_table.entries[count].smio_low; + else + table->VddciLevel[count].Smio = 0; + } + table->VddciLevelCount = cpu_to_be32(table->VddciLevelCount); + + return 0; +} + +static int ci_populate_smc_mvdd_table(struct radeon_device *rdev, + SMU7_Discrete_DpmTable *table) +{ + struct ci_power_info *pi = ci_get_pi(rdev); + unsigned int count; + + table->MvddLevelCount = pi->mvdd_voltage_table.count; + for (count = 0; count < table->MvddLevelCount; count++) { + ci_populate_smc_voltage_table(rdev, + &pi->mvdd_voltage_table.entries[count], + &table->MvddLevel[count]); + + if (pi->mvdd_control == CISLANDS_VOLTAGE_CONTROL_BY_GPIO) + table->MvddLevel[count].Smio |= + pi->mvdd_voltage_table.entries[count].smio_low; + else + table->MvddLevel[count].Smio = 0; + } + table->MvddLevelCount = cpu_to_be32(table->MvddLevelCount); + + return 0; +} + +static int ci_populate_smc_voltage_tables(struct radeon_device *rdev, + SMU7_Discrete_DpmTable *table) +{ + int ret; + + ret = ci_populate_smc_vddc_table(rdev, table); + if (ret) + return ret; + + ret = ci_populate_smc_vddci_table(rdev, table); + if (ret) + return ret; + + ret = ci_populate_smc_mvdd_table(rdev, table); + if (ret) + return ret; + + return 0; +} + +static int ci_populate_mvdd_value(struct radeon_device *rdev, u32 mclk, + SMU7_Discrete_VoltageLevel *voltage) +{ + struct ci_power_info *pi = ci_get_pi(rdev); + u32 i = 0; + + if (pi->mvdd_control != CISLANDS_VOLTAGE_CONTROL_NONE) { + for (i = 0; i < rdev->pm.dpm.dyn_state.mvdd_dependency_on_mclk.count; i++) { + if (mclk <= rdev->pm.dpm.dyn_state.mvdd_dependency_on_mclk.entries[i].clk) { + voltage->Voltage = pi->mvdd_voltage_table.entries[i].value; + break; + } + } + + if (i >= rdev->pm.dpm.dyn_state.mvdd_dependency_on_mclk.count) + return -EINVAL; + } + + return -EINVAL; +} + +static int ci_get_std_voltage_value_sidd(struct radeon_device *rdev, + struct atom_voltage_table_entry *voltage_table, + u16 *std_voltage_hi_sidd, u16 *std_voltage_lo_sidd) +{ + u16 v_index, idx; + bool voltage_found = false; + *std_voltage_hi_sidd = voltage_table->value * VOLTAGE_SCALE; + *std_voltage_lo_sidd = voltage_table->value * VOLTAGE_SCALE; + + if (rdev->pm.dpm.dyn_state.vddc_dependency_on_sclk.entries == NULL) + return -EINVAL; + + if (rdev->pm.dpm.dyn_state.cac_leakage_table.entries) { + for (v_index = 0; (u32)v_index < rdev->pm.dpm.dyn_state.vddc_dependency_on_sclk.count; v_index++) { + if (voltage_table->value == + rdev->pm.dpm.dyn_state.vddc_dependency_on_sclk.entries[v_index].v) { + voltage_found = true; + if ((u32)v_index < rdev->pm.dpm.dyn_state.cac_leakage_table.count) + idx = v_index; + else + idx = rdev->pm.dpm.dyn_state.cac_leakage_table.count - 1; + *std_voltage_lo_sidd = + rdev->pm.dpm.dyn_state.cac_leakage_table.entries[idx].vddc * VOLTAGE_SCALE; + *std_voltage_hi_sidd = + rdev->pm.dpm.dyn_state.cac_leakage_table.entries[idx].leakage * VOLTAGE_SCALE; + break; + } + } + + if (!voltage_found) { + for (v_index = 0; (u32)v_index < rdev->pm.dpm.dyn_state.vddc_dependency_on_sclk.count; v_index++) { + if (voltage_table->value <= + rdev->pm.dpm.dyn_state.vddc_dependency_on_sclk.entries[v_index].v) { + voltage_found = true; + if ((u32)v_index < rdev->pm.dpm.dyn_state.cac_leakage_table.count) + idx = v_index; + else + idx = rdev->pm.dpm.dyn_state.cac_leakage_table.count - 1; + *std_voltage_lo_sidd = + rdev->pm.dpm.dyn_state.cac_leakage_table.entries[idx].vddc * VOLTAGE_SCALE; + *std_voltage_hi_sidd = + rdev->pm.dpm.dyn_state.cac_leakage_table.entries[idx].leakage * VOLTAGE_SCALE; + break; + } + } + } + } + + return 0; +} + +static void ci_populate_phase_value_based_on_sclk(struct radeon_device *rdev, + const struct radeon_phase_shedding_limits_table *limits, + u32 sclk, + u32 *phase_shedding) +{ + unsigned int i; + + *phase_shedding = 1; + + for (i = 0; i < limits->count; i++) { + if (sclk < limits->entries[i].sclk) { + *phase_shedding = i; + break; + } + } +} + +static void ci_populate_phase_value_based_on_mclk(struct radeon_device *rdev, + const struct radeon_phase_shedding_limits_table *limits, + u32 mclk, + u32 *phase_shedding) +{ + unsigned int i; + + *phase_shedding = 1; + + for (i = 0; i < limits->count; i++) { + if (mclk < limits->entries[i].mclk) { + *phase_shedding = i; + break; + } + } +} + +static int ci_init_arb_table_index(struct radeon_device *rdev) +{ + struct ci_power_info *pi = ci_get_pi(rdev); + u32 tmp; + int ret; + + ret = ci_read_smc_sram_dword(rdev, pi->arb_table_start, + &tmp, pi->sram_end); + if (ret) + return ret; + + tmp &= 0x00FFFFFF; + tmp |= MC_CG_ARB_FREQ_F1 << 24; + + return ci_write_smc_sram_dword(rdev, pi->arb_table_start, + tmp, pi->sram_end); +} + +static int ci_get_dependency_volt_by_clk(struct radeon_device *rdev, + struct radeon_clock_voltage_dependency_table *allowed_clock_voltage_table, + u32 clock, u32 *voltage) +{ + u32 i = 0; + + if (allowed_clock_voltage_table->count == 0) + return -EINVAL; + + for (i = 0; i < allowed_clock_voltage_table->count; i++) { + if (allowed_clock_voltage_table->entries[i].clk >= clock) { + *voltage = allowed_clock_voltage_table->entries[i].v; + return 0; + } + } + + *voltage = allowed_clock_voltage_table->entries[i-1].v; + + return 0; +} + +static u8 ci_get_sleep_divider_id_from_clock(struct radeon_device *rdev, + u32 sclk, u32 min_sclk_in_sr) +{ + u32 i; + u32 tmp; + u32 min = (min_sclk_in_sr > CISLAND_MINIMUM_ENGINE_CLOCK) ? + min_sclk_in_sr : CISLAND_MINIMUM_ENGINE_CLOCK; + + if (sclk < min) + return 0; + + for (i = CISLAND_MAX_DEEPSLEEP_DIVIDER_ID; ; i--) { + tmp = sclk / (1 << i); + if (tmp >= min || i == 0) + break; + } + + return (u8)i; +} + +static int ci_initial_switch_from_arb_f0_to_f1(struct radeon_device *rdev) +{ + return ni_copy_and_switch_arb_sets(rdev, MC_CG_ARB_FREQ_F0, MC_CG_ARB_FREQ_F1); +} + +static int ci_reset_to_default(struct radeon_device *rdev) +{ + return (ci_send_msg_to_smc(rdev, PPSMC_MSG_ResetToDefaults) == PPSMC_Result_OK) ? + 0 : -EINVAL; +} + +static int ci_force_switch_to_arb_f0(struct radeon_device *rdev) +{ + u32 tmp; + + tmp = (RREG32_SMC(SMC_SCRATCH9) & 0x0000ff00) >> 8; + + if (tmp == MC_CG_ARB_FREQ_F0) + return 0; + + return ni_copy_and_switch_arb_sets(rdev, tmp, MC_CG_ARB_FREQ_F0); +} + +static int ci_populate_memory_timing_parameters(struct radeon_device *rdev, + u32 sclk, + u32 mclk, + SMU7_Discrete_MCArbDramTimingTableEntry *arb_regs) +{ + u32 dram_timing; + u32 dram_timing2; + u32 burst_time; + + radeon_atom_set_engine_dram_timings(rdev, sclk, mclk); + + dram_timing = RREG32(MC_ARB_DRAM_TIMING); + dram_timing2 = RREG32(MC_ARB_DRAM_TIMING2); + burst_time = RREG32(MC_ARB_BURST_TIME) & STATE0_MASK; + + arb_regs->McArbDramTiming = cpu_to_be32(dram_timing); + arb_regs->McArbDramTiming2 = cpu_to_be32(dram_timing2); + arb_regs->McArbBurstTime = (u8)burst_time; + + return 0; +} + +static int ci_do_program_memory_timing_parameters(struct radeon_device *rdev) +{ + struct ci_power_info *pi = ci_get_pi(rdev); + SMU7_Discrete_MCArbDramTimingTable arb_regs; + u32 i, j; + int ret = 0; + + memset(&arb_regs, 0, sizeof(SMU7_Discrete_MCArbDramTimingTable)); + + for (i = 0; i < pi->dpm_table.sclk_table.count; i++) { + for (j = 0; j < pi->dpm_table.mclk_table.count; j++) { + ret = ci_populate_memory_timing_parameters(rdev, + pi->dpm_table.sclk_table.dpm_levels[i].value, + pi->dpm_table.mclk_table.dpm_levels[j].value, + &arb_regs.entries[i][j]); + if (ret) + break; + } + } + + if (ret == 0) + ret = ci_copy_bytes_to_smc(rdev, + pi->arb_table_start, + (u8 *)&arb_regs, + sizeof(SMU7_Discrete_MCArbDramTimingTable), + pi->sram_end); + + return ret; +} + +static int ci_program_memory_timing_parameters(struct radeon_device *rdev) +{ + struct ci_power_info *pi = ci_get_pi(rdev); + + if (pi->need_update_smu7_dpm_table == 0) + return 0; + + return ci_do_program_memory_timing_parameters(rdev); +} + +static void ci_populate_smc_initial_state(struct radeon_device *rdev, + struct radeon_ps *radeon_boot_state) +{ + struct ci_ps *boot_state = ci_get_ps(radeon_boot_state); + struct ci_power_info *pi = ci_get_pi(rdev); + u32 level = 0; + + for (level = 0; level < rdev->pm.dpm.dyn_state.vddc_dependency_on_sclk.count; level++) { + if (rdev->pm.dpm.dyn_state.vddc_dependency_on_sclk.entries[level].clk >= + boot_state->performance_levels[0].sclk) { + pi->smc_state_table.GraphicsBootLevel = level; + break; + } + } + + for (level = 0; level < rdev->pm.dpm.dyn_state.vddc_dependency_on_mclk.count; level++) { + if (rdev->pm.dpm.dyn_state.vddc_dependency_on_mclk.entries[level].clk >= + boot_state->performance_levels[0].mclk) { + pi->smc_state_table.MemoryBootLevel = level; + break; + } + } +} + +static u32 ci_get_dpm_level_enable_mask_value(struct ci_single_dpm_table *dpm_table) +{ + u32 i; + u32 mask_value = 0; + + for (i = dpm_table->count; i > 0; i--) { + mask_value = mask_value << 1; + if (dpm_table->dpm_levels[i-1].enabled) + mask_value |= 0x1; + else + mask_value &= 0xFFFFFFFE; + } + + return mask_value; +} + +static void ci_populate_smc_link_level(struct radeon_device *rdev, + SMU7_Discrete_DpmTable *table) +{ + struct ci_power_info *pi = ci_get_pi(rdev); + struct ci_dpm_table *dpm_table = &pi->dpm_table; + u32 i; + + for (i = 0; i < dpm_table->pcie_speed_table.count; i++) { + table->LinkLevel[i].PcieGenSpeed = + (u8)dpm_table->pcie_speed_table.dpm_levels[i].value; + table->LinkLevel[i].PcieLaneCount = + r600_encode_pci_lane_width(dpm_table->pcie_speed_table.dpm_levels[i].param1); + table->LinkLevel[i].EnabledForActivity = 1; + table->LinkLevel[i].DownT = cpu_to_be32(5); + table->LinkLevel[i].UpT = cpu_to_be32(30); + } + + pi->smc_state_table.LinkLevelCount = (u8)dpm_table->pcie_speed_table.count; + pi->dpm_level_enable_mask.pcie_dpm_enable_mask = + ci_get_dpm_level_enable_mask_value(&dpm_table->pcie_speed_table); +} + +static int ci_populate_smc_uvd_level(struct radeon_device *rdev, + SMU7_Discrete_DpmTable *table) +{ + u32 count; + struct atom_clock_dividers dividers; + int ret = -EINVAL; + + table->UvdLevelCount = + rdev->pm.dpm.dyn_state.uvd_clock_voltage_dependency_table.count; + + for (count = 0; count < table->UvdLevelCount; count++) { + table->UvdLevel[count].VclkFrequency = + rdev->pm.dpm.dyn_state.uvd_clock_voltage_dependency_table.entries[count].vclk; + table->UvdLevel[count].DclkFrequency = + rdev->pm.dpm.dyn_state.uvd_clock_voltage_dependency_table.entries[count].dclk; + table->UvdLevel[count].MinVddc = + rdev->pm.dpm.dyn_state.uvd_clock_voltage_dependency_table.entries[count].v * VOLTAGE_SCALE; + table->UvdLevel[count].MinVddcPhases = 1; + + ret = radeon_atom_get_clock_dividers(rdev, + COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK, + table->UvdLevel[count].VclkFrequency, false, ÷rs); + if (ret) + return ret; + + table->UvdLevel[count].VclkDivider = (u8)dividers.post_divider; + + ret = radeon_atom_get_clock_dividers(rdev, + COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK, + table->UvdLevel[count].DclkFrequency, false, ÷rs); + if (ret) + return ret; + + table->UvdLevel[count].DclkDivider = (u8)dividers.post_divider; + + table->UvdLevel[count].VclkFrequency = cpu_to_be32(table->UvdLevel[count].VclkFrequency); + table->UvdLevel[count].DclkFrequency = cpu_to_be32(table->UvdLevel[count].DclkFrequency); + table->UvdLevel[count].MinVddc = cpu_to_be16(table->UvdLevel[count].MinVddc); + } + + return ret; +} + +static int ci_populate_smc_vce_level(struct radeon_device *rdev, + SMU7_Discrete_DpmTable *table) +{ + u32 count; + struct atom_clock_dividers dividers; + int ret = -EINVAL; + + table->VceLevelCount = + rdev->pm.dpm.dyn_state.vce_clock_voltage_dependency_table.count; + + for (count = 0; count < table->VceLevelCount; count++) { + table->VceLevel[count].Frequency = + rdev->pm.dpm.dyn_state.vce_clock_voltage_dependency_table.entries[count].evclk; + table->VceLevel[count].MinVoltage = + (u16)rdev->pm.dpm.dyn_state.vce_clock_voltage_dependency_table.entries[count].v * VOLTAGE_SCALE; + table->VceLevel[count].MinPhases = 1; + + ret = radeon_atom_get_clock_dividers(rdev, + COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK, + table->VceLevel[count].Frequency, false, ÷rs); + if (ret) + return ret; + + table->VceLevel[count].Divider = (u8)dividers.post_divider; + + table->VceLevel[count].Frequency = cpu_to_be32(table->VceLevel[count].Frequency); + table->VceLevel[count].MinVoltage = cpu_to_be16(table->VceLevel[count].MinVoltage); + } + + return ret; + +} + +static int ci_populate_smc_acp_level(struct radeon_device *rdev, + SMU7_Discrete_DpmTable *table) +{ + u32 count; + struct atom_clock_dividers dividers; + int ret = -EINVAL; + + table->AcpLevelCount = (u8) + (rdev->pm.dpm.dyn_state.acp_clock_voltage_dependency_table.count); + + for (count = 0; count < table->AcpLevelCount; count++) { + table->AcpLevel[count].Frequency = + rdev->pm.dpm.dyn_state.acp_clock_voltage_dependency_table.entries[count].clk; + table->AcpLevel[count].MinVoltage = + rdev->pm.dpm.dyn_state.acp_clock_voltage_dependency_table.entries[count].v; + table->AcpLevel[count].MinPhases = 1; + + ret = radeon_atom_get_clock_dividers(rdev, + COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK, + table->AcpLevel[count].Frequency, false, ÷rs); + if (ret) + return ret; + + table->AcpLevel[count].Divider = (u8)dividers.post_divider; + + table->AcpLevel[count].Frequency = cpu_to_be32(table->AcpLevel[count].Frequency); + table->AcpLevel[count].MinVoltage = cpu_to_be16(table->AcpLevel[count].MinVoltage); + } + + return ret; +} + +static int ci_populate_smc_samu_level(struct radeon_device *rdev, + SMU7_Discrete_DpmTable *table) +{ + u32 count; + struct atom_clock_dividers dividers; + int ret = -EINVAL; + + table->SamuLevelCount = + rdev->pm.dpm.dyn_state.samu_clock_voltage_dependency_table.count; + + for (count = 0; count < table->SamuLevelCount; count++) { + table->SamuLevel[count].Frequency = + rdev->pm.dpm.dyn_state.samu_clock_voltage_dependency_table.entries[count].clk; + table->SamuLevel[count].MinVoltage = + rdev->pm.dpm.dyn_state.samu_clock_voltage_dependency_table.entries[count].v * VOLTAGE_SCALE; + table->SamuLevel[count].MinPhases = 1; + + ret = radeon_atom_get_clock_dividers(rdev, + COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK, + table->SamuLevel[count].Frequency, false, ÷rs); + if (ret) + return ret; + + table->SamuLevel[count].Divider = (u8)dividers.post_divider; + + table->SamuLevel[count].Frequency = cpu_to_be32(table->SamuLevel[count].Frequency); + table->SamuLevel[count].MinVoltage = cpu_to_be16(table->SamuLevel[count].MinVoltage); + } + + return ret; +} + +static int ci_calculate_mclk_params(struct radeon_device *rdev, + u32 memory_clock, + SMU7_Discrete_MemoryLevel *mclk, + bool strobe_mode, + bool dll_state_on) +{ + struct ci_power_info *pi = ci_get_pi(rdev); + u32 dll_cntl = pi->clock_registers.dll_cntl; + u32 mclk_pwrmgt_cntl = pi->clock_registers.mclk_pwrmgt_cntl; + u32 mpll_ad_func_cntl = pi->clock_registers.mpll_ad_func_cntl; + u32 mpll_dq_func_cntl = pi->clock_registers.mpll_dq_func_cntl; + u32 mpll_func_cntl = pi->clock_registers.mpll_func_cntl; + u32 mpll_func_cntl_1 = pi->clock_registers.mpll_func_cntl_1; + u32 mpll_func_cntl_2 = pi->clock_registers.mpll_func_cntl_2; + u32 mpll_ss1 = pi->clock_registers.mpll_ss1; + u32 mpll_ss2 = pi->clock_registers.mpll_ss2; + struct atom_mpll_param mpll_param; + int ret; + + ret = radeon_atom_get_memory_pll_dividers(rdev, memory_clock, strobe_mode, &mpll_param); + if (ret) + return ret; + + mpll_func_cntl &= ~BWCTRL_MASK; + mpll_func_cntl |= BWCTRL(mpll_param.bwcntl); + + mpll_func_cntl_1 &= ~(CLKF_MASK | CLKFRAC_MASK | VCO_MODE_MASK); + mpll_func_cntl_1 |= CLKF(mpll_param.clkf) | + CLKFRAC(mpll_param.clkfrac) | VCO_MODE(mpll_param.vco_mode); + + mpll_ad_func_cntl &= ~YCLK_POST_DIV_MASK; + mpll_ad_func_cntl |= YCLK_POST_DIV(mpll_param.post_div); + + if (pi->mem_gddr5) { + mpll_dq_func_cntl &= ~(YCLK_SEL_MASK | YCLK_POST_DIV_MASK); + mpll_dq_func_cntl |= YCLK_SEL(mpll_param.yclk_sel) | + YCLK_POST_DIV(mpll_param.post_div); + } + + if (pi->caps_mclk_ss_support) { + struct radeon_atom_ss ss; + u32 freq_nom; + u32 tmp; + u32 reference_clock = rdev->clock.mpll.reference_freq; + + if (pi->mem_gddr5) + freq_nom = memory_clock * 4; + else + freq_nom = memory_clock * 2; + + tmp = (freq_nom / reference_clock); + tmp = tmp * tmp; + if (radeon_atombios_get_asic_ss_info(rdev, &ss, + ASIC_INTERNAL_MEMORY_SS, freq_nom)) { + u32 clks = reference_clock * 5 / ss.rate; + u32 clkv = (u32)((((131 * ss.percentage * ss.rate) / 100) * tmp) / freq_nom); + + mpll_ss1 &= ~CLKV_MASK; + mpll_ss1 |= CLKV(clkv); + + mpll_ss2 &= ~CLKS_MASK; + mpll_ss2 |= CLKS(clks); + } + } + + mclk_pwrmgt_cntl &= ~DLL_SPEED_MASK; + mclk_pwrmgt_cntl |= DLL_SPEED(mpll_param.dll_speed); + + if (dll_state_on) + mclk_pwrmgt_cntl |= MRDCK0_PDNB | MRDCK1_PDNB; + else + mclk_pwrmgt_cntl &= ~(MRDCK0_PDNB | MRDCK1_PDNB); + + mclk->MclkFrequency = memory_clock; + mclk->MpllFuncCntl = mpll_func_cntl; + mclk->MpllFuncCntl_1 = mpll_func_cntl_1; + mclk->MpllFuncCntl_2 = mpll_func_cntl_2; + mclk->MpllAdFuncCntl = mpll_ad_func_cntl; + mclk->MpllDqFuncCntl = mpll_dq_func_cntl; + mclk->MclkPwrmgtCntl = mclk_pwrmgt_cntl; + mclk->DllCntl = dll_cntl; + mclk->MpllSs1 = mpll_ss1; + mclk->MpllSs2 = mpll_ss2; + + return 0; +} + +static int ci_populate_single_memory_level(struct radeon_device *rdev, + u32 memory_clock, + SMU7_Discrete_MemoryLevel *memory_level) +{ + struct ci_power_info *pi = ci_get_pi(rdev); + int ret; + bool dll_state_on; + + if (rdev->pm.dpm.dyn_state.vddc_dependency_on_mclk.entries) { + ret = ci_get_dependency_volt_by_clk(rdev, + &rdev->pm.dpm.dyn_state.vddc_dependency_on_mclk, + memory_clock, &memory_level->MinVddc); + if (ret) + return ret; + } + + if (rdev->pm.dpm.dyn_state.vddci_dependency_on_mclk.entries) { + ret = ci_get_dependency_volt_by_clk(rdev, + &rdev->pm.dpm.dyn_state.vddci_dependency_on_mclk, + memory_clock, &memory_level->MinVddci); + if (ret) + return ret; + } + + if (rdev->pm.dpm.dyn_state.mvdd_dependency_on_mclk.entries) { + ret = ci_get_dependency_volt_by_clk(rdev, + &rdev->pm.dpm.dyn_state.mvdd_dependency_on_mclk, + memory_clock, &memory_level->MinMvdd); + if (ret) + return ret; + } + + memory_level->MinVddcPhases = 1; + + if (pi->vddc_phase_shed_control) + ci_populate_phase_value_based_on_mclk(rdev, + &rdev->pm.dpm.dyn_state.phase_shedding_limits_table, + memory_clock, + &memory_level->MinVddcPhases); + + memory_level->EnabledForThrottle = 1; + memory_level->EnabledForActivity = 1; + memory_level->UpH = 0; + memory_level->DownH = 100; + memory_level->VoltageDownH = 0; + memory_level->ActivityLevel = (u16)pi->mclk_activity_target; + + memory_level->StutterEnable = false; + memory_level->StrobeEnable = false; + memory_level->EdcReadEnable = false; + memory_level->EdcWriteEnable = false; + memory_level->RttEnable = false; + + memory_level->DisplayWatermark = PPSMC_DISPLAY_WATERMARK_LOW; + + if (pi->mclk_stutter_mode_threshold && + (memory_clock <= pi->mclk_stutter_mode_threshold) && + (pi->uvd_enabled == false) && + (RREG32(DPG_PIPE_STUTTER_CONTROL) & STUTTER_ENABLE) && + (rdev->pm.dpm.new_active_crtc_count <= 2)) + memory_level->StutterEnable = true; + + if (pi->mclk_strobe_mode_threshold && + (memory_clock <= pi->mclk_strobe_mode_threshold)) + memory_level->StrobeEnable = 1; + + if (pi->mem_gddr5) { + memory_level->StrobeRatio = + si_get_mclk_frequency_ratio(memory_clock, memory_level->StrobeEnable); + if (pi->mclk_edc_enable_threshold && + (memory_clock > pi->mclk_edc_enable_threshold)) + memory_level->EdcReadEnable = true; + + if (pi->mclk_edc_wr_enable_threshold && + (memory_clock > pi->mclk_edc_wr_enable_threshold)) + memory_level->EdcWriteEnable = true; + + if (memory_level->StrobeEnable) { + if (si_get_mclk_frequency_ratio(memory_clock, true) >= + ((RREG32(MC_SEQ_MISC7) >> 16) & 0xf)) + dll_state_on = ((RREG32(MC_SEQ_MISC5) >> 1) & 0x1) ? true : false; + else + dll_state_on = ((RREG32(MC_SEQ_MISC6) >> 1) & 0x1) ? true : false; + } else { + dll_state_on = pi->dll_default_on; + } + } else { + memory_level->StrobeRatio = si_get_ddr3_mclk_frequency_ratio(memory_clock); + dll_state_on = ((RREG32(MC_SEQ_MISC5) >> 1) & 0x1) ? true : false; + } + + ret = ci_calculate_mclk_params(rdev, memory_clock, memory_level, memory_level->StrobeEnable, dll_state_on); + if (ret) + return ret; + + memory_level->MinVddc = cpu_to_be32(memory_level->MinVddc * VOLTAGE_SCALE); + memory_level->MinVddcPhases = cpu_to_be32(memory_level->MinVddcPhases); + memory_level->MinVddci = cpu_to_be32(memory_level->MinVddci * VOLTAGE_SCALE); + memory_level->MinMvdd = cpu_to_be32(memory_level->MinMvdd * VOLTAGE_SCALE); + + memory_level->MclkFrequency = cpu_to_be32(memory_level->MclkFrequency); + memory_level->ActivityLevel = cpu_to_be16(memory_level->ActivityLevel); + memory_level->MpllFuncCntl = cpu_to_be32(memory_level->MpllFuncCntl); + memory_level->MpllFuncCntl_1 = cpu_to_be32(memory_level->MpllFuncCntl_1); + memory_level->MpllFuncCntl_2 = cpu_to_be32(memory_level->MpllFuncCntl_2); + memory_level->MpllAdFuncCntl = cpu_to_be32(memory_level->MpllAdFuncCntl); + memory_level->MpllDqFuncCntl = cpu_to_be32(memory_level->MpllDqFuncCntl); + memory_level->MclkPwrmgtCntl = cpu_to_be32(memory_level->MclkPwrmgtCntl); + memory_level->DllCntl = cpu_to_be32(memory_level->DllCntl); + memory_level->MpllSs1 = cpu_to_be32(memory_level->MpllSs1); + memory_level->MpllSs2 = cpu_to_be32(memory_level->MpllSs2); + + return 0; +} + +static int ci_populate_smc_acpi_level(struct radeon_device *rdev, + SMU7_Discrete_DpmTable *table) +{ + struct ci_power_info *pi = ci_get_pi(rdev); + struct atom_clock_dividers dividers; + SMU7_Discrete_VoltageLevel voltage_level; + u32 spll_func_cntl = pi->clock_registers.cg_spll_func_cntl; + u32 spll_func_cntl_2 = pi->clock_registers.cg_spll_func_cntl_2; + u32 dll_cntl = pi->clock_registers.dll_cntl; + u32 mclk_pwrmgt_cntl = pi->clock_registers.mclk_pwrmgt_cntl; + int ret; + + table->ACPILevel.Flags &= ~PPSMC_SWSTATE_FLAG_DC; + + if (pi->acpi_vddc) + table->ACPILevel.MinVddc = cpu_to_be32(pi->acpi_vddc * VOLTAGE_SCALE); + else + table->ACPILevel.MinVddc = cpu_to_be32(pi->min_vddc_in_pp_table * VOLTAGE_SCALE); + + table->ACPILevel.MinVddcPhases = pi->vddc_phase_shed_control ? 0 : 1; + + table->ACPILevel.SclkFrequency = rdev->clock.spll.reference_freq; + + ret = radeon_atom_get_clock_dividers(rdev, + COMPUTE_GPUCLK_INPUT_FLAG_SCLK, + table->ACPILevel.SclkFrequency, false, ÷rs); + if (ret) + return ret; + + table->ACPILevel.SclkDid = (u8)dividers.post_divider; + table->ACPILevel.DisplayWatermark = PPSMC_DISPLAY_WATERMARK_LOW; + table->ACPILevel.DeepSleepDivId = 0; + + spll_func_cntl &= ~SPLL_PWRON; + spll_func_cntl |= SPLL_RESET; + + spll_func_cntl_2 &= ~SCLK_MUX_SEL_MASK; + spll_func_cntl_2 |= SCLK_MUX_SEL(4); + + table->ACPILevel.CgSpllFuncCntl = spll_func_cntl; + table->ACPILevel.CgSpllFuncCntl2 = spll_func_cntl_2; + table->ACPILevel.CgSpllFuncCntl3 = pi->clock_registers.cg_spll_func_cntl_3; + table->ACPILevel.CgSpllFuncCntl4 = pi->clock_registers.cg_spll_func_cntl_4; + table->ACPILevel.SpllSpreadSpectrum = pi->clock_registers.cg_spll_spread_spectrum; + table->ACPILevel.SpllSpreadSpectrum2 = pi->clock_registers.cg_spll_spread_spectrum_2; + table->ACPILevel.CcPwrDynRm = 0; + table->ACPILevel.CcPwrDynRm1 = 0; + + table->ACPILevel.Flags = cpu_to_be32(table->ACPILevel.Flags); + table->ACPILevel.MinVddcPhases = cpu_to_be32(table->ACPILevel.MinVddcPhases); + table->ACPILevel.SclkFrequency = cpu_to_be32(table->ACPILevel.SclkFrequency); + table->ACPILevel.CgSpllFuncCntl = cpu_to_be32(table->ACPILevel.CgSpllFuncCntl); + table->ACPILevel.CgSpllFuncCntl2 = cpu_to_be32(table->ACPILevel.CgSpllFuncCntl2); + table->ACPILevel.CgSpllFuncCntl3 = cpu_to_be32(table->ACPILevel.CgSpllFuncCntl3); + table->ACPILevel.CgSpllFuncCntl4 = cpu_to_be32(table->ACPILevel.CgSpllFuncCntl4); + table->ACPILevel.SpllSpreadSpectrum = cpu_to_be32(table->ACPILevel.SpllSpreadSpectrum); + table->ACPILevel.SpllSpreadSpectrum2 = cpu_to_be32(table->ACPILevel.SpllSpreadSpectrum2); + table->ACPILevel.CcPwrDynRm = cpu_to_be32(table->ACPILevel.CcPwrDynRm); + table->ACPILevel.CcPwrDynRm1 = cpu_to_be32(table->ACPILevel.CcPwrDynRm1); + + table->MemoryACPILevel.MinVddc = table->ACPILevel.MinVddc; + table->MemoryACPILevel.MinVddcPhases = table->ACPILevel.MinVddcPhases; + + if (pi->vddci_control != CISLANDS_VOLTAGE_CONTROL_NONE) { + if (pi->acpi_vddci) + table->MemoryACPILevel.MinVddci = + cpu_to_be32(pi->acpi_vddci * VOLTAGE_SCALE); + else + table->MemoryACPILevel.MinVddci = + cpu_to_be32(pi->min_vddci_in_pp_table * VOLTAGE_SCALE); + } + + if (ci_populate_mvdd_value(rdev, 0, &voltage_level)) + table->MemoryACPILevel.MinMvdd = 0; + else + table->MemoryACPILevel.MinMvdd = + cpu_to_be32(voltage_level.Voltage * VOLTAGE_SCALE); + + mclk_pwrmgt_cntl |= MRDCK0_RESET | MRDCK1_RESET; + mclk_pwrmgt_cntl &= ~(MRDCK0_PDNB | MRDCK1_PDNB); + + dll_cntl &= ~(MRDCK0_BYPASS | MRDCK1_BYPASS); + + table->MemoryACPILevel.DllCntl = cpu_to_be32(dll_cntl); + table->MemoryACPILevel.MclkPwrmgtCntl = cpu_to_be32(mclk_pwrmgt_cntl); + table->MemoryACPILevel.MpllAdFuncCntl = + cpu_to_be32(pi->clock_registers.mpll_ad_func_cntl); + table->MemoryACPILevel.MpllDqFuncCntl = + cpu_to_be32(pi->clock_registers.mpll_dq_func_cntl); + table->MemoryACPILevel.MpllFuncCntl = + cpu_to_be32(pi->clock_registers.mpll_func_cntl); + table->MemoryACPILevel.MpllFuncCntl_1 = + cpu_to_be32(pi->clock_registers.mpll_func_cntl_1); + table->MemoryACPILevel.MpllFuncCntl_2 = + cpu_to_be32(pi->clock_registers.mpll_func_cntl_2); + table->MemoryACPILevel.MpllSs1 = cpu_to_be32(pi->clock_registers.mpll_ss1); + table->MemoryACPILevel.MpllSs2 = cpu_to_be32(pi->clock_registers.mpll_ss2); + + table->MemoryACPILevel.EnabledForThrottle = 0; + table->MemoryACPILevel.EnabledForActivity = 0; + table->MemoryACPILevel.UpH = 0; + table->MemoryACPILevel.DownH = 100; + table->MemoryACPILevel.VoltageDownH = 0; + table->MemoryACPILevel.ActivityLevel = + cpu_to_be16((u16)pi->mclk_activity_target); + + table->MemoryACPILevel.StutterEnable = false; + table->MemoryACPILevel.StrobeEnable = false; + table->MemoryACPILevel.EdcReadEnable = false; + table->MemoryACPILevel.EdcWriteEnable = false; + table->MemoryACPILevel.RttEnable = false; + + return 0; +} + + +static int ci_enable_ulv(struct radeon_device *rdev, bool enable) +{ + struct ci_power_info *pi = ci_get_pi(rdev); + struct ci_ulv_parm *ulv = &pi->ulv; + + if (ulv->supported) { + if (enable) + return (ci_send_msg_to_smc(rdev, PPSMC_MSG_EnableULV) == PPSMC_Result_OK) ? + 0 : -EINVAL; + else + return (ci_send_msg_to_smc(rdev, PPSMC_MSG_DisableULV) == PPSMC_Result_OK) ? + 0 : -EINVAL; + } + + return 0; +} + +static int ci_populate_ulv_level(struct radeon_device *rdev, + SMU7_Discrete_Ulv *state) +{ + struct ci_power_info *pi = ci_get_pi(rdev); + u16 ulv_voltage = rdev->pm.dpm.backbias_response_time; + + state->CcPwrDynRm = 0; + state->CcPwrDynRm1 = 0; + + if (ulv_voltage == 0) { + pi->ulv.supported = false; + return 0; + } + + if (pi->voltage_control != CISLANDS_VOLTAGE_CONTROL_BY_SVID2) { + if (ulv_voltage > rdev->pm.dpm.dyn_state.vddc_dependency_on_sclk.entries[0].v) + state->VddcOffset = 0; + else + state->VddcOffset = + rdev->pm.dpm.dyn_state.vddc_dependency_on_sclk.entries[0].v - ulv_voltage; + } else { + if (ulv_voltage > rdev->pm.dpm.dyn_state.vddc_dependency_on_sclk.entries[0].v) + state->VddcOffsetVid = 0; + else + state->VddcOffsetVid = (u8) + ((rdev->pm.dpm.dyn_state.vddc_dependency_on_sclk.entries[0].v - ulv_voltage) * + VOLTAGE_VID_OFFSET_SCALE2 / VOLTAGE_VID_OFFSET_SCALE1); + } + state->VddcPhase = pi->vddc_phase_shed_control ? 0 : 1; + + state->CcPwrDynRm = cpu_to_be32(state->CcPwrDynRm); + state->CcPwrDynRm1 = cpu_to_be32(state->CcPwrDynRm1); + state->VddcOffset = cpu_to_be16(state->VddcOffset); + + return 0; +} + +static int ci_calculate_sclk_params(struct radeon_device *rdev, + u32 engine_clock, + SMU7_Discrete_GraphicsLevel *sclk) +{ + struct ci_power_info *pi = ci_get_pi(rdev); + struct atom_clock_dividers dividers; + u32 spll_func_cntl_3 = pi->clock_registers.cg_spll_func_cntl_3; + u32 spll_func_cntl_4 = pi->clock_registers.cg_spll_func_cntl_4; + u32 cg_spll_spread_spectrum = pi->clock_registers.cg_spll_spread_spectrum; + u32 cg_spll_spread_spectrum_2 = pi->clock_registers.cg_spll_spread_spectrum_2; + u32 reference_clock = rdev->clock.spll.reference_freq; + u32 reference_divider; + u32 fbdiv; + int ret; + + ret = radeon_atom_get_clock_dividers(rdev, + COMPUTE_GPUCLK_INPUT_FLAG_SCLK, + engine_clock, false, ÷rs); + if (ret) + return ret; + + reference_divider = 1 + dividers.ref_div; + fbdiv = dividers.fb_div & 0x3FFFFFF; + + spll_func_cntl_3 &= ~SPLL_FB_DIV_MASK; + spll_func_cntl_3 |= SPLL_FB_DIV(fbdiv); + spll_func_cntl_3 |= SPLL_DITHEN; + + if (pi->caps_sclk_ss_support) { + struct radeon_atom_ss ss; + u32 vco_freq = engine_clock * dividers.post_div; + + if (radeon_atombios_get_asic_ss_info(rdev, &ss, + ASIC_INTERNAL_ENGINE_SS, vco_freq)) { + u32 clk_s = reference_clock * 5 / (reference_divider * ss.rate); + u32 clk_v = 4 * ss.percentage * fbdiv / (clk_s * 10000); + + cg_spll_spread_spectrum &= ~CLK_S_MASK; + cg_spll_spread_spectrum |= CLK_S(clk_s); + cg_spll_spread_spectrum |= SSEN; + + cg_spll_spread_spectrum_2 &= ~CLK_V_MASK; + cg_spll_spread_spectrum_2 |= CLK_V(clk_v); + } + } + + sclk->SclkFrequency = engine_clock; + sclk->CgSpllFuncCntl3 = spll_func_cntl_3; + sclk->CgSpllFuncCntl4 = spll_func_cntl_4; + sclk->SpllSpreadSpectrum = cg_spll_spread_spectrum; + sclk->SpllSpreadSpectrum2 = cg_spll_spread_spectrum_2; + sclk->SclkDid = (u8)dividers.post_divider; + + return 0; +} + +static int ci_populate_single_graphic_level(struct radeon_device *rdev, + u32 engine_clock, + u16 sclk_activity_level_t, + SMU7_Discrete_GraphicsLevel *graphic_level) +{ + struct ci_power_info *pi = ci_get_pi(rdev); + int ret; + + ret = ci_calculate_sclk_params(rdev, engine_clock, graphic_level); + if (ret) + return ret; + + ret = ci_get_dependency_volt_by_clk(rdev, + &rdev->pm.dpm.dyn_state.vddc_dependency_on_sclk, + engine_clock, &graphic_level->MinVddc); + if (ret) + return ret; + + graphic_level->SclkFrequency = engine_clock; + + graphic_level->Flags = 0; + graphic_level->MinVddcPhases = 1; + + if (pi->vddc_phase_shed_control) + ci_populate_phase_value_based_on_sclk(rdev, + &rdev->pm.dpm.dyn_state.phase_shedding_limits_table, + engine_clock, + &graphic_level->MinVddcPhases); + + graphic_level->ActivityLevel = sclk_activity_level_t; + + graphic_level->CcPwrDynRm = 0; + graphic_level->CcPwrDynRm1 = 0; + graphic_level->EnabledForActivity = 1; + graphic_level->EnabledForThrottle = 1; + graphic_level->UpH = 0; + graphic_level->DownH = 0; + graphic_level->VoltageDownH = 0; + graphic_level->PowerThrottle = 0; + + if (pi->caps_sclk_ds) + graphic_level->DeepSleepDivId = ci_get_sleep_divider_id_from_clock(rdev, + engine_clock, + CISLAND_MINIMUM_ENGINE_CLOCK); + + graphic_level->DisplayWatermark = PPSMC_DISPLAY_WATERMARK_LOW; + + graphic_level->Flags = cpu_to_be32(graphic_level->Flags); + graphic_level->MinVddc = cpu_to_be32(graphic_level->MinVddc * VOLTAGE_SCALE); + graphic_level->MinVddcPhases = cpu_to_be32(graphic_level->MinVddcPhases); + graphic_level->SclkFrequency = cpu_to_be32(graphic_level->SclkFrequency); + graphic_level->ActivityLevel = cpu_to_be16(graphic_level->ActivityLevel); + graphic_level->CgSpllFuncCntl3 = cpu_to_be32(graphic_level->CgSpllFuncCntl3); + graphic_level->CgSpllFuncCntl4 = cpu_to_be32(graphic_level->CgSpllFuncCntl4); + graphic_level->SpllSpreadSpectrum = cpu_to_be32(graphic_level->SpllSpreadSpectrum); + graphic_level->SpllSpreadSpectrum2 = cpu_to_be32(graphic_level->SpllSpreadSpectrum2); + graphic_level->CcPwrDynRm = cpu_to_be32(graphic_level->CcPwrDynRm); + graphic_level->CcPwrDynRm1 = cpu_to_be32(graphic_level->CcPwrDynRm1); + + return 0; +} + +static int ci_populate_all_graphic_levels(struct radeon_device *rdev) +{ + struct ci_power_info *pi = ci_get_pi(rdev); + struct ci_dpm_table *dpm_table = &pi->dpm_table; + u32 level_array_address = pi->dpm_table_start + + offsetof(SMU7_Discrete_DpmTable, GraphicsLevel); + u32 level_array_size = sizeof(SMU7_Discrete_GraphicsLevel) * + SMU7_MAX_LEVELS_GRAPHICS; + SMU7_Discrete_GraphicsLevel *levels = pi->smc_state_table.GraphicsLevel; + u32 i, ret; + + memset(levels, 0, level_array_size); + + for (i = 0; i < dpm_table->sclk_table.count; i++) { + ret = ci_populate_single_graphic_level(rdev, + dpm_table->sclk_table.dpm_levels[i].value, + (u16)pi->activity_target[i], + &pi->smc_state_table.GraphicsLevel[i]); + if (ret) + return ret; + if (i == (dpm_table->sclk_table.count - 1)) + pi->smc_state_table.GraphicsLevel[i].DisplayWatermark = + PPSMC_DISPLAY_WATERMARK_HIGH; + } + + pi->smc_state_table.GraphicsDpmLevelCount = (u8)dpm_table->sclk_table.count; + pi->dpm_level_enable_mask.sclk_dpm_enable_mask = + ci_get_dpm_level_enable_mask_value(&dpm_table->sclk_table); + + ret = ci_copy_bytes_to_smc(rdev, level_array_address, + (u8 *)levels, level_array_size, + pi->sram_end); + if (ret) + return ret; + + return 0; +} + +static int ci_populate_ulv_state(struct radeon_device *rdev, + SMU7_Discrete_Ulv *ulv_level) +{ + return ci_populate_ulv_level(rdev, ulv_level); +} + +static int ci_populate_all_memory_levels(struct radeon_device *rdev) +{ + struct ci_power_info *pi = ci_get_pi(rdev); + struct ci_dpm_table *dpm_table = &pi->dpm_table; + u32 level_array_address = pi->dpm_table_start + + offsetof(SMU7_Discrete_DpmTable, MemoryLevel); + u32 level_array_size = sizeof(SMU7_Discrete_MemoryLevel) * + SMU7_MAX_LEVELS_MEMORY; + SMU7_Discrete_MemoryLevel *levels = pi->smc_state_table.MemoryLevel; + u32 i, ret; + + memset(levels, 0, level_array_size); + + for (i = 0; i < dpm_table->mclk_table.count; i++) { + if (dpm_table->mclk_table.dpm_levels[i].value == 0) + return -EINVAL; + ret = ci_populate_single_memory_level(rdev, + dpm_table->mclk_table.dpm_levels[i].value, + &pi->smc_state_table.MemoryLevel[i]); + if (ret) + return ret; + } + + pi->smc_state_table.MemoryLevel[0].ActivityLevel = cpu_to_be16(0x1F); + + pi->smc_state_table.MemoryDpmLevelCount = (u8)dpm_table->mclk_table.count; + pi->dpm_level_enable_mask.mclk_dpm_enable_mask = + ci_get_dpm_level_enable_mask_value(&dpm_table->mclk_table); + + pi->smc_state_table.MemoryLevel[dpm_table->mclk_table.count - 1].DisplayWatermark = + PPSMC_DISPLAY_WATERMARK_HIGH; + + ret = ci_copy_bytes_to_smc(rdev, level_array_address, + (u8 *)levels, level_array_size, + pi->sram_end); + if (ret) + return ret; + + return 0; +} + +static void ci_reset_single_dpm_table(struct radeon_device *rdev, + struct ci_single_dpm_table* dpm_table, + u32 count) +{ + u32 i; + + dpm_table->count = count; + for (i = 0; i < MAX_REGULAR_DPM_NUMBER; i++) + dpm_table->dpm_levels[i].enabled = false; +} + +static void ci_setup_pcie_table_entry(struct ci_single_dpm_table* dpm_table, + u32 index, u32 pcie_gen, u32 pcie_lanes) +{ + dpm_table->dpm_levels[index].value = pcie_gen; + dpm_table->dpm_levels[index].param1 = pcie_lanes; + dpm_table->dpm_levels[index].enabled = true; +} + +static int ci_setup_default_pcie_tables(struct radeon_device *rdev) +{ + struct ci_power_info *pi = ci_get_pi(rdev); + + if (!pi->use_pcie_performance_levels && !pi->use_pcie_powersaving_levels) + return -EINVAL; + + if (pi->use_pcie_performance_levels && !pi->use_pcie_powersaving_levels) { + pi->pcie_gen_powersaving = pi->pcie_gen_performance; + pi->pcie_lane_powersaving = pi->pcie_lane_performance; + } else if (!pi->use_pcie_performance_levels && pi->use_pcie_powersaving_levels) { + pi->pcie_gen_performance = pi->pcie_gen_powersaving; + pi->pcie_lane_performance = pi->pcie_lane_powersaving; + } + + ci_reset_single_dpm_table(rdev, + &pi->dpm_table.pcie_speed_table, + SMU7_MAX_LEVELS_LINK); + + ci_setup_pcie_table_entry(&pi->dpm_table.pcie_speed_table, 0, + pi->pcie_gen_powersaving.min, + pi->pcie_lane_powersaving.min); + ci_setup_pcie_table_entry(&pi->dpm_table.pcie_speed_table, 1, + pi->pcie_gen_performance.min, + pi->pcie_lane_performance.min); + ci_setup_pcie_table_entry(&pi->dpm_table.pcie_speed_table, 2, + pi->pcie_gen_powersaving.min, + pi->pcie_lane_powersaving.max); + ci_setup_pcie_table_entry(&pi->dpm_table.pcie_speed_table, 3, + pi->pcie_gen_performance.min, + pi->pcie_lane_performance.max); + ci_setup_pcie_table_entry(&pi->dpm_table.pcie_speed_table, 4, + pi->pcie_gen_powersaving.max, + pi->pcie_lane_powersaving.max); + ci_setup_pcie_table_entry(&pi->dpm_table.pcie_speed_table, 5, + pi->pcie_gen_performance.max, + pi->pcie_lane_performance.max); + + pi->dpm_table.pcie_speed_table.count = 6; + + return 0; +} + +static int ci_setup_default_dpm_tables(struct radeon_device *rdev) +{ + struct ci_power_info *pi = ci_get_pi(rdev); + struct radeon_clock_voltage_dependency_table *allowed_sclk_vddc_table = + &rdev->pm.dpm.dyn_state.vddc_dependency_on_sclk; + struct radeon_clock_voltage_dependency_table *allowed_mclk_table = + &rdev->pm.dpm.dyn_state.vddc_dependency_on_mclk; + struct radeon_cac_leakage_table *std_voltage_table = + &rdev->pm.dpm.dyn_state.cac_leakage_table; + u32 i; + + if (allowed_sclk_vddc_table == NULL) + return -EINVAL; + if (allowed_sclk_vddc_table->count < 1) + return -EINVAL; + if (allowed_mclk_table == NULL) + return -EINVAL; + if (allowed_mclk_table->count < 1) + return -EINVAL; + + memset(&pi->dpm_table, 0, sizeof(struct ci_dpm_table)); + + ci_reset_single_dpm_table(rdev, + &pi->dpm_table.sclk_table, + SMU7_MAX_LEVELS_GRAPHICS); + ci_reset_single_dpm_table(rdev, + &pi->dpm_table.mclk_table, + SMU7_MAX_LEVELS_MEMORY); + ci_reset_single_dpm_table(rdev, + &pi->dpm_table.vddc_table, + SMU7_MAX_LEVELS_VDDC); + ci_reset_single_dpm_table(rdev, + &pi->dpm_table.vddci_table, + SMU7_MAX_LEVELS_VDDCI); + ci_reset_single_dpm_table(rdev, + &pi->dpm_table.mvdd_table, + SMU7_MAX_LEVELS_MVDD); + + pi->dpm_table.sclk_table.count = 0; + for (i = 0; i < allowed_sclk_vddc_table->count; i++) { + if ((i == 0) || + (pi->dpm_table.sclk_table.dpm_levels[pi->dpm_table.sclk_table.count-1].value != + allowed_sclk_vddc_table->entries[i].clk)) { + pi->dpm_table.sclk_table.dpm_levels[pi->dpm_table.sclk_table.count].value = + allowed_sclk_vddc_table->entries[i].clk; + pi->dpm_table.sclk_table.dpm_levels[pi->dpm_table.sclk_table.count].enabled = true; + pi->dpm_table.sclk_table.count++; + } + } + + pi->dpm_table.mclk_table.count = 0; + for (i = 0; i < allowed_mclk_table->count; i++) { + if ((i==0) || + (pi->dpm_table.mclk_table.dpm_levels[pi->dpm_table.mclk_table.count-1].value != + allowed_mclk_table->entries[i].clk)) { + pi->dpm_table.mclk_table.dpm_levels[pi->dpm_table.mclk_table.count].value = + allowed_mclk_table->entries[i].clk; + pi->dpm_table.mclk_table.dpm_levels[pi->dpm_table.mclk_table.count].enabled = true; + pi->dpm_table.mclk_table.count++; + } + } + + for (i = 0; i < allowed_sclk_vddc_table->count; i++) { + pi->dpm_table.vddc_table.dpm_levels[i].value = + allowed_sclk_vddc_table->entries[i].v; + pi->dpm_table.vddc_table.dpm_levels[i].param1 = + std_voltage_table->entries[i].leakage; + pi->dpm_table.vddc_table.dpm_levels[i].enabled = true; + } + pi->dpm_table.vddc_table.count = allowed_sclk_vddc_table->count; + + allowed_mclk_table = &rdev->pm.dpm.dyn_state.vddci_dependency_on_mclk; + if (allowed_mclk_table) { + for (i = 0; i < allowed_mclk_table->count; i++) { + pi->dpm_table.vddci_table.dpm_levels[i].value = + allowed_mclk_table->entries[i].v; + pi->dpm_table.vddci_table.dpm_levels[i].enabled = true; + } + pi->dpm_table.vddci_table.count = allowed_mclk_table->count; + } + + allowed_mclk_table = &rdev->pm.dpm.dyn_state.mvdd_dependency_on_mclk; + if (allowed_mclk_table) { + for (i = 0; i < allowed_mclk_table->count; i++) { + pi->dpm_table.mvdd_table.dpm_levels[i].value = + allowed_mclk_table->entries[i].v; + pi->dpm_table.mvdd_table.dpm_levels[i].enabled = true; + } + pi->dpm_table.mvdd_table.count = allowed_mclk_table->count; + } + + ci_setup_default_pcie_tables(rdev); + + return 0; +} + +static int ci_find_boot_level(struct ci_single_dpm_table *table, + u32 value, u32 *boot_level) +{ + u32 i; + int ret = -EINVAL; + + for(i = 0; i < table->count; i++) { + if (value == table->dpm_levels[i].value) { + *boot_level = i; + ret = 0; + } + } + + return ret; +} + +static int ci_init_smc_table(struct radeon_device *rdev) +{ + struct ci_power_info *pi = ci_get_pi(rdev); + struct ci_ulv_parm *ulv = &pi->ulv; + struct radeon_ps *radeon_boot_state = rdev->pm.dpm.boot_ps; + SMU7_Discrete_DpmTable *table = &pi->smc_state_table; + int ret; + + ret = ci_setup_default_dpm_tables(rdev); + if (ret) + return ret; + + if (pi->voltage_control != CISLANDS_VOLTAGE_CONTROL_NONE) + ci_populate_smc_voltage_tables(rdev, table); + + ci_init_fps_limits(rdev); + + if (rdev->pm.dpm.platform_caps & ATOM_PP_PLATFORM_CAP_HARDWAREDC) + table->SystemFlags |= PPSMC_SYSTEMFLAG_GPIO_DC; + + if (rdev->pm.dpm.platform_caps & ATOM_PP_PLATFORM_CAP_STEPVDDC) + table->SystemFlags |= PPSMC_SYSTEMFLAG_STEPVDDC; + + if (pi->mem_gddr5) + table->SystemFlags |= PPSMC_SYSTEMFLAG_GDDR5; + + if (ulv->supported) { + ret = ci_populate_ulv_state(rdev, &pi->smc_state_table.Ulv); + if (ret) + return ret; + WREG32_SMC(CG_ULV_PARAMETER, ulv->cg_ulv_parameter); + } + + ret = ci_populate_all_graphic_levels(rdev); + if (ret) + return ret; + + ret = ci_populate_all_memory_levels(rdev); + if (ret) + return ret; + + ci_populate_smc_link_level(rdev, table); + + ret = ci_populate_smc_acpi_level(rdev, table); + if (ret) + return ret; + + ret = ci_populate_smc_vce_level(rdev, table); + if (ret) + return ret; + + ret = ci_populate_smc_acp_level(rdev, table); + if (ret) + return ret; + + ret = ci_populate_smc_samu_level(rdev, table); + if (ret) + return ret; + + ret = ci_do_program_memory_timing_parameters(rdev); + if (ret) + return ret; + + ret = ci_populate_smc_uvd_level(rdev, table); + if (ret) + return ret; + + table->UvdBootLevel = 0; + table->VceBootLevel = 0; + table->AcpBootLevel = 0; + table->SamuBootLevel = 0; + table->GraphicsBootLevel = 0; + table->MemoryBootLevel = 0; + + ret = ci_find_boot_level(&pi->dpm_table.sclk_table, + pi->vbios_boot_state.sclk_bootup_value, + (u32 *)&pi->smc_state_table.GraphicsBootLevel); + + ret = ci_find_boot_level(&pi->dpm_table.mclk_table, + pi->vbios_boot_state.mclk_bootup_value, + (u32 *)&pi->smc_state_table.MemoryBootLevel); + + table->BootVddc = pi->vbios_boot_state.vddc_bootup_value; + table->BootVddci = pi->vbios_boot_state.vddci_bootup_value; + table->BootMVdd = pi->vbios_boot_state.mvdd_bootup_value; + + ci_populate_smc_initial_state(rdev, radeon_boot_state); + + ret = ci_populate_bapm_parameters_in_dpm_table(rdev); + if (ret) + return ret; + + table->UVDInterval = 1; + table->VCEInterval = 1; + table->ACPInterval = 1; + table->SAMUInterval = 1; + table->GraphicsVoltageChangeEnable = 1; + table->GraphicsThermThrottleEnable = 1; + table->GraphicsInterval = 1; + table->VoltageInterval = 1; + table->ThermalInterval = 1; + table->TemperatureLimitHigh = (u16)((pi->thermal_temp_setting.temperature_high * + CISLANDS_Q88_FORMAT_CONVERSION_UNIT) / 1000); + table->TemperatureLimitLow = (u16)((pi->thermal_temp_setting.temperature_low * + CISLANDS_Q88_FORMAT_CONVERSION_UNIT) / 1000); + table->MemoryVoltageChangeEnable = 1; + table->MemoryInterval = 1; + table->VoltageResponseTime = 0; + table->VddcVddciDelta = 4000; + table->PhaseResponseTime = 0; + table->MemoryThermThrottleEnable = 1; + table->PCIeBootLinkLevel = 0; + table->PCIeGenInterval = 1; + if (pi->voltage_control == CISLANDS_VOLTAGE_CONTROL_BY_SVID2) + table->SVI2Enable = 1; + else + table->SVI2Enable = 0; + + table->ThermGpio = 17; + table->SclkStepSize = 0x4000; + + table->SystemFlags = cpu_to_be32(table->SystemFlags); + table->SmioMaskVddcVid = cpu_to_be32(table->SmioMaskVddcVid); + table->SmioMaskVddcPhase = cpu_to_be32(table->SmioMaskVddcPhase); + table->SmioMaskVddciVid = cpu_to_be32(table->SmioMaskVddciVid); + table->SmioMaskMvddVid = cpu_to_be32(table->SmioMaskMvddVid); + table->SclkStepSize = cpu_to_be32(table->SclkStepSize); + table->TemperatureLimitHigh = cpu_to_be16(table->TemperatureLimitHigh); + table->TemperatureLimitLow = cpu_to_be16(table->TemperatureLimitLow); + table->VddcVddciDelta = cpu_to_be16(table->VddcVddciDelta); + table->VoltageResponseTime = cpu_to_be16(table->VoltageResponseTime); + table->PhaseResponseTime = cpu_to_be16(table->PhaseResponseTime); + table->BootVddc = cpu_to_be16(table->BootVddc * VOLTAGE_SCALE); + table->BootVddci = cpu_to_be16(table->BootVddci * VOLTAGE_SCALE); + table->BootMVdd = cpu_to_be16(table->BootMVdd * VOLTAGE_SCALE); + + ret = ci_copy_bytes_to_smc(rdev, + pi->dpm_table_start + + offsetof(SMU7_Discrete_DpmTable, SystemFlags), + (u8 *)&table->SystemFlags, + sizeof(SMU7_Discrete_DpmTable) - 3 * sizeof(SMU7_PIDController), + pi->sram_end); + if (ret) + return ret; + + return 0; +} + +static void ci_trim_single_dpm_states(struct radeon_device *rdev, + struct ci_single_dpm_table *dpm_table, + u32 low_limit, u32 high_limit) +{ + u32 i; + + for (i = 0; i < dpm_table->count; i++) { + if ((dpm_table->dpm_levels[i].value < low_limit) || + (dpm_table->dpm_levels[i].value > high_limit)) + dpm_table->dpm_levels[i].enabled = false; + else + dpm_table->dpm_levels[i].enabled = true; + } +} + +static void ci_trim_pcie_dpm_states(struct radeon_device *rdev, + u32 speed_low, u32 lanes_low, + u32 speed_high, u32 lanes_high) +{ + struct ci_power_info *pi = ci_get_pi(rdev); + struct ci_single_dpm_table *pcie_table = &pi->dpm_table.pcie_speed_table; + u32 i, j; + + for (i = 0; i < pcie_table->count; i++) { + if ((pcie_table->dpm_levels[i].value < speed_low) || + (pcie_table->dpm_levels[i].param1 < lanes_low) || + (pcie_table->dpm_levels[i].value > speed_high) || + (pcie_table->dpm_levels[i].param1 > lanes_high)) + pcie_table->dpm_levels[i].enabled = false; + else + pcie_table->dpm_levels[i].enabled = true; + } + + for (i = 0; i < pcie_table->count; i++) { + if (pcie_table->dpm_levels[i].enabled) { + for (j = i + 1; j < pcie_table->count; j++) { + if (pcie_table->dpm_levels[j].enabled) { + if ((pcie_table->dpm_levels[i].value == pcie_table->dpm_levels[j].value) && + (pcie_table->dpm_levels[i].param1 == pcie_table->dpm_levels[j].param1)) + pcie_table->dpm_levels[j].enabled = false; + } + } + } + } +} + +static int ci_trim_dpm_states(struct radeon_device *rdev, + struct radeon_ps *radeon_state) +{ + struct ci_ps *state = ci_get_ps(radeon_state); + struct ci_power_info *pi = ci_get_pi(rdev); + u32 high_limit_count; + + if (state->performance_level_count < 1) + return -EINVAL; + + if (state->performance_level_count == 1) + high_limit_count = 0; + else + high_limit_count = 1; + + ci_trim_single_dpm_states(rdev, + &pi->dpm_table.sclk_table, + state->performance_levels[0].sclk, + state->performance_levels[high_limit_count].sclk); + + ci_trim_single_dpm_states(rdev, + &pi->dpm_table.mclk_table, + state->performance_levels[0].mclk, + state->performance_levels[high_limit_count].mclk); + + ci_trim_pcie_dpm_states(rdev, + state->performance_levels[0].pcie_gen, + state->performance_levels[0].pcie_lane, + state->performance_levels[high_limit_count].pcie_gen, + state->performance_levels[high_limit_count].pcie_lane); + + return 0; +} + +static int ci_apply_disp_minimum_voltage_request(struct radeon_device *rdev) +{ + struct radeon_clock_voltage_dependency_table *disp_voltage_table = + &rdev->pm.dpm.dyn_state.vddc_dependency_on_dispclk; + struct radeon_clock_voltage_dependency_table *vddc_table = + &rdev->pm.dpm.dyn_state.vddc_dependency_on_sclk; + u32 requested_voltage = 0; + u32 i; + + if (disp_voltage_table == NULL) + return -EINVAL; + if (!disp_voltage_table->count) + return -EINVAL; + + for (i = 0; i < disp_voltage_table->count; i++) { + if (rdev->clock.current_dispclk == disp_voltage_table->entries[i].clk) + requested_voltage = disp_voltage_table->entries[i].v; + } + + for (i = 0; i < vddc_table->count; i++) { + if (requested_voltage <= vddc_table->entries[i].v) { + requested_voltage = vddc_table->entries[i].v; + return (ci_send_msg_to_smc_with_parameter(rdev, + PPSMC_MSG_VddC_Request, + requested_voltage * VOLTAGE_SCALE) == PPSMC_Result_OK) ? + 0 : -EINVAL; + } + } + + return -EINVAL; +} + +static int ci_upload_dpm_level_enable_mask(struct radeon_device *rdev) +{ + struct ci_power_info *pi = ci_get_pi(rdev); + PPSMC_Result result; + + if (!pi->sclk_dpm_key_disabled) { + if (pi->dpm_level_enable_mask.sclk_dpm_enable_mask) { + result = ci_send_msg_to_smc_with_parameter(rdev, + PPSMC_MSG_SCLKDPM_SetEnabledMask, + pi->dpm_level_enable_mask.sclk_dpm_enable_mask); + if (result != PPSMC_Result_OK) + return -EINVAL; + } + } + + if (!pi->mclk_dpm_key_disabled) { + if (pi->dpm_level_enable_mask.mclk_dpm_enable_mask) { + result = ci_send_msg_to_smc_with_parameter(rdev, + PPSMC_MSG_MCLKDPM_SetEnabledMask, + pi->dpm_level_enable_mask.mclk_dpm_enable_mask); + if (result != PPSMC_Result_OK) + return -EINVAL; + } + } + + if (!pi->pcie_dpm_key_disabled) { + if (pi->dpm_level_enable_mask.pcie_dpm_enable_mask) { + result = ci_send_msg_to_smc_with_parameter(rdev, + PPSMC_MSG_PCIeDPM_SetEnabledMask, + pi->dpm_level_enable_mask.pcie_dpm_enable_mask); + if (result != PPSMC_Result_OK) + return -EINVAL; + } + } + + ci_apply_disp_minimum_voltage_request(rdev); + + return 0; +} + +static void ci_find_dpm_states_clocks_in_dpm_table(struct radeon_device *rdev, + struct radeon_ps *radeon_state) +{ + struct ci_power_info *pi = ci_get_pi(rdev); + struct ci_ps *state = ci_get_ps(radeon_state); + struct ci_single_dpm_table *sclk_table = &pi->dpm_table.sclk_table; + u32 sclk = state->performance_levels[state->performance_level_count-1].sclk; + struct ci_single_dpm_table *mclk_table = &pi->dpm_table.mclk_table; + u32 mclk = state->performance_levels[state->performance_level_count-1].mclk; + u32 i; + + pi->need_update_smu7_dpm_table = 0; + + for (i = 0; i < sclk_table->count; i++) { + if (sclk == sclk_table->dpm_levels[i].value) + break; + } + + if (i >= sclk_table->count) { + pi->need_update_smu7_dpm_table |= DPMTABLE_OD_UPDATE_SCLK; + } else { + /* XXX check display min clock requirements */ + if (0 != CISLAND_MINIMUM_ENGINE_CLOCK) + pi->need_update_smu7_dpm_table |= DPMTABLE_UPDATE_SCLK; + } + + for (i = 0; i < mclk_table->count; i++) { + if (mclk == mclk_table->dpm_levels[i].value) + break; + } + + if (i >= mclk_table->count) + pi->need_update_smu7_dpm_table |= DPMTABLE_OD_UPDATE_MCLK; + + if (rdev->pm.dpm.current_active_crtc_count != + rdev->pm.dpm.new_active_crtc_count) + pi->need_update_smu7_dpm_table |= DPMTABLE_UPDATE_MCLK; +} + +static int ci_populate_and_upload_sclk_mclk_dpm_levels(struct radeon_device *rdev, + struct radeon_ps *radeon_state) +{ + struct ci_power_info *pi = ci_get_pi(rdev); + struct ci_ps *state = ci_get_ps(radeon_state); + u32 sclk = state->performance_levels[state->performance_level_count-1].sclk; + u32 mclk = state->performance_levels[state->performance_level_count-1].mclk; + struct ci_dpm_table *dpm_table = &pi->dpm_table; + int ret; + + if (!pi->need_update_smu7_dpm_table) + return 0; + + if (pi->need_update_smu7_dpm_table & DPMTABLE_OD_UPDATE_SCLK) + dpm_table->sclk_table.dpm_levels[dpm_table->sclk_table.count-1].value = sclk; + + if (pi->need_update_smu7_dpm_table & DPMTABLE_OD_UPDATE_MCLK) + dpm_table->mclk_table.dpm_levels[dpm_table->mclk_table.count-1].value = mclk; + + if (pi->need_update_smu7_dpm_table & (DPMTABLE_OD_UPDATE_SCLK | DPMTABLE_UPDATE_SCLK)) { + ret = ci_populate_all_graphic_levels(rdev); + if (ret) + return ret; + } + + if (pi->need_update_smu7_dpm_table & (DPMTABLE_OD_UPDATE_MCLK | DPMTABLE_UPDATE_MCLK)) { + ret = ci_populate_all_memory_levels(rdev); + if (ret) + return ret; + } + + return 0; +} + +static int ci_enable_uvd_dpm(struct radeon_device *rdev, bool enable) +{ + struct ci_power_info *pi = ci_get_pi(rdev); + const struct radeon_clock_and_voltage_limits *max_limits; + int i; + + if (rdev->pm.dpm.ac_power) + max_limits = &rdev->pm.dpm.dyn_state.max_clock_voltage_on_ac; + else + max_limits = &rdev->pm.dpm.dyn_state.max_clock_voltage_on_dc; + + if (enable) { + pi->dpm_level_enable_mask.uvd_dpm_enable_mask = 0; + + for (i = rdev->pm.dpm.dyn_state.uvd_clock_voltage_dependency_table.count - 1; i >= 0; i--) { + if (rdev->pm.dpm.dyn_state.uvd_clock_voltage_dependency_table.entries[i].v <= max_limits->vddc) { + pi->dpm_level_enable_mask.uvd_dpm_enable_mask |= 1 << i; + + if (!pi->caps_uvd_dpm) + break; + } + } + + ci_send_msg_to_smc_with_parameter(rdev, + PPSMC_MSG_UVDDPM_SetEnabledMask, + pi->dpm_level_enable_mask.uvd_dpm_enable_mask); + + if (pi->last_mclk_dpm_enable_mask & 0x1) { + pi->uvd_enabled = true; + pi->dpm_level_enable_mask.mclk_dpm_enable_mask &= 0xFFFFFFFE; + ci_send_msg_to_smc_with_parameter(rdev, + PPSMC_MSG_MCLKDPM_SetEnabledMask, + pi->dpm_level_enable_mask.mclk_dpm_enable_mask); + } + } else { + if (pi->last_mclk_dpm_enable_mask & 0x1) { + pi->uvd_enabled = false; + pi->dpm_level_enable_mask.mclk_dpm_enable_mask |= 1; + ci_send_msg_to_smc_with_parameter(rdev, + PPSMC_MSG_MCLKDPM_SetEnabledMask, + pi->dpm_level_enable_mask.mclk_dpm_enable_mask); + } + } + + return (ci_send_msg_to_smc(rdev, enable ? + PPSMC_MSG_UVDDPM_Enable : PPSMC_MSG_UVDDPM_Disable) == PPSMC_Result_OK) ? + 0 : -EINVAL; +} + +#if 0 +static int ci_enable_vce_dpm(struct radeon_device *rdev, bool enable) +{ + struct ci_power_info *pi = ci_get_pi(rdev); + const struct radeon_clock_and_voltage_limits *max_limits; + int i; + + if (rdev->pm.dpm.ac_power) + max_limits = &rdev->pm.dpm.dyn_state.max_clock_voltage_on_ac; + else + max_limits = &rdev->pm.dpm.dyn_state.max_clock_voltage_on_dc; + + if (enable) { + pi->dpm_level_enable_mask.vce_dpm_enable_mask = 0; + for (i = rdev->pm.dpm.dyn_state.vce_clock_voltage_dependency_table.count - 1; i >= 0; i--) { + if (rdev->pm.dpm.dyn_state.vce_clock_voltage_dependency_table.entries[i].v <= max_limits->vddc) { + pi->dpm_level_enable_mask.vce_dpm_enable_mask |= 1 << i; + + if (!pi->caps_vce_dpm) + break; + } + } + + ci_send_msg_to_smc_with_parameter(rdev, + PPSMC_MSG_VCEDPM_SetEnabledMask, + pi->dpm_level_enable_mask.vce_dpm_enable_mask); + } + + return (ci_send_msg_to_smc(rdev, enable ? + PPSMC_MSG_VCEDPM_Enable : PPSMC_MSG_VCEDPM_Disable) == PPSMC_Result_OK) ? + 0 : -EINVAL; +} + +static int ci_enable_samu_dpm(struct radeon_device *rdev, bool enable) +{ + struct ci_power_info *pi = ci_get_pi(rdev); + const struct radeon_clock_and_voltage_limits *max_limits; + int i; + + if (rdev->pm.dpm.ac_power) + max_limits = &rdev->pm.dpm.dyn_state.max_clock_voltage_on_ac; + else + max_limits = &rdev->pm.dpm.dyn_state.max_clock_voltage_on_dc; + + if (enable) { + pi->dpm_level_enable_mask.samu_dpm_enable_mask = 0; + for (i = rdev->pm.dpm.dyn_state.samu_clock_voltage_dependency_table.count - 1; i >= 0; i--) { + if (rdev->pm.dpm.dyn_state.samu_clock_voltage_dependency_table.entries[i].v <= max_limits->vddc) { + pi->dpm_level_enable_mask.samu_dpm_enable_mask |= 1 << i; + + if (!pi->caps_samu_dpm) + break; + } + } + + ci_send_msg_to_smc_with_parameter(rdev, + PPSMC_MSG_SAMUDPM_SetEnabledMask, + pi->dpm_level_enable_mask.samu_dpm_enable_mask); + } + return (ci_send_msg_to_smc(rdev, enable ? + PPSMC_MSG_SAMUDPM_Enable : PPSMC_MSG_SAMUDPM_Disable) == PPSMC_Result_OK) ? + 0 : -EINVAL; +} + +static int ci_enable_acp_dpm(struct radeon_device *rdev, bool enable) +{ + struct ci_power_info *pi = ci_get_pi(rdev); + const struct radeon_clock_and_voltage_limits *max_limits; + int i; + + if (rdev->pm.dpm.ac_power) + max_limits = &rdev->pm.dpm.dyn_state.max_clock_voltage_on_ac; + else + max_limits = &rdev->pm.dpm.dyn_state.max_clock_voltage_on_dc; + + if (enable) { + pi->dpm_level_enable_mask.acp_dpm_enable_mask = 0; + for (i = rdev->pm.dpm.dyn_state.acp_clock_voltage_dependency_table.count - 1; i >= 0; i--) { + if (rdev->pm.dpm.dyn_state.acp_clock_voltage_dependency_table.entries[i].v <= max_limits->vddc) { + pi->dpm_level_enable_mask.acp_dpm_enable_mask |= 1 << i; + + if (!pi->caps_acp_dpm) + break; + } + } + + ci_send_msg_to_smc_with_parameter(rdev, + PPSMC_MSG_ACPDPM_SetEnabledMask, + pi->dpm_level_enable_mask.acp_dpm_enable_mask); + } + + return (ci_send_msg_to_smc(rdev, enable ? + PPSMC_MSG_ACPDPM_Enable : PPSMC_MSG_ACPDPM_Disable) == PPSMC_Result_OK) ? + 0 : -EINVAL; +} +#endif + +static int ci_update_uvd_dpm(struct radeon_device *rdev, bool gate) +{ + struct ci_power_info *pi = ci_get_pi(rdev); + u32 tmp; + + if (!gate) { + if (pi->caps_uvd_dpm || + (rdev->pm.dpm.dyn_state.uvd_clock_voltage_dependency_table.count <= 0)) + pi->smc_state_table.UvdBootLevel = 0; + else + pi->smc_state_table.UvdBootLevel = + rdev->pm.dpm.dyn_state.uvd_clock_voltage_dependency_table.count - 1; + + tmp = RREG32_SMC(DPM_TABLE_475); + tmp &= ~UvdBootLevel_MASK; + tmp |= UvdBootLevel(pi->smc_state_table.UvdBootLevel); + WREG32_SMC(DPM_TABLE_475, tmp); + } + + return ci_enable_uvd_dpm(rdev, !gate); +} + +#if 0 +static u8 ci_get_vce_boot_level(struct radeon_device *rdev) +{ + u8 i; + u32 min_evclk = 30000; /* ??? */ + struct radeon_vce_clock_voltage_dependency_table *table = + &rdev->pm.dpm.dyn_state.vce_clock_voltage_dependency_table; + + for (i = 0; i < table->count; i++) { + if (table->entries[i].evclk >= min_evclk) + return i; + } + + return table->count - 1; +} + +static int ci_update_vce_dpm(struct radeon_device *rdev, + struct radeon_ps *radeon_new_state, + struct radeon_ps *radeon_current_state) +{ + struct ci_power_info *pi = ci_get_pi(rdev); + bool new_vce_clock_non_zero = (radeon_new_state->evclk != 0); + bool old_vce_clock_non_zero = (radeon_current_state->evclk != 0); + int ret = 0; + u32 tmp; + + if (new_vce_clock_non_zero != old_vce_clock_non_zero) { + if (new_vce_clock_non_zero) { + pi->smc_state_table.VceBootLevel = ci_get_vce_boot_level(rdev); + + tmp = RREG32_SMC(DPM_TABLE_475); + tmp &= ~VceBootLevel_MASK; + tmp |= VceBootLevel(pi->smc_state_table.VceBootLevel); + WREG32_SMC(DPM_TABLE_475, tmp); + + ret = ci_enable_vce_dpm(rdev, true); + } else { + ret = ci_enable_vce_dpm(rdev, false); + } + } + return ret; +} + +static int ci_update_samu_dpm(struct radeon_device *rdev, bool gate) +{ + return ci_enable_samu_dpm(rdev, gate); +} + +static int ci_update_acp_dpm(struct radeon_device *rdev, bool gate) +{ + struct ci_power_info *pi = ci_get_pi(rdev); + u32 tmp; + + if (!gate) { + pi->smc_state_table.AcpBootLevel = 0; + + tmp = RREG32_SMC(DPM_TABLE_475); + tmp &= ~AcpBootLevel_MASK; + tmp |= AcpBootLevel(pi->smc_state_table.AcpBootLevel); + WREG32_SMC(DPM_TABLE_475, tmp); + } + + return ci_enable_acp_dpm(rdev, !gate); +} +#endif + +static int ci_generate_dpm_level_enable_mask(struct radeon_device *rdev, + struct radeon_ps *radeon_state) +{ + struct ci_power_info *pi = ci_get_pi(rdev); + int ret; + + ret = ci_trim_dpm_states(rdev, radeon_state); + if (ret) + return ret; + + pi->dpm_level_enable_mask.sclk_dpm_enable_mask = + ci_get_dpm_level_enable_mask_value(&pi->dpm_table.sclk_table); + pi->dpm_level_enable_mask.mclk_dpm_enable_mask = + ci_get_dpm_level_enable_mask_value(&pi->dpm_table.mclk_table); + pi->last_mclk_dpm_enable_mask = + pi->dpm_level_enable_mask.mclk_dpm_enable_mask; + if (pi->uvd_enabled) { + if (pi->dpm_level_enable_mask.mclk_dpm_enable_mask & 1) + pi->dpm_level_enable_mask.mclk_dpm_enable_mask &= 0xFFFFFFFE; + } + pi->dpm_level_enable_mask.pcie_dpm_enable_mask = + ci_get_dpm_level_enable_mask_value(&pi->dpm_table.pcie_speed_table); + + return 0; +} + +static u32 ci_get_lowest_enabled_level(struct radeon_device *rdev, + u32 level_mask) +{ + u32 level = 0; + + while ((level_mask & (1 << level)) == 0) + level++; + + return level; +} + + +int ci_dpm_force_performance_level(struct radeon_device *rdev, + enum radeon_dpm_forced_level level) +{ + struct ci_power_info *pi = ci_get_pi(rdev); + PPSMC_Result smc_result; + u32 tmp, levels, i; + int ret; + + if (level == RADEON_DPM_FORCED_LEVEL_HIGH) { + if ((!pi->sclk_dpm_key_disabled) && + pi->dpm_level_enable_mask.sclk_dpm_enable_mask) { + levels = 0; + tmp = pi->dpm_level_enable_mask.sclk_dpm_enable_mask; + while (tmp >>= 1) + levels++; + if (levels) { + ret = ci_dpm_force_state_sclk(rdev, levels); + if (ret) + return ret; + for (i = 0; i < rdev->usec_timeout; i++) { + tmp = (RREG32_SMC(TARGET_AND_CURRENT_PROFILE_INDEX) & + CURR_SCLK_INDEX_MASK) >> CURR_SCLK_INDEX_SHIFT; + if (tmp == levels) + break; + udelay(1); + } + } + } + if ((!pi->mclk_dpm_key_disabled) && + pi->dpm_level_enable_mask.mclk_dpm_enable_mask) { + levels = 0; + tmp = pi->dpm_level_enable_mask.mclk_dpm_enable_mask; + while (tmp >>= 1) + levels++; + if (levels) { + ret = ci_dpm_force_state_mclk(rdev, levels); + if (ret) + return ret; + for (i = 0; i < rdev->usec_timeout; i++) { + tmp = (RREG32_SMC(TARGET_AND_CURRENT_PROFILE_INDEX) & + CURR_MCLK_INDEX_MASK) >> CURR_MCLK_INDEX_SHIFT; + if (tmp == levels) + break; + udelay(1); + } + } + } + if ((!pi->pcie_dpm_key_disabled) && + pi->dpm_level_enable_mask.pcie_dpm_enable_mask) { + levels = 0; + tmp = pi->dpm_level_enable_mask.pcie_dpm_enable_mask; + while (tmp >>= 1) + levels++; + if (levels) { + ret = ci_dpm_force_state_pcie(rdev, level); + if (ret) + return ret; + for (i = 0; i < rdev->usec_timeout; i++) { + tmp = (RREG32_SMC(TARGET_AND_CURRENT_PROFILE_INDEX_1) & + CURR_PCIE_INDEX_MASK) >> CURR_PCIE_INDEX_SHIFT; + if (tmp == levels) + break; + udelay(1); + } + } + } + } else if (level == RADEON_DPM_FORCED_LEVEL_LOW) { + if ((!pi->sclk_dpm_key_disabled) && + pi->dpm_level_enable_mask.sclk_dpm_enable_mask) { + levels = ci_get_lowest_enabled_level(rdev, + pi->dpm_level_enable_mask.sclk_dpm_enable_mask); + ret = ci_dpm_force_state_sclk(rdev, levels); + if (ret) + return ret; + for (i = 0; i < rdev->usec_timeout; i++) { + tmp = (RREG32_SMC(TARGET_AND_CURRENT_PROFILE_INDEX) & + CURR_SCLK_INDEX_MASK) >> CURR_SCLK_INDEX_SHIFT; + if (tmp == levels) + break; + udelay(1); + } + } + if ((!pi->mclk_dpm_key_disabled) && + pi->dpm_level_enable_mask.mclk_dpm_enable_mask) { + levels = ci_get_lowest_enabled_level(rdev, + pi->dpm_level_enable_mask.mclk_dpm_enable_mask); + ret = ci_dpm_force_state_mclk(rdev, levels); + if (ret) + return ret; + for (i = 0; i < rdev->usec_timeout; i++) { + tmp = (RREG32_SMC(TARGET_AND_CURRENT_PROFILE_INDEX) & + CURR_MCLK_INDEX_MASK) >> CURR_MCLK_INDEX_SHIFT; + if (tmp == levels) + break; + udelay(1); + } + } + if ((!pi->pcie_dpm_key_disabled) && + pi->dpm_level_enable_mask.pcie_dpm_enable_mask) { + levels = ci_get_lowest_enabled_level(rdev, + pi->dpm_level_enable_mask.pcie_dpm_enable_mask); + ret = ci_dpm_force_state_pcie(rdev, levels); + if (ret) + return ret; + for (i = 0; i < rdev->usec_timeout; i++) { + tmp = (RREG32_SMC(TARGET_AND_CURRENT_PROFILE_INDEX_1) & + CURR_PCIE_INDEX_MASK) >> CURR_PCIE_INDEX_SHIFT; + if (tmp == levels) + break; + udelay(1); + } + } + } else if (level == RADEON_DPM_FORCED_LEVEL_AUTO) { + if (!pi->sclk_dpm_key_disabled) { + smc_result = ci_send_msg_to_smc(rdev, PPSMC_MSG_NoForcedLevel); + if (smc_result != PPSMC_Result_OK) + return -EINVAL; + } + if (!pi->mclk_dpm_key_disabled) { + smc_result = ci_send_msg_to_smc(rdev, PPSMC_MSG_MCLKDPM_NoForcedLevel); + if (smc_result != PPSMC_Result_OK) + return -EINVAL; + } + if (!pi->pcie_dpm_key_disabled) { + smc_result = ci_send_msg_to_smc(rdev, PPSMC_MSG_PCIeDPM_UnForceLevel); + if (smc_result != PPSMC_Result_OK) + return -EINVAL; + } + } + + rdev->pm.dpm.forced_level = level; + + return 0; +} + +static int ci_set_mc_special_registers(struct radeon_device *rdev, + struct ci_mc_reg_table *table) +{ + struct ci_power_info *pi = ci_get_pi(rdev); + u8 i, j, k; + u32 temp_reg; + + for (i = 0, j = table->last; i < table->last; i++) { + if (j >= SMU7_DISCRETE_MC_REGISTER_ARRAY_SIZE) + return -EINVAL; + switch(table->mc_reg_address[i].s1 << 2) { + case MC_SEQ_MISC1: + temp_reg = RREG32(MC_PMG_CMD_EMRS); + table->mc_reg_address[j].s1 = MC_PMG_CMD_EMRS >> 2; + table->mc_reg_address[j].s0 = MC_SEQ_PMG_CMD_EMRS_LP >> 2; + for (k = 0; k < table->num_entries; k++) { + table->mc_reg_table_entry[k].mc_data[j] = + ((temp_reg & 0xffff0000)) | ((table->mc_reg_table_entry[k].mc_data[i] & 0xffff0000) >> 16); + } + j++; + if (j >= SMU7_DISCRETE_MC_REGISTER_ARRAY_SIZE) + return -EINVAL; + + temp_reg = RREG32(MC_PMG_CMD_MRS); + table->mc_reg_address[j].s1 = MC_PMG_CMD_MRS >> 2; + table->mc_reg_address[j].s0 = MC_SEQ_PMG_CMD_MRS_LP >> 2; + for (k = 0; k < table->num_entries; k++) { + table->mc_reg_table_entry[k].mc_data[j] = + (temp_reg & 0xffff0000) | (table->mc_reg_table_entry[k].mc_data[i] & 0x0000ffff); + if (!pi->mem_gddr5) + table->mc_reg_table_entry[k].mc_data[j] |= 0x100; + } + j++; + if (j > SMU7_DISCRETE_MC_REGISTER_ARRAY_SIZE) + return -EINVAL; + + if (!pi->mem_gddr5) { + table->mc_reg_address[j].s1 = MC_PMG_AUTO_CMD >> 2; + table->mc_reg_address[j].s0 = MC_PMG_AUTO_CMD >> 2; + for (k = 0; k < table->num_entries; k++) { + table->mc_reg_table_entry[k].mc_data[j] = + (table->mc_reg_table_entry[k].mc_data[i] & 0xffff0000) >> 16; + } + j++; + if (j > SMU7_DISCRETE_MC_REGISTER_ARRAY_SIZE) + return -EINVAL; + } + break; + case MC_SEQ_RESERVE_M: + temp_reg = RREG32(MC_PMG_CMD_MRS1); + table->mc_reg_address[j].s1 = MC_PMG_CMD_MRS1 >> 2; + table->mc_reg_address[j].s0 = MC_SEQ_PMG_CMD_MRS1_LP >> 2; + for (k = 0; k < table->num_entries; k++) { + table->mc_reg_table_entry[k].mc_data[j] = + (temp_reg & 0xffff0000) | (table->mc_reg_table_entry[k].mc_data[i] & 0x0000ffff); + } + j++; + if (j > SMU7_DISCRETE_MC_REGISTER_ARRAY_SIZE) + return -EINVAL; + break; + default: + break; + } + + } + + table->last = j; + + return 0; +} + +static bool ci_check_s0_mc_reg_index(u16 in_reg, u16 *out_reg) +{ + bool result = true; + + switch(in_reg) { + case MC_SEQ_RAS_TIMING >> 2: + *out_reg = MC_SEQ_RAS_TIMING_LP >> 2; + break; + case MC_SEQ_DLL_STBY >> 2: + *out_reg = MC_SEQ_DLL_STBY_LP >> 2; + break; + case MC_SEQ_G5PDX_CMD0 >> 2: + *out_reg = MC_SEQ_G5PDX_CMD0_LP >> 2; + break; + case MC_SEQ_G5PDX_CMD1 >> 2: + *out_reg = MC_SEQ_G5PDX_CMD1_LP >> 2; + break; + case MC_SEQ_G5PDX_CTRL >> 2: + *out_reg = MC_SEQ_G5PDX_CTRL_LP >> 2; + break; + case MC_SEQ_CAS_TIMING >> 2: + *out_reg = MC_SEQ_CAS_TIMING_LP >> 2; + break; + case MC_SEQ_MISC_TIMING >> 2: + *out_reg = MC_SEQ_MISC_TIMING_LP >> 2; + break; + case MC_SEQ_MISC_TIMING2 >> 2: + *out_reg = MC_SEQ_MISC_TIMING2_LP >> 2; + break; + case MC_SEQ_PMG_DVS_CMD >> 2: + *out_reg = MC_SEQ_PMG_DVS_CMD_LP >> 2; + break; + case MC_SEQ_PMG_DVS_CTL >> 2: + *out_reg = MC_SEQ_PMG_DVS_CTL_LP >> 2; + break; + case MC_SEQ_RD_CTL_D0 >> 2: + *out_reg = MC_SEQ_RD_CTL_D0_LP >> 2; + break; + case MC_SEQ_RD_CTL_D1 >> 2: + *out_reg = MC_SEQ_RD_CTL_D1_LP >> 2; + break; + case MC_SEQ_WR_CTL_D0 >> 2: + *out_reg = MC_SEQ_WR_CTL_D0_LP >> 2; + break; + case MC_SEQ_WR_CTL_D1 >> 2: + *out_reg = MC_SEQ_WR_CTL_D1_LP >> 2; + break; + case MC_PMG_CMD_EMRS >> 2: + *out_reg = MC_SEQ_PMG_CMD_EMRS_LP >> 2; + break; + case MC_PMG_CMD_MRS >> 2: + *out_reg = MC_SEQ_PMG_CMD_MRS_LP >> 2; + break; + case MC_PMG_CMD_MRS1 >> 2: + *out_reg = MC_SEQ_PMG_CMD_MRS1_LP >> 2; + break; + case MC_SEQ_PMG_TIMING >> 2: + *out_reg = MC_SEQ_PMG_TIMING_LP >> 2; + break; + case MC_PMG_CMD_MRS2 >> 2: + *out_reg = MC_SEQ_PMG_CMD_MRS2_LP >> 2; + break; + case MC_SEQ_WR_CTL_2 >> 2: + *out_reg = MC_SEQ_WR_CTL_2_LP >> 2; + break; + default: + result = false; + break; + } + + return result; +} + +static void ci_set_valid_flag(struct ci_mc_reg_table *table) +{ + u8 i, j; + + for (i = 0; i < table->last; i++) { + for (j = 1; j < table->num_entries; j++) { + if (table->mc_reg_table_entry[j-1].mc_data[i] != + table->mc_reg_table_entry[j].mc_data[i]) { + table->valid_flag |= 1 << i; + break; + } + } + } +} + +static void ci_set_s0_mc_reg_index(struct ci_mc_reg_table *table) +{ + u32 i; + u16 address; + + for (i = 0; i < table->last; i++) { + table->mc_reg_address[i].s0 = + ci_check_s0_mc_reg_index(table->mc_reg_address[i].s1, &address) ? + address : table->mc_reg_address[i].s1; + } +} + +static int ci_copy_vbios_mc_reg_table(const struct atom_mc_reg_table *table, + struct ci_mc_reg_table *ci_table) +{ + u8 i, j; + + if (table->last > SMU7_DISCRETE_MC_REGISTER_ARRAY_SIZE) + return -EINVAL; + if (table->num_entries > MAX_AC_TIMING_ENTRIES) + return -EINVAL; + + for (i = 0; i < table->last; i++) + ci_table->mc_reg_address[i].s1 = table->mc_reg_address[i].s1; + + ci_table->last = table->last; + + for (i = 0; i < table->num_entries; i++) { + ci_table->mc_reg_table_entry[i].mclk_max = + table->mc_reg_table_entry[i].mclk_max; + for (j = 0; j < table->last; j++) + ci_table->mc_reg_table_entry[i].mc_data[j] = + table->mc_reg_table_entry[i].mc_data[j]; + } + ci_table->num_entries = table->num_entries; + + return 0; +} + +static int ci_initialize_mc_reg_table(struct radeon_device *rdev) +{ + struct ci_power_info *pi = ci_get_pi(rdev); + struct atom_mc_reg_table *table; + struct ci_mc_reg_table *ci_table = &pi->mc_reg_table; + u8 module_index = rv770_get_memory_module_index(rdev); + int ret; + + table = kzalloc(sizeof(struct atom_mc_reg_table), GFP_KERNEL); + if (!table) + return -ENOMEM; + + WREG32(MC_SEQ_RAS_TIMING_LP, RREG32(MC_SEQ_RAS_TIMING)); + WREG32(MC_SEQ_CAS_TIMING_LP, RREG32(MC_SEQ_CAS_TIMING)); + WREG32(MC_SEQ_DLL_STBY_LP, RREG32(MC_SEQ_DLL_STBY)); + WREG32(MC_SEQ_G5PDX_CMD0_LP, RREG32(MC_SEQ_G5PDX_CMD0)); + WREG32(MC_SEQ_G5PDX_CMD1_LP, RREG32(MC_SEQ_G5PDX_CMD1)); + WREG32(MC_SEQ_G5PDX_CTRL_LP, RREG32(MC_SEQ_G5PDX_CTRL)); + WREG32(MC_SEQ_PMG_DVS_CMD_LP, RREG32(MC_SEQ_PMG_DVS_CMD)); + WREG32(MC_SEQ_PMG_DVS_CTL_LP, RREG32(MC_SEQ_PMG_DVS_CTL)); + WREG32(MC_SEQ_MISC_TIMING_LP, RREG32(MC_SEQ_MISC_TIMING)); + WREG32(MC_SEQ_MISC_TIMING2_LP, RREG32(MC_SEQ_MISC_TIMING2)); + WREG32(MC_SEQ_PMG_CMD_EMRS_LP, RREG32(MC_PMG_CMD_EMRS)); + WREG32(MC_SEQ_PMG_CMD_MRS_LP, RREG32(MC_PMG_CMD_MRS)); + WREG32(MC_SEQ_PMG_CMD_MRS1_LP, RREG32(MC_PMG_CMD_MRS1)); + WREG32(MC_SEQ_WR_CTL_D0_LP, RREG32(MC_SEQ_WR_CTL_D0)); + WREG32(MC_SEQ_WR_CTL_D1_LP, RREG32(MC_SEQ_WR_CTL_D1)); + WREG32(MC_SEQ_RD_CTL_D0_LP, RREG32(MC_SEQ_RD_CTL_D0)); + WREG32(MC_SEQ_RD_CTL_D1_LP, RREG32(MC_SEQ_RD_CTL_D1)); + WREG32(MC_SEQ_PMG_TIMING_LP, RREG32(MC_SEQ_PMG_TIMING)); + WREG32(MC_SEQ_PMG_CMD_MRS2_LP, RREG32(MC_PMG_CMD_MRS2)); + WREG32(MC_SEQ_WR_CTL_2_LP, RREG32(MC_SEQ_WR_CTL_2)); + + ret = radeon_atom_init_mc_reg_table(rdev, module_index, table); + if (ret) + goto init_mc_done; + + ret = ci_copy_vbios_mc_reg_table(table, ci_table); + if (ret) + goto init_mc_done; + + ci_set_s0_mc_reg_index(ci_table); + + ret = ci_set_mc_special_registers(rdev, ci_table); + if (ret) + goto init_mc_done; + + ci_set_valid_flag(ci_table); + +init_mc_done: + kfree(table); + + return ret; +} + +static int ci_populate_mc_reg_addresses(struct radeon_device *rdev, + SMU7_Discrete_MCRegisters *mc_reg_table) +{ + struct ci_power_info *pi = ci_get_pi(rdev); + u32 i, j; + + for (i = 0, j = 0; j < pi->mc_reg_table.last; j++) { + if (pi->mc_reg_table.valid_flag & (1 << j)) { + if (i >= SMU7_DISCRETE_MC_REGISTER_ARRAY_SIZE) + return -EINVAL; + mc_reg_table->address[i].s0 = cpu_to_be16(pi->mc_reg_table.mc_reg_address[j].s0); + mc_reg_table->address[i].s1 = cpu_to_be16(pi->mc_reg_table.mc_reg_address[j].s1); + i++; + } + } + + mc_reg_table->last = (u8)i; + + return 0; +} + +static void ci_convert_mc_registers(const struct ci_mc_reg_entry *entry, + SMU7_Discrete_MCRegisterSet *data, + u32 num_entries, u32 valid_flag) +{ + u32 i, j; + + for (i = 0, j = 0; j < num_entries; j++) { + if (valid_flag & (1 << j)) { + data->value[i] = cpu_to_be32(entry->mc_data[j]); + i++; + } + } +} + +static void ci_convert_mc_reg_table_entry_to_smc(struct radeon_device *rdev, + const u32 memory_clock, + SMU7_Discrete_MCRegisterSet *mc_reg_table_data) +{ + struct ci_power_info *pi = ci_get_pi(rdev); + u32 i = 0; + + for(i = 0; i < pi->mc_reg_table.num_entries; i++) { + if (memory_clock <= pi->mc_reg_table.mc_reg_table_entry[i].mclk_max) + break; + } + + if ((i == pi->mc_reg_table.num_entries) && (i > 0)) + --i; + + ci_convert_mc_registers(&pi->mc_reg_table.mc_reg_table_entry[i], + mc_reg_table_data, pi->mc_reg_table.last, + pi->mc_reg_table.valid_flag); +} + +static void ci_convert_mc_reg_table_to_smc(struct radeon_device *rdev, + SMU7_Discrete_MCRegisters *mc_reg_table) +{ + struct ci_power_info *pi = ci_get_pi(rdev); + u32 i; + + for (i = 0; i < pi->dpm_table.mclk_table.count; i++) + ci_convert_mc_reg_table_entry_to_smc(rdev, + pi->dpm_table.mclk_table.dpm_levels[i].value, + &mc_reg_table->data[i]); +} + +static int ci_populate_initial_mc_reg_table(struct radeon_device *rdev) +{ + struct ci_power_info *pi = ci_get_pi(rdev); + int ret; + + memset(&pi->smc_mc_reg_table, 0, sizeof(SMU7_Discrete_MCRegisters)); + + ret = ci_populate_mc_reg_addresses(rdev, &pi->smc_mc_reg_table); + if (ret) + return ret; + ci_convert_mc_reg_table_to_smc(rdev, &pi->smc_mc_reg_table); + + return ci_copy_bytes_to_smc(rdev, + pi->mc_reg_table_start, + (u8 *)&pi->smc_mc_reg_table, + sizeof(SMU7_Discrete_MCRegisters), + pi->sram_end); +} + +static int ci_update_and_upload_mc_reg_table(struct radeon_device *rdev) +{ + struct ci_power_info *pi = ci_get_pi(rdev); + + if (!(pi->need_update_smu7_dpm_table & DPMTABLE_OD_UPDATE_MCLK)) + return 0; + + memset(&pi->smc_mc_reg_table, 0, sizeof(SMU7_Discrete_MCRegisters)); + + ci_convert_mc_reg_table_to_smc(rdev, &pi->smc_mc_reg_table); + + return ci_copy_bytes_to_smc(rdev, + pi->mc_reg_table_start + + offsetof(SMU7_Discrete_MCRegisters, data[0]), + (u8 *)&pi->smc_mc_reg_table.data[0], + sizeof(SMU7_Discrete_MCRegisterSet) * + pi->dpm_table.mclk_table.count, + pi->sram_end); +} + +static void ci_enable_voltage_control(struct radeon_device *rdev) +{ + u32 tmp = RREG32_SMC(GENERAL_PWRMGT); + + tmp |= VOLT_PWRMGT_EN; + WREG32_SMC(GENERAL_PWRMGT, tmp); +} + +static enum radeon_pcie_gen ci_get_maximum_link_speed(struct radeon_device *rdev, + struct radeon_ps *radeon_state) +{ + struct ci_ps *state = ci_get_ps(radeon_state); + int i; + u16 pcie_speed, max_speed = 0; + + for (i = 0; i < state->performance_level_count; i++) { + pcie_speed = state->performance_levels[i].pcie_gen; + if (max_speed < pcie_speed) + max_speed = pcie_speed; + } + + return max_speed; +} + +static u16 ci_get_current_pcie_speed(struct radeon_device *rdev) +{ + u32 speed_cntl = 0; + + speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL) & LC_CURRENT_DATA_RATE_MASK; + speed_cntl >>= LC_CURRENT_DATA_RATE_SHIFT; + + return (u16)speed_cntl; +} + +static int ci_get_current_pcie_lane_number(struct radeon_device *rdev) +{ + u32 link_width = 0; + + link_width = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL) & LC_LINK_WIDTH_RD_MASK; + link_width >>= LC_LINK_WIDTH_RD_SHIFT; + + switch (link_width) { + case RADEON_PCIE_LC_LINK_WIDTH_X1: + return 1; + case RADEON_PCIE_LC_LINK_WIDTH_X2: + return 2; + case RADEON_PCIE_LC_LINK_WIDTH_X4: + return 4; + case RADEON_PCIE_LC_LINK_WIDTH_X8: + return 8; + case RADEON_PCIE_LC_LINK_WIDTH_X12: + /* not actually supported */ + return 12; + case RADEON_PCIE_LC_LINK_WIDTH_X0: + case RADEON_PCIE_LC_LINK_WIDTH_X16: + default: + return 16; + } +} + +static void ci_request_link_speed_change_before_state_change(struct radeon_device *rdev, + struct radeon_ps *radeon_new_state, + struct radeon_ps *radeon_current_state) +{ + struct ci_power_info *pi = ci_get_pi(rdev); + enum radeon_pcie_gen target_link_speed = + ci_get_maximum_link_speed(rdev, radeon_new_state); + enum radeon_pcie_gen current_link_speed; + + if (pi->force_pcie_gen == RADEON_PCIE_GEN_INVALID) + current_link_speed = ci_get_maximum_link_speed(rdev, radeon_current_state); + else + current_link_speed = pi->force_pcie_gen; + + pi->force_pcie_gen = RADEON_PCIE_GEN_INVALID; + pi->pspp_notify_required = false; + if (target_link_speed > current_link_speed) { + switch (target_link_speed) { +#ifdef CONFIG_ACPI + case RADEON_PCIE_GEN3: + if (radeon_acpi_pcie_performance_request(rdev, PCIE_PERF_REQ_PECI_GEN3, false) == 0) + break; + pi->force_pcie_gen = RADEON_PCIE_GEN2; + if (current_link_speed == RADEON_PCIE_GEN2) + break; + case RADEON_PCIE_GEN2: + if (radeon_acpi_pcie_performance_request(rdev, PCIE_PERF_REQ_PECI_GEN2, false) == 0) + break; +#endif + default: + pi->force_pcie_gen = ci_get_current_pcie_speed(rdev); + break; + } + } else { + if (target_link_speed < current_link_speed) + pi->pspp_notify_required = true; + } +} + +static void ci_notify_link_speed_change_after_state_change(struct radeon_device *rdev, + struct radeon_ps *radeon_new_state, + struct radeon_ps *radeon_current_state) +{ + struct ci_power_info *pi = ci_get_pi(rdev); + enum radeon_pcie_gen target_link_speed = + ci_get_maximum_link_speed(rdev, radeon_new_state); + u8 request; + + if (pi->pspp_notify_required) { + if (target_link_speed == RADEON_PCIE_GEN3) + request = PCIE_PERF_REQ_PECI_GEN3; + else if (target_link_speed == RADEON_PCIE_GEN2) + request = PCIE_PERF_REQ_PECI_GEN2; + else + request = PCIE_PERF_REQ_PECI_GEN1; + + if ((request == PCIE_PERF_REQ_PECI_GEN1) && + (ci_get_current_pcie_speed(rdev) > 0)) + return; + +#ifdef CONFIG_ACPI + radeon_acpi_pcie_performance_request(rdev, request, false); +#endif + } +} + +static int ci_set_private_data_variables_based_on_pptable(struct radeon_device *rdev) +{ + struct ci_power_info *pi = ci_get_pi(rdev); + struct radeon_clock_voltage_dependency_table *allowed_sclk_vddc_table = + &rdev->pm.dpm.dyn_state.vddc_dependency_on_sclk; + struct radeon_clock_voltage_dependency_table *allowed_mclk_vddc_table = + &rdev->pm.dpm.dyn_state.vddc_dependency_on_mclk; + struct radeon_clock_voltage_dependency_table *allowed_mclk_vddci_table = + &rdev->pm.dpm.dyn_state.vddci_dependency_on_mclk; + + if (allowed_sclk_vddc_table == NULL) + return -EINVAL; + if (allowed_sclk_vddc_table->count < 1) + return -EINVAL; + if (allowed_mclk_vddc_table == NULL) + return -EINVAL; + if (allowed_mclk_vddc_table->count < 1) + return -EINVAL; + if (allowed_mclk_vddci_table == NULL) + return -EINVAL; + if (allowed_mclk_vddci_table->count < 1) + return -EINVAL; + + pi->min_vddc_in_pp_table = allowed_sclk_vddc_table->entries[0].v; + pi->max_vddc_in_pp_table = + allowed_sclk_vddc_table->entries[allowed_sclk_vddc_table->count - 1].v; + + pi->min_vddci_in_pp_table = allowed_mclk_vddci_table->entries[0].v; + pi->max_vddci_in_pp_table = + allowed_mclk_vddci_table->entries[allowed_mclk_vddci_table->count - 1].v; + + rdev->pm.dpm.dyn_state.max_clock_voltage_on_ac.sclk = + allowed_sclk_vddc_table->entries[allowed_sclk_vddc_table->count - 1].clk; + rdev->pm.dpm.dyn_state.max_clock_voltage_on_ac.mclk = + allowed_mclk_vddc_table->entries[allowed_sclk_vddc_table->count - 1].clk; + rdev->pm.dpm.dyn_state.max_clock_voltage_on_ac.vddc = + allowed_sclk_vddc_table->entries[allowed_sclk_vddc_table->count - 1].v; + rdev->pm.dpm.dyn_state.max_clock_voltage_on_ac.vddci = + allowed_mclk_vddci_table->entries[allowed_mclk_vddci_table->count - 1].v; + + return 0; +} + +static void ci_patch_with_vddc_leakage(struct radeon_device *rdev, u16 *vddc) +{ + struct ci_power_info *pi = ci_get_pi(rdev); + struct ci_leakage_voltage *leakage_table = &pi->vddc_leakage; + u32 leakage_index; + + for (leakage_index = 0; leakage_index < leakage_table->count; leakage_index++) { + if (leakage_table->leakage_id[leakage_index] == *vddc) { + *vddc = leakage_table->actual_voltage[leakage_index]; + break; + } + } +} + +static void ci_patch_with_vddci_leakage(struct radeon_device *rdev, u16 *vddci) +{ + struct ci_power_info *pi = ci_get_pi(rdev); + struct ci_leakage_voltage *leakage_table = &pi->vddci_leakage; + u32 leakage_index; + + for (leakage_index = 0; leakage_index < leakage_table->count; leakage_index++) { + if (leakage_table->leakage_id[leakage_index] == *vddci) { + *vddci = leakage_table->actual_voltage[leakage_index]; + break; + } + } +} + +static void ci_patch_clock_voltage_dependency_table_with_vddc_leakage(struct radeon_device *rdev, + struct radeon_clock_voltage_dependency_table *table) +{ + u32 i; + + if (table) { + for (i = 0; i < table->count; i++) + ci_patch_with_vddc_leakage(rdev, &table->entries[i].v); + } +} + +static void ci_patch_clock_voltage_dependency_table_with_vddci_leakage(struct radeon_device *rdev, + struct radeon_clock_voltage_dependency_table *table) +{ + u32 i; + + if (table) { + for (i = 0; i < table->count; i++) + ci_patch_with_vddci_leakage(rdev, &table->entries[i].v); + } +} + +static void ci_patch_vce_clock_voltage_dependency_table_with_vddc_leakage(struct radeon_device *rdev, + struct radeon_vce_clock_voltage_dependency_table *table) +{ + u32 i; + + if (table) { + for (i = 0; i < table->count; i++) + ci_patch_with_vddc_leakage(rdev, &table->entries[i].v); + } +} + +static void ci_patch_uvd_clock_voltage_dependency_table_with_vddc_leakage(struct radeon_device *rdev, + struct radeon_uvd_clock_voltage_dependency_table *table) +{ + u32 i; + + if (table) { + for (i = 0; i < table->count; i++) + ci_patch_with_vddc_leakage(rdev, &table->entries[i].v); + } +} + +static void ci_patch_vddc_phase_shed_limit_table_with_vddc_leakage(struct radeon_device *rdev, + struct radeon_phase_shedding_limits_table *table) +{ + u32 i; + + if (table) { + for (i = 0; i < table->count; i++) + ci_patch_with_vddc_leakage(rdev, &table->entries[i].voltage); + } +} + +static void ci_patch_clock_voltage_limits_with_vddc_leakage(struct radeon_device *rdev, + struct radeon_clock_and_voltage_limits *table) +{ + if (table) { + ci_patch_with_vddc_leakage(rdev, (u16 *)&table->vddc); + ci_patch_with_vddci_leakage(rdev, (u16 *)&table->vddci); + } +} + +static void ci_patch_cac_leakage_table_with_vddc_leakage(struct radeon_device *rdev, + struct radeon_cac_leakage_table *table) +{ + u32 i; + + if (table) { + for (i = 0; i < table->count; i++) + ci_patch_with_vddc_leakage(rdev, &table->entries[i].vddc); + } +} + +static void ci_patch_dependency_tables_with_leakage(struct radeon_device *rdev) +{ + + ci_patch_clock_voltage_dependency_table_with_vddc_leakage(rdev, + &rdev->pm.dpm.dyn_state.vddc_dependency_on_sclk); + ci_patch_clock_voltage_dependency_table_with_vddc_leakage(rdev, + &rdev->pm.dpm.dyn_state.vddc_dependency_on_mclk); + ci_patch_clock_voltage_dependency_table_with_vddc_leakage(rdev, + &rdev->pm.dpm.dyn_state.vddc_dependency_on_dispclk); + ci_patch_clock_voltage_dependency_table_with_vddci_leakage(rdev, + &rdev->pm.dpm.dyn_state.vddci_dependency_on_mclk); + ci_patch_vce_clock_voltage_dependency_table_with_vddc_leakage(rdev, + &rdev->pm.dpm.dyn_state.vce_clock_voltage_dependency_table); + ci_patch_uvd_clock_voltage_dependency_table_with_vddc_leakage(rdev, + &rdev->pm.dpm.dyn_state.uvd_clock_voltage_dependency_table); + ci_patch_clock_voltage_dependency_table_with_vddc_leakage(rdev, + &rdev->pm.dpm.dyn_state.samu_clock_voltage_dependency_table); + ci_patch_clock_voltage_dependency_table_with_vddc_leakage(rdev, + &rdev->pm.dpm.dyn_state.acp_clock_voltage_dependency_table); + ci_patch_vddc_phase_shed_limit_table_with_vddc_leakage(rdev, + &rdev->pm.dpm.dyn_state.phase_shedding_limits_table); + ci_patch_clock_voltage_limits_with_vddc_leakage(rdev, + &rdev->pm.dpm.dyn_state.max_clock_voltage_on_ac); + ci_patch_clock_voltage_limits_with_vddc_leakage(rdev, + &rdev->pm.dpm.dyn_state.max_clock_voltage_on_dc); + ci_patch_cac_leakage_table_with_vddc_leakage(rdev, + &rdev->pm.dpm.dyn_state.cac_leakage_table); + +} + +static void ci_get_memory_type(struct radeon_device *rdev) +{ + struct ci_power_info *pi = ci_get_pi(rdev); + u32 tmp; + + tmp = RREG32(MC_SEQ_MISC0); + + if (((tmp & MC_SEQ_MISC0_GDDR5_MASK) >> MC_SEQ_MISC0_GDDR5_SHIFT) == + MC_SEQ_MISC0_GDDR5_VALUE) + pi->mem_gddr5 = true; + else + pi->mem_gddr5 = false; + +} + +static void ci_update_current_ps(struct radeon_device *rdev, + struct radeon_ps *rps) +{ + struct ci_ps *new_ps = ci_get_ps(rps); + struct ci_power_info *pi = ci_get_pi(rdev); + + pi->current_rps = *rps; + pi->current_ps = *new_ps; + pi->current_rps.ps_priv = &pi->current_ps; +} + +static void ci_update_requested_ps(struct radeon_device *rdev, + struct radeon_ps *rps) +{ + struct ci_ps *new_ps = ci_get_ps(rps); + struct ci_power_info *pi = ci_get_pi(rdev); + + pi->requested_rps = *rps; + pi->requested_ps = *new_ps; + pi->requested_rps.ps_priv = &pi->requested_ps; +} + +int ci_dpm_pre_set_power_state(struct radeon_device *rdev) +{ + struct ci_power_info *pi = ci_get_pi(rdev); + struct radeon_ps requested_ps = *rdev->pm.dpm.requested_ps; + struct radeon_ps *new_ps = &requested_ps; + + ci_update_requested_ps(rdev, new_ps); + + ci_apply_state_adjust_rules(rdev, &pi->requested_rps); + + return 0; +} + +void ci_dpm_post_set_power_state(struct radeon_device *rdev) +{ + struct ci_power_info *pi = ci_get_pi(rdev); + struct radeon_ps *new_ps = &pi->requested_rps; + + ci_update_current_ps(rdev, new_ps); +} + + +void ci_dpm_setup_asic(struct radeon_device *rdev) +{ + ci_read_clock_registers(rdev); + ci_get_memory_type(rdev); + ci_enable_acpi_power_management(rdev); + ci_init_sclk_t(rdev); +} + +int ci_dpm_enable(struct radeon_device *rdev) +{ + struct ci_power_info *pi = ci_get_pi(rdev); + struct radeon_ps *boot_ps = rdev->pm.dpm.boot_ps; + int ret; + + cik_update_cg(rdev, (RADEON_CG_BLOCK_GFX | + RADEON_CG_BLOCK_MC | + RADEON_CG_BLOCK_SDMA | + RADEON_CG_BLOCK_BIF | + RADEON_CG_BLOCK_UVD | + RADEON_CG_BLOCK_HDP), false); + + if (ci_is_smc_running(rdev)) + return -EINVAL; + if (pi->voltage_control != CISLANDS_VOLTAGE_CONTROL_NONE) { + ci_enable_voltage_control(rdev); + ret = ci_construct_voltage_tables(rdev); + if (ret) { + DRM_ERROR("ci_construct_voltage_tables failed\n"); + return ret; + } + } + if (pi->caps_dynamic_ac_timing) { + ret = ci_initialize_mc_reg_table(rdev); + if (ret) + pi->caps_dynamic_ac_timing = false; + } + if (pi->dynamic_ss) + ci_enable_spread_spectrum(rdev, true); + if (pi->thermal_protection) + ci_enable_thermal_protection(rdev, true); + ci_program_sstp(rdev); + ci_enable_display_gap(rdev); + ci_program_vc(rdev); + ret = ci_upload_firmware(rdev); + if (ret) { + DRM_ERROR("ci_upload_firmware failed\n"); + return ret; + } + ret = ci_process_firmware_header(rdev); + if (ret) { + DRM_ERROR("ci_process_firmware_header failed\n"); + return ret; + } + ret = ci_initial_switch_from_arb_f0_to_f1(rdev); + if (ret) { + DRM_ERROR("ci_initial_switch_from_arb_f0_to_f1 failed\n"); + return ret; + } + ret = ci_init_smc_table(rdev); + if (ret) { + DRM_ERROR("ci_init_smc_table failed\n"); + return ret; + } + ret = ci_init_arb_table_index(rdev); + if (ret) { + DRM_ERROR("ci_init_arb_table_index failed\n"); + return ret; + } + if (pi->caps_dynamic_ac_timing) { + ret = ci_populate_initial_mc_reg_table(rdev); + if (ret) { + DRM_ERROR("ci_populate_initial_mc_reg_table failed\n"); + return ret; + } + } + ret = ci_populate_pm_base(rdev); + if (ret) { + DRM_ERROR("ci_populate_pm_base failed\n"); + return ret; + } + ci_dpm_start_smc(rdev); + ci_enable_vr_hot_gpio_interrupt(rdev); + ret = ci_notify_smc_display_change(rdev, false); + if (ret) { + DRM_ERROR("ci_notify_smc_display_change failed\n"); + return ret; + } + ci_enable_sclk_control(rdev, true); + ret = ci_enable_ulv(rdev, true); + if (ret) { + DRM_ERROR("ci_enable_ulv failed\n"); + return ret; + } + ret = ci_enable_ds_master_switch(rdev, true); + if (ret) { + DRM_ERROR("ci_enable_ds_master_switch failed\n"); + return ret; + } + ret = ci_start_dpm(rdev); + if (ret) { + DRM_ERROR("ci_start_dpm failed\n"); + return ret; + } + ret = ci_enable_didt(rdev, true); + if (ret) { + DRM_ERROR("ci_enable_didt failed\n"); + return ret; + } + ret = ci_enable_smc_cac(rdev, true); + if (ret) { + DRM_ERROR("ci_enable_smc_cac failed\n"); + return ret; + } + ret = ci_enable_power_containment(rdev, true); + if (ret) { + DRM_ERROR("ci_enable_power_containment failed\n"); + return ret; + } + if (rdev->irq.installed && + r600_is_internal_thermal_sensor(rdev->pm.int_thermal_type)) { +#if 0 + PPSMC_Result result; +#endif + ret = ci_set_thermal_temperature_range(rdev, R600_TEMP_RANGE_MIN, R600_TEMP_RANGE_MAX); + if (ret) { + DRM_ERROR("ci_set_thermal_temperature_range failed\n"); + return ret; + } + rdev->irq.dpm_thermal = true; + radeon_irq_set(rdev); +#if 0 + result = ci_send_msg_to_smc(rdev, PPSMC_MSG_EnableThermalInterrupt); + + if (result != PPSMC_Result_OK) + DRM_DEBUG_KMS("Could not enable thermal interrupts.\n"); +#endif + } + + ci_enable_auto_throttle_source(rdev, RADEON_DPM_AUTO_THROTTLE_SRC_THERMAL, true); + + ci_dpm_powergate_uvd(rdev, true); + + cik_update_cg(rdev, (RADEON_CG_BLOCK_GFX | + RADEON_CG_BLOCK_MC | + RADEON_CG_BLOCK_SDMA | + RADEON_CG_BLOCK_BIF | + RADEON_CG_BLOCK_UVD | + RADEON_CG_BLOCK_HDP), true); + + ci_update_current_ps(rdev, boot_ps); + + return 0; +} + +void ci_dpm_disable(struct radeon_device *rdev) +{ + struct ci_power_info *pi = ci_get_pi(rdev); + struct radeon_ps *boot_ps = rdev->pm.dpm.boot_ps; + + cik_update_cg(rdev, (RADEON_CG_BLOCK_GFX | + RADEON_CG_BLOCK_MC | + RADEON_CG_BLOCK_SDMA | + RADEON_CG_BLOCK_UVD | + RADEON_CG_BLOCK_HDP), false); + + ci_dpm_powergate_uvd(rdev, false); + + if (!ci_is_smc_running(rdev)) + return; + + if (pi->thermal_protection) + ci_enable_thermal_protection(rdev, false); + ci_enable_power_containment(rdev, false); + ci_enable_smc_cac(rdev, false); + ci_enable_didt(rdev, false); + ci_enable_spread_spectrum(rdev, false); + ci_enable_auto_throttle_source(rdev, RADEON_DPM_AUTO_THROTTLE_SRC_THERMAL, false); + ci_stop_dpm(rdev); + ci_enable_ds_master_switch(rdev, true); + ci_enable_ulv(rdev, false); + ci_clear_vc(rdev); + ci_reset_to_default(rdev); + ci_dpm_stop_smc(rdev); + ci_force_switch_to_arb_f0(rdev); + + ci_update_current_ps(rdev, boot_ps); +} + +int ci_dpm_set_power_state(struct radeon_device *rdev) +{ + struct ci_power_info *pi = ci_get_pi(rdev); + struct radeon_ps *new_ps = &pi->requested_rps; + struct radeon_ps *old_ps = &pi->current_rps; + int ret; + + cik_update_cg(rdev, (RADEON_CG_BLOCK_GFX | + RADEON_CG_BLOCK_MC | + RADEON_CG_BLOCK_SDMA | + RADEON_CG_BLOCK_BIF | + RADEON_CG_BLOCK_UVD | + RADEON_CG_BLOCK_HDP), false); + + ci_find_dpm_states_clocks_in_dpm_table(rdev, new_ps); + if (pi->pcie_performance_request) + ci_request_link_speed_change_before_state_change(rdev, new_ps, old_ps); + ret = ci_freeze_sclk_mclk_dpm(rdev); + if (ret) { + DRM_ERROR("ci_freeze_sclk_mclk_dpm failed\n"); + return ret; + } + ret = ci_populate_and_upload_sclk_mclk_dpm_levels(rdev, new_ps); + if (ret) { + DRM_ERROR("ci_populate_and_upload_sclk_mclk_dpm_levels failed\n"); + return ret; + } + ret = ci_generate_dpm_level_enable_mask(rdev, new_ps); + if (ret) { + DRM_ERROR("ci_generate_dpm_level_enable_mask failed\n"); + return ret; + } +#if 0 + ret = ci_update_vce_dpm(rdev, new_ps, old_ps); + if (ret) { + DRM_ERROR("ci_update_vce_dpm failed\n"); + return ret; + } +#endif + ret = ci_update_sclk_t(rdev); + if (ret) { + DRM_ERROR("ci_update_sclk_t failed\n"); + return ret; + } + if (pi->caps_dynamic_ac_timing) { + ret = ci_update_and_upload_mc_reg_table(rdev); + if (ret) { + DRM_ERROR("ci_update_and_upload_mc_reg_table failed\n"); + return ret; + } + } + ret = ci_program_memory_timing_parameters(rdev); + if (ret) { + DRM_ERROR("ci_program_memory_timing_parameters failed\n"); + return ret; + } + ret = ci_unfreeze_sclk_mclk_dpm(rdev); + if (ret) { + DRM_ERROR("ci_unfreeze_sclk_mclk_dpm failed\n"); + return ret; + } + ret = ci_upload_dpm_level_enable_mask(rdev); + if (ret) { + DRM_ERROR("ci_upload_dpm_level_enable_mask failed\n"); + return ret; + } + if (pi->pcie_performance_request) + ci_notify_link_speed_change_after_state_change(rdev, new_ps, old_ps); + + cik_update_cg(rdev, (RADEON_CG_BLOCK_GFX | + RADEON_CG_BLOCK_MC | + RADEON_CG_BLOCK_SDMA | + RADEON_CG_BLOCK_BIF | + RADEON_CG_BLOCK_UVD | + RADEON_CG_BLOCK_HDP), true); + + return 0; +} + +static int __unused ci_dpm_power_control_set_level(struct radeon_device *rdev) +{ + return ci_power_control_set_level(rdev); +} + +static void __unused ci_dpm_reset_asic(struct radeon_device *rdev) +{ + ci_set_boot_state(rdev); +} + +void ci_dpm_display_configuration_changed(struct radeon_device *rdev) +{ + ci_program_display_gap(rdev); +} + +union power_info { + struct _ATOM_POWERPLAY_INFO info; + struct _ATOM_POWERPLAY_INFO_V2 info_2; + struct _ATOM_POWERPLAY_INFO_V3 info_3; + struct _ATOM_PPLIB_POWERPLAYTABLE pplib; + struct _ATOM_PPLIB_POWERPLAYTABLE2 pplib2; + struct _ATOM_PPLIB_POWERPLAYTABLE3 pplib3; +}; + +union pplib_clock_info { + struct _ATOM_PPLIB_R600_CLOCK_INFO r600; + struct _ATOM_PPLIB_RS780_CLOCK_INFO rs780; + struct _ATOM_PPLIB_EVERGREEN_CLOCK_INFO evergreen; + struct _ATOM_PPLIB_SUMO_CLOCK_INFO sumo; + struct _ATOM_PPLIB_SI_CLOCK_INFO si; + struct _ATOM_PPLIB_CI_CLOCK_INFO ci; +}; + +union pplib_power_state { + struct _ATOM_PPLIB_STATE v1; + struct _ATOM_PPLIB_STATE_V2 v2; +}; + +static void ci_parse_pplib_non_clock_info(struct radeon_device *rdev, + struct radeon_ps *rps, + struct _ATOM_PPLIB_NONCLOCK_INFO *non_clock_info, + u8 table_rev) +{ + rps->caps = le32_to_cpu(non_clock_info->ulCapsAndSettings); + rps->class = le16_to_cpu(non_clock_info->usClassification); + rps->class2 = le16_to_cpu(non_clock_info->usClassification2); + + if (ATOM_PPLIB_NONCLOCKINFO_VER1 < table_rev) { + rps->vclk = le32_to_cpu(non_clock_info->ulVCLK); + rps->dclk = le32_to_cpu(non_clock_info->ulDCLK); + } else { + rps->vclk = 0; + rps->dclk = 0; + } + + if (rps->class & ATOM_PPLIB_CLASSIFICATION_BOOT) + rdev->pm.dpm.boot_ps = rps; + if (rps->class & ATOM_PPLIB_CLASSIFICATION_UVDSTATE) + rdev->pm.dpm.uvd_ps = rps; +} + +static void ci_parse_pplib_clock_info(struct radeon_device *rdev, + struct radeon_ps *rps, int index, + union pplib_clock_info *clock_info) +{ + struct ci_power_info *pi = ci_get_pi(rdev); + struct ci_ps *ps = ci_get_ps(rps); + struct ci_pl *pl = &ps->performance_levels[index]; + + ps->performance_level_count = index + 1; + + pl->sclk = le16_to_cpu(clock_info->ci.usEngineClockLow); + pl->sclk |= clock_info->ci.ucEngineClockHigh << 16; + pl->mclk = le16_to_cpu(clock_info->ci.usMemoryClockLow); + pl->mclk |= clock_info->ci.ucMemoryClockHigh << 16; + + pl->pcie_gen = r600_get_pcie_gen_support(rdev, + pi->sys_pcie_mask, + pi->vbios_boot_state.pcie_gen_bootup_value, + clock_info->ci.ucPCIEGen); + pl->pcie_lane = r600_get_pcie_lane_support(rdev, + pi->vbios_boot_state.pcie_lane_bootup_value, + le16_to_cpu(clock_info->ci.usPCIELane)); + + if (rps->class & ATOM_PPLIB_CLASSIFICATION_ACPI) { + pi->acpi_pcie_gen = pl->pcie_gen; + } + + if (rps->class2 & ATOM_PPLIB_CLASSIFICATION2_ULV) { + pi->ulv.supported = true; + pi->ulv.pl = *pl; + pi->ulv.cg_ulv_parameter = CISLANDS_CGULVPARAMETER_DFLT; + } + + /* patch up boot state */ + if (rps->class & ATOM_PPLIB_CLASSIFICATION_BOOT) { + pl->mclk = pi->vbios_boot_state.mclk_bootup_value; + pl->sclk = pi->vbios_boot_state.sclk_bootup_value; + pl->pcie_gen = pi->vbios_boot_state.pcie_gen_bootup_value; + pl->pcie_lane = pi->vbios_boot_state.pcie_lane_bootup_value; + } + + switch (rps->class & ATOM_PPLIB_CLASSIFICATION_UI_MASK) { + case ATOM_PPLIB_CLASSIFICATION_UI_BATTERY: + pi->use_pcie_powersaving_levels = true; + if (pi->pcie_gen_powersaving.max < pl->pcie_gen) + pi->pcie_gen_powersaving.max = pl->pcie_gen; + if (pi->pcie_gen_powersaving.min > pl->pcie_gen) + pi->pcie_gen_powersaving.min = pl->pcie_gen; + if (pi->pcie_lane_powersaving.max < pl->pcie_lane) + pi->pcie_lane_powersaving.max = pl->pcie_lane; + if (pi->pcie_lane_powersaving.min > pl->pcie_lane) + pi->pcie_lane_powersaving.min = pl->pcie_lane; + break; + case ATOM_PPLIB_CLASSIFICATION_UI_PERFORMANCE: + pi->use_pcie_performance_levels = true; + if (pi->pcie_gen_performance.max < pl->pcie_gen) + pi->pcie_gen_performance.max = pl->pcie_gen; + if (pi->pcie_gen_performance.min > pl->pcie_gen) + pi->pcie_gen_performance.min = pl->pcie_gen; + if (pi->pcie_lane_performance.max < pl->pcie_lane) + pi->pcie_lane_performance.max = pl->pcie_lane; + if (pi->pcie_lane_performance.min > pl->pcie_lane) + pi->pcie_lane_performance.min = pl->pcie_lane; + break; + default: + break; + } +} + +static int ci_parse_power_table(struct radeon_device *rdev) +{ + struct radeon_mode_info *mode_info = &rdev->mode_info; + struct _ATOM_PPLIB_NONCLOCK_INFO *non_clock_info; + union pplib_power_state *power_state; + int i, j, k, non_clock_array_index, clock_array_index; + union pplib_clock_info *clock_info; + struct _StateArray *state_array; + struct _ClockInfoArray *clock_info_array; + struct _NonClockInfoArray *non_clock_info_array; + union power_info *power_info; + int index = GetIndexIntoMasterTable(DATA, PowerPlayInfo); + u16 data_offset; + u8 frev, crev; + u8 *power_state_offset; + struct ci_ps *ps; + + if (!atom_parse_data_header(mode_info->atom_context, index, NULL, + &frev, &crev, &data_offset)) + return -EINVAL; + power_info = (union power_info *)(mode_info->atom_context->bios + data_offset); + + state_array = (struct _StateArray *) + (mode_info->atom_context->bios + data_offset + + le16_to_cpu(power_info->pplib.usStateArrayOffset)); + clock_info_array = (struct _ClockInfoArray *) + (mode_info->atom_context->bios + data_offset + + le16_to_cpu(power_info->pplib.usClockInfoArrayOffset)); + non_clock_info_array = (struct _NonClockInfoArray *) + (mode_info->atom_context->bios + data_offset + + le16_to_cpu(power_info->pplib.usNonClockInfoArrayOffset)); + + rdev->pm.dpm.ps = kzalloc(sizeof(struct radeon_ps) * + state_array->ucNumEntries, GFP_KERNEL); + if (!rdev->pm.dpm.ps) + return -ENOMEM; + power_state_offset = (u8 *)state_array->states; + rdev->pm.dpm.platform_caps = le32_to_cpu(power_info->pplib.ulPlatformCaps); + rdev->pm.dpm.backbias_response_time = le16_to_cpu(power_info->pplib.usBackbiasTime); + rdev->pm.dpm.voltage_response_time = le16_to_cpu(power_info->pplib.usVoltageTime); + for (i = 0; i < state_array->ucNumEntries; i++) { + u8 *idx; + power_state = (union pplib_power_state *)power_state_offset; + non_clock_array_index = power_state->v2.nonClockInfoIndex; + non_clock_info = (struct _ATOM_PPLIB_NONCLOCK_INFO *) + &non_clock_info_array->nonClockInfo[non_clock_array_index]; + if (!rdev->pm.power_state[i].clock_info) + return -EINVAL; + ps = kzalloc(sizeof(struct ci_ps), GFP_KERNEL); + if (ps == NULL) { + kfree(rdev->pm.dpm.ps); + return -ENOMEM; + } + rdev->pm.dpm.ps[i].ps_priv = ps; + ci_parse_pplib_non_clock_info(rdev, &rdev->pm.dpm.ps[i], + non_clock_info, + non_clock_info_array->ucEntrySize); + k = 0; + idx = (u8 *)&power_state->v2.clockInfoIndex[0]; + for (j = 0; j < power_state->v2.ucNumDPMLevels; j++) { + clock_array_index = idx[j]; + if (clock_array_index >= clock_info_array->ucNumEntries) + continue; + if (k >= CISLANDS_MAX_HARDWARE_POWERLEVELS) + break; + clock_info = (union pplib_clock_info *) + ((u8 *)&clock_info_array->clockInfo[0] + + (clock_array_index * clock_info_array->ucEntrySize)); + ci_parse_pplib_clock_info(rdev, + &rdev->pm.dpm.ps[i], k, + clock_info); + k++; + } + power_state_offset += 2 + power_state->v2.ucNumDPMLevels; + } + rdev->pm.dpm.num_ps = state_array->ucNumEntries; + return 0; +} + +static int ci_get_vbios_boot_values(struct radeon_device *rdev, + struct ci_vbios_boot_state *boot_state) +{ + struct radeon_mode_info *mode_info = &rdev->mode_info; + int index = GetIndexIntoMasterTable(DATA, FirmwareInfo); + ATOM_FIRMWARE_INFO_V2_2 *firmware_info; + u8 frev, crev; + u16 data_offset; + + if (atom_parse_data_header(mode_info->atom_context, index, NULL, + &frev, &crev, &data_offset)) { + firmware_info = + (ATOM_FIRMWARE_INFO_V2_2 *)(mode_info->atom_context->bios + + data_offset); + boot_state->mvdd_bootup_value = le16_to_cpu(firmware_info->usBootUpMVDDCVoltage); + boot_state->vddc_bootup_value = le16_to_cpu(firmware_info->usBootUpVDDCVoltage); + boot_state->vddci_bootup_value = le16_to_cpu(firmware_info->usBootUpVDDCIVoltage); + boot_state->pcie_gen_bootup_value = ci_get_current_pcie_speed(rdev); + boot_state->pcie_lane_bootup_value = ci_get_current_pcie_lane_number(rdev); + boot_state->sclk_bootup_value = le32_to_cpu(firmware_info->ulDefaultEngineClock); + boot_state->mclk_bootup_value = le32_to_cpu(firmware_info->ulDefaultMemoryClock); + + return 0; + } + return -EINVAL; +} + +void ci_dpm_fini(struct radeon_device *rdev) +{ + int i; + + for (i = 0; i < rdev->pm.dpm.num_ps; i++) { + kfree(rdev->pm.dpm.ps[i].ps_priv); + } + kfree(rdev->pm.dpm.ps); + kfree(rdev->pm.dpm.priv); + kfree(rdev->pm.dpm.dyn_state.vddc_dependency_on_dispclk.entries); + r600_free_extended_power_table(rdev); +} + +int ci_dpm_init(struct radeon_device *rdev) +{ + int index = GetIndexIntoMasterTable(DATA, ASIC_InternalSS_Info); + u16 data_offset, size; + u8 frev, crev; + struct ci_power_info *pi; + int ret; + u32 mask; + + pi = kzalloc(sizeof(struct ci_power_info), GFP_KERNEL); + if (pi == NULL) + return -ENOMEM; + rdev->pm.dpm.priv = pi; + + ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask); + if (ret) + pi->sys_pcie_mask = 0; + else + pi->sys_pcie_mask = mask; + pi->force_pcie_gen = RADEON_PCIE_GEN_INVALID; + + pi->pcie_gen_performance.max = RADEON_PCIE_GEN1; + pi->pcie_gen_performance.min = RADEON_PCIE_GEN3; + pi->pcie_gen_powersaving.max = RADEON_PCIE_GEN1; + pi->pcie_gen_powersaving.min = RADEON_PCIE_GEN3; + + pi->pcie_lane_performance.max = 0; + pi->pcie_lane_performance.min = 16; + pi->pcie_lane_powersaving.max = 0; + pi->pcie_lane_powersaving.min = 16; + + ret = ci_get_vbios_boot_values(rdev, &pi->vbios_boot_state); + if (ret) { + ci_dpm_fini(rdev); + return ret; + } + ret = ci_parse_power_table(rdev); + if (ret) { + ci_dpm_fini(rdev); + return ret; + } + ret = r600_parse_extended_power_table(rdev); + if (ret) { + ci_dpm_fini(rdev); + return ret; + } + + pi->dll_default_on = false; + pi->sram_end = SMC_RAM_END; + + pi->activity_target[0] = CISLAND_TARGETACTIVITY_DFLT; + pi->activity_target[1] = CISLAND_TARGETACTIVITY_DFLT; + pi->activity_target[2] = CISLAND_TARGETACTIVITY_DFLT; + pi->activity_target[3] = CISLAND_TARGETACTIVITY_DFLT; + pi->activity_target[4] = CISLAND_TARGETACTIVITY_DFLT; + pi->activity_target[5] = CISLAND_TARGETACTIVITY_DFLT; + pi->activity_target[6] = CISLAND_TARGETACTIVITY_DFLT; + pi->activity_target[7] = CISLAND_TARGETACTIVITY_DFLT; + + pi->mclk_activity_target = CISLAND_MCLK_TARGETACTIVITY_DFLT; + + pi->sclk_dpm_key_disabled = 0; + pi->mclk_dpm_key_disabled = 0; + pi->pcie_dpm_key_disabled = 0; + + pi->caps_sclk_ds = true; + + pi->mclk_strobe_mode_threshold = 40000; + pi->mclk_stutter_mode_threshold = 40000; + pi->mclk_edc_enable_threshold = 40000; + pi->mclk_edc_wr_enable_threshold = 40000; + + ci_initialize_powertune_defaults(rdev); + + pi->caps_fps = false; + + pi->caps_sclk_throttle_low_notification = false; + + pi->caps_uvd_dpm = true; + + ci_get_leakage_voltages(rdev); + ci_patch_dependency_tables_with_leakage(rdev); + ci_set_private_data_variables_based_on_pptable(rdev); + + rdev->pm.dpm.dyn_state.vddc_dependency_on_dispclk.entries = + kzalloc(4 * sizeof(struct radeon_clock_voltage_dependency_entry), GFP_KERNEL); + if (!rdev->pm.dpm.dyn_state.vddc_dependency_on_dispclk.entries) { + ci_dpm_fini(rdev); + return -ENOMEM; + } + rdev->pm.dpm.dyn_state.vddc_dependency_on_dispclk.count = 4; + rdev->pm.dpm.dyn_state.vddc_dependency_on_dispclk.entries[0].clk = 0; + rdev->pm.dpm.dyn_state.vddc_dependency_on_dispclk.entries[0].v = 0; + rdev->pm.dpm.dyn_state.vddc_dependency_on_dispclk.entries[1].clk = 36000; + rdev->pm.dpm.dyn_state.vddc_dependency_on_dispclk.entries[1].v = 720; + rdev->pm.dpm.dyn_state.vddc_dependency_on_dispclk.entries[2].clk = 54000; + rdev->pm.dpm.dyn_state.vddc_dependency_on_dispclk.entries[2].v = 810; + rdev->pm.dpm.dyn_state.vddc_dependency_on_dispclk.entries[3].clk = 72000; + rdev->pm.dpm.dyn_state.vddc_dependency_on_dispclk.entries[3].v = 900; + + rdev->pm.dpm.dyn_state.mclk_sclk_ratio = 4; + rdev->pm.dpm.dyn_state.sclk_mclk_delta = 15000; + rdev->pm.dpm.dyn_state.vddc_vddci_delta = 200; + + rdev->pm.dpm.dyn_state.valid_sclk_values.count = 0; + rdev->pm.dpm.dyn_state.valid_sclk_values.values = NULL; + rdev->pm.dpm.dyn_state.valid_mclk_values.count = 0; + rdev->pm.dpm.dyn_state.valid_mclk_values.values = NULL; + + pi->thermal_temp_setting.temperature_low = 99500; + pi->thermal_temp_setting.temperature_high = 100000; + pi->thermal_temp_setting.temperature_shutdown = 104000; + + pi->uvd_enabled = false; + + pi->voltage_control = CISLANDS_VOLTAGE_CONTROL_NONE; + pi->vddci_control = CISLANDS_VOLTAGE_CONTROL_NONE; + pi->mvdd_control = CISLANDS_VOLTAGE_CONTROL_NONE; + if (radeon_atom_is_voltage_gpio(rdev, VOLTAGE_TYPE_VDDC, VOLTAGE_OBJ_GPIO_LUT)) + pi->voltage_control = CISLANDS_VOLTAGE_CONTROL_BY_GPIO; + else if (radeon_atom_is_voltage_gpio(rdev, VOLTAGE_TYPE_VDDC, VOLTAGE_OBJ_SVID2)) + pi->voltage_control = CISLANDS_VOLTAGE_CONTROL_BY_SVID2; + + if (rdev->pm.dpm.platform_caps & ATOM_PP_PLATFORM_CAP_VDDCI_CONTROL) { + if (radeon_atom_is_voltage_gpio(rdev, VOLTAGE_TYPE_VDDCI, VOLTAGE_OBJ_GPIO_LUT)) + pi->vddci_control = CISLANDS_VOLTAGE_CONTROL_BY_GPIO; + else if (radeon_atom_is_voltage_gpio(rdev, VOLTAGE_TYPE_VDDCI, VOLTAGE_OBJ_SVID2)) + pi->vddci_control = CISLANDS_VOLTAGE_CONTROL_BY_SVID2; + else + rdev->pm.dpm.platform_caps &= ~ATOM_PP_PLATFORM_CAP_VDDCI_CONTROL; + } + + if (rdev->pm.dpm.platform_caps & ATOM_PP_PLATFORM_CAP_MVDDCONTROL) { + if (radeon_atom_is_voltage_gpio(rdev, VOLTAGE_TYPE_MVDDC, VOLTAGE_OBJ_GPIO_LUT)) + pi->mvdd_control = CISLANDS_VOLTAGE_CONTROL_BY_GPIO; + else if (radeon_atom_is_voltage_gpio(rdev, VOLTAGE_TYPE_MVDDC, VOLTAGE_OBJ_SVID2)) + pi->mvdd_control = CISLANDS_VOLTAGE_CONTROL_BY_SVID2; + else + rdev->pm.dpm.platform_caps &= ~ATOM_PP_PLATFORM_CAP_MVDDCONTROL; + } + + pi->vddc_phase_shed_control = true; + +#if defined(CONFIG_ACPI) + pi->pcie_performance_request = + radeon_acpi_is_pcie_performance_request_supported(rdev); +#else + pi->pcie_performance_request = false; +#endif + + if (atom_parse_data_header(rdev->mode_info.atom_context, index, &size, + &frev, &crev, &data_offset)) { + pi->caps_sclk_ss_support = true; + pi->caps_mclk_ss_support = true; + pi->dynamic_ss = true; + } else { + pi->caps_sclk_ss_support = false; + pi->caps_mclk_ss_support = false; + pi->dynamic_ss = true; + } + + if (rdev->pm.int_thermal_type != THERMAL_TYPE_NONE) + pi->thermal_protection = true; + else + pi->thermal_protection = false; + + pi->caps_dynamic_ac_timing = true; + + pi->uvd_power_gated = false; + + /* make sure dc limits are valid */ + if ((rdev->pm.dpm.dyn_state.max_clock_voltage_on_dc.sclk == 0) || + (rdev->pm.dpm.dyn_state.max_clock_voltage_on_dc.mclk == 0)) + rdev->pm.dpm.dyn_state.max_clock_voltage_on_dc = + rdev->pm.dpm.dyn_state.max_clock_voltage_on_ac; + + return 0; +} + +void ci_dpm_debugfs_print_current_performance_level(struct radeon_device *rdev, + struct seq_file *m) +{ + u32 sclk = ci_get_average_sclk_freq(rdev); + u32 mclk = ci_get_average_mclk_freq(rdev); + + seq_printf(m, "power level avg sclk: %u mclk: %u\n", + sclk, mclk); +} + +void ci_dpm_print_power_state(struct radeon_device *rdev, + struct radeon_ps *rps) +{ + struct ci_ps *ps = ci_get_ps(rps); + struct ci_pl *pl; + int i; + + r600_dpm_print_class_info(rps->class, rps->class2); + r600_dpm_print_cap_info(rps->caps); + printk("\tuvd vclk: %d dclk: %d\n", rps->vclk, rps->dclk); + for (i = 0; i < ps->performance_level_count; i++) { + pl = &ps->performance_levels[i]; + printk("\t\tpower level %d sclk: %u mclk: %u pcie gen: %u pcie lanes: %u\n", + i, pl->sclk, pl->mclk, pl->pcie_gen + 1, pl->pcie_lane); + } + r600_dpm_print_ps_status(rdev, rps); +} + +u32 ci_dpm_get_sclk(struct radeon_device *rdev, bool low) +{ + struct ci_power_info *pi = ci_get_pi(rdev); + struct ci_ps *requested_state = ci_get_ps(&pi->requested_rps); + + if (low) + return requested_state->performance_levels[0].sclk; + else + return requested_state->performance_levels[requested_state->performance_level_count - 1].sclk; +} + +u32 ci_dpm_get_mclk(struct radeon_device *rdev, bool low) +{ + struct ci_power_info *pi = ci_get_pi(rdev); + struct ci_ps *requested_state = ci_get_ps(&pi->requested_rps); + + if (low) + return requested_state->performance_levels[0].mclk; + else + return requested_state->performance_levels[requested_state->performance_level_count - 1].mclk; +} diff --git a/sys/dev/drm/radeon/ci_dpm.h b/sys/dev/drm/radeon/ci_dpm.h new file mode 100644 index 0000000000..93bbed977f --- /dev/null +++ b/sys/dev/drm/radeon/ci_dpm.h @@ -0,0 +1,332 @@ +/* + * Copyright 2013 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#ifndef __CI_DPM_H__ +#define __CI_DPM_H__ + +#include "ppsmc.h" + +#define SMU__NUM_SCLK_DPM_STATE 8 +#define SMU__NUM_MCLK_DPM_LEVELS 6 +#define SMU__NUM_LCLK_DPM_LEVELS 8 +#define SMU__NUM_PCIE_DPM_LEVELS 8 +#include "smu7_discrete.h" + +#define CISLANDS_MAX_HARDWARE_POWERLEVELS 2 + +struct ci_pl { + u32 mclk; + u32 sclk; + enum radeon_pcie_gen pcie_gen; + u16 pcie_lane; +}; + +struct ci_ps { + u16 performance_level_count; + bool dc_compatible; + u32 sclk_t; + struct ci_pl performance_levels[CISLANDS_MAX_HARDWARE_POWERLEVELS]; +}; + +struct ci_dpm_level { + bool enabled; + u32 value; + u32 param1; +}; + +#define CISLAND_MAX_DEEPSLEEP_DIVIDER_ID 5 +#define MAX_REGULAR_DPM_NUMBER 8 +#define CISLAND_MINIMUM_ENGINE_CLOCK 800 + +struct ci_single_dpm_table { + u32 count; + struct ci_dpm_level dpm_levels[MAX_REGULAR_DPM_NUMBER]; +}; + +struct ci_dpm_table { + struct ci_single_dpm_table sclk_table; + struct ci_single_dpm_table mclk_table; + struct ci_single_dpm_table pcie_speed_table; + struct ci_single_dpm_table vddc_table; + struct ci_single_dpm_table vddci_table; + struct ci_single_dpm_table mvdd_table; +}; + +struct ci_mc_reg_entry { + u32 mclk_max; + u32 mc_data[SMU7_DISCRETE_MC_REGISTER_ARRAY_SIZE]; +}; + +struct ci_mc_reg_table { + u8 last; + u8 num_entries; + u16 valid_flag; + struct ci_mc_reg_entry mc_reg_table_entry[MAX_AC_TIMING_ENTRIES]; + SMU7_Discrete_MCRegisterAddress mc_reg_address[SMU7_DISCRETE_MC_REGISTER_ARRAY_SIZE]; +}; + +struct ci_ulv_parm +{ + bool supported; + u32 cg_ulv_parameter; + u32 volt_change_delay; + struct ci_pl pl; +}; + +#define CISLANDS_MAX_LEAKAGE_COUNT 8 + +struct ci_leakage_voltage { + u16 count; + u16 leakage_id[CISLANDS_MAX_LEAKAGE_COUNT]; + u16 actual_voltage[CISLANDS_MAX_LEAKAGE_COUNT]; +}; + +struct ci_dpm_level_enable_mask { + u32 uvd_dpm_enable_mask; + u32 vce_dpm_enable_mask; + u32 acp_dpm_enable_mask; + u32 samu_dpm_enable_mask; + u32 sclk_dpm_enable_mask; + u32 mclk_dpm_enable_mask; + u32 pcie_dpm_enable_mask; +}; + +struct ci_vbios_boot_state +{ + u16 mvdd_bootup_value; + u16 vddc_bootup_value; + u16 vddci_bootup_value; + u32 sclk_bootup_value; + u32 mclk_bootup_value; + u16 pcie_gen_bootup_value; + u16 pcie_lane_bootup_value; +}; + +struct ci_clock_registers { + u32 cg_spll_func_cntl; + u32 cg_spll_func_cntl_2; + u32 cg_spll_func_cntl_3; + u32 cg_spll_func_cntl_4; + u32 cg_spll_spread_spectrum; + u32 cg_spll_spread_spectrum_2; + u32 dll_cntl; + u32 mclk_pwrmgt_cntl; + u32 mpll_ad_func_cntl; + u32 mpll_dq_func_cntl; + u32 mpll_func_cntl; + u32 mpll_func_cntl_1; + u32 mpll_func_cntl_2; + u32 mpll_ss1; + u32 mpll_ss2; +}; + +struct ci_thermal_temperature_setting { + s32 temperature_low; + s32 temperature_high; + s32 temperature_shutdown; +}; + +struct ci_pcie_perf_range { + u16 max; + u16 min; +}; + +enum ci_pt_config_reg_type { + CISLANDS_CONFIGREG_MMR = 0, + CISLANDS_CONFIGREG_SMC_IND, + CISLANDS_CONFIGREG_DIDT_IND, + CISLANDS_CONFIGREG_CACHE, + CISLANDS_CONFIGREG_MAX +}; + +#define POWERCONTAINMENT_FEATURE_BAPM 0x00000001 +#define POWERCONTAINMENT_FEATURE_TDCLimit 0x00000002 +#define POWERCONTAINMENT_FEATURE_PkgPwrLimit 0x00000004 + +struct ci_pt_config_reg { + u32 offset; + u32 mask; + u32 shift; + u32 value; + enum ci_pt_config_reg_type type; +}; + +struct ci_pt_defaults { + u8 svi_load_line_en; + u8 svi_load_line_vddc; + u8 tdc_vddc_throttle_release_limit_perc; + u8 tdc_mawt; + u8 tdc_waterfall_ctl; + u8 dte_ambient_temp_base; + u32 display_cac; + u32 bapm_temp_gradient; + u16 bapmti_r[SMU7_DTE_ITERATIONS * SMU7_DTE_SOURCES * SMU7_DTE_SINKS]; + u16 bapmti_rc[SMU7_DTE_ITERATIONS * SMU7_DTE_SOURCES * SMU7_DTE_SINKS]; +}; + +#define DPMTABLE_OD_UPDATE_SCLK 0x00000001 +#define DPMTABLE_OD_UPDATE_MCLK 0x00000002 +#define DPMTABLE_UPDATE_SCLK 0x00000004 +#define DPMTABLE_UPDATE_MCLK 0x00000008 + +struct ci_power_info { + struct ci_dpm_table dpm_table; + u32 voltage_control; + u32 mvdd_control; + u32 vddci_control; + u32 active_auto_throttle_sources; + struct ci_clock_registers clock_registers; + u16 acpi_vddc; + u16 acpi_vddci; + enum radeon_pcie_gen force_pcie_gen; + enum radeon_pcie_gen acpi_pcie_gen; + struct ci_leakage_voltage vddc_leakage; + struct ci_leakage_voltage vddci_leakage; + u16 max_vddc_in_pp_table; + u16 min_vddc_in_pp_table; + u16 max_vddci_in_pp_table; + u16 min_vddci_in_pp_table; + u32 mclk_strobe_mode_threshold; + u32 mclk_stutter_mode_threshold; + u32 mclk_edc_enable_threshold; + u32 mclk_edc_wr_enable_threshold; + struct ci_vbios_boot_state vbios_boot_state; + /* smc offsets */ + u32 sram_end; + u32 dpm_table_start; + u32 soft_regs_start; + u32 mc_reg_table_start; + u32 fan_table_start; + u32 arb_table_start; + /* smc tables */ + SMU7_Discrete_DpmTable smc_state_table; + SMU7_Discrete_MCRegisters smc_mc_reg_table; + SMU7_Discrete_PmFuses smc_powertune_table; + /* other stuff */ + struct ci_mc_reg_table mc_reg_table; + struct atom_voltage_table vddc_voltage_table; + struct atom_voltage_table vddci_voltage_table; + struct atom_voltage_table mvdd_voltage_table; + struct ci_ulv_parm ulv; + u32 power_containment_features; + const struct ci_pt_defaults *powertune_defaults; + u32 dte_tj_offset; + bool vddc_phase_shed_control; + struct ci_thermal_temperature_setting thermal_temp_setting; + struct ci_dpm_level_enable_mask dpm_level_enable_mask; + u32 need_update_smu7_dpm_table; + u32 sclk_dpm_key_disabled; + u32 mclk_dpm_key_disabled; + u32 pcie_dpm_key_disabled; + struct ci_pcie_perf_range pcie_gen_performance; + struct ci_pcie_perf_range pcie_lane_performance; + struct ci_pcie_perf_range pcie_gen_powersaving; + struct ci_pcie_perf_range pcie_lane_powersaving; + u32 activity_target[SMU7_MAX_LEVELS_GRAPHICS]; + u32 mclk_activity_target; + u32 low_sclk_interrupt_t; + u32 last_mclk_dpm_enable_mask; + u32 sys_pcie_mask; + /* caps */ + bool caps_power_containment; + bool caps_cac; + bool caps_sq_ramping; + bool caps_db_ramping; + bool caps_td_ramping; + bool caps_tcp_ramping; + bool caps_fps; + bool caps_sclk_ds; + bool caps_sclk_ss_support; + bool caps_mclk_ss_support; + bool caps_uvd_dpm; + bool caps_vce_dpm; + bool caps_samu_dpm; + bool caps_acp_dpm; + bool caps_automatic_dc_transition; + bool caps_sclk_throttle_low_notification; + bool caps_dynamic_ac_timing; + /* flags */ + bool thermal_protection; + bool pcie_performance_request; + bool dynamic_ss; + bool dll_default_on; + bool cac_enabled; + bool uvd_enabled; + bool battery_state; + bool pspp_notify_required; + bool mem_gddr5; + bool enable_bapm_feature; + bool enable_tdc_limit_feature; + bool enable_pkg_pwr_tracking_feature; + bool use_pcie_performance_levels; + bool use_pcie_powersaving_levels; + bool uvd_power_gated; + /* driver states */ + struct radeon_ps current_rps; + struct ci_ps current_ps; + struct radeon_ps requested_rps; + struct ci_ps requested_ps; +}; + +#define CISLANDS_VOLTAGE_CONTROL_NONE 0x0 +#define CISLANDS_VOLTAGE_CONTROL_BY_GPIO 0x1 +#define CISLANDS_VOLTAGE_CONTROL_BY_SVID2 0x2 + +#define CISLANDS_Q88_FORMAT_CONVERSION_UNIT 256 + +#define CISLANDS_VRC_DFLT0 0x3FFFC000 +#define CISLANDS_VRC_DFLT1 0x000400 +#define CISLANDS_VRC_DFLT2 0xC00080 +#define CISLANDS_VRC_DFLT3 0xC00200 +#define CISLANDS_VRC_DFLT4 0xC01680 +#define CISLANDS_VRC_DFLT5 0xC00033 +#define CISLANDS_VRC_DFLT6 0xC00033 +#define CISLANDS_VRC_DFLT7 0x3FFFC000 + +#define CISLANDS_CGULVPARAMETER_DFLT 0x00040035 +#define CISLAND_TARGETACTIVITY_DFLT 30 +#define CISLAND_MCLK_TARGETACTIVITY_DFLT 10 + +#define PCIE_PERF_REQ_REMOVE_REGISTRY 0 +#define PCIE_PERF_REQ_FORCE_LOWPOWER 1 +#define PCIE_PERF_REQ_PECI_GEN1 2 +#define PCIE_PERF_REQ_PECI_GEN2 3 +#define PCIE_PERF_REQ_PECI_GEN3 4 + +int ci_copy_bytes_to_smc(struct radeon_device *rdev, + u32 smc_start_address, + const u8 *src, u32 byte_count, u32 limit); +void ci_start_smc(struct radeon_device *rdev); +void ci_reset_smc(struct radeon_device *rdev); +int ci_program_jump_on_start(struct radeon_device *rdev); +void ci_stop_smc_clock(struct radeon_device *rdev); +void ci_start_smc_clock(struct radeon_device *rdev); +bool ci_is_smc_running(struct radeon_device *rdev); +PPSMC_Result ci_send_msg_to_smc(struct radeon_device *rdev, PPSMC_Msg msg); +PPSMC_Result ci_wait_for_smc_inactive(struct radeon_device *rdev); +int ci_load_smc_ucode(struct radeon_device *rdev, u32 limit); +int ci_read_smc_sram_dword(struct radeon_device *rdev, + u32 smc_address, u32 *value, u32 limit); +int ci_write_smc_sram_dword(struct radeon_device *rdev, + u32 smc_address, u32 value, u32 limit); + +#endif diff --git a/sys/dev/drm/radeon/ci_smc.c b/sys/dev/drm/radeon/ci_smc.c new file mode 100644 index 0000000000..1c18c7d80f --- /dev/null +++ b/sys/dev/drm/radeon/ci_smc.c @@ -0,0 +1,263 @@ +/* + * Copyright 2011 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: Alex Deucher + */ + +#include +#include +#include "radeon.h" +#include "cikd.h" +#include "ppsmc.h" +#include "ci_dpm.h" +#include "radeon_ucode.h" + +static int ci_set_smc_sram_address(struct radeon_device *rdev, + u32 smc_address, u32 limit) +{ + if (smc_address & 3) + return -EINVAL; + if ((smc_address + 3) > limit) + return -EINVAL; + + WREG32(SMC_IND_INDEX_0, smc_address); + WREG32_P(SMC_IND_ACCESS_CNTL, 0, ~AUTO_INCREMENT_IND_0); + + return 0; +} + +int ci_copy_bytes_to_smc(struct radeon_device *rdev, + u32 smc_start_address, + const u8 *src, u32 byte_count, u32 limit) +{ + u32 data, original_data; + u32 addr; + u32 extra_shift; + int ret; + + if (smc_start_address & 3) + return -EINVAL; + if ((smc_start_address + byte_count) > limit) + return -EINVAL; + + addr = smc_start_address; + + while (byte_count >= 4) { + /* SMC address space is BE */ + data = (src[0] << 24) | (src[1] << 16) | (src[2] << 8) | src[3]; + + ret = ci_set_smc_sram_address(rdev, addr, limit); + if (ret) + return ret; + + WREG32(SMC_IND_DATA_0, data); + + src += 4; + byte_count -= 4; + addr += 4; + } + + /* RMW for the final bytes */ + if (byte_count > 0) { + data = 0; + + ret = ci_set_smc_sram_address(rdev, addr, limit); + if (ret) + return ret; + + original_data = RREG32(SMC_IND_DATA_0); + + extra_shift = 8 * (4 - byte_count); + + while (byte_count > 0) { + data = (data << 8) + *src++; + byte_count--; + } + + data <<= extra_shift; + + data |= (original_data & ~((~0UL) << extra_shift)); + + ret = ci_set_smc_sram_address(rdev, addr, limit); + if (ret) + return ret; + + WREG32(SMC_IND_DATA_0, data); + } + return 0; +} + +void ci_start_smc(struct radeon_device *rdev) +{ + u32 tmp = RREG32_SMC(SMC_SYSCON_RESET_CNTL); + + tmp &= ~RST_REG; + WREG32_SMC(SMC_SYSCON_RESET_CNTL, tmp); +} + +void ci_reset_smc(struct radeon_device *rdev) +{ + u32 tmp = RREG32_SMC(SMC_SYSCON_RESET_CNTL); + + tmp |= RST_REG; + WREG32_SMC(SMC_SYSCON_RESET_CNTL, tmp); +} + +int ci_program_jump_on_start(struct radeon_device *rdev) +{ + static u8 data[] = { 0xE0, 0x00, 0x80, 0x40 }; + + return ci_copy_bytes_to_smc(rdev, 0x0, data, 4, sizeof(data)+1); +} + +void ci_stop_smc_clock(struct radeon_device *rdev) +{ + u32 tmp = RREG32_SMC(SMC_SYSCON_CLOCK_CNTL_0); + + tmp |= CK_DISABLE; + + WREG32_SMC(SMC_SYSCON_CLOCK_CNTL_0, tmp); +} + +void ci_start_smc_clock(struct radeon_device *rdev) +{ + u32 tmp = RREG32_SMC(SMC_SYSCON_CLOCK_CNTL_0); + + tmp &= ~CK_DISABLE; + + WREG32_SMC(SMC_SYSCON_CLOCK_CNTL_0, tmp); +} + +bool ci_is_smc_running(struct radeon_device *rdev) +{ + u32 clk = RREG32_SMC(SMC_SYSCON_CLOCK_CNTL_0); + u32 pc_c = RREG32_SMC(SMC_PC_C); + + if (!(clk & CK_DISABLE) && (0x20100 <= pc_c)) + return true; + + return false; +} + +PPSMC_Result ci_send_msg_to_smc(struct radeon_device *rdev, PPSMC_Msg msg) +{ + u32 tmp; + int i; + + if (!ci_is_smc_running(rdev)) + return PPSMC_Result_Failed; + + WREG32(SMC_MESSAGE_0, msg); + + for (i = 0; i < rdev->usec_timeout; i++) { + tmp = RREG32(SMC_RESP_0); + if (tmp != 0) + break; + udelay(1); + } + tmp = RREG32(SMC_RESP_0); + + return (PPSMC_Result)tmp; +} + +PPSMC_Result ci_wait_for_smc_inactive(struct radeon_device *rdev) +{ + u32 tmp; + int i; + + if (!ci_is_smc_running(rdev)) + return PPSMC_Result_OK; + + for (i = 0; i < rdev->usec_timeout; i++) { + tmp = RREG32_SMC(SMC_SYSCON_CLOCK_CNTL_0); + if ((tmp & CKEN) == 0) + break; + udelay(1); + } + + return PPSMC_Result_OK; +} + +int ci_load_smc_ucode(struct radeon_device *rdev, u32 limit) +{ + u32 ucode_start_address; + u32 ucode_size; + const u8 *src; + u32 data; + + if (!rdev->smc_fw) + return -EINVAL; + + switch (rdev->family) { + case CHIP_BONAIRE: + ucode_start_address = BONAIRE_SMC_UCODE_START; + ucode_size = BONAIRE_SMC_UCODE_SIZE; + break; + default: + DRM_ERROR("unknown asic in smc ucode loader\n"); + BUG(); + } + + if (ucode_size & 3) + return -EINVAL; + + src = (const u8 *)rdev->smc_fw->data; + WREG32(SMC_IND_INDEX_0, ucode_start_address); + WREG32_P(SMC_IND_ACCESS_CNTL, AUTO_INCREMENT_IND_0, ~AUTO_INCREMENT_IND_0); + while (ucode_size >= 4) { + /* SMC address space is BE */ + data = (src[0] << 24) | (src[1] << 16) | (src[2] << 8) | src[3]; + + WREG32(SMC_IND_DATA_0, data); + + src += 4; + ucode_size -= 4; + } + WREG32_P(SMC_IND_ACCESS_CNTL, 0, ~AUTO_INCREMENT_IND_0); + + return 0; +} + +int ci_read_smc_sram_dword(struct radeon_device *rdev, + u32 smc_address, u32 *value, u32 limit) +{ + int ret; + + ret = ci_set_smc_sram_address(rdev, smc_address, limit); + if (ret) + return ret; + + *value = RREG32(SMC_IND_DATA_0); + return 0; +} + +int ci_write_smc_sram_dword(struct radeon_device *rdev, + u32 smc_address, u32 value, u32 limit) +{ + int ret; + + ret = ci_set_smc_sram_address(rdev, smc_address, limit); + if (ret) + return ret; + + WREG32(SMC_IND_DATA_0, value); + return 0; +} diff --git a/sys/dev/drm/radeon/cik.c b/sys/dev/drm/radeon/cik.c index a1cc1c2b7d..5f789896af 100644 --- a/sys/dev/drm/radeon/cik.c +++ b/sys/dev/drm/radeon/cik.c @@ -29,22 +29,15 @@ #include "cikd.h" #include "atom.h" #include "cik_blit_shaders.h" +#include "radeon_ucode.h" +#include "clearstate_ci.h" -/* GFX */ -#define CIK_PFP_UCODE_SIZE 2144 -#define CIK_ME_UCODE_SIZE 2144 -#define CIK_CE_UCODE_SIZE 2144 -/* compute */ -#define CIK_MEC_UCODE_SIZE 4192 -/* interrupts */ -#define BONAIRE_RLC_UCODE_SIZE 2048 -#define KB_RLC_UCODE_SIZE 2560 -#define KV_RLC_UCODE_SIZE 2560 -/* gddr controller */ -#define CIK_MC_UCODE_SIZE 7866 -/* sdma */ -#define CIK_SDMA_UCODE_SIZE 1050 -#define CIK_SDMA_UCODE_VERSION 64 +#define PCI_EXP_LNKCTL PCIER_LINKCTRL /* 16 */ +#define PCI_EXP_LNKCTL2 48 +#define PCI_EXP_LNKCTL_HAWD PCIEM_LNKCTL_HAWD /* 0x0200 */ +#define PCI_EXP_DEVSTA PCIER_DEVSTS /* 10 */ +#define PCI_EXP_DEVSTA_TRPND 0x0020 +#define PCI_EXP_LNKCAP_CLKPM 0x00040000 MODULE_FIRMWARE("radeon/BONAIRE_pfp.bin"); MODULE_FIRMWARE("radeon/BONAIRE_me.bin"); @@ -53,6 +46,7 @@ MODULE_FIRMWARE("radeon/BONAIRE_mec.bin"); MODULE_FIRMWARE("radeon/BONAIRE_mc.bin"); MODULE_FIRMWARE("radeon/BONAIRE_rlc.bin"); MODULE_FIRMWARE("radeon/BONAIRE_sdma.bin"); +MODULE_FIRMWARE("radeon/BONAIRE_smc.bin"); MODULE_FIRMWARE("radeon/KAVERI_pfp.bin"); MODULE_FIRMWARE("radeon/KAVERI_me.bin"); MODULE_FIRMWARE("radeon/KAVERI_ce.bin"); @@ -67,6 +61,51 @@ MODULE_FIRMWARE("radeon/KABINI_rlc.bin"); MODULE_FIRMWARE("radeon/KABINI_sdma.bin"); static void cik_rlc_stop(struct radeon_device *rdev); +static void cik_pcie_gen3_enable(struct radeon_device *rdev); +static void cik_program_aspm(struct radeon_device *rdev); +static void cik_init_pg(struct radeon_device *rdev); +static void cik_init_cg(struct radeon_device *rdev); +static void cik_fini_pg(struct radeon_device *rdev); +static void cik_fini_cg(struct radeon_device *rdev); +static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev, + bool enable); + +/* get temperature in millidegrees */ +int ci_get_temp(struct radeon_device *rdev) +{ + u32 temp; + int actual_temp = 0; + + temp = (RREG32_SMC(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >> + CTF_TEMP_SHIFT; + + if (temp & 0x200) + actual_temp = 255; + else + actual_temp = temp & 0x1ff; + + actual_temp = actual_temp * 1000; + + return actual_temp; +} + +/* get temperature in millidegrees */ +int kv_get_temp(struct radeon_device *rdev) +{ + u32 temp; + int actual_temp = 0; + + temp = RREG32_SMC(0xC0300E0C); + + if (temp) + actual_temp = (temp / 8) - 49; + else + actual_temp = 0; + + actual_temp = actual_temp * 1000; + + return actual_temp; +} /* * Indirect registers accessor @@ -89,6 +128,778 @@ void cik_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v) (void)RREG32(PCIE_DATA); } +static const u32 spectre_rlc_save_restore_register_list[] = +{ + (0x0e00 << 16) | (0xc12c >> 2), + 0x00000000, + (0x0e00 << 16) | (0xc140 >> 2), + 0x00000000, + (0x0e00 << 16) | (0xc150 >> 2), + 0x00000000, + (0x0e00 << 16) | (0xc15c >> 2), + 0x00000000, + (0x0e00 << 16) | (0xc168 >> 2), + 0x00000000, + (0x0e00 << 16) | (0xc170 >> 2), + 0x00000000, + (0x0e00 << 16) | (0xc178 >> 2), + 0x00000000, + (0x0e00 << 16) | (0xc204 >> 2), + 0x00000000, + (0x0e00 << 16) | (0xc2b4 >> 2), + 0x00000000, + (0x0e00 << 16) | (0xc2b8 >> 2), + 0x00000000, + (0x0e00 << 16) | (0xc2bc >> 2), + 0x00000000, + (0x0e00 << 16) | (0xc2c0 >> 2), + 0x00000000, + (0x0e00 << 16) | (0x8228 >> 2), + 0x00000000, + (0x0e00 << 16) | (0x829c >> 2), + 0x00000000, + (0x0e00 << 16) | (0x869c >> 2), + 0x00000000, + (0x0600 << 16) | (0x98f4 >> 2), + 0x00000000, + (0x0e00 << 16) | (0x98f8 >> 2), + 0x00000000, + (0x0e00 << 16) | (0x9900 >> 2), + 0x00000000, + (0x0e00 << 16) | (0xc260 >> 2), + 0x00000000, + (0x0e00 << 16) | (0x90e8 >> 2), + 0x00000000, + (0x0e00 << 16) | (0x3c000 >> 2), + 0x00000000, + (0x0e00 << 16) | (0x3c00c >> 2), + 0x00000000, + (0x0e00 << 16) | (0x8c1c >> 2), + 0x00000000, + (0x0e00 << 16) | (0x9700 >> 2), + 0x00000000, + (0x0e00 << 16) | (0xcd20 >> 2), + 0x00000000, + (0x4e00 << 16) | (0xcd20 >> 2), + 0x00000000, + (0x5e00 << 16) | (0xcd20 >> 2), + 0x00000000, + (0x6e00 << 16) | (0xcd20 >> 2), + 0x00000000, + (0x7e00 << 16) | (0xcd20 >> 2), + 0x00000000, + (0x8e00 << 16) | (0xcd20 >> 2), + 0x00000000, + (0x9e00 << 16) | (0xcd20 >> 2), + 0x00000000, + (0xae00 << 16) | (0xcd20 >> 2), + 0x00000000, + (0xbe00 << 16) | (0xcd20 >> 2), + 0x00000000, + (0x0e00 << 16) | (0x89bc >> 2), + 0x00000000, + (0x0e00 << 16) | (0x8900 >> 2), + 0x00000000, + 0x3, + (0x0e00 << 16) | (0xc130 >> 2), + 0x00000000, + (0x0e00 << 16) | (0xc134 >> 2), + 0x00000000, + (0x0e00 << 16) | (0xc1fc >> 2), + 0x00000000, + (0x0e00 << 16) | (0xc208 >> 2), + 0x00000000, + (0x0e00 << 16) | (0xc264 >> 2), + 0x00000000, + (0x0e00 << 16) | (0xc268 >> 2), + 0x00000000, + (0x0e00 << 16) | (0xc26c >> 2), + 0x00000000, + (0x0e00 << 16) | (0xc270 >> 2), + 0x00000000, + (0x0e00 << 16) | (0xc274 >> 2), + 0x00000000, + (0x0e00 << 16) | (0xc278 >> 2), + 0x00000000, + (0x0e00 << 16) | (0xc27c >> 2), + 0x00000000, + (0x0e00 << 16) | (0xc280 >> 2), + 0x00000000, + (0x0e00 << 16) | (0xc284 >> 2), + 0x00000000, + (0x0e00 << 16) | (0xc288 >> 2), + 0x00000000, + (0x0e00 << 16) | (0xc28c >> 2), + 0x00000000, + (0x0e00 << 16) | (0xc290 >> 2), + 0x00000000, + (0x0e00 << 16) | (0xc294 >> 2), + 0x00000000, + (0x0e00 << 16) | (0xc298 >> 2), + 0x00000000, + (0x0e00 << 16) | (0xc29c >> 2), + 0x00000000, + (0x0e00 << 16) | (0xc2a0 >> 2), + 0x00000000, + (0x0e00 << 16) | (0xc2a4 >> 2), + 0x00000000, + (0x0e00 << 16) | (0xc2a8 >> 2), + 0x00000000, + (0x0e00 << 16) | (0xc2ac >> 2), + 0x00000000, + (0x0e00 << 16) | (0xc2b0 >> 2), + 0x00000000, + (0x0e00 << 16) | (0x301d0 >> 2), + 0x00000000, + (0x0e00 << 16) | (0x30238 >> 2), + 0x00000000, + (0x0e00 << 16) | (0x30250 >> 2), + 0x00000000, + (0x0e00 << 16) | (0x30254 >> 2), + 0x00000000, + (0x0e00 << 16) | (0x30258 >> 2), + 0x00000000, + (0x0e00 << 16) | (0x3025c >> 2), + 0x00000000, + (0x4e00 << 16) | (0xc900 >> 2), + 0x00000000, + (0x5e00 << 16) | (0xc900 >> 2), + 0x00000000, + (0x6e00 << 16) | (0xc900 >> 2), + 0x00000000, + (0x7e00 << 16) | (0xc900 >> 2), + 0x00000000, + (0x8e00 << 16) | (0xc900 >> 2), + 0x00000000, + (0x9e00 << 16) | (0xc900 >> 2), + 0x00000000, + (0xae00 << 16) | (0xc900 >> 2), + 0x00000000, + (0xbe00 << 16) | (0xc900 >> 2), + 0x00000000, + (0x4e00 << 16) | (0xc904 >> 2), + 0x00000000, + (0x5e00 << 16) | (0xc904 >> 2), + 0x00000000, + (0x6e00 << 16) | (0xc904 >> 2), + 0x00000000, + (0x7e00 << 16) | (0xc904 >> 2), + 0x00000000, + (0x8e00 << 16) | (0xc904 >> 2), + 0x00000000, + (0x9e00 << 16) | (0xc904 >> 2), + 0x00000000, + (0xae00 << 16) | (0xc904 >> 2), + 0x00000000, + (0xbe00 << 16) | (0xc904 >> 2), + 0x00000000, + (0x4e00 << 16) | (0xc908 >> 2), + 0x00000000, + (0x5e00 << 16) | (0xc908 >> 2), + 0x00000000, + (0x6e00 << 16) | (0xc908 >> 2), + 0x00000000, + (0x7e00 << 16) | (0xc908 >> 2), + 0x00000000, + (0x8e00 << 16) | (0xc908 >> 2), + 0x00000000, + (0x9e00 << 16) | (0xc908 >> 2), + 0x00000000, + (0xae00 << 16) | (0xc908 >> 2), + 0x00000000, + (0xbe00 << 16) | (0xc908 >> 2), + 0x00000000, + (0x4e00 << 16) | (0xc90c >> 2), + 0x00000000, + (0x5e00 << 16) | (0xc90c >> 2), + 0x00000000, + (0x6e00 << 16) | (0xc90c >> 2), + 0x00000000, + (0x7e00 << 16) | (0xc90c >> 2), + 0x00000000, + (0x8e00 << 16) | (0xc90c >> 2), + 0x00000000, + (0x9e00 << 16) | (0xc90c >> 2), + 0x00000000, + (0xae00 << 16) | (0xc90c >> 2), + 0x00000000, + (0xbe00 << 16) | (0xc90c >> 2), + 0x00000000, + (0x4e00 << 16) | (0xc910 >> 2), + 0x00000000, + (0x5e00 << 16) | (0xc910 >> 2), + 0x00000000, + (0x6e00 << 16) | (0xc910 >> 2), + 0x00000000, + (0x7e00 << 16) | (0xc910 >> 2), + 0x00000000, + (0x8e00 << 16) | (0xc910 >> 2), + 0x00000000, + (0x9e00 << 16) | (0xc910 >> 2), + 0x00000000, + (0xae00 << 16) | (0xc910 >> 2), + 0x00000000, + (0xbe00 << 16) | (0xc910 >> 2), + 0x00000000, + (0x0e00 << 16) | (0xc99c >> 2), + 0x00000000, + (0x0e00 << 16) | (0x9834 >> 2), + 0x00000000, + (0x0000 << 16) | (0x30f00 >> 2), + 0x00000000, + (0x0001 << 16) | (0x30f00 >> 2), + 0x00000000, + (0x0000 << 16) | (0x30f04 >> 2), + 0x00000000, + (0x0001 << 16) | (0x30f04 >> 2), + 0x00000000, + (0x0000 << 16) | (0x30f08 >> 2), + 0x00000000, + (0x0001 << 16) | (0x30f08 >> 2), + 0x00000000, + (0x0000 << 16) | (0x30f0c >> 2), + 0x00000000, + (0x0001 << 16) | (0x30f0c >> 2), + 0x00000000, + (0x0600 << 16) | (0x9b7c >> 2), + 0x00000000, + (0x0e00 << 16) | (0x8a14 >> 2), + 0x00000000, + (0x0e00 << 16) | (0x8a18 >> 2), + 0x00000000, + (0x0600 << 16) | (0x30a00 >> 2), + 0x00000000, + (0x0e00 << 16) | (0x8bf0 >> 2), + 0x00000000, + (0x0e00 << 16) | (0x8bcc >> 2), + 0x00000000, + (0x0e00 << 16) | (0x8b24 >> 2), + 0x00000000, + (0x0e00 << 16) | (0x30a04 >> 2), + 0x00000000, + (0x0600 << 16) | (0x30a10 >> 2), + 0x00000000, + (0x0600 << 16) | (0x30a14 >> 2), + 0x00000000, + (0x0600 << 16) | (0x30a18 >> 2), + 0x00000000, + (0x0600 << 16) | (0x30a2c >> 2), + 0x00000000, + (0x0e00 << 16) | (0xc700 >> 2), + 0x00000000, + (0x0e00 << 16) | (0xc704 >> 2), + 0x00000000, + (0x0e00 << 16) | (0xc708 >> 2), + 0x00000000, + (0x0e00 << 16) | (0xc768 >> 2), + 0x00000000, + (0x0400 << 16) | (0xc770 >> 2), + 0x00000000, + (0x0400 << 16) | (0xc774 >> 2), + 0x00000000, + (0x0400 << 16) | (0xc778 >> 2), + 0x00000000, + (0x0400 << 16) | (0xc77c >> 2), + 0x00000000, + (0x0400 << 16) | (0xc780 >> 2), + 0x00000000, + (0x0400 << 16) | (0xc784 >> 2), + 0x00000000, + (0x0400 << 16) | (0xc788 >> 2), + 0x00000000, + (0x0400 << 16) | (0xc78c >> 2), + 0x00000000, + (0x0400 << 16) | (0xc798 >> 2), + 0x00000000, + (0x0400 << 16) | (0xc79c >> 2), + 0x00000000, + (0x0400 << 16) | (0xc7a0 >> 2), + 0x00000000, + (0x0400 << 16) | (0xc7a4 >> 2), + 0x00000000, + (0x0400 << 16) | (0xc7a8 >> 2), + 0x00000000, + (0x0400 << 16) | (0xc7ac >> 2), + 0x00000000, + (0x0400 << 16) | (0xc7b0 >> 2), + 0x00000000, + (0x0400 << 16) | (0xc7b4 >> 2), + 0x00000000, + (0x0e00 << 16) | (0x9100 >> 2), + 0x00000000, + (0x0e00 << 16) | (0x3c010 >> 2), + 0x00000000, + (0x0e00 << 16) | (0x92a8 >> 2), + 0x00000000, + (0x0e00 << 16) | (0x92ac >> 2), + 0x00000000, + (0x0e00 << 16) | (0x92b4 >> 2), + 0x00000000, + (0x0e00 << 16) | (0x92b8 >> 2), + 0x00000000, + (0x0e00 << 16) | (0x92bc >> 2), + 0x00000000, + (0x0e00 << 16) | (0x92c0 >> 2), + 0x00000000, + (0x0e00 << 16) | (0x92c4 >> 2), + 0x00000000, + (0x0e00 << 16) | (0x92c8 >> 2), + 0x00000000, + (0x0e00 << 16) | (0x92cc >> 2), + 0x00000000, + (0x0e00 << 16) | (0x92d0 >> 2), + 0x00000000, + (0x0e00 << 16) | (0x8c00 >> 2), + 0x00000000, + (0x0e00 << 16) | (0x8c04 >> 2), + 0x00000000, + (0x0e00 << 16) | (0x8c20 >> 2), + 0x00000000, + (0x0e00 << 16) | (0x8c38 >> 2), + 0x00000000, + (0x0e00 << 16) | (0x8c3c >> 2), + 0x00000000, + (0x0e00 << 16) | (0xae00 >> 2), + 0x00000000, + (0x0e00 << 16) | (0x9604 >> 2), + 0x00000000, + (0x0e00 << 16) | (0xac08 >> 2), + 0x00000000, + (0x0e00 << 16) | (0xac0c >> 2), + 0x00000000, + (0x0e00 << 16) | (0xac10 >> 2), + 0x00000000, + (0x0e00 << 16) | (0xac14 >> 2), + 0x00000000, + (0x0e00 << 16) | (0xac58 >> 2), + 0x00000000, + (0x0e00 << 16) | (0xac68 >> 2), + 0x00000000, + (0x0e00 << 16) | (0xac6c >> 2), + 0x00000000, + (0x0e00 << 16) | (0xac70 >> 2), + 0x00000000, + (0x0e00 << 16) | (0xac74 >> 2), + 0x00000000, + (0x0e00 << 16) | (0xac78 >> 2), + 0x00000000, + (0x0e00 << 16) | (0xac7c >> 2), + 0x00000000, + (0x0e00 << 16) | (0xac80 >> 2), + 0x00000000, + (0x0e00 << 16) | (0xac84 >> 2), + 0x00000000, + (0x0e00 << 16) | (0xac88 >> 2), + 0x00000000, + (0x0e00 << 16) | (0xac8c >> 2), + 0x00000000, + (0x0e00 << 16) | (0x970c >> 2), + 0x00000000, + (0x0e00 << 16) | (0x9714 >> 2), + 0x00000000, + (0x0e00 << 16) | (0x9718 >> 2), + 0x00000000, + (0x0e00 << 16) | (0x971c >> 2), + 0x00000000, + (0x0e00 << 16) | (0x31068 >> 2), + 0x00000000, + (0x4e00 << 16) | (0x31068 >> 2), + 0x00000000, + (0x5e00 << 16) | (0x31068 >> 2), + 0x00000000, + (0x6e00 << 16) | (0x31068 >> 2), + 0x00000000, + (0x7e00 << 16) | (0x31068 >> 2), + 0x00000000, + (0x8e00 << 16) | (0x31068 >> 2), + 0x00000000, + (0x9e00 << 16) | (0x31068 >> 2), + 0x00000000, + (0xae00 << 16) | (0x31068 >> 2), + 0x00000000, + (0xbe00 << 16) | (0x31068 >> 2), + 0x00000000, + (0x0e00 << 16) | (0xcd10 >> 2), + 0x00000000, + (0x0e00 << 16) | (0xcd14 >> 2), + 0x00000000, + (0x0e00 << 16) | (0x88b0 >> 2), + 0x00000000, + (0x0e00 << 16) | (0x88b4 >> 2), + 0x00000000, + (0x0e00 << 16) | (0x88b8 >> 2), + 0x00000000, + (0x0e00 << 16) | (0x88bc >> 2), + 0x00000000, + (0x0400 << 16) | (0x89c0 >> 2), + 0x00000000, + (0x0e00 << 16) | (0x88c4 >> 2), + 0x00000000, + (0x0e00 << 16) | (0x88c8 >> 2), + 0x00000000, + (0x0e00 << 16) | (0x88d0 >> 2), + 0x00000000, + (0x0e00 << 16) | (0x88d4 >> 2), + 0x00000000, + (0x0e00 << 16) | (0x88d8 >> 2), + 0x00000000, + (0x0e00 << 16) | (0x8980 >> 2), + 0x00000000, + (0x0e00 << 16) | (0x30938 >> 2), + 0x00000000, + (0x0e00 << 16) | (0x3093c >> 2), + 0x00000000, + (0x0e00 << 16) | (0x30940 >> 2), + 0x00000000, + (0x0e00 << 16) | (0x89a0 >> 2), + 0x00000000, + (0x0e00 << 16) | (0x30900 >> 2), + 0x00000000, + (0x0e00 << 16) | (0x30904 >> 2), + 0x00000000, + (0x0e00 << 16) | (0x89b4 >> 2), + 0x00000000, + (0x0e00 << 16) | (0x3c210 >> 2), + 0x00000000, + (0x0e00 << 16) | (0x3c214 >> 2), + 0x00000000, + (0x0e00 << 16) | (0x3c218 >> 2), + 0x00000000, + (0x0e00 << 16) | (0x8904 >> 2), + 0x00000000, + 0x5, + (0x0e00 << 16) | (0x8c28 >> 2), + (0x0e00 << 16) | (0x8c2c >> 2), + (0x0e00 << 16) | (0x8c30 >> 2), + (0x0e00 << 16) | (0x8c34 >> 2), + (0x0e00 << 16) | (0x9600 >> 2), +}; + +static const u32 kalindi_rlc_save_restore_register_list[] = +{ + (0x0e00 << 16) | (0xc12c >> 2), + 0x00000000, + (0x0e00 << 16) | (0xc140 >> 2), + 0x00000000, + (0x0e00 << 16) | (0xc150 >> 2), + 0x00000000, + (0x0e00 << 16) | (0xc15c >> 2), + 0x00000000, + (0x0e00 << 16) | (0xc168 >> 2), + 0x00000000, + (0x0e00 << 16) | (0xc170 >> 2), + 0x00000000, + (0x0e00 << 16) | (0xc204 >> 2), + 0x00000000, + (0x0e00 << 16) | (0xc2b4 >> 2), + 0x00000000, + (0x0e00 << 16) | (0xc2b8 >> 2), + 0x00000000, + (0x0e00 << 16) | (0xc2bc >> 2), + 0x00000000, + (0x0e00 << 16) | (0xc2c0 >> 2), + 0x00000000, + (0x0e00 << 16) | (0x8228 >> 2), + 0x00000000, + (0x0e00 << 16) | (0x829c >> 2), + 0x00000000, + (0x0e00 << 16) | (0x869c >> 2), + 0x00000000, + (0x0600 << 16) | (0x98f4 >> 2), + 0x00000000, + (0x0e00 << 16) | (0x98f8 >> 2), + 0x00000000, + (0x0e00 << 16) | (0x9900 >> 2), + 0x00000000, + (0x0e00 << 16) | (0xc260 >> 2), + 0x00000000, + (0x0e00 << 16) | (0x90e8 >> 2), + 0x00000000, + (0x0e00 << 16) | (0x3c000 >> 2), + 0x00000000, + (0x0e00 << 16) | (0x3c00c >> 2), + 0x00000000, + (0x0e00 << 16) | (0x8c1c >> 2), + 0x00000000, + (0x0e00 << 16) | (0x9700 >> 2), + 0x00000000, + (0x0e00 << 16) | (0xcd20 >> 2), + 0x00000000, + (0x4e00 << 16) | (0xcd20 >> 2), + 0x00000000, + (0x5e00 << 16) | (0xcd20 >> 2), + 0x00000000, + (0x6e00 << 16) | (0xcd20 >> 2), + 0x00000000, + (0x7e00 << 16) | (0xcd20 >> 2), + 0x00000000, + (0x0e00 << 16) | (0x89bc >> 2), + 0x00000000, + (0x0e00 << 16) | (0x8900 >> 2), + 0x00000000, + 0x3, + (0x0e00 << 16) | (0xc130 >> 2), + 0x00000000, + (0x0e00 << 16) | (0xc134 >> 2), + 0x00000000, + (0x0e00 << 16) | (0xc1fc >> 2), + 0x00000000, + (0x0e00 << 16) | (0xc208 >> 2), + 0x00000000, + (0x0e00 << 16) | (0xc264 >> 2), + 0x00000000, + (0x0e00 << 16) | (0xc268 >> 2), + 0x00000000, + (0x0e00 << 16) | (0xc26c >> 2), + 0x00000000, + (0x0e00 << 16) | (0xc270 >> 2), + 0x00000000, + (0x0e00 << 16) | (0xc274 >> 2), + 0x00000000, + (0x0e00 << 16) | (0xc28c >> 2), + 0x00000000, + (0x0e00 << 16) | (0xc290 >> 2), + 0x00000000, + (0x0e00 << 16) | (0xc294 >> 2), + 0x00000000, + (0x0e00 << 16) | (0xc298 >> 2), + 0x00000000, + (0x0e00 << 16) | (0xc2a0 >> 2), + 0x00000000, + (0x0e00 << 16) | (0xc2a4 >> 2), + 0x00000000, + (0x0e00 << 16) | (0xc2a8 >> 2), + 0x00000000, + (0x0e00 << 16) | (0xc2ac >> 2), + 0x00000000, + (0x0e00 << 16) | (0x301d0 >> 2), + 0x00000000, + (0x0e00 << 16) | (0x30238 >> 2), + 0x00000000, + (0x0e00 << 16) | (0x30250 >> 2), + 0x00000000, + (0x0e00 << 16) | (0x30254 >> 2), + 0x00000000, + (0x0e00 << 16) | (0x30258 >> 2), + 0x00000000, + (0x0e00 << 16) | (0x3025c >> 2), + 0x00000000, + (0x4e00 << 16) | (0xc900 >> 2), + 0x00000000, + (0x5e00 << 16) | (0xc900 >> 2), + 0x00000000, + (0x6e00 << 16) | (0xc900 >> 2), + 0x00000000, + (0x7e00 << 16) | (0xc900 >> 2), + 0x00000000, + (0x4e00 << 16) | (0xc904 >> 2), + 0x00000000, + (0x5e00 << 16) | (0xc904 >> 2), + 0x00000000, + (0x6e00 << 16) | (0xc904 >> 2), + 0x00000000, + (0x7e00 << 16) | (0xc904 >> 2), + 0x00000000, + (0x4e00 << 16) | (0xc908 >> 2), + 0x00000000, + (0x5e00 << 16) | (0xc908 >> 2), + 0x00000000, + (0x6e00 << 16) | (0xc908 >> 2), + 0x00000000, + (0x7e00 << 16) | (0xc908 >> 2), + 0x00000000, + (0x4e00 << 16) | (0xc90c >> 2), + 0x00000000, + (0x5e00 << 16) | (0xc90c >> 2), + 0x00000000, + (0x6e00 << 16) | (0xc90c >> 2), + 0x00000000, + (0x7e00 << 16) | (0xc90c >> 2), + 0x00000000, + (0x4e00 << 16) | (0xc910 >> 2), + 0x00000000, + (0x5e00 << 16) | (0xc910 >> 2), + 0x00000000, + (0x6e00 << 16) | (0xc910 >> 2), + 0x00000000, + (0x7e00 << 16) | (0xc910 >> 2), + 0x00000000, + (0x0e00 << 16) | (0xc99c >> 2), + 0x00000000, + (0x0e00 << 16) | (0x9834 >> 2), + 0x00000000, + (0x0000 << 16) | (0x30f00 >> 2), + 0x00000000, + (0x0000 << 16) | (0x30f04 >> 2), + 0x00000000, + (0x0000 << 16) | (0x30f08 >> 2), + 0x00000000, + (0x0000 << 16) | (0x30f0c >> 2), + 0x00000000, + (0x0600 << 16) | (0x9b7c >> 2), + 0x00000000, + (0x0e00 << 16) | (0x8a14 >> 2), + 0x00000000, + (0x0e00 << 16) | (0x8a18 >> 2), + 0x00000000, + (0x0600 << 16) | (0x30a00 >> 2), + 0x00000000, + (0x0e00 << 16) | (0x8bf0 >> 2), + 0x00000000, + (0x0e00 << 16) | (0x8bcc >> 2), + 0x00000000, + (0x0e00 << 16) | (0x8b24 >> 2), + 0x00000000, + (0x0e00 << 16) | (0x30a04 >> 2), + 0x00000000, + (0x0600 << 16) | (0x30a10 >> 2), + 0x00000000, + (0x0600 << 16) | (0x30a14 >> 2), + 0x00000000, + (0x0600 << 16) | (0x30a18 >> 2), + 0x00000000, + (0x0600 << 16) | (0x30a2c >> 2), + 0x00000000, + (0x0e00 << 16) | (0xc700 >> 2), + 0x00000000, + (0x0e00 << 16) | (0xc704 >> 2), + 0x00000000, + (0x0e00 << 16) | (0xc708 >> 2), + 0x00000000, + (0x0e00 << 16) | (0xc768 >> 2), + 0x00000000, + (0x0400 << 16) | (0xc770 >> 2), + 0x00000000, + (0x0400 << 16) | (0xc774 >> 2), + 0x00000000, + (0x0400 << 16) | (0xc798 >> 2), + 0x00000000, + (0x0400 << 16) | (0xc79c >> 2), + 0x00000000, + (0x0e00 << 16) | (0x9100 >> 2), + 0x00000000, + (0x0e00 << 16) | (0x3c010 >> 2), + 0x00000000, + (0x0e00 << 16) | (0x8c00 >> 2), + 0x00000000, + (0x0e00 << 16) | (0x8c04 >> 2), + 0x00000000, + (0x0e00 << 16) | (0x8c20 >> 2), + 0x00000000, + (0x0e00 << 16) | (0x8c38 >> 2), + 0x00000000, + (0x0e00 << 16) | (0x8c3c >> 2), + 0x00000000, + (0x0e00 << 16) | (0xae00 >> 2), + 0x00000000, + (0x0e00 << 16) | (0x9604 >> 2), + 0x00000000, + (0x0e00 << 16) | (0xac08 >> 2), + 0x00000000, + (0x0e00 << 16) | (0xac0c >> 2), + 0x00000000, + (0x0e00 << 16) | (0xac10 >> 2), + 0x00000000, + (0x0e00 << 16) | (0xac14 >> 2), + 0x00000000, + (0x0e00 << 16) | (0xac58 >> 2), + 0x00000000, + (0x0e00 << 16) | (0xac68 >> 2), + 0x00000000, + (0x0e00 << 16) | (0xac6c >> 2), + 0x00000000, + (0x0e00 << 16) | (0xac70 >> 2), + 0x00000000, + (0x0e00 << 16) | (0xac74 >> 2), + 0x00000000, + (0x0e00 << 16) | (0xac78 >> 2), + 0x00000000, + (0x0e00 << 16) | (0xac7c >> 2), + 0x00000000, + (0x0e00 << 16) | (0xac80 >> 2), + 0x00000000, + (0x0e00 << 16) | (0xac84 >> 2), + 0x00000000, + (0x0e00 << 16) | (0xac88 >> 2), + 0x00000000, + (0x0e00 << 16) | (0xac8c >> 2), + 0x00000000, + (0x0e00 << 16) | (0x970c >> 2), + 0x00000000, + (0x0e00 << 16) | (0x9714 >> 2), + 0x00000000, + (0x0e00 << 16) | (0x9718 >> 2), + 0x00000000, + (0x0e00 << 16) | (0x971c >> 2), + 0x00000000, + (0x0e00 << 16) | (0x31068 >> 2), + 0x00000000, + (0x4e00 << 16) | (0x31068 >> 2), + 0x00000000, + (0x5e00 << 16) | (0x31068 >> 2), + 0x00000000, + (0x6e00 << 16) | (0x31068 >> 2), + 0x00000000, + (0x7e00 << 16) | (0x31068 >> 2), + 0x00000000, + (0x0e00 << 16) | (0xcd10 >> 2), + 0x00000000, + (0x0e00 << 16) | (0xcd14 >> 2), + 0x00000000, + (0x0e00 << 16) | (0x88b0 >> 2), + 0x00000000, + (0x0e00 << 16) | (0x88b4 >> 2), + 0x00000000, + (0x0e00 << 16) | (0x88b8 >> 2), + 0x00000000, + (0x0e00 << 16) | (0x88bc >> 2), + 0x00000000, + (0x0400 << 16) | (0x89c0 >> 2), + 0x00000000, + (0x0e00 << 16) | (0x88c4 >> 2), + 0x00000000, + (0x0e00 << 16) | (0x88c8 >> 2), + 0x00000000, + (0x0e00 << 16) | (0x88d0 >> 2), + 0x00000000, + (0x0e00 << 16) | (0x88d4 >> 2), + 0x00000000, + (0x0e00 << 16) | (0x88d8 >> 2), + 0x00000000, + (0x0e00 << 16) | (0x8980 >> 2), + 0x00000000, + (0x0e00 << 16) | (0x30938 >> 2), + 0x00000000, + (0x0e00 << 16) | (0x3093c >> 2), + 0x00000000, + (0x0e00 << 16) | (0x30940 >> 2), + 0x00000000, + (0x0e00 << 16) | (0x89a0 >> 2), + 0x00000000, + (0x0e00 << 16) | (0x30900 >> 2), + 0x00000000, + (0x0e00 << 16) | (0x30904 >> 2), + 0x00000000, + (0x0e00 << 16) | (0x89b4 >> 2), + 0x00000000, + (0x0e00 << 16) | (0x3e1fc >> 2), + 0x00000000, + (0x0e00 << 16) | (0x3c210 >> 2), + 0x00000000, + (0x0e00 << 16) | (0x3c214 >> 2), + 0x00000000, + (0x0e00 << 16) | (0x3c218 >> 2), + 0x00000000, + (0x0e00 << 16) | (0x8904 >> 2), + 0x00000000, + 0x5, + (0x0e00 << 16) | (0x8c28 >> 2), + (0x0e00 << 16) | (0x8c2c >> 2), + (0x0e00 << 16) | (0x8c30 >> 2), + (0x0e00 << 16) | (0x8c34 >> 2), + (0x0e00 << 16) | (0x9600 >> 2), +}; + static const u32 bonaire_golden_spm_registers[] = { 0x30800, 0xe0ffffff, 0xe0000000 @@ -656,7 +1467,7 @@ static void cik_srbm_select(struct radeon_device *rdev, * Load the GDDR MC ucode into the hw (CIK). * Returns 0 on success, error on failure. */ -static __unused int ci_mc_load_microcode(struct radeon_device *rdev) +static int ci_mc_load_microcode(struct radeon_device *rdev) { const __be32 *fw_data; u32 running, blackout = 0; @@ -735,7 +1546,7 @@ static int cik_init_microcode(struct radeon_device *rdev) const char *chip_name; size_t pfp_req_size, me_req_size, ce_req_size, mec_req_size, rlc_req_size, mc_req_size, - sdma_req_size; + sdma_req_size, smc_req_size; char fw_name[30]; int err; @@ -751,6 +1562,7 @@ static int cik_init_microcode(struct radeon_device *rdev) rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4; mc_req_size = CIK_MC_UCODE_SIZE * 4; sdma_req_size = CIK_SDMA_UCODE_SIZE * 4; + smc_req_size = ALIGN(BONAIRE_SMC_UCODE_SIZE, 4); break; case CHIP_KAVERI: chip_name = "KAVERI"; @@ -842,7 +1654,7 @@ static int cik_init_microcode(struct radeon_device *rdev) err = -EINVAL; } - /* No MC ucode on APUs */ + /* No SMC, MC ucode on APUs */ if (!(rdev->flags & RADEON_IS_IGP)) { ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_mc", chip_name); err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev); @@ -854,6 +1666,22 @@ static int cik_init_microcode(struct radeon_device *rdev) rdev->mc_fw->datasize, fw_name); err = -EINVAL; } + + ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_smc", chip_name); + err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev); + if (err) { + printk(KERN_ERR + "smc: error loading firmware \"%s\"\n", + fw_name); + release_firmware(rdev->smc_fw); + rdev->smc_fw = NULL; + err = 0; + } else if (rdev->smc_fw->datasize != smc_req_size) { + printk(KERN_ERR + "cik_smc: Bogus length %zu in firmware \"%s\"\n", + rdev->smc_fw->datasize, fw_name); + err = -EINVAL; + } } out: @@ -876,6 +1704,8 @@ out: rdev->sdma_fw = NULL; release_firmware(rdev->mc_fw); rdev->mc_fw = NULL; + release_firmware(rdev->smc_fw); + rdev->smc_fw = NULL; } return err; } @@ -1848,7 +2678,7 @@ static void cik_setup_rb(struct radeon_device *rdev, * Configures the 3D engine and tiling configuration * registers so that the 3D engine is usable. */ -static __unused void cik_gpu_init(struct radeon_device *rdev) +static void cik_gpu_init(struct radeon_device *rdev) { u32 gb_addr_config = RREG32(GB_ADDR_CONFIG); u32 mc_shared_chmap, mc_arb_ramcfg; @@ -1875,7 +2705,47 @@ static __unused void cik_gpu_init(struct radeon_device *rdev) gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN; break; case CHIP_KAVERI: - /* TODO */ + rdev->config.cik.max_shader_engines = 1; + rdev->config.cik.max_tile_pipes = 4; + if ((rdev->ddev->pci_device == 0x1304) || + (rdev->ddev->pci_device == 0x1305) || + (rdev->ddev->pci_device == 0x130C) || + (rdev->ddev->pci_device == 0x130F) || + (rdev->ddev->pci_device == 0x1310) || + (rdev->ddev->pci_device == 0x1311) || + (rdev->ddev->pci_device == 0x131C)) { + rdev->config.cik.max_cu_per_sh = 8; + rdev->config.cik.max_backends_per_se = 2; + } else if ((rdev->ddev->pci_device == 0x1309) || + (rdev->ddev->pci_device == 0x130A) || + (rdev->ddev->pci_device == 0x130D) || + (rdev->ddev->pci_device == 0x1313) || + (rdev->ddev->pci_device == 0x131D)) { + rdev->config.cik.max_cu_per_sh = 6; + rdev->config.cik.max_backends_per_se = 2; + } else if ((rdev->ddev->pci_device == 0x1306) || + (rdev->ddev->pci_device == 0x1307) || + (rdev->ddev->pci_device == 0x130B) || + (rdev->ddev->pci_device == 0x130E) || + (rdev->ddev->pci_device == 0x1315) || + (rdev->ddev->pci_device == 0x131B)) { + rdev->config.cik.max_cu_per_sh = 4; + rdev->config.cik.max_backends_per_se = 1; + } else { + rdev->config.cik.max_cu_per_sh = 3; + rdev->config.cik.max_backends_per_se = 1; + } + rdev->config.cik.max_sh_per_se = 1; + rdev->config.cik.max_texture_channel_caches = 4; + rdev->config.cik.max_gprs = 256; + rdev->config.cik.max_gs_threads = 16; + rdev->config.cik.max_hw_contexts = 8; + + rdev->config.cik.sc_prim_fifo_size_frontend = 0x20; + rdev->config.cik.sc_prim_fifo_size_backend = 0x100; + rdev->config.cik.sc_hiz_tile_fifo_size = 0x30; + rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130; + gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN; break; case CHIP_KABINI: default: @@ -1963,10 +2833,8 @@ static __unused void cik_gpu_init(struct radeon_device *rdev) rdev->config.cik.tile_config |= (3 << 0); break; } - if ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) - rdev->config.cik.tile_config |= 1 << 4; - else - rdev->config.cik.tile_config |= 0 << 4; + rdev->config.cik.tile_config |= + ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) << 4; rdev->config.cik.tile_config |= ((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8; rdev->config.cik.tile_config |= @@ -2062,7 +2930,7 @@ static __unused void cik_gpu_init(struct radeon_device *rdev) * is not used by default on newer asics (r6xx+). On newer asics, * memory buffers are used for fences rather than scratch regs. */ -static __unused void cik_scratch_init(struct radeon_device *rdev) +static void cik_scratch_init(struct radeon_device *rdev) { int i; @@ -2300,6 +3168,7 @@ int cik_ib_test(struct radeon_device *rdev, struct radeon_ring *ring) r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256); if (r) { DRM_ERROR("radeon: failed to get ib (%d).\n", r); + radeon_scratch_free(rdev, scratch); return r; } ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1); @@ -2316,6 +3185,8 @@ int cik_ib_test(struct radeon_device *rdev, struct radeon_ring *ring) r = radeon_fence_wait(ib.fence, false); if (r) { DRM_ERROR("radeon: fence wait failed (%d).\n", r); + radeon_scratch_free(rdev, scratch); + radeon_ib_free(rdev, &ib); return r; } for (i = 0; i < rdev->usec_timeout; i++) { @@ -2530,8 +3401,8 @@ static int cik_cp_gfx_resume(struct radeon_device *rdev) /* ring 0 - compute and gfx */ /* Set ring buffer size */ ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; - rb_bufsz = drm_order(ring->ring_size / 8); - tmp = (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz; + rb_bufsz = order_base_2(ring->ring_size / 8); + tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz; #ifdef __BIG_ENDIAN tmp |= BUF_SWAP_32BIT; #endif @@ -2588,7 +3459,6 @@ u32 cik_compute_ring_get_rptr(struct radeon_device *rdev, cik_srbm_select(rdev, 0, 0, 0, 0); spin_unlock(&rdev->srbm_mutex); } - rptr = (rptr & ring->ptr_reg_mask) >> ring->ptr_reg_shift; return rptr; } @@ -2607,7 +3477,6 @@ u32 cik_compute_ring_get_wptr(struct radeon_device *rdev, cik_srbm_select(rdev, 0, 0, 0, 0); spin_unlock(&rdev->srbm_mutex); } - wptr = (wptr & ring->ptr_reg_mask) >> ring->ptr_reg_shift; return wptr; } @@ -2615,10 +3484,8 @@ u32 cik_compute_ring_get_wptr(struct radeon_device *rdev, void cik_compute_ring_set_wptr(struct radeon_device *rdev, struct radeon_ring *ring) { - u32 wptr = (ring->wptr << ring->ptr_reg_shift) & ring->ptr_reg_mask; - - rdev->wb.wb[ring->wptr_offs/4] = cpu_to_le32(wptr); - WDOORBELL32(ring->doorbell_offset, wptr); + rdev->wb.wb[ring->wptr_offs/4] = cpu_to_le32(ring->wptr); + WDOORBELL32(ring->doorbell_offset, ring->wptr); } /** @@ -2915,7 +3782,7 @@ static int cik_cp_compute_resume(struct radeon_device *rdev) /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ tmp = RREG32(CP_HPD_EOP_CONTROL); tmp &= ~EOP_SIZE_MASK; - tmp |= drm_order(MEC_HPD_SIZE / 8); + tmp |= order_base_2(MEC_HPD_SIZE / 8); WREG32(CP_HPD_EOP_CONTROL, tmp); } cik_srbm_select(rdev, 0, 0, 0, 0); @@ -3032,9 +3899,9 @@ static int cik_cp_compute_resume(struct radeon_device *rdev) ~(QUEUE_SIZE_MASK | RPTR_BLOCK_SIZE_MASK); mqd->queue_state.cp_hqd_pq_control |= - drm_order(rdev->ring[idx].ring_size / 8); + order_base_2(rdev->ring[idx].ring_size / 8); mqd->queue_state.cp_hqd_pq_control |= - (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8); + (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8); #ifdef __BIG_ENDIAN mqd->queue_state.cp_hqd_pq_control |= BUF_SWAP_32BIT; #endif @@ -3145,12 +4012,7 @@ static int cik_cp_resume(struct radeon_device *rdev) { int r; - /* Reset all cp blocks */ - WREG32(GRBM_SOFT_RESET, SOFT_RESET_CP); - RREG32(GRBM_SOFT_RESET); - mdelay(15); - WREG32(GRBM_SOFT_RESET, 0); - RREG32(GRBM_SOFT_RESET); + cik_enable_gui_idle_interrupt(rdev, false); r = cik_cp_load_microcode(rdev); if (r) @@ -3163,1714 +4025,1816 @@ static int cik_cp_resume(struct radeon_device *rdev) if (r) return r; + cik_enable_gui_idle_interrupt(rdev, true); + return 0; } -/* - * sDMA - System DMA - * Starting with CIK, the GPU has new asynchronous - * DMA engines. These engines are used for compute - * and gfx. There are two DMA engines (SDMA0, SDMA1) - * and each one supports 1 ring buffer used for gfx - * and 2 queues used for compute. - * - * The programming model is very similar to the CP - * (ring buffer, IBs, etc.), but sDMA has it's own - * packet format that is different from the PM4 format - * used by the CP. sDMA supports copying data, writing - * embedded data, solid fills, and a number of other - * things. It also has support for tiling/detiling of - * buffers. - */ +static void cik_print_gpu_status_regs(struct radeon_device *rdev) +{ + dev_info(rdev->dev, " GRBM_STATUS=0x%08X\n", + RREG32(GRBM_STATUS)); + dev_info(rdev->dev, " GRBM_STATUS2=0x%08X\n", + RREG32(GRBM_STATUS2)); + dev_info(rdev->dev, " GRBM_STATUS_SE0=0x%08X\n", + RREG32(GRBM_STATUS_SE0)); + dev_info(rdev->dev, " GRBM_STATUS_SE1=0x%08X\n", + RREG32(GRBM_STATUS_SE1)); + dev_info(rdev->dev, " GRBM_STATUS_SE2=0x%08X\n", + RREG32(GRBM_STATUS_SE2)); + dev_info(rdev->dev, " GRBM_STATUS_SE3=0x%08X\n", + RREG32(GRBM_STATUS_SE3)); + dev_info(rdev->dev, " SRBM_STATUS=0x%08X\n", + RREG32(SRBM_STATUS)); + dev_info(rdev->dev, " SRBM_STATUS2=0x%08X\n", + RREG32(SRBM_STATUS2)); + dev_info(rdev->dev, " SDMA0_STATUS_REG = 0x%08X\n", + RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET)); + dev_info(rdev->dev, " SDMA1_STATUS_REG = 0x%08X\n", + RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET)); + dev_info(rdev->dev, " CP_STAT = 0x%08x\n", RREG32(CP_STAT)); + dev_info(rdev->dev, " CP_STALLED_STAT1 = 0x%08x\n", + RREG32(CP_STALLED_STAT1)); + dev_info(rdev->dev, " CP_STALLED_STAT2 = 0x%08x\n", + RREG32(CP_STALLED_STAT2)); + dev_info(rdev->dev, " CP_STALLED_STAT3 = 0x%08x\n", + RREG32(CP_STALLED_STAT3)); + dev_info(rdev->dev, " CP_CPF_BUSY_STAT = 0x%08x\n", + RREG32(CP_CPF_BUSY_STAT)); + dev_info(rdev->dev, " CP_CPF_STALLED_STAT1 = 0x%08x\n", + RREG32(CP_CPF_STALLED_STAT1)); + dev_info(rdev->dev, " CP_CPF_STATUS = 0x%08x\n", RREG32(CP_CPF_STATUS)); + dev_info(rdev->dev, " CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(CP_CPC_BUSY_STAT)); + dev_info(rdev->dev, " CP_CPC_STALLED_STAT1 = 0x%08x\n", + RREG32(CP_CPC_STALLED_STAT1)); + dev_info(rdev->dev, " CP_CPC_STATUS = 0x%08x\n", RREG32(CP_CPC_STATUS)); +} + /** - * cik_sdma_ring_ib_execute - Schedule an IB on the DMA engine + * cik_gpu_check_soft_reset - check which blocks are busy * * @rdev: radeon_device pointer - * @ib: IB object to schedule * - * Schedule an IB in the DMA ring (CIK). + * Check which blocks are busy and return the relevant reset + * mask to be used by cik_gpu_soft_reset(). + * Returns a mask of the blocks to be reset. */ -void cik_sdma_ring_ib_execute(struct radeon_device *rdev, - struct radeon_ib *ib) +u32 cik_gpu_check_soft_reset(struct radeon_device *rdev) { - struct radeon_ring *ring = &rdev->ring[ib->ring]; - u32 extra_bits = (ib->vm ? ib->vm->id : 0) & 0xf; + u32 reset_mask = 0; + u32 tmp; - if (rdev->wb.enabled) { - u32 next_rptr = ring->wptr + 5; - while ((next_rptr & 7) != 4) - next_rptr++; - next_rptr += 4; - radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0)); - radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc); - radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff); - radeon_ring_write(ring, 1); /* number of DWs to follow */ - radeon_ring_write(ring, next_rptr); - } + /* GRBM_STATUS */ + tmp = RREG32(GRBM_STATUS); + if (tmp & (PA_BUSY | SC_BUSY | + BCI_BUSY | SX_BUSY | + TA_BUSY | VGT_BUSY | + DB_BUSY | CB_BUSY | + GDS_BUSY | SPI_BUSY | + IA_BUSY | IA_BUSY_NO_DMA)) + reset_mask |= RADEON_RESET_GFX; - /* IB packet must end on a 8 DW boundary */ - while ((ring->wptr & 7) != 4) - radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0)); - radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_INDIRECT_BUFFER, 0, extra_bits)); - radeon_ring_write(ring, ib->gpu_addr & 0xffffffe0); /* base must be 32 byte aligned */ - radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xffffffff); - radeon_ring_write(ring, ib->length_dw); + if (tmp & (CP_BUSY | CP_COHERENCY_BUSY)) + reset_mask |= RADEON_RESET_CP; -} + /* GRBM_STATUS2 */ + tmp = RREG32(GRBM_STATUS2); + if (tmp & RLC_BUSY) + reset_mask |= RADEON_RESET_RLC; -/** - * cik_sdma_fence_ring_emit - emit a fence on the DMA ring - * - * @rdev: radeon_device pointer - * @fence: radeon fence object - * - * Add a DMA fence packet to the ring to write - * the fence seq number and DMA trap packet to generate - * an interrupt if needed (CIK). - */ -void cik_sdma_fence_ring_emit(struct radeon_device *rdev, - struct radeon_fence *fence) -{ - struct radeon_ring *ring = &rdev->ring[fence->ring]; - u64 addr = rdev->fence_drv[fence->ring].gpu_addr; - u32 extra_bits = (SDMA_POLL_REG_MEM_EXTRA_OP(1) | - SDMA_POLL_REG_MEM_EXTRA_FUNC(3)); /* == */ - u32 ref_and_mask; + /* SDMA0_STATUS_REG */ + tmp = RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET); + if (!(tmp & SDMA_IDLE)) + reset_mask |= RADEON_RESET_DMA; - if (fence->ring == R600_RING_TYPE_DMA_INDEX) - ref_and_mask = SDMA0; - else - ref_and_mask = SDMA1; + /* SDMA1_STATUS_REG */ + tmp = RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET); + if (!(tmp & SDMA_IDLE)) + reset_mask |= RADEON_RESET_DMA1; - /* write the fence */ - radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_FENCE, 0, 0)); - radeon_ring_write(ring, addr & 0xffffffff); - radeon_ring_write(ring, upper_32_bits(addr) & 0xffffffff); - radeon_ring_write(ring, fence->seq); - /* generate an interrupt */ - radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_TRAP, 0, 0)); - /* flush HDP */ - radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_POLL_REG_MEM, 0, extra_bits)); - radeon_ring_write(ring, GPU_HDP_FLUSH_DONE); - radeon_ring_write(ring, GPU_HDP_FLUSH_REQ); - radeon_ring_write(ring, ref_and_mask); /* REFERENCE */ - radeon_ring_write(ring, ref_and_mask); /* MASK */ - radeon_ring_write(ring, (4 << 16) | 10); /* RETRY_COUNT, POLL_INTERVAL */ -} + /* SRBM_STATUS2 */ + tmp = RREG32(SRBM_STATUS2); + if (tmp & SDMA_BUSY) + reset_mask |= RADEON_RESET_DMA; -/** - * cik_sdma_semaphore_ring_emit - emit a semaphore on the dma ring - * - * @rdev: radeon_device pointer - * @ring: radeon_ring structure holding ring information - * @semaphore: radeon semaphore object - * @emit_wait: wait or signal semaphore - * - * Add a DMA semaphore packet to the ring wait on or signal - * other rings (CIK). - */ -void cik_sdma_semaphore_ring_emit(struct radeon_device *rdev, - struct radeon_ring *ring, - struct radeon_semaphore *semaphore, - bool emit_wait) -{ - u64 addr = semaphore->gpu_addr; - u32 extra_bits = emit_wait ? 0 : SDMA_SEMAPHORE_EXTRA_S; + if (tmp & SDMA1_BUSY) + reset_mask |= RADEON_RESET_DMA1; - radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SEMAPHORE, 0, extra_bits)); - radeon_ring_write(ring, addr & 0xfffffff8); - radeon_ring_write(ring, upper_32_bits(addr) & 0xffffffff); -} + /* SRBM_STATUS */ + tmp = RREG32(SRBM_STATUS); -/** - * cik_sdma_gfx_stop - stop the gfx async dma engines - * - * @rdev: radeon_device pointer - * - * Stop the gfx async dma ring buffers (CIK). - */ -static void cik_sdma_gfx_stop(struct radeon_device *rdev) -{ - u32 rb_cntl, reg_offset; - int i; + if (tmp & IH_BUSY) + reset_mask |= RADEON_RESET_IH; - radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size); + if (tmp & SEM_BUSY) + reset_mask |= RADEON_RESET_SEM; - for (i = 0; i < 2; i++) { - if (i == 0) - reg_offset = SDMA0_REGISTER_OFFSET; - else - reg_offset = SDMA1_REGISTER_OFFSET; - rb_cntl = RREG32(SDMA0_GFX_RB_CNTL + reg_offset); - rb_cntl &= ~SDMA_RB_ENABLE; - WREG32(SDMA0_GFX_RB_CNTL + reg_offset, rb_cntl); - WREG32(SDMA0_GFX_IB_CNTL + reg_offset, 0); - } -} + if (tmp & GRBM_RQ_PENDING) + reset_mask |= RADEON_RESET_GRBM; -/** - * cik_sdma_rlc_stop - stop the compute async dma engines - * - * @rdev: radeon_device pointer - * - * Stop the compute async dma queues (CIK). - */ -static void cik_sdma_rlc_stop(struct radeon_device *rdev) -{ - /* XXX todo */ -} + if (tmp & VMC_BUSY) + reset_mask |= RADEON_RESET_VMC; -/** - * cik_sdma_enable - stop the async dma engines - * - * @rdev: radeon_device pointer - * @enable: enable/disable the DMA MEs. - * - * Halt or unhalt the async dma engines (CIK). - */ -static void cik_sdma_enable(struct radeon_device *rdev, bool enable) -{ - u32 me_cntl, reg_offset; - int i; + if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY | + MCC_BUSY | MCD_BUSY)) + reset_mask |= RADEON_RESET_MC; - for (i = 0; i < 2; i++) { - if (i == 0) - reg_offset = SDMA0_REGISTER_OFFSET; - else - reg_offset = SDMA1_REGISTER_OFFSET; - me_cntl = RREG32(SDMA0_ME_CNTL + reg_offset); - if (enable) - me_cntl &= ~SDMA_HALT; - else - me_cntl |= SDMA_HALT; - WREG32(SDMA0_ME_CNTL + reg_offset, me_cntl); + if (evergreen_is_display_hung(rdev)) + reset_mask |= RADEON_RESET_DISPLAY; + + /* Skip MC reset as it's mostly likely not hung, just busy */ + if (reset_mask & RADEON_RESET_MC) { + DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask); + reset_mask &= ~RADEON_RESET_MC; } + + return reset_mask; } /** - * cik_sdma_gfx_resume - setup and start the async dma engines + * cik_gpu_soft_reset - soft reset GPU * * @rdev: radeon_device pointer + * @reset_mask: mask of which blocks to reset * - * Set up the gfx DMA ring buffers and enable them (CIK). - * Returns 0 for success, error for failure. + * Soft reset the blocks specified in @reset_mask. */ -static int cik_sdma_gfx_resume(struct radeon_device *rdev) +static void cik_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask) { - struct radeon_ring *ring; - u32 rb_cntl, ib_cntl; - u32 rb_bufsz; - u32 reg_offset, wb_offset; - int i, r; + struct evergreen_mc_save save; + u32 grbm_soft_reset = 0, srbm_soft_reset = 0; + u32 tmp; - for (i = 0; i < 2; i++) { - if (i == 0) { - ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX]; - reg_offset = SDMA0_REGISTER_OFFSET; - wb_offset = R600_WB_DMA_RPTR_OFFSET; - } else { - ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX]; - reg_offset = SDMA1_REGISTER_OFFSET; - wb_offset = CAYMAN_WB_DMA1_RPTR_OFFSET; - } + if (reset_mask == 0) + return; - WREG32(SDMA0_SEM_INCOMPLETE_TIMER_CNTL + reg_offset, 0); - WREG32(SDMA0_SEM_WAIT_FAIL_TIMER_CNTL + reg_offset, 0); + dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask); - /* Set ring buffer size in dwords */ - rb_bufsz = drm_order(ring->ring_size / 4); - rb_cntl = rb_bufsz << 1; -#ifdef __BIG_ENDIAN - rb_cntl |= SDMA_RB_SWAP_ENABLE | SDMA_RPTR_WRITEBACK_SWAP_ENABLE; -#endif - WREG32(SDMA0_GFX_RB_CNTL + reg_offset, rb_cntl); + cik_print_gpu_status_regs(rdev); + dev_info(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x%08X\n", + RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR)); + dev_info(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n", + RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS)); - /* Initialize the ring buffer's read and write pointers */ - WREG32(SDMA0_GFX_RB_RPTR + reg_offset, 0); - WREG32(SDMA0_GFX_RB_WPTR + reg_offset, 0); + /* disable CG/PG */ + cik_fini_pg(rdev); + cik_fini_cg(rdev); - /* set the wb address whether it's enabled or not */ - WREG32(SDMA0_GFX_RB_RPTR_ADDR_HI + reg_offset, - upper_32_bits(rdev->wb.gpu_addr + wb_offset) & 0xFFFFFFFF); - WREG32(SDMA0_GFX_RB_RPTR_ADDR_LO + reg_offset, - ((rdev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC)); + /* stop the rlc */ + cik_rlc_stop(rdev); - if (rdev->wb.enabled) - rb_cntl |= SDMA_RPTR_WRITEBACK_ENABLE; + /* Disable GFX parsing/prefetching */ + WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT); - WREG32(SDMA0_GFX_RB_BASE + reg_offset, ring->gpu_addr >> 8); - WREG32(SDMA0_GFX_RB_BASE_HI + reg_offset, ring->gpu_addr >> 40); + /* Disable MEC parsing/prefetching */ + WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT); - ring->wptr = 0; - WREG32(SDMA0_GFX_RB_WPTR + reg_offset, ring->wptr << 2); + if (reset_mask & RADEON_RESET_DMA) { + /* sdma0 */ + tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET); + tmp |= SDMA_HALT; + WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp); + } + if (reset_mask & RADEON_RESET_DMA1) { + /* sdma1 */ + tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET); + tmp |= SDMA_HALT; + WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp); + } - ring->rptr = RREG32(SDMA0_GFX_RB_RPTR + reg_offset) >> 2; + evergreen_mc_stop(rdev, &save); + if (evergreen_mc_wait_for_idle(rdev)) { + dev_warn(rdev->dev, "Wait for MC idle timedout !\n"); + } - /* enable DMA RB */ - WREG32(SDMA0_GFX_RB_CNTL + reg_offset, rb_cntl | SDMA_RB_ENABLE); + if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP)) + grbm_soft_reset = SOFT_RESET_CP | SOFT_RESET_GFX; - ib_cntl = SDMA_IB_ENABLE; -#ifdef __BIG_ENDIAN - ib_cntl |= SDMA_IB_SWAP_ENABLE; -#endif - /* enable DMA IBs */ - WREG32(SDMA0_GFX_IB_CNTL + reg_offset, ib_cntl); + if (reset_mask & RADEON_RESET_CP) { + grbm_soft_reset |= SOFT_RESET_CP; - ring->ready = true; + srbm_soft_reset |= SOFT_RESET_GRBM; + } - r = radeon_ring_test(rdev, ring->idx, ring); - if (r) { - ring->ready = false; - return r; - } + if (reset_mask & RADEON_RESET_DMA) + srbm_soft_reset |= SOFT_RESET_SDMA; + + if (reset_mask & RADEON_RESET_DMA1) + srbm_soft_reset |= SOFT_RESET_SDMA1; + + if (reset_mask & RADEON_RESET_DISPLAY) + srbm_soft_reset |= SOFT_RESET_DC; + + if (reset_mask & RADEON_RESET_RLC) + grbm_soft_reset |= SOFT_RESET_RLC; + + if (reset_mask & RADEON_RESET_SEM) + srbm_soft_reset |= SOFT_RESET_SEM; + + if (reset_mask & RADEON_RESET_IH) + srbm_soft_reset |= SOFT_RESET_IH; + + if (reset_mask & RADEON_RESET_GRBM) + srbm_soft_reset |= SOFT_RESET_GRBM; + + if (reset_mask & RADEON_RESET_VMC) + srbm_soft_reset |= SOFT_RESET_VMC; + + if (!(rdev->flags & RADEON_IS_IGP)) { + if (reset_mask & RADEON_RESET_MC) + srbm_soft_reset |= SOFT_RESET_MC; } - radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size); + if (grbm_soft_reset) { + tmp = RREG32(GRBM_SOFT_RESET); + tmp |= grbm_soft_reset; + dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp); + WREG32(GRBM_SOFT_RESET, tmp); + tmp = RREG32(GRBM_SOFT_RESET); - return 0; -} + udelay(50); -/** - * cik_sdma_rlc_resume - setup and start the async dma engines - * - * @rdev: radeon_device pointer - * - * Set up the compute DMA queues and enable them (CIK). - * Returns 0 for success, error for failure. - */ -static int cik_sdma_rlc_resume(struct radeon_device *rdev) -{ - /* XXX todo */ - return 0; -} + tmp &= ~grbm_soft_reset; + WREG32(GRBM_SOFT_RESET, tmp); + tmp = RREG32(GRBM_SOFT_RESET); + } -/** - * cik_sdma_load_microcode - load the sDMA ME ucode - * - * @rdev: radeon_device pointer - * - * Loads the sDMA0/1 ucode. - * Returns 0 for success, -EINVAL if the ucode is not available. - */ -static int cik_sdma_load_microcode(struct radeon_device *rdev) -{ - const __be32 *fw_data; - int i; + if (srbm_soft_reset) { + tmp = RREG32(SRBM_SOFT_RESET); + tmp |= srbm_soft_reset; + dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp); + WREG32(SRBM_SOFT_RESET, tmp); + tmp = RREG32(SRBM_SOFT_RESET); - if (!rdev->sdma_fw) - return -EINVAL; + udelay(50); + + tmp &= ~srbm_soft_reset; + WREG32(SRBM_SOFT_RESET, tmp); + tmp = RREG32(SRBM_SOFT_RESET); + } - /* stop the gfx rings and rlc compute queues */ - cik_sdma_gfx_stop(rdev); - cik_sdma_rlc_stop(rdev); + /* Wait a little for things to settle down */ + udelay(50); - /* halt the MEs */ - cik_sdma_enable(rdev, false); + evergreen_mc_resume(rdev, &save); + udelay(50); - /* sdma0 */ - fw_data = (const __be32 *)rdev->sdma_fw->data; - WREG32(SDMA0_UCODE_ADDR + SDMA0_REGISTER_OFFSET, 0); - for (i = 0; i < CIK_SDMA_UCODE_SIZE; i++) - WREG32(SDMA0_UCODE_DATA + SDMA0_REGISTER_OFFSET, be32_to_cpup(fw_data++)); - WREG32(SDMA0_UCODE_DATA + SDMA0_REGISTER_OFFSET, CIK_SDMA_UCODE_VERSION); - - /* sdma1 */ - fw_data = (const __be32 *)rdev->sdma_fw->data; - WREG32(SDMA0_UCODE_ADDR + SDMA1_REGISTER_OFFSET, 0); - for (i = 0; i < CIK_SDMA_UCODE_SIZE; i++) - WREG32(SDMA0_UCODE_DATA + SDMA1_REGISTER_OFFSET, be32_to_cpup(fw_data++)); - WREG32(SDMA0_UCODE_DATA + SDMA1_REGISTER_OFFSET, CIK_SDMA_UCODE_VERSION); - - WREG32(SDMA0_UCODE_ADDR + SDMA0_REGISTER_OFFSET, 0); - WREG32(SDMA0_UCODE_ADDR + SDMA1_REGISTER_OFFSET, 0); - return 0; + cik_print_gpu_status_regs(rdev); } /** - * cik_sdma_resume - setup and start the async dma engines + * cik_asic_reset - soft reset GPU * * @rdev: radeon_device pointer * - * Set up the DMA engines and enable them (CIK). - * Returns 0 for success, error for failure. + * Look up which blocks are hung and attempt + * to reset them. + * Returns 0 for success. */ -static __unused int cik_sdma_resume(struct radeon_device *rdev) +int cik_asic_reset(struct radeon_device *rdev) { - int r; + u32 reset_mask; - /* Reset dma */ - WREG32(SRBM_SOFT_RESET, SOFT_RESET_SDMA | SOFT_RESET_SDMA1); - RREG32(SRBM_SOFT_RESET); - udelay(50); - WREG32(SRBM_SOFT_RESET, 0); - RREG32(SRBM_SOFT_RESET); + reset_mask = cik_gpu_check_soft_reset(rdev); - r = cik_sdma_load_microcode(rdev); - if (r) - return r; + if (reset_mask) + r600_set_bios_scratch_engine_hung(rdev, true); + + cik_gpu_soft_reset(rdev, reset_mask); - /* unhalt the MEs */ - cik_sdma_enable(rdev, true); + reset_mask = cik_gpu_check_soft_reset(rdev); - /* start the gfx rings and rlc compute queues */ - r = cik_sdma_gfx_resume(rdev); - if (r) - return r; - r = cik_sdma_rlc_resume(rdev); - if (r) - return r; + if (!reset_mask) + r600_set_bios_scratch_engine_hung(rdev, false); return 0; } /** - * cik_sdma_fini - tear down the async dma engines + * cik_gfx_is_lockup - check if the 3D engine is locked up * * @rdev: radeon_device pointer + * @ring: radeon_ring structure holding ring information * - * Stop the async dma engines and free the rings (CIK). + * Check if the 3D engine is locked up (CIK). + * Returns true if the engine is locked, false if not. */ -static __unused void cik_sdma_fini(struct radeon_device *rdev) +bool cik_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring) { - /* stop the gfx rings and rlc compute queues */ - cik_sdma_gfx_stop(rdev); - cik_sdma_rlc_stop(rdev); - /* halt the MEs */ - cik_sdma_enable(rdev, false); - radeon_ring_fini(rdev, &rdev->ring[R600_RING_TYPE_DMA_INDEX]); - radeon_ring_fini(rdev, &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX]); - /* XXX - compute dma queue tear down */ -} - -/** - * cik_copy_dma - copy pages using the DMA engine - * - * @rdev: radeon_device pointer - * @src_offset: src GPU address - * @dst_offset: dst GPU address - * @num_gpu_pages: number of GPU pages to xfer - * @fence: radeon fence object - * - * Copy GPU paging using the DMA engine (CIK). - * Used by the radeon ttm implementation to move pages if - * registered as the asic copy callback. - */ -int cik_copy_dma(struct radeon_device *rdev, - uint64_t src_offset, uint64_t dst_offset, - unsigned num_gpu_pages, - struct radeon_fence **fence) -{ - struct radeon_semaphore *sem = NULL; - int ring_index = rdev->asic->copy.dma_ring_index; - struct radeon_ring *ring = &rdev->ring[ring_index]; - u32 size_in_bytes, cur_size_in_bytes; - int i, num_loops; - int r = 0; - - r = radeon_semaphore_create(rdev, &sem); - if (r) { - DRM_ERROR("radeon: moving bo (%d).\n", r); - return r; - } - - size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT); - num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff); - r = radeon_ring_lock(rdev, ring, num_loops * 7 + 14); - if (r) { - DRM_ERROR("radeon: moving bo (%d).\n", r); - radeon_semaphore_free(rdev, &sem, NULL); - return r; - } + u32 reset_mask = cik_gpu_check_soft_reset(rdev); - if (radeon_fence_need_sync(*fence, ring->idx)) { - radeon_semaphore_sync_rings(rdev, sem, (*fence)->ring, - ring->idx); - radeon_fence_note_sync(*fence, ring->idx); - } else { - radeon_semaphore_free(rdev, &sem, NULL); - } - - for (i = 0; i < num_loops; i++) { - cur_size_in_bytes = size_in_bytes; - if (cur_size_in_bytes > 0x1fffff) - cur_size_in_bytes = 0x1fffff; - size_in_bytes -= cur_size_in_bytes; - radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_COPY, SDMA_COPY_SUB_OPCODE_LINEAR, 0)); - radeon_ring_write(ring, cur_size_in_bytes); - radeon_ring_write(ring, 0); /* src/dst endian swap */ - radeon_ring_write(ring, src_offset & 0xffffffff); - radeon_ring_write(ring, upper_32_bits(src_offset) & 0xffffffff); - radeon_ring_write(ring, dst_offset & 0xfffffffc); - radeon_ring_write(ring, upper_32_bits(dst_offset) & 0xffffffff); - src_offset += cur_size_in_bytes; - dst_offset += cur_size_in_bytes; - } - - r = radeon_fence_emit(rdev, fence, ring->idx); - if (r) { - radeon_ring_unlock_undo(rdev, ring); - return r; + if (!(reset_mask & (RADEON_RESET_GFX | + RADEON_RESET_COMPUTE | + RADEON_RESET_CP))) { + radeon_ring_lockup_update(ring); + return false; } - - radeon_ring_unlock_commit(rdev, ring); - radeon_semaphore_free(rdev, &sem, *fence); - - return r; + /* force CP activities */ + radeon_ring_force_activity(rdev, ring); + return radeon_ring_test_lockup(rdev, ring); } +/* MC */ /** - * cik_sdma_ring_test - simple async dma engine test + * cik_mc_program - program the GPU memory controller * * @rdev: radeon_device pointer - * @ring: radeon_ring structure holding ring information * - * Test the DMA engine by writing using it to write an - * value to memory. (CIK). - * Returns 0 for success, error for failure. + * Set the location of vram, gart, and AGP in the GPU's + * physical address space (CIK). */ -int cik_sdma_ring_test(struct radeon_device *rdev, - struct radeon_ring *ring) +static void cik_mc_program(struct radeon_device *rdev) { - unsigned i; - int r; - volatile void __iomem *ptr = (volatile void *)rdev->vram_scratch.ptr; + struct evergreen_mc_save save; u32 tmp; + int i, j; - if (!ptr) { - DRM_ERROR("invalid vram scratch pointer\n"); - return -EINVAL; - } - - tmp = 0xCAFEDEAD; - writel(tmp, ptr); - - r = radeon_ring_lock(rdev, ring, 4); - if (r) { - DRM_ERROR("radeon: dma failed to lock ring %d (%d).\n", ring->idx, r); - return r; + /* Initialize HDP */ + for (i = 0, j = 0; i < 32; i++, j += 0x18) { + WREG32((0x2c14 + j), 0x00000000); + WREG32((0x2c18 + j), 0x00000000); + WREG32((0x2c1c + j), 0x00000000); + WREG32((0x2c20 + j), 0x00000000); + WREG32((0x2c24 + j), 0x00000000); } - radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0)); - radeon_ring_write(ring, rdev->vram_scratch.gpu_addr & 0xfffffffc); - radeon_ring_write(ring, upper_32_bits(rdev->vram_scratch.gpu_addr) & 0xffffffff); - radeon_ring_write(ring, 1); /* number of DWs to follow */ - radeon_ring_write(ring, 0xDEADBEEF); - radeon_ring_unlock_commit(rdev, ring); + WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0); - for (i = 0; i < rdev->usec_timeout; i++) { - tmp = readl(ptr); - if (tmp == 0xDEADBEEF) - break; - DRM_UDELAY(1); + evergreen_mc_stop(rdev, &save); + if (radeon_mc_wait_for_idle(rdev)) { + dev_warn(rdev->dev, "Wait for MC idle timedout !\n"); } - - if (i < rdev->usec_timeout) { - DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i); - } else { - DRM_ERROR("radeon: ring %d test failed (0x%08X)\n", - ring->idx, tmp); - r = -EINVAL; + /* Lockout access through VGA aperture*/ + WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE); + /* Update configuration */ + WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR, + rdev->mc.vram_start >> 12); + WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR, + rdev->mc.vram_end >> 12); + WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR, + rdev->vram_scratch.gpu_addr >> 12); + tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16; + tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF); + WREG32(MC_VM_FB_LOCATION, tmp); + /* XXX double check these! */ + WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8)); + WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30)); + WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF); + WREG32(MC_VM_AGP_BASE, 0); + WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF); + WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF); + if (radeon_mc_wait_for_idle(rdev)) { + dev_warn(rdev->dev, "Wait for MC idle timedout !\n"); } - return r; + evergreen_mc_resume(rdev, &save); + /* we need to own VRAM, so turn off the VGA renderer here + * to stop it overwriting our objects */ + rv515_vga_render_disable(rdev); } /** - * cik_sdma_ib_test - test an IB on the DMA engine + * cik_mc_init - initialize the memory controller driver params * * @rdev: radeon_device pointer - * @ring: radeon_ring structure holding ring information * - * Test a simple IB in the DMA ring (CIK). - * Returns 0 on success, error on failure. + * Look up the amount of vram, vram width, and decide how to place + * vram and gart within the GPU's physical address space (CIK). + * Returns 0 for success. */ -int cik_sdma_ib_test(struct radeon_device *rdev, struct radeon_ring *ring) +static int cik_mc_init(struct radeon_device *rdev) { - struct radeon_ib ib; - unsigned i; - int r; - volatile void __iomem *ptr = (volatile void *)rdev->vram_scratch.ptr; - u32 tmp = 0; - - if (!ptr) { - DRM_ERROR("invalid vram scratch pointer\n"); - return -EINVAL; - } - - tmp = 0xCAFEDEAD; - writel(tmp, ptr); - - r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256); - if (r) { - DRM_ERROR("radeon: failed to get ib (%d).\n", r); - return r; - } - - ib.ptr[0] = SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0); - ib.ptr[1] = rdev->vram_scratch.gpu_addr & 0xfffffffc; - ib.ptr[2] = upper_32_bits(rdev->vram_scratch.gpu_addr) & 0xffffffff; - ib.ptr[3] = 1; - ib.ptr[4] = 0xDEADBEEF; - ib.length_dw = 5; + u32 tmp; + int chansize, numchan; - r = radeon_ib_schedule(rdev, &ib, NULL); - if (r) { - radeon_ib_free(rdev, &ib); - DRM_ERROR("radeon: failed to schedule ib (%d).\n", r); - return r; - } - r = radeon_fence_wait(ib.fence, false); - if (r) { - DRM_ERROR("radeon: fence wait failed (%d).\n", r); - return r; - } - for (i = 0; i < rdev->usec_timeout; i++) { - tmp = readl(ptr); - if (tmp == 0xDEADBEEF) - break; - DRM_UDELAY(1); - } - if (i < rdev->usec_timeout) { - DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i); + /* Get VRAM informations */ + rdev->mc.vram_is_ddr = true; + tmp = RREG32(MC_ARB_RAMCFG); + if (tmp & CHANSIZE_MASK) { + chansize = 64; } else { - DRM_ERROR("radeon: ib test failed (0x%08X)\n", tmp); - r = -EINVAL; + chansize = 32; } - radeon_ib_free(rdev, &ib); - return r; + tmp = RREG32(MC_SHARED_CHMAP); + switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) { + case 0: + default: + numchan = 1; + break; + case 1: + numchan = 2; + break; + case 2: + numchan = 4; + break; + case 3: + numchan = 8; + break; + case 4: + numchan = 3; + break; + case 5: + numchan = 6; + break; + case 6: + numchan = 10; + break; + case 7: + numchan = 12; + break; + case 8: + numchan = 16; + break; + } + rdev->mc.vram_width = numchan * chansize; + /* Could aper size report 0 ? */ + rdev->mc.aper_base = drm_get_resource_start(rdev->ddev, 0); + rdev->mc.aper_size = drm_get_resource_len(rdev->ddev, 0); + /* size in MB on si */ + rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL; + rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL; + rdev->mc.visible_vram_size = rdev->mc.aper_size; + si_vram_gtt_location(rdev, &rdev->mc); + radeon_update_bandwidth_info(rdev); + + return 0; +} + +/* + * GART + * VMID 0 is the physical GPU addresses as used by the kernel. + * VMIDs 1-15 are used for userspace clients and are handled + * by the radeon vm/hsa code. + */ +/** + * cik_pcie_gart_tlb_flush - gart tlb flush callback + * + * @rdev: radeon_device pointer + * + * Flush the TLB for the VMID 0 page table (CIK). + */ +void cik_pcie_gart_tlb_flush(struct radeon_device *rdev) +{ + /* flush hdp cache */ + WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0); + + /* bits 0-15 are the VM contexts0-15 */ + WREG32(VM_INVALIDATE_REQUEST, 0x1); +} + +/** + * cik_pcie_gart_enable - gart enable + * + * @rdev: radeon_device pointer + * + * This sets up the TLBs, programs the page tables for VMID0, + * sets up the hw for VMIDs 1-15 which are allocated on + * demand, and sets up the global locations for the LDS, GDS, + * and GPUVM for FSA64 clients (CIK). + * Returns 0 for success, errors for failure. + */ +static int cik_pcie_gart_enable(struct radeon_device *rdev) +{ + int r, i; + + if (rdev->gart.robj == NULL) { + dev_err(rdev->dev, "No VRAM object for PCIE GART.\n"); + return -EINVAL; + } + r = radeon_gart_table_vram_pin(rdev); + if (r) + return r; + radeon_gart_restore(rdev); + /* Setup TLB control */ + WREG32(MC_VM_MX_L1_TLB_CNTL, + (0xA << 7) | + ENABLE_L1_TLB | + SYSTEM_ACCESS_MODE_NOT_IN_SYS | + ENABLE_ADVANCED_DRIVER_MODEL | + SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU); + /* Setup L2 cache */ + WREG32(VM_L2_CNTL, ENABLE_L2_CACHE | + ENABLE_L2_FRAGMENT_PROCESSING | + ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE | + ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE | + EFFECTIVE_L2_QUEUE_SIZE(7) | + CONTEXT1_IDENTITY_ACCESS_MODE(1)); + WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE); + WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY | + L2_CACHE_BIGK_FRAGMENT_SIZE(6)); + /* setup context0 */ + WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12); + WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12); + WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12); + WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR, + (u32)(rdev->dummy_page.addr >> 12)); + WREG32(VM_CONTEXT0_CNTL2, 0); + WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) | + RANGE_PROTECTION_FAULT_ENABLE_DEFAULT)); + + WREG32(0x15D4, 0); + WREG32(0x15D8, 0); + WREG32(0x15DC, 0); + + /* empty context1-15 */ + /* FIXME start with 4G, once using 2 level pt switch to full + * vm size space + */ + /* set vm size, must be a multiple of 4 */ + WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0); + WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn); + for (i = 1; i < 16; i++) { + if (i < 8) + WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2), + rdev->gart.table_addr >> 12); + else + WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2), + rdev->gart.table_addr >> 12); + } + + /* enable context1-15 */ + WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR, + (u32)(rdev->dummy_page.addr >> 12)); + WREG32(VM_CONTEXT1_CNTL2, 4); + WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) | + RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT | + RANGE_PROTECTION_FAULT_ENABLE_DEFAULT | + DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT | + DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT | + PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT | + PDE0_PROTECTION_FAULT_ENABLE_DEFAULT | + VALID_PROTECTION_FAULT_ENABLE_INTERRUPT | + VALID_PROTECTION_FAULT_ENABLE_DEFAULT | + READ_PROTECTION_FAULT_ENABLE_INTERRUPT | + READ_PROTECTION_FAULT_ENABLE_DEFAULT | + WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT | + WRITE_PROTECTION_FAULT_ENABLE_DEFAULT); + + /* TC cache setup ??? */ + WREG32(TC_CFG_L1_LOAD_POLICY0, 0); + WREG32(TC_CFG_L1_LOAD_POLICY1, 0); + WREG32(TC_CFG_L1_STORE_POLICY, 0); + + WREG32(TC_CFG_L2_LOAD_POLICY0, 0); + WREG32(TC_CFG_L2_LOAD_POLICY1, 0); + WREG32(TC_CFG_L2_STORE_POLICY0, 0); + WREG32(TC_CFG_L2_STORE_POLICY1, 0); + WREG32(TC_CFG_L2_ATOMIC_POLICY, 0); + + WREG32(TC_CFG_L1_VOLATILE, 0); + WREG32(TC_CFG_L2_VOLATILE, 0); + + if (rdev->family == CHIP_KAVERI) { + u32 tmp = RREG32(CHUB_CONTROL); + tmp &= ~BYPASS_VM; + WREG32(CHUB_CONTROL, tmp); + } + + /* XXX SH_MEM regs */ + /* where to put LDS, scratch, GPUVM in FSA64 space */ + spin_lock(&rdev->srbm_mutex); + for (i = 0; i < 16; i++) { + cik_srbm_select(rdev, 0, 0, 0, i); + /* CP and shaders */ + WREG32(SH_MEM_CONFIG, 0); + WREG32(SH_MEM_APE1_BASE, 1); + WREG32(SH_MEM_APE1_LIMIT, 0); + WREG32(SH_MEM_BASES, 0); + /* SDMA GFX */ + WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA0_REGISTER_OFFSET, 0); + WREG32(SDMA0_GFX_APE1_CNTL + SDMA0_REGISTER_OFFSET, 0); + WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA1_REGISTER_OFFSET, 0); + WREG32(SDMA0_GFX_APE1_CNTL + SDMA1_REGISTER_OFFSET, 0); + /* XXX SDMA RLC - todo */ + } + cik_srbm_select(rdev, 0, 0, 0, 0); + spin_unlock(&rdev->srbm_mutex); + + cik_pcie_gart_tlb_flush(rdev); + DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n", + (unsigned)(rdev->mc.gtt_size >> 20), + (unsigned long long)rdev->gart.table_addr); + rdev->gart.ready = true; + return 0; +} + +/** + * cik_pcie_gart_disable - gart disable + * + * @rdev: radeon_device pointer + * + * This disables all VM page table (CIK). + */ +static void cik_pcie_gart_disable(struct radeon_device *rdev) +{ + /* Disable all tables */ + WREG32(VM_CONTEXT0_CNTL, 0); + WREG32(VM_CONTEXT1_CNTL, 0); + /* Setup TLB control */ + WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS | + SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU); + /* Setup L2 cache */ + WREG32(VM_L2_CNTL, + ENABLE_L2_FRAGMENT_PROCESSING | + ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE | + ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE | + EFFECTIVE_L2_QUEUE_SIZE(7) | + CONTEXT1_IDENTITY_ACCESS_MODE(1)); + WREG32(VM_L2_CNTL2, 0); + WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY | + L2_CACHE_BIGK_FRAGMENT_SIZE(6)); + radeon_gart_table_vram_unpin(rdev); } +/** + * cik_pcie_gart_fini - vm fini callback + * + * @rdev: radeon_device pointer + * + * Tears down the driver GART/VM setup (CIK). + */ +static void cik_pcie_gart_fini(struct radeon_device *rdev) +{ + cik_pcie_gart_disable(rdev); + radeon_gart_table_vram_free(rdev); + radeon_gart_fini(rdev); +} + +/* vm parser */ +/** + * cik_ib_parse - vm ib_parse callback + * + * @rdev: radeon_device pointer + * @ib: indirect buffer pointer + * + * CIK uses hw IB checking so this is a nop (CIK). + */ +int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib) +{ + return 0; +} + +/* + * vm + * VMID 0 is the physical GPU addresses as used by the kernel. + * VMIDs 1-15 are used for userspace clients and are handled + * by the radeon vm/hsa code. + */ +/** + * cik_vm_init - cik vm init callback + * + * @rdev: radeon_device pointer + * + * Inits cik specific vm parameters (number of VMs, base of vram for + * VMIDs 1-15) (CIK). + * Returns 0 for success. + */ +int cik_vm_init(struct radeon_device *rdev) +{ + /* number of VMs */ + rdev->vm_manager.nvm = 16; + /* base offset of vram pages */ + if (rdev->flags & RADEON_IS_IGP) { + u64 tmp = RREG32(MC_VM_FB_OFFSET); + tmp <<= 22; + rdev->vm_manager.vram_base_offset = tmp; + } else + rdev->vm_manager.vram_base_offset = 0; + + return 0; +} + +/** + * cik_vm_fini - cik vm fini callback + * + * @rdev: radeon_device pointer + * + * Tear down any asic specific VM setup (CIK). + */ +void cik_vm_fini(struct radeon_device *rdev) +{ +} + +/** + * cik_vm_decode_fault - print human readable fault info + * + * @rdev: radeon_device pointer + * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value + * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value + * + * Print human readable fault information (CIK). + */ +static void cik_vm_decode_fault(struct radeon_device *rdev, + u32 status, u32 addr, u32 mc_client) +{ + u32 mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT; + u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT; + u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT; + char block[5] = { mc_client >> 24, (mc_client >> 16) & 0xff, + (mc_client >> 8) & 0xff, mc_client & 0xff, 0 }; + + printk("VM fault (0x%02x, vmid %d) at page %u, %s from '%s' (0x%08x) (%d)\n", + protections, vmid, addr, + (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read", + block, mc_client, mc_id); +} + +/** + * cik_vm_flush - cik vm flush using the CP + * + * @rdev: radeon_device pointer + * + * Update the page table base and flush the VM TLB + * using the CP (CIK). + */ +void cik_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm) +{ + struct radeon_ring *ring = &rdev->ring[ridx]; + + if (vm == NULL) + return; + + radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); + radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | + WRITE_DATA_DST_SEL(0))); + if (vm->id < 8) { + radeon_ring_write(ring, + (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2); + } else { + radeon_ring_write(ring, + (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2); + } + radeon_ring_write(ring, 0); + radeon_ring_write(ring, vm->pd_gpu_addr >> 12); + + /* update SH_MEM_* regs */ + radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); + radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | + WRITE_DATA_DST_SEL(0))); + radeon_ring_write(ring, SRBM_GFX_CNTL >> 2); + radeon_ring_write(ring, 0); + radeon_ring_write(ring, VMID(vm->id)); + + radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6)); + radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | + WRITE_DATA_DST_SEL(0))); + radeon_ring_write(ring, SH_MEM_BASES >> 2); + radeon_ring_write(ring, 0); + + radeon_ring_write(ring, 0); /* SH_MEM_BASES */ + radeon_ring_write(ring, 0); /* SH_MEM_CONFIG */ + radeon_ring_write(ring, 1); /* SH_MEM_APE1_BASE */ + radeon_ring_write(ring, 0); /* SH_MEM_APE1_LIMIT */ + + radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); + radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | + WRITE_DATA_DST_SEL(0))); + radeon_ring_write(ring, SRBM_GFX_CNTL >> 2); + radeon_ring_write(ring, 0); + radeon_ring_write(ring, VMID(0)); -static void cik_print_gpu_status_regs(struct radeon_device *rdev) -{ - dev_info(rdev->dev, " GRBM_STATUS=0x%08X\n", - RREG32(GRBM_STATUS)); - dev_info(rdev->dev, " GRBM_STATUS2=0x%08X\n", - RREG32(GRBM_STATUS2)); - dev_info(rdev->dev, " GRBM_STATUS_SE0=0x%08X\n", - RREG32(GRBM_STATUS_SE0)); - dev_info(rdev->dev, " GRBM_STATUS_SE1=0x%08X\n", - RREG32(GRBM_STATUS_SE1)); - dev_info(rdev->dev, " GRBM_STATUS_SE2=0x%08X\n", - RREG32(GRBM_STATUS_SE2)); - dev_info(rdev->dev, " GRBM_STATUS_SE3=0x%08X\n", - RREG32(GRBM_STATUS_SE3)); - dev_info(rdev->dev, " SRBM_STATUS=0x%08X\n", - RREG32(SRBM_STATUS)); - dev_info(rdev->dev, " SRBM_STATUS2=0x%08X\n", - RREG32(SRBM_STATUS2)); - dev_info(rdev->dev, " SDMA0_STATUS_REG = 0x%08X\n", - RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET)); - dev_info(rdev->dev, " SDMA1_STATUS_REG = 0x%08X\n", - RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET)); - dev_info(rdev->dev, " CP_STAT = 0x%08x\n", RREG32(CP_STAT)); - dev_info(rdev->dev, " CP_STALLED_STAT1 = 0x%08x\n", - RREG32(CP_STALLED_STAT1)); - dev_info(rdev->dev, " CP_STALLED_STAT2 = 0x%08x\n", - RREG32(CP_STALLED_STAT2)); - dev_info(rdev->dev, " CP_STALLED_STAT3 = 0x%08x\n", - RREG32(CP_STALLED_STAT3)); - dev_info(rdev->dev, " CP_CPF_BUSY_STAT = 0x%08x\n", - RREG32(CP_CPF_BUSY_STAT)); - dev_info(rdev->dev, " CP_CPF_STALLED_STAT1 = 0x%08x\n", - RREG32(CP_CPF_STALLED_STAT1)); - dev_info(rdev->dev, " CP_CPF_STATUS = 0x%08x\n", RREG32(CP_CPF_STATUS)); - dev_info(rdev->dev, " CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(CP_CPC_BUSY_STAT)); - dev_info(rdev->dev, " CP_CPC_STALLED_STAT1 = 0x%08x\n", - RREG32(CP_CPC_STALLED_STAT1)); - dev_info(rdev->dev, " CP_CPC_STATUS = 0x%08x\n", RREG32(CP_CPC_STATUS)); + /* HDP flush */ + /* We should be using the WAIT_REG_MEM packet here like in + * cik_fence_ring_emit(), but it causes the CP to hang in this + * context... + */ + radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); + radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | + WRITE_DATA_DST_SEL(0))); + radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2); + radeon_ring_write(ring, 0); + radeon_ring_write(ring, 0); + + /* bits 0-15 are the VM contexts0-15 */ + radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); + radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | + WRITE_DATA_DST_SEL(0))); + radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2); + radeon_ring_write(ring, 0); + radeon_ring_write(ring, 1 << vm->id); + + /* compute doesn't have PFP */ + if (ridx == RADEON_RING_TYPE_GFX_INDEX) { + /* sync PFP to ME, otherwise we might get invalid PFP reads */ + radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0)); + radeon_ring_write(ring, 0x0); + } } /** - * cik_gpu_check_soft_reset - check which blocks are busy + * cik_vm_set_page - update the page tables using sDMA * * @rdev: radeon_device pointer + * @ib: indirect buffer to fill with commands + * @pe: addr of the page entry + * @addr: dst addr to write into pe + * @count: number of page entries to update + * @incr: increase next addr by incr bytes + * @flags: access flags * - * Check which blocks are busy and return the relevant reset - * mask to be used by cik_gpu_soft_reset(). - * Returns a mask of the blocks to be reset. + * Update the page tables using CP or sDMA (CIK). */ -static u32 cik_gpu_check_soft_reset(struct radeon_device *rdev) +void cik_vm_set_page(struct radeon_device *rdev, + struct radeon_ib *ib, + uint64_t pe, + uint64_t addr, unsigned count, + uint32_t incr, uint32_t flags) +{ + uint32_t r600_flags = cayman_vm_page_flags(rdev, flags); + uint64_t value; + unsigned ndw; + + if (rdev->asic->vm.pt_ring_index == RADEON_RING_TYPE_GFX_INDEX) { + /* CP */ + while (count) { + ndw = 2 + count * 2; + if (ndw > 0x3FFE) + ndw = 0x3FFE; + + ib->ptr[ib->length_dw++] = PACKET3(PACKET3_WRITE_DATA, ndw); + ib->ptr[ib->length_dw++] = (WRITE_DATA_ENGINE_SEL(0) | + WRITE_DATA_DST_SEL(1)); + ib->ptr[ib->length_dw++] = pe; + ib->ptr[ib->length_dw++] = upper_32_bits(pe); + for (; ndw > 2; ndw -= 2, --count, pe += 8) { + if (flags & RADEON_VM_PAGE_SYSTEM) { + value = radeon_vm_map_gart(rdev, addr); + value &= 0xFFFFFFFFFFFFF000ULL; + } else if (flags & RADEON_VM_PAGE_VALID) { + value = addr; + } else { + value = 0; + } + addr += incr; + value |= r600_flags; + ib->ptr[ib->length_dw++] = value; + ib->ptr[ib->length_dw++] = upper_32_bits(value); + } + } + } else { + /* DMA */ + cik_sdma_vm_set_page(rdev, ib, pe, addr, count, incr, flags); + } +} + +/* + * RLC + * The RLC is a multi-purpose microengine that handles a + * variety of functions, the most important of which is + * the interrupt controller. + */ +static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev, + bool enable) +{ + u32 tmp = RREG32(CP_INT_CNTL_RING0); + + if (enable) + tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE); + else + tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE); + WREG32(CP_INT_CNTL_RING0, tmp); +} + +static void cik_enable_lbpw(struct radeon_device *rdev, bool enable) { - u32 reset_mask = 0; u32 tmp; - /* GRBM_STATUS */ - tmp = RREG32(GRBM_STATUS); - if (tmp & (PA_BUSY | SC_BUSY | - BCI_BUSY | SX_BUSY | - TA_BUSY | VGT_BUSY | - DB_BUSY | CB_BUSY | - GDS_BUSY | SPI_BUSY | - IA_BUSY | IA_BUSY_NO_DMA)) - reset_mask |= RADEON_RESET_GFX; + tmp = RREG32(RLC_LB_CNTL); + if (enable) + tmp |= LOAD_BALANCE_ENABLE; + else + tmp &= ~LOAD_BALANCE_ENABLE; + WREG32(RLC_LB_CNTL, tmp); +} - if (tmp & (CP_BUSY | CP_COHERENCY_BUSY)) - reset_mask |= RADEON_RESET_CP; +static void cik_wait_for_rlc_serdes(struct radeon_device *rdev) +{ + u32 i, j, k; + u32 mask; - /* GRBM_STATUS2 */ - tmp = RREG32(GRBM_STATUS2); - if (tmp & RLC_BUSY) - reset_mask |= RADEON_RESET_RLC; + for (i = 0; i < rdev->config.cik.max_shader_engines; i++) { + for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) { + cik_select_se_sh(rdev, i, j); + for (k = 0; k < rdev->usec_timeout; k++) { + if (RREG32(RLC_SERDES_CU_MASTER_BUSY) == 0) + break; + udelay(1); + } + } + } + cik_select_se_sh(rdev, 0xffffffff, 0xffffffff); - /* SDMA0_STATUS_REG */ - tmp = RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET); - if (!(tmp & SDMA_IDLE)) - reset_mask |= RADEON_RESET_DMA; + mask = SE_MASTER_BUSY_MASK | GC_MASTER_BUSY | TC0_MASTER_BUSY | TC1_MASTER_BUSY; + for (k = 0; k < rdev->usec_timeout; k++) { + if ((RREG32(RLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0) + break; + udelay(1); + } +} - /* SDMA1_STATUS_REG */ - tmp = RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET); - if (!(tmp & SDMA_IDLE)) - reset_mask |= RADEON_RESET_DMA1; +static void cik_update_rlc(struct radeon_device *rdev, u32 rlc) +{ + u32 tmp; - /* SRBM_STATUS2 */ - tmp = RREG32(SRBM_STATUS2); - if (tmp & SDMA_BUSY) - reset_mask |= RADEON_RESET_DMA; + tmp = RREG32(RLC_CNTL); + if (tmp != rlc) + WREG32(RLC_CNTL, rlc); +} - if (tmp & SDMA1_BUSY) - reset_mask |= RADEON_RESET_DMA1; +static u32 cik_halt_rlc(struct radeon_device *rdev) +{ + u32 data, orig; - /* SRBM_STATUS */ - tmp = RREG32(SRBM_STATUS); + orig = data = RREG32(RLC_CNTL); - if (tmp & IH_BUSY) - reset_mask |= RADEON_RESET_IH; + if (data & RLC_ENABLE) { + u32 i; - if (tmp & SEM_BUSY) - reset_mask |= RADEON_RESET_SEM; + data &= ~RLC_ENABLE; + WREG32(RLC_CNTL, data); - if (tmp & GRBM_RQ_PENDING) - reset_mask |= RADEON_RESET_GRBM; + for (i = 0; i < rdev->usec_timeout; i++) { + if ((RREG32(RLC_GPM_STAT) & RLC_GPM_BUSY) == 0) + break; + udelay(1); + } - if (tmp & VMC_BUSY) - reset_mask |= RADEON_RESET_VMC; + cik_wait_for_rlc_serdes(rdev); + } - if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY | - MCC_BUSY | MCD_BUSY)) - reset_mask |= RADEON_RESET_MC; + return orig; +} - if (evergreen_is_display_hung(rdev)) - reset_mask |= RADEON_RESET_DISPLAY; +void cik_enter_rlc_safe_mode(struct radeon_device *rdev) +{ + u32 tmp, i, mask; + + tmp = REQ | MESSAGE(MSG_ENTER_RLC_SAFE_MODE); + WREG32(RLC_GPR_REG2, tmp); + + mask = GFX_POWER_STATUS | GFX_CLOCK_STATUS; + for (i = 0; i < rdev->usec_timeout; i++) { + if ((RREG32(RLC_GPM_STAT) & mask) == mask) + break; + udelay(1); + } + + for (i = 0; i < rdev->usec_timeout; i++) { + if ((RREG32(RLC_GPR_REG2) & REQ) == 0) + break; + udelay(1); + } +} + +void cik_exit_rlc_safe_mode(struct radeon_device *rdev) +{ + u32 tmp; + + tmp = REQ | MESSAGE(MSG_EXIT_RLC_SAFE_MODE); + WREG32(RLC_GPR_REG2, tmp); +} + +/** + * cik_rlc_stop - stop the RLC ME + * + * @rdev: radeon_device pointer + * + * Halt the RLC ME (MicroEngine) (CIK). + */ +static void cik_rlc_stop(struct radeon_device *rdev) +{ + WREG32(RLC_CNTL, 0); + + cik_enable_gui_idle_interrupt(rdev, false); + + cik_wait_for_rlc_serdes(rdev); +} + +/** + * cik_rlc_start - start the RLC ME + * + * @rdev: radeon_device pointer + * + * Unhalt the RLC ME (MicroEngine) (CIK). + */ +static void cik_rlc_start(struct radeon_device *rdev) +{ + WREG32(RLC_CNTL, RLC_ENABLE); - /* Skip MC reset as it's mostly likely not hung, just busy */ - if (reset_mask & RADEON_RESET_MC) { - DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask); - reset_mask &= ~RADEON_RESET_MC; - } + cik_enable_gui_idle_interrupt(rdev, true); - return reset_mask; + udelay(50); } /** - * cik_gpu_soft_reset - soft reset GPU + * cik_rlc_resume - setup the RLC hw * * @rdev: radeon_device pointer - * @reset_mask: mask of which blocks to reset * - * Soft reset the blocks specified in @reset_mask. + * Initialize the RLC registers, load the ucode, + * and start the RLC (CIK). + * Returns 0 for success, -EINVAL if the ucode is not available. */ -static void cik_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask) +static int cik_rlc_resume(struct radeon_device *rdev) { - struct evergreen_mc_save save; - u32 grbm_soft_reset = 0, srbm_soft_reset = 0; - u32 tmp; - - if (reset_mask == 0) - return; + u32 i, size, tmp; + const __be32 *fw_data; - dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask); + if (!rdev->rlc_fw) + return -EINVAL; - cik_print_gpu_status_regs(rdev); - dev_info(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x%08X\n", - RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR)); - dev_info(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n", - RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS)); + switch (rdev->family) { + case CHIP_BONAIRE: + default: + size = BONAIRE_RLC_UCODE_SIZE; + break; + case CHIP_KAVERI: + size = KV_RLC_UCODE_SIZE; + break; + case CHIP_KABINI: + size = KB_RLC_UCODE_SIZE; + break; + } - /* stop the rlc */ cik_rlc_stop(rdev); - /* Disable GFX parsing/prefetching */ - WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT); - - /* Disable MEC parsing/prefetching */ - WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT); + /* disable CG */ + tmp = RREG32(RLC_CGCG_CGLS_CTRL) & 0xfffffffc; + WREG32(RLC_CGCG_CGLS_CTRL, tmp); - if (reset_mask & RADEON_RESET_DMA) { - /* sdma0 */ - tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET); - tmp |= SDMA_HALT; - WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp); - } - if (reset_mask & RADEON_RESET_DMA1) { - /* sdma1 */ - tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET); - tmp |= SDMA_HALT; - WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp); - } + si_rlc_reset(rdev); - evergreen_mc_stop(rdev, &save); - if (evergreen_mc_wait_for_idle(rdev)) { - dev_warn(rdev->dev, "Wait for MC idle timedout !\n"); - } + cik_init_pg(rdev); - if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP)) - grbm_soft_reset = SOFT_RESET_CP | SOFT_RESET_GFX; + cik_init_cg(rdev); - if (reset_mask & RADEON_RESET_CP) { - grbm_soft_reset |= SOFT_RESET_CP; + WREG32(RLC_LB_CNTR_INIT, 0); + WREG32(RLC_LB_CNTR_MAX, 0x00008000); - srbm_soft_reset |= SOFT_RESET_GRBM; - } + cik_select_se_sh(rdev, 0xffffffff, 0xffffffff); + WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff); + WREG32(RLC_LB_PARAMS, 0x00600408); + WREG32(RLC_LB_CNTL, 0x80000004); - if (reset_mask & RADEON_RESET_DMA) - srbm_soft_reset |= SOFT_RESET_SDMA; + WREG32(RLC_MC_CNTL, 0); + WREG32(RLC_UCODE_CNTL, 0); - if (reset_mask & RADEON_RESET_DMA1) - srbm_soft_reset |= SOFT_RESET_SDMA1; + fw_data = (const __be32 *)rdev->rlc_fw->data; + WREG32(RLC_GPM_UCODE_ADDR, 0); + for (i = 0; i < size; i++) + WREG32(RLC_GPM_UCODE_DATA, be32_to_cpup(fw_data++)); + WREG32(RLC_GPM_UCODE_ADDR, 0); - if (reset_mask & RADEON_RESET_DISPLAY) - srbm_soft_reset |= SOFT_RESET_DC; + /* XXX - find out what chips support lbpw */ + cik_enable_lbpw(rdev, false); - if (reset_mask & RADEON_RESET_RLC) - grbm_soft_reset |= SOFT_RESET_RLC; + if (rdev->family == CHIP_BONAIRE) + WREG32(RLC_DRIVER_DMA_STATUS, 0); - if (reset_mask & RADEON_RESET_SEM) - srbm_soft_reset |= SOFT_RESET_SEM; + cik_rlc_start(rdev); - if (reset_mask & RADEON_RESET_IH) - srbm_soft_reset |= SOFT_RESET_IH; + return 0; +} - if (reset_mask & RADEON_RESET_GRBM) - srbm_soft_reset |= SOFT_RESET_GRBM; +static void cik_enable_cgcg(struct radeon_device *rdev, bool enable) +{ + u32 data, orig, tmp, tmp2; - if (reset_mask & RADEON_RESET_VMC) - srbm_soft_reset |= SOFT_RESET_VMC; + orig = data = RREG32(RLC_CGCG_CGLS_CTRL); - if (!(rdev->flags & RADEON_IS_IGP)) { - if (reset_mask & RADEON_RESET_MC) - srbm_soft_reset |= SOFT_RESET_MC; - } + if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) { + cik_enable_gui_idle_interrupt(rdev, true); - if (grbm_soft_reset) { - tmp = RREG32(GRBM_SOFT_RESET); - tmp |= grbm_soft_reset; - dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp); - WREG32(GRBM_SOFT_RESET, tmp); - tmp = RREG32(GRBM_SOFT_RESET); + tmp = cik_halt_rlc(rdev); - udelay(50); + cik_select_se_sh(rdev, 0xffffffff, 0xffffffff); + WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff); + WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff); + tmp2 = BPM_ADDR_MASK | CGCG_OVERRIDE_0 | CGLS_ENABLE; + WREG32(RLC_SERDES_WR_CTRL, tmp2); - tmp &= ~grbm_soft_reset; - WREG32(GRBM_SOFT_RESET, tmp); - tmp = RREG32(GRBM_SOFT_RESET); - } + cik_update_rlc(rdev, tmp); - if (srbm_soft_reset) { - tmp = RREG32(SRBM_SOFT_RESET); - tmp |= srbm_soft_reset; - dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp); - WREG32(SRBM_SOFT_RESET, tmp); - tmp = RREG32(SRBM_SOFT_RESET); + data |= CGCG_EN | CGLS_EN; + } else { + cik_enable_gui_idle_interrupt(rdev, false); - udelay(50); + RREG32(CB_CGTT_SCLK_CTRL); + RREG32(CB_CGTT_SCLK_CTRL); + RREG32(CB_CGTT_SCLK_CTRL); + RREG32(CB_CGTT_SCLK_CTRL); - tmp &= ~srbm_soft_reset; - WREG32(SRBM_SOFT_RESET, tmp); - tmp = RREG32(SRBM_SOFT_RESET); + data &= ~(CGCG_EN | CGLS_EN); } - /* Wait a little for things to settle down */ - udelay(50); - - evergreen_mc_resume(rdev, &save); - udelay(50); + if (orig != data) + WREG32(RLC_CGCG_CGLS_CTRL, data); - cik_print_gpu_status_regs(rdev); } -/** - * cik_asic_reset - soft reset GPU - * - * @rdev: radeon_device pointer - * - * Look up which blocks are hung and attempt - * to reset them. - * Returns 0 for success. - */ -int cik_asic_reset(struct radeon_device *rdev) +static void cik_enable_mgcg(struct radeon_device *rdev, bool enable) { - u32 reset_mask; - - reset_mask = cik_gpu_check_soft_reset(rdev); - - if (reset_mask) - r600_set_bios_scratch_engine_hung(rdev, true); + u32 data, orig, tmp = 0; + + if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) { + if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) { + if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) { + orig = data = RREG32(CP_MEM_SLP_CNTL); + data |= CP_MEM_LS_EN; + if (orig != data) + WREG32(CP_MEM_SLP_CNTL, data); + } + } - cik_gpu_soft_reset(rdev, reset_mask); + orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE); + data &= 0xfffffffd; + if (orig != data) + WREG32(RLC_CGTT_MGCG_OVERRIDE, data); + + tmp = cik_halt_rlc(rdev); + + cik_select_se_sh(rdev, 0xffffffff, 0xffffffff); + WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff); + WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff); + data = BPM_ADDR_MASK | MGCG_OVERRIDE_0; + WREG32(RLC_SERDES_WR_CTRL, data); + + cik_update_rlc(rdev, tmp); + + if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS) { + orig = data = RREG32(CGTS_SM_CTRL_REG); + data &= ~SM_MODE_MASK; + data |= SM_MODE(0x2); + data |= SM_MODE_ENABLE; + data &= ~CGTS_OVERRIDE; + if ((rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) && + (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS_LS)) + data &= ~CGTS_LS_OVERRIDE; + data &= ~ON_MONITOR_ADD_MASK; + data |= ON_MONITOR_ADD_EN; + data |= ON_MONITOR_ADD(0x96); + if (orig != data) + WREG32(CGTS_SM_CTRL_REG, data); + } + } else { + orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE); + data |= 0x00000002; + if (orig != data) + WREG32(RLC_CGTT_MGCG_OVERRIDE, data); + + data = RREG32(RLC_MEM_SLP_CNTL); + if (data & RLC_MEM_LS_EN) { + data &= ~RLC_MEM_LS_EN; + WREG32(RLC_MEM_SLP_CNTL, data); + } - reset_mask = cik_gpu_check_soft_reset(rdev); + data = RREG32(CP_MEM_SLP_CNTL); + if (data & CP_MEM_LS_EN) { + data &= ~CP_MEM_LS_EN; + WREG32(CP_MEM_SLP_CNTL, data); + } - if (!reset_mask) - r600_set_bios_scratch_engine_hung(rdev, false); + orig = data = RREG32(CGTS_SM_CTRL_REG); + data |= CGTS_OVERRIDE | CGTS_LS_OVERRIDE; + if (orig != data) + WREG32(CGTS_SM_CTRL_REG, data); - return 0; -} + tmp = cik_halt_rlc(rdev); -/** - * cik_gfx_is_lockup - check if the 3D engine is locked up - * - * @rdev: radeon_device pointer - * @ring: radeon_ring structure holding ring information - * - * Check if the 3D engine is locked up (CIK). - * Returns true if the engine is locked, false if not. - */ -bool cik_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring) -{ - u32 reset_mask = cik_gpu_check_soft_reset(rdev); + cik_select_se_sh(rdev, 0xffffffff, 0xffffffff); + WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff); + WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff); + data = BPM_ADDR_MASK | MGCG_OVERRIDE_1; + WREG32(RLC_SERDES_WR_CTRL, data); - if (!(reset_mask & (RADEON_RESET_GFX | - RADEON_RESET_COMPUTE | - RADEON_RESET_CP))) { - radeon_ring_lockup_update(ring); - return false; + cik_update_rlc(rdev, tmp); } - /* force CP activities */ - radeon_ring_force_activity(rdev, ring); - return radeon_ring_test_lockup(rdev, ring); } -/** - * cik_sdma_is_lockup - Check if the DMA engine is locked up - * - * @rdev: radeon_device pointer - * @ring: radeon_ring structure holding ring information - * - * Check if the async DMA engine is locked up (CIK). - * Returns true if the engine appears to be locked up, false if not. - */ -bool cik_sdma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring) +static const u32 mc_cg_registers[] = { - u32 reset_mask = cik_gpu_check_soft_reset(rdev); - u32 mask; + MC_HUB_MISC_HUB_CG, + MC_HUB_MISC_SIP_CG, + MC_HUB_MISC_VM_CG, + MC_XPB_CLK_GAT, + ATC_MISC_CG, + MC_CITF_MISC_WR_CG, + MC_CITF_MISC_RD_CG, + MC_CITF_MISC_VM_CG, + VM_L2_CG, +}; - if (ring->idx == R600_RING_TYPE_DMA_INDEX) - mask = RADEON_RESET_DMA; - else - mask = RADEON_RESET_DMA1; +static void cik_enable_mc_ls(struct radeon_device *rdev, + bool enable) +{ + int i; + u32 orig, data; - if (!(reset_mask & mask)) { - radeon_ring_lockup_update(ring); - return false; + for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) { + orig = data = RREG32(mc_cg_registers[i]); + if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS)) + data |= MC_LS_ENABLE; + else + data &= ~MC_LS_ENABLE; + if (data != orig) + WREG32(mc_cg_registers[i], data); } - /* force ring activities */ - radeon_ring_force_activity(rdev, ring); - return radeon_ring_test_lockup(rdev, ring); } -/* MC */ -/** - * cik_mc_program - program the GPU memory controller - * - * @rdev: radeon_device pointer - * - * Set the location of vram, gart, and AGP in the GPU's - * physical address space (CIK). - */ -static __unused void cik_mc_program(struct radeon_device *rdev) +static void cik_enable_mc_mgcg(struct radeon_device *rdev, + bool enable) { - struct evergreen_mc_save save; - u32 tmp; - int i, j; - - /* Initialize HDP */ - for (i = 0, j = 0; i < 32; i++, j += 0x18) { - WREG32((0x2c14 + j), 0x00000000); - WREG32((0x2c18 + j), 0x00000000); - WREG32((0x2c1c + j), 0x00000000); - WREG32((0x2c20 + j), 0x00000000); - WREG32((0x2c24 + j), 0x00000000); - } - WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0); - - evergreen_mc_stop(rdev, &save); - if (radeon_mc_wait_for_idle(rdev)) { - dev_warn(rdev->dev, "Wait for MC idle timedout !\n"); - } - /* Lockout access through VGA aperture*/ - WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE); - /* Update configuration */ - WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR, - rdev->mc.vram_start >> 12); - WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR, - rdev->mc.vram_end >> 12); - WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR, - rdev->vram_scratch.gpu_addr >> 12); - tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16; - tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF); - WREG32(MC_VM_FB_LOCATION, tmp); - /* XXX double check these! */ - WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8)); - WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30)); - WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF); - WREG32(MC_VM_AGP_BASE, 0); - WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF); - WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF); - if (radeon_mc_wait_for_idle(rdev)) { - dev_warn(rdev->dev, "Wait for MC idle timedout !\n"); + int i; + u32 orig, data; + + for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) { + orig = data = RREG32(mc_cg_registers[i]); + if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG)) + data |= MC_CG_ENABLE; + else + data &= ~MC_CG_ENABLE; + if (data != orig) + WREG32(mc_cg_registers[i], data); } - evergreen_mc_resume(rdev, &save); - /* we need to own VRAM, so turn off the VGA renderer here - * to stop it overwriting our objects */ - rv515_vga_render_disable(rdev); } -/** - * cik_mc_init - initialize the memory controller driver params - * - * @rdev: radeon_device pointer - * - * Look up the amount of vram, vram width, and decide how to place - * vram and gart within the GPU's physical address space (CIK). - * Returns 0 for success. - */ -static __unused int cik_mc_init(struct radeon_device *rdev) +static void cik_enable_sdma_mgcg(struct radeon_device *rdev, + bool enable) { - u32 tmp; - int chansize, numchan; + u32 orig, data; - /* Get VRAM informations */ - rdev->mc.vram_is_ddr = true; - tmp = RREG32(MC_ARB_RAMCFG); - if (tmp & CHANSIZE_MASK) { - chansize = 64; + if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) { + WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, 0x00000100); + WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, 0x00000100); } else { - chansize = 32; - } - tmp = RREG32(MC_SHARED_CHMAP); - switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) { - case 0: - default: - numchan = 1; - break; - case 1: - numchan = 2; - break; - case 2: - numchan = 4; - break; - case 3: - numchan = 8; - break; - case 4: - numchan = 3; - break; - case 5: - numchan = 6; - break; - case 6: - numchan = 10; - break; - case 7: - numchan = 12; - break; - case 8: - numchan = 16; - break; - } - rdev->mc.vram_width = numchan * chansize; - /* Could aper size report 0 ? */ - rdev->mc.aper_base = drm_get_resource_start(rdev->ddev, 0); - rdev->mc.aper_size = drm_get_resource_len(rdev->ddev, 0); - /* size in MB on si */ - rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024 * 1024; - rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024 * 1024; - rdev->mc.visible_vram_size = rdev->mc.aper_size; - si_vram_gtt_location(rdev, &rdev->mc); - radeon_update_bandwidth_info(rdev); + orig = data = RREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET); + data |= 0xff000000; + if (data != orig) + WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, data); - return 0; + orig = data = RREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET); + data |= 0xff000000; + if (data != orig) + WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, data); + } } -/* - * GART - * VMID 0 is the physical GPU addresses as used by the kernel. - * VMIDs 1-15 are used for userspace clients and are handled - * by the radeon vm/hsa code. - */ -/** - * cik_pcie_gart_tlb_flush - gart tlb flush callback - * - * @rdev: radeon_device pointer - * - * Flush the TLB for the VMID 0 page table (CIK). - */ -void cik_pcie_gart_tlb_flush(struct radeon_device *rdev) +static void cik_enable_sdma_mgls(struct radeon_device *rdev, + bool enable) { - /* flush hdp cache */ - WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0); + u32 orig, data; + + if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_LS)) { + orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET); + data |= 0x100; + if (orig != data) + WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data); + + orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET); + data |= 0x100; + if (orig != data) + WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data); + } else { + orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET); + data &= ~0x100; + if (orig != data) + WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data); - /* bits 0-15 are the VM contexts0-15 */ - WREG32(VM_INVALIDATE_REQUEST, 0x1); + orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET); + data &= ~0x100; + if (orig != data) + WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data); + } } -/** - * cik_pcie_gart_enable - gart enable - * - * @rdev: radeon_device pointer - * - * This sets up the TLBs, programs the page tables for VMID0, - * sets up the hw for VMIDs 1-15 which are allocated on - * demand, and sets up the global locations for the LDS, GDS, - * and GPUVM for FSA64 clients (CIK). - * Returns 0 for success, errors for failure. - */ -static __unused int cik_pcie_gart_enable(struct radeon_device *rdev) +static void cik_enable_uvd_mgcg(struct radeon_device *rdev, + bool enable) { - int r, i; + u32 orig, data; - if (rdev->gart.robj == NULL) { - dev_err(rdev->dev, "No VRAM object for PCIE GART.\n"); - return -EINVAL; - } - r = radeon_gart_table_vram_pin(rdev); - if (r) - return r; - radeon_gart_restore(rdev); - /* Setup TLB control */ - WREG32(MC_VM_MX_L1_TLB_CNTL, - (0xA << 7) | - ENABLE_L1_TLB | - SYSTEM_ACCESS_MODE_NOT_IN_SYS | - ENABLE_ADVANCED_DRIVER_MODEL | - SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU); - /* Setup L2 cache */ - WREG32(VM_L2_CNTL, ENABLE_L2_CACHE | - ENABLE_L2_FRAGMENT_PROCESSING | - ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE | - ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE | - EFFECTIVE_L2_QUEUE_SIZE(7) | - CONTEXT1_IDENTITY_ACCESS_MODE(1)); - WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE); - WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY | - L2_CACHE_BIGK_FRAGMENT_SIZE(6)); - /* setup context0 */ - WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12); - WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12); - WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12); - WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR, - (u32)(rdev->dummy_page.addr >> 12)); - WREG32(VM_CONTEXT0_CNTL2, 0); - WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) | - RANGE_PROTECTION_FAULT_ENABLE_DEFAULT)); + if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) { + data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL); + data = 0xfff; + WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data); - WREG32(0x15D4, 0); - WREG32(0x15D8, 0); - WREG32(0x15DC, 0); + orig = data = RREG32(UVD_CGC_CTRL); + data |= DCM; + if (orig != data) + WREG32(UVD_CGC_CTRL, data); + } else { + data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL); + data &= ~0xfff; + WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data); - /* empty context1-15 */ - /* FIXME start with 4G, once using 2 level pt switch to full - * vm size space - */ - /* set vm size, must be a multiple of 4 */ - WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0); - WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn); - for (i = 1; i < 16; i++) { - if (i < 8) - WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2), - rdev->gart.table_addr >> 12); - else - WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2), - rdev->gart.table_addr >> 12); + orig = data = RREG32(UVD_CGC_CTRL); + data &= ~DCM; + if (orig != data) + WREG32(UVD_CGC_CTRL, data); } +} - /* enable context1-15 */ - WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR, - (u32)(rdev->dummy_page.addr >> 12)); - WREG32(VM_CONTEXT1_CNTL2, 4); - WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) | - RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT | - RANGE_PROTECTION_FAULT_ENABLE_DEFAULT | - DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT | - DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT | - PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT | - PDE0_PROTECTION_FAULT_ENABLE_DEFAULT | - VALID_PROTECTION_FAULT_ENABLE_INTERRUPT | - VALID_PROTECTION_FAULT_ENABLE_DEFAULT | - READ_PROTECTION_FAULT_ENABLE_INTERRUPT | - READ_PROTECTION_FAULT_ENABLE_DEFAULT | - WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT | - WRITE_PROTECTION_FAULT_ENABLE_DEFAULT); +static void cik_enable_bif_mgls(struct radeon_device *rdev, + bool enable) +{ + u32 orig, data; - /* TC cache setup ??? */ - WREG32(TC_CFG_L1_LOAD_POLICY0, 0); - WREG32(TC_CFG_L1_LOAD_POLICY1, 0); - WREG32(TC_CFG_L1_STORE_POLICY, 0); + orig = data = RREG32_PCIE_PORT(PCIE_CNTL2); - WREG32(TC_CFG_L2_LOAD_POLICY0, 0); - WREG32(TC_CFG_L2_LOAD_POLICY1, 0); - WREG32(TC_CFG_L2_STORE_POLICY0, 0); - WREG32(TC_CFG_L2_STORE_POLICY1, 0); - WREG32(TC_CFG_L2_ATOMIC_POLICY, 0); + if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS)) + data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | + REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN; + else + data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN | + REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN); - WREG32(TC_CFG_L1_VOLATILE, 0); - WREG32(TC_CFG_L2_VOLATILE, 0); + if (orig != data) + WREG32_PCIE_PORT(PCIE_CNTL2, data); +} - if (rdev->family == CHIP_KAVERI) { - u32 tmp = RREG32(CHUB_CONTROL); - tmp &= ~BYPASS_VM; - WREG32(CHUB_CONTROL, tmp); - } +static void cik_enable_hdp_mgcg(struct radeon_device *rdev, + bool enable) +{ + u32 orig, data; - /* XXX SH_MEM regs */ - /* where to put LDS, scratch, GPUVM in FSA64 space */ - spin_lock(&rdev->srbm_mutex); - for (i = 0; i < 16; i++) { - cik_srbm_select(rdev, 0, 0, 0, i); - /* CP and shaders */ - WREG32(SH_MEM_CONFIG, 0); - WREG32(SH_MEM_APE1_BASE, 1); - WREG32(SH_MEM_APE1_LIMIT, 0); - WREG32(SH_MEM_BASES, 0); - /* SDMA GFX */ - WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA0_REGISTER_OFFSET, 0); - WREG32(SDMA0_GFX_APE1_CNTL + SDMA0_REGISTER_OFFSET, 0); - WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA1_REGISTER_OFFSET, 0); - WREG32(SDMA0_GFX_APE1_CNTL + SDMA1_REGISTER_OFFSET, 0); - /* XXX SDMA RLC - todo */ - } - cik_srbm_select(rdev, 0, 0, 0, 0); - spin_unlock(&rdev->srbm_mutex); + orig = data = RREG32(HDP_HOST_PATH_CNTL); - cik_pcie_gart_tlb_flush(rdev); - DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n", - (unsigned)(rdev->mc.gtt_size >> 20), - (unsigned long long)rdev->gart.table_addr); - rdev->gart.ready = true; - return 0; + if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG)) + data &= ~CLOCK_GATING_DIS; + else + data |= CLOCK_GATING_DIS; + + if (orig != data) + WREG32(HDP_HOST_PATH_CNTL, data); } -/** - * cik_pcie_gart_disable - gart disable - * - * @rdev: radeon_device pointer - * - * This disables all VM page table (CIK). - */ -static void cik_pcie_gart_disable(struct radeon_device *rdev) +static void cik_enable_hdp_ls(struct radeon_device *rdev, + bool enable) { - /* Disable all tables */ - WREG32(VM_CONTEXT0_CNTL, 0); - WREG32(VM_CONTEXT1_CNTL, 0); - /* Setup TLB control */ - WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS | - SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU); - /* Setup L2 cache */ - WREG32(VM_L2_CNTL, - ENABLE_L2_FRAGMENT_PROCESSING | - ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE | - ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE | - EFFECTIVE_L2_QUEUE_SIZE(7) | - CONTEXT1_IDENTITY_ACCESS_MODE(1)); - WREG32(VM_L2_CNTL2, 0); - WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY | - L2_CACHE_BIGK_FRAGMENT_SIZE(6)); - radeon_gart_table_vram_unpin(rdev); + u32 orig, data; + + orig = data = RREG32(HDP_MEM_POWER_LS); + + if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS)) + data |= HDP_LS_ENABLE; + else + data &= ~HDP_LS_ENABLE; + + if (orig != data) + WREG32(HDP_MEM_POWER_LS, data); } -/** - * cik_pcie_gart_fini - vm fini callback - * - * @rdev: radeon_device pointer - * - * Tears down the driver GART/VM setup (CIK). - */ -static __unused void cik_pcie_gart_fini(struct radeon_device *rdev) -{ - cik_pcie_gart_disable(rdev); - radeon_gart_table_vram_free(rdev); - radeon_gart_fini(rdev); +void cik_update_cg(struct radeon_device *rdev, + u32 block, bool enable) +{ + + if (block & RADEON_CG_BLOCK_GFX) { + cik_enable_gui_idle_interrupt(rdev, false); + /* order matters! */ + if (enable) { + cik_enable_mgcg(rdev, true); + cik_enable_cgcg(rdev, true); + } else { + cik_enable_cgcg(rdev, false); + cik_enable_mgcg(rdev, false); + } + cik_enable_gui_idle_interrupt(rdev, true); + } + + if (block & RADEON_CG_BLOCK_MC) { + if (!(rdev->flags & RADEON_IS_IGP)) { + cik_enable_mc_mgcg(rdev, enable); + cik_enable_mc_ls(rdev, enable); + } + } + + if (block & RADEON_CG_BLOCK_SDMA) { + cik_enable_sdma_mgcg(rdev, enable); + cik_enable_sdma_mgls(rdev, enable); + } + + if (block & RADEON_CG_BLOCK_BIF) { + cik_enable_bif_mgls(rdev, enable); + } + + if (block & RADEON_CG_BLOCK_UVD) { + if (rdev->has_uvd) + cik_enable_uvd_mgcg(rdev, enable); + } + + if (block & RADEON_CG_BLOCK_HDP) { + cik_enable_hdp_mgcg(rdev, enable); + cik_enable_hdp_ls(rdev, enable); + } } -/* vm parser */ -/** - * cik_ib_parse - vm ib_parse callback - * - * @rdev: radeon_device pointer - * @ib: indirect buffer pointer - * - * CIK uses hw IB checking so this is a nop (CIK). - */ -int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib) +static void cik_init_cg(struct radeon_device *rdev) { - return 0; + + cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, true); + + if (rdev->has_uvd) + si_init_uvd_internal_cg(rdev); + + cik_update_cg(rdev, (RADEON_CG_BLOCK_MC | + RADEON_CG_BLOCK_SDMA | + RADEON_CG_BLOCK_BIF | + RADEON_CG_BLOCK_UVD | + RADEON_CG_BLOCK_HDP), true); } -/* - * vm - * VMID 0 is the physical GPU addresses as used by the kernel. - * VMIDs 1-15 are used for userspace clients and are handled - * by the radeon vm/hsa code. - */ -/** - * cik_vm_init - cik vm init callback - * - * @rdev: radeon_device pointer - * - * Inits cik specific vm parameters (number of VMs, base of vram for - * VMIDs 1-15) (CIK). - * Returns 0 for success. - */ -int cik_vm_init(struct radeon_device *rdev) +static void cik_fini_cg(struct radeon_device *rdev) { - /* number of VMs */ - rdev->vm_manager.nvm = 16; - /* base offset of vram pages */ - if (rdev->flags & RADEON_IS_IGP) { - u64 tmp = RREG32(MC_VM_FB_OFFSET); - tmp <<= 22; - rdev->vm_manager.vram_base_offset = tmp; - } else - rdev->vm_manager.vram_base_offset = 0; + cik_update_cg(rdev, (RADEON_CG_BLOCK_MC | + RADEON_CG_BLOCK_SDMA | + RADEON_CG_BLOCK_BIF | + RADEON_CG_BLOCK_UVD | + RADEON_CG_BLOCK_HDP), false); - return 0; + cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, false); } -/** - * cik_vm_fini - cik vm fini callback - * - * @rdev: radeon_device pointer - * - * Tear down any asic specific VM setup (CIK). - */ -void cik_vm_fini(struct radeon_device *rdev) +static void cik_enable_sck_slowdown_on_pu(struct radeon_device *rdev, + bool enable) { + u32 data, orig; + + orig = data = RREG32(RLC_PG_CNTL); + if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS)) + data |= SMU_CLK_SLOWDOWN_ON_PU_ENABLE; + else + data &= ~SMU_CLK_SLOWDOWN_ON_PU_ENABLE; + if (orig != data) + WREG32(RLC_PG_CNTL, data); } -/** - * cik_vm_decode_fault - print human readable fault info - * - * @rdev: radeon_device pointer - * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value - * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value - * - * Print human readable fault information (CIK). - */ -static void cik_vm_decode_fault(struct radeon_device *rdev, - u32 status, u32 addr, u32 mc_client) +static void cik_enable_sck_slowdown_on_pd(struct radeon_device *rdev, + bool enable) { - u32 mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT; - u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT; - u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT; - char *block = (char *)&mc_client; + u32 data, orig; - printk("VM fault (0x%02x, vmid %d) at page %u, %s from %s (%d)\n", - protections, vmid, addr, - (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read", - block, mc_id); + orig = data = RREG32(RLC_PG_CNTL); + if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS)) + data |= SMU_CLK_SLOWDOWN_ON_PD_ENABLE; + else + data &= ~SMU_CLK_SLOWDOWN_ON_PD_ENABLE; + if (orig != data) + WREG32(RLC_PG_CNTL, data); } -/** - * cik_vm_flush - cik vm flush using the CP - * - * @rdev: radeon_device pointer - * - * Update the page table base and flush the VM TLB - * using the CP (CIK). - */ -void cik_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm) +static void cik_enable_cp_pg(struct radeon_device *rdev, bool enable) { - struct radeon_ring *ring = &rdev->ring[ridx]; + u32 data, orig; - if (vm == NULL) - return; + orig = data = RREG32(RLC_PG_CNTL); + if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_CP)) + data &= ~DISABLE_CP_PG; + else + data |= DISABLE_CP_PG; + if (orig != data) + WREG32(RLC_PG_CNTL, data); +} - radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); - radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | - WRITE_DATA_DST_SEL(0))); - if (vm->id < 8) { - radeon_ring_write(ring, - (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2); - } else { - radeon_ring_write(ring, - (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2); - } - radeon_ring_write(ring, 0); - radeon_ring_write(ring, vm->pd_gpu_addr >> 12); +static void cik_enable_gds_pg(struct radeon_device *rdev, bool enable) +{ + u32 data, orig; - /* update SH_MEM_* regs */ - radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); - radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | - WRITE_DATA_DST_SEL(0))); - radeon_ring_write(ring, SRBM_GFX_CNTL >> 2); - radeon_ring_write(ring, 0); - radeon_ring_write(ring, VMID(vm->id)); + orig = data = RREG32(RLC_PG_CNTL); + if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GDS)) + data &= ~DISABLE_GDS_PG; + else + data |= DISABLE_GDS_PG; + if (orig != data) + WREG32(RLC_PG_CNTL, data); +} - radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6)); - radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | - WRITE_DATA_DST_SEL(0))); - radeon_ring_write(ring, SH_MEM_BASES >> 2); - radeon_ring_write(ring, 0); +#define CP_ME_TABLE_SIZE 96 +#define CP_ME_TABLE_OFFSET 2048 +#define CP_MEC_TABLE_OFFSET 4096 - radeon_ring_write(ring, 0); /* SH_MEM_BASES */ - radeon_ring_write(ring, 0); /* SH_MEM_CONFIG */ - radeon_ring_write(ring, 1); /* SH_MEM_APE1_BASE */ - radeon_ring_write(ring, 0); /* SH_MEM_APE1_LIMIT */ +void cik_init_cp_pg_table(struct radeon_device *rdev) +{ + const __be32 *fw_data; + volatile u32 *dst_ptr; + int me, i, max_me = 4; + u32 bo_offset = 0; + u32 table_offset; - radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); - radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | - WRITE_DATA_DST_SEL(0))); - radeon_ring_write(ring, SRBM_GFX_CNTL >> 2); - radeon_ring_write(ring, 0); - radeon_ring_write(ring, VMID(0)); + if (rdev->family == CHIP_KAVERI) + max_me = 5; - /* HDP flush */ - /* We should be using the WAIT_REG_MEM packet here like in - * cik_fence_ring_emit(), but it causes the CP to hang in this - * context... - */ - radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); - radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | - WRITE_DATA_DST_SEL(0))); - radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2); - radeon_ring_write(ring, 0); - radeon_ring_write(ring, 0); + if (rdev->rlc.cp_table_ptr == NULL) + return; - /* bits 0-15 are the VM contexts0-15 */ - radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); - radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | - WRITE_DATA_DST_SEL(0))); - radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2); - radeon_ring_write(ring, 0); - radeon_ring_write(ring, 1 << vm->id); + /* write the cp table buffer */ + dst_ptr = rdev->rlc.cp_table_ptr; + for (me = 0; me < max_me; me++) { + if (me == 0) { + fw_data = (const __be32 *)rdev->ce_fw->data; + table_offset = CP_ME_TABLE_OFFSET; + } else if (me == 1) { + fw_data = (const __be32 *)rdev->pfp_fw->data; + table_offset = CP_ME_TABLE_OFFSET; + } else if (me == 2) { + fw_data = (const __be32 *)rdev->me_fw->data; + table_offset = CP_ME_TABLE_OFFSET; + } else { + fw_data = (const __be32 *)rdev->mec_fw->data; + table_offset = CP_MEC_TABLE_OFFSET; + } - /* compute doesn't have PFP */ - if (ridx == RADEON_RING_TYPE_GFX_INDEX) { - /* sync PFP to ME, otherwise we might get invalid PFP reads */ - radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0)); - radeon_ring_write(ring, 0x0); + for (i = 0; i < CP_ME_TABLE_SIZE; i ++) { + dst_ptr[bo_offset + i] = be32_to_cpu(fw_data[table_offset + i]); + } + bo_offset += CP_ME_TABLE_SIZE; } } -/** - * cik_vm_set_page - update the page tables using sDMA - * - * @rdev: radeon_device pointer - * @ib: indirect buffer to fill with commands - * @pe: addr of the page entry - * @addr: dst addr to write into pe - * @count: number of page entries to update - * @incr: increase next addr by incr bytes - * @flags: access flags - * - * Update the page tables using CP or sDMA (CIK). - */ -void cik_vm_set_page(struct radeon_device *rdev, - struct radeon_ib *ib, - uint64_t pe, - uint64_t addr, unsigned count, - uint32_t incr, uint32_t flags) +static void cik_enable_gfx_cgpg(struct radeon_device *rdev, + bool enable) { - uint32_t r600_flags = cayman_vm_page_flags(rdev, flags); - uint64_t value; - unsigned ndw; - - if (rdev->asic->vm.pt_ring_index == RADEON_RING_TYPE_GFX_INDEX) { - /* CP */ - while (count) { - ndw = 2 + count * 2; - if (ndw > 0x3FFE) - ndw = 0x3FFE; - - ib->ptr[ib->length_dw++] = PACKET3(PACKET3_WRITE_DATA, ndw); - ib->ptr[ib->length_dw++] = (WRITE_DATA_ENGINE_SEL(0) | - WRITE_DATA_DST_SEL(1)); - ib->ptr[ib->length_dw++] = pe; - ib->ptr[ib->length_dw++] = upper_32_bits(pe); - for (; ndw > 2; ndw -= 2, --count, pe += 8) { - if (flags & RADEON_VM_PAGE_SYSTEM) { - value = radeon_vm_map_gart(rdev, addr); - value &= 0xFFFFFFFFFFFFF000ULL; - } else if (flags & RADEON_VM_PAGE_VALID) { - value = addr; - } else { - value = 0; - } - addr += incr; - value |= r600_flags; - ib->ptr[ib->length_dw++] = value; - ib->ptr[ib->length_dw++] = upper_32_bits(value); - } - } + u32 data, orig; + + if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG)) { + orig = data = RREG32(RLC_PG_CNTL); + data |= GFX_PG_ENABLE; + if (orig != data) + WREG32(RLC_PG_CNTL, data); + + orig = data = RREG32(RLC_AUTO_PG_CTRL); + data |= AUTO_PG_EN; + if (orig != data) + WREG32(RLC_AUTO_PG_CTRL, data); } else { - /* DMA */ - if (flags & RADEON_VM_PAGE_SYSTEM) { - while (count) { - ndw = count * 2; - if (ndw > 0xFFFFE) - ndw = 0xFFFFE; - - /* for non-physically contiguous pages (system) */ - ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0); - ib->ptr[ib->length_dw++] = pe; - ib->ptr[ib->length_dw++] = upper_32_bits(pe); - ib->ptr[ib->length_dw++] = ndw; - for (; ndw > 0; ndw -= 2, --count, pe += 8) { - if (flags & RADEON_VM_PAGE_SYSTEM) { - value = radeon_vm_map_gart(rdev, addr); - value &= 0xFFFFFFFFFFFFF000ULL; - } else if (flags & RADEON_VM_PAGE_VALID) { - value = addr; - } else { - value = 0; - } - addr += incr; - value |= r600_flags; - ib->ptr[ib->length_dw++] = value; - ib->ptr[ib->length_dw++] = upper_32_bits(value); - } - } - } else { - while (count) { - ndw = count; - if (ndw > 0x7FFFF) - ndw = 0x7FFFF; + orig = data = RREG32(RLC_PG_CNTL); + data &= ~GFX_PG_ENABLE; + if (orig != data) + WREG32(RLC_PG_CNTL, data); - if (flags & RADEON_VM_PAGE_VALID) - value = addr; - else - value = 0; - /* for physically contiguous pages (vram) */ - ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_GENERATE_PTE_PDE, 0, 0); - ib->ptr[ib->length_dw++] = pe; /* dst addr */ - ib->ptr[ib->length_dw++] = upper_32_bits(pe); - ib->ptr[ib->length_dw++] = r600_flags; /* mask */ - ib->ptr[ib->length_dw++] = 0; - ib->ptr[ib->length_dw++] = value; /* value */ - ib->ptr[ib->length_dw++] = upper_32_bits(value); - ib->ptr[ib->length_dw++] = incr; /* increment size */ - ib->ptr[ib->length_dw++] = 0; - ib->ptr[ib->length_dw++] = ndw; /* number of entries */ - pe += ndw * 8; - addr += ndw * incr; - count -= ndw; - } - } - while (ib->length_dw & 0x7) - ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0); + orig = data = RREG32(RLC_AUTO_PG_CTRL); + data &= ~AUTO_PG_EN; + if (orig != data) + WREG32(RLC_AUTO_PG_CTRL, data); + + data = RREG32(DB_RENDER_CONTROL); } } -/** - * cik_dma_vm_flush - cik vm flush using sDMA - * - * @rdev: radeon_device pointer - * - * Update the page table base and flush the VM TLB - * using sDMA (CIK). - */ -void cik_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm) +static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh) { - struct radeon_ring *ring = &rdev->ring[ridx]; - u32 extra_bits = (SDMA_POLL_REG_MEM_EXTRA_OP(1) | - SDMA_POLL_REG_MEM_EXTRA_FUNC(3)); /* == */ - u32 ref_and_mask; + u32 mask = 0, tmp, tmp1; + int i; - if (vm == NULL) - return; + cik_select_se_sh(rdev, se, sh); + tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG); + tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG); + cik_select_se_sh(rdev, 0xffffffff, 0xffffffff); - if (ridx == R600_RING_TYPE_DMA_INDEX) - ref_and_mask = SDMA0; - else - ref_and_mask = SDMA1; + tmp &= 0xffff0000; - radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000)); - if (vm->id < 8) { - radeon_ring_write(ring, (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2); - } else { - radeon_ring_write(ring, (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2); + tmp |= tmp1; + tmp >>= 16; + + for (i = 0; i < rdev->config.cik.max_cu_per_sh; i ++) { + mask <<= 1; + mask |= 1; } - radeon_ring_write(ring, vm->pd_gpu_addr >> 12); - /* update SH_MEM_* regs */ - radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000)); - radeon_ring_write(ring, SRBM_GFX_CNTL >> 2); - radeon_ring_write(ring, VMID(vm->id)); + return (~tmp) & mask; +} - radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000)); - radeon_ring_write(ring, SH_MEM_BASES >> 2); - radeon_ring_write(ring, 0); +static void cik_init_ao_cu_mask(struct radeon_device *rdev) +{ + u32 i, j, k, active_cu_number = 0; + u32 mask, counter, cu_bitmap; + u32 tmp = 0; + + for (i = 0; i < rdev->config.cik.max_shader_engines; i++) { + for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) { + mask = 1; + cu_bitmap = 0; + counter = 0; + for (k = 0; k < rdev->config.cik.max_cu_per_sh; k ++) { + if (cik_get_cu_active_bitmap(rdev, i, j) & mask) { + if (counter < 2) + cu_bitmap |= mask; + counter ++; + } + mask <<= 1; + } + + active_cu_number += counter; + tmp |= (cu_bitmap << (i * 16 + j * 8)); + } + } - radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000)); - radeon_ring_write(ring, SH_MEM_CONFIG >> 2); - radeon_ring_write(ring, 0); + WREG32(RLC_PG_AO_CU_MASK, tmp); - radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000)); - radeon_ring_write(ring, SH_MEM_APE1_BASE >> 2); - radeon_ring_write(ring, 1); + tmp = RREG32(RLC_MAX_PG_CU); + tmp &= ~MAX_PU_CU_MASK; + tmp |= MAX_PU_CU(active_cu_number); + WREG32(RLC_MAX_PG_CU, tmp); +} - radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000)); - radeon_ring_write(ring, SH_MEM_APE1_LIMIT >> 2); - radeon_ring_write(ring, 0); +static void cik_enable_gfx_static_mgpg(struct radeon_device *rdev, + bool enable) +{ + u32 data, orig; - radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000)); - radeon_ring_write(ring, SRBM_GFX_CNTL >> 2); - radeon_ring_write(ring, VMID(0)); + orig = data = RREG32(RLC_PG_CNTL); + if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_SMG)) + data |= STATIC_PER_CU_PG_ENABLE; + else + data &= ~STATIC_PER_CU_PG_ENABLE; + if (orig != data) + WREG32(RLC_PG_CNTL, data); +} - /* flush HDP */ - radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_POLL_REG_MEM, 0, extra_bits)); - radeon_ring_write(ring, GPU_HDP_FLUSH_DONE); - radeon_ring_write(ring, GPU_HDP_FLUSH_REQ); - radeon_ring_write(ring, ref_and_mask); /* REFERENCE */ - radeon_ring_write(ring, ref_and_mask); /* MASK */ - radeon_ring_write(ring, (4 << 16) | 10); /* RETRY_COUNT, POLL_INTERVAL */ +static void cik_enable_gfx_dynamic_mgpg(struct radeon_device *rdev, + bool enable) +{ + u32 data, orig; - /* flush TLB */ - radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000)); - radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2); - radeon_ring_write(ring, 1 << vm->id); + orig = data = RREG32(RLC_PG_CNTL); + if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_DMG)) + data |= DYN_PER_CU_PG_ENABLE; + else + data &= ~DYN_PER_CU_PG_ENABLE; + if (orig != data) + WREG32(RLC_PG_CNTL, data); } -/* - * RLC - * The RLC is a multi-purpose microengine that handles a - * variety of functions, the most important of which is - * the interrupt controller. - */ -/** - * cik_rlc_stop - stop the RLC ME - * - * @rdev: radeon_device pointer - * - * Halt the RLC ME (MicroEngine) (CIK). - */ -static void cik_rlc_stop(struct radeon_device *rdev) +#define RLC_SAVE_AND_RESTORE_STARTING_OFFSET 0x90 +#define RLC_CLEAR_STATE_DESCRIPTOR_OFFSET 0x3D + +static void cik_init_gfx_cgpg(struct radeon_device *rdev) { - int i, j, k; - u32 mask, tmp; + u32 data, orig; + u32 i; + + if (rdev->rlc.cs_data) { + WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET); + WREG32(RLC_GPM_SCRATCH_DATA, upper_32_bits(rdev->rlc.clear_state_gpu_addr)); + WREG32(RLC_GPM_SCRATCH_DATA, lower_32_bits(rdev->rlc.clear_state_gpu_addr)); + WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.clear_state_size); + } else { + WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET); + for (i = 0; i < 3; i++) + WREG32(RLC_GPM_SCRATCH_DATA, 0); + } + if (rdev->rlc.reg_list) { + WREG32(RLC_GPM_SCRATCH_ADDR, RLC_SAVE_AND_RESTORE_STARTING_OFFSET); + for (i = 0; i < rdev->rlc.reg_list_size; i++) + WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.reg_list[i]); + } - tmp = RREG32(CP_INT_CNTL_RING0); - tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE); - WREG32(CP_INT_CNTL_RING0, tmp); + orig = data = RREG32(RLC_PG_CNTL); + data |= GFX_PG_SRC; + if (orig != data) + WREG32(RLC_PG_CNTL, data); - RREG32(CB_CGTT_SCLK_CTRL); - RREG32(CB_CGTT_SCLK_CTRL); - RREG32(CB_CGTT_SCLK_CTRL); - RREG32(CB_CGTT_SCLK_CTRL); + WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8); + WREG32(RLC_CP_TABLE_RESTORE, rdev->rlc.cp_table_gpu_addr >> 8); - tmp = RREG32(RLC_CGCG_CGLS_CTRL) & 0xfffffffc; - WREG32(RLC_CGCG_CGLS_CTRL, tmp); + data = RREG32(CP_RB_WPTR_POLL_CNTL); + data &= ~IDLE_POLL_COUNT_MASK; + data |= IDLE_POLL_COUNT(0x60); + WREG32(CP_RB_WPTR_POLL_CNTL, data); - WREG32(RLC_CNTL, 0); + data = 0x10101010; + WREG32(RLC_PG_DELAY, data); - for (i = 0; i < rdev->config.cik.max_shader_engines; i++) { - for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) { - cik_select_se_sh(rdev, i, j); - for (k = 0; k < rdev->usec_timeout; k++) { - if (RREG32(RLC_SERDES_CU_MASTER_BUSY) == 0) - break; - udelay(1); - } - } - } - cik_select_se_sh(rdev, 0xffffffff, 0xffffffff); + data = RREG32(RLC_PG_DELAY_2); + data &= ~0xff; + data |= 0x3; + WREG32(RLC_PG_DELAY_2, data); + + data = RREG32(RLC_AUTO_PG_CTRL); + data &= ~GRBM_REG_SGIT_MASK; + data |= GRBM_REG_SGIT(0x700); + WREG32(RLC_AUTO_PG_CTRL, data); - mask = SE_MASTER_BUSY_MASK | GC_MASTER_BUSY | TC0_MASTER_BUSY | TC1_MASTER_BUSY; - for (k = 0; k < rdev->usec_timeout; k++) { - if ((RREG32(RLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0) - break; - udelay(1); - } } -/** - * cik_rlc_start - start the RLC ME - * - * @rdev: radeon_device pointer - * - * Unhalt the RLC ME (MicroEngine) (CIK). - */ -static void cik_rlc_start(struct radeon_device *rdev) +static void cik_update_gfx_pg(struct radeon_device *rdev, bool enable) { - u32 tmp; + cik_enable_gfx_cgpg(rdev, enable); + cik_enable_gfx_static_mgpg(rdev, enable); + cik_enable_gfx_dynamic_mgpg(rdev, enable); +} - WREG32(RLC_CNTL, RLC_ENABLE); +u32 cik_get_csb_size(struct radeon_device *rdev) +{ + u32 count = 0; + const struct cs_section_def *sect = NULL; + const struct cs_extent_def *ext = NULL; - tmp = RREG32(CP_INT_CNTL_RING0); - tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE); - WREG32(CP_INT_CNTL_RING0, tmp); + if (rdev->rlc.cs_data == NULL) + return 0; - udelay(50); + /* begin clear state */ + count += 2; + /* context control state */ + count += 3; + + for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) { + for (ext = sect->section; ext->extent != NULL; ++ext) { + if (sect->id == SECT_CONTEXT) + count += 2 + ext->reg_count; + else + return 0; + } + } + /* pa_sc_raster_config/pa_sc_raster_config1 */ + count += 4; + /* end clear state */ + count += 2; + /* clear state */ + count += 2; + + return count; } -/** - * cik_rlc_resume - setup the RLC hw - * - * @rdev: radeon_device pointer - * - * Initialize the RLC registers, load the ucode, - * and start the RLC (CIK). - * Returns 0 for success, -EINVAL if the ucode is not available. - */ -static __unused int cik_rlc_resume(struct radeon_device *rdev) +void cik_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer) { - u32 i, size; - u32 clear_state_info[3]; - const __be32 *fw_data; + u32 count = 0, i; + const struct cs_section_def *sect = NULL; + const struct cs_extent_def *ext = NULL; - if (!rdev->rlc_fw) - return -EINVAL; + if (rdev->rlc.cs_data == NULL) + return; + if (buffer == NULL) + return; + + buffer[count++] = PACKET3(PACKET3_PREAMBLE_CNTL, 0); + buffer[count++] = PACKET3_PREAMBLE_BEGIN_CLEAR_STATE; + + buffer[count++] = PACKET3(PACKET3_CONTEXT_CONTROL, 1); + buffer[count++] = 0x80000000; + buffer[count++] = 0x80000000; + + for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) { + for (ext = sect->section; ext->extent != NULL; ++ext) { + if (sect->id == SECT_CONTEXT) { + buffer[count++] = PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count); + buffer[count++] = ext->reg_index - 0xa000; + for (i = 0; i < ext->reg_count; i++) + buffer[count++] = ext->extent[i]; + } else { + return; + } + } + } + buffer[count++] = PACKET3(PACKET3_SET_CONTEXT_REG, 2); + buffer[count++] = PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START; switch (rdev->family) { case CHIP_BONAIRE: - default: - size = BONAIRE_RLC_UCODE_SIZE; + buffer[count++] = 0x16000012; + buffer[count++] = 0x00000000; break; case CHIP_KAVERI: - size = KV_RLC_UCODE_SIZE; + buffer[count++] = 0x00000000; /* XXX */ + buffer[count++] = 0x00000000; break; case CHIP_KABINI: - size = KB_RLC_UCODE_SIZE; + buffer[count++] = 0x00000000; /* XXX */ + buffer[count++] = 0x00000000; + break; + default: + buffer[count++] = 0x00000000; + buffer[count++] = 0x00000000; break; } - cik_rlc_stop(rdev); - - WREG32(GRBM_SOFT_RESET, SOFT_RESET_RLC); - RREG32(GRBM_SOFT_RESET); - udelay(50); - WREG32(GRBM_SOFT_RESET, 0); - RREG32(GRBM_SOFT_RESET); - udelay(50); - - WREG32(RLC_LB_CNTR_INIT, 0); - WREG32(RLC_LB_CNTR_MAX, 0x00008000); - - cik_select_se_sh(rdev, 0xffffffff, 0xffffffff); - WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff); - WREG32(RLC_LB_PARAMS, 0x00600408); - WREG32(RLC_LB_CNTL, 0x80000004); - - WREG32(RLC_MC_CNTL, 0); - WREG32(RLC_UCODE_CNTL, 0); - - fw_data = (const __be32 *)rdev->rlc_fw->data; - WREG32(RLC_GPM_UCODE_ADDR, 0); - for (i = 0; i < size; i++) - WREG32(RLC_GPM_UCODE_DATA, be32_to_cpup(fw_data++)); - WREG32(RLC_GPM_UCODE_ADDR, 0); + buffer[count++] = PACKET3(PACKET3_PREAMBLE_CNTL, 0); + buffer[count++] = PACKET3_PREAMBLE_END_CLEAR_STATE; - /* XXX */ - clear_state_info[0] = 0;//upper_32_bits(rdev->rlc.save_restore_gpu_addr); - clear_state_info[1] = 0;//rdev->rlc.save_restore_gpu_addr; - clear_state_info[2] = 0;//cik_default_size; - WREG32(RLC_GPM_SCRATCH_ADDR, 0x3d); - for (i = 0; i < 3; i++) - WREG32(RLC_GPM_SCRATCH_DATA, clear_state_info[i]); - WREG32(RLC_DRIVER_DMA_STATUS, 0); + buffer[count++] = PACKET3(PACKET3_CLEAR_STATE, 0); + buffer[count++] = 0; +} - cik_rlc_start(rdev); +static void cik_init_pg(struct radeon_device *rdev) +{ + if (rdev->pg_flags) { + cik_enable_sck_slowdown_on_pu(rdev, true); + cik_enable_sck_slowdown_on_pd(rdev, true); + if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) { + cik_init_gfx_cgpg(rdev); + cik_enable_cp_pg(rdev, true); + cik_enable_gds_pg(rdev, true); + } + cik_init_ao_cu_mask(rdev); + cik_update_gfx_pg(rdev, true); + } +} - return 0; +static void cik_fini_pg(struct radeon_device *rdev) +{ + if (rdev->pg_flags) { + cik_update_gfx_pg(rdev, false); + if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) { + cik_enable_cp_pg(rdev, false); + cik_enable_gds_pg(rdev, false); + } + } } /* @@ -4942,7 +5906,9 @@ static void cik_disable_interrupt_state(struct radeon_device *rdev) u32 tmp; /* gfx ring */ - WREG32(CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE); + tmp = RREG32(CP_INT_CNTL_RING0) & + (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE); + WREG32(CP_INT_CNTL_RING0, tmp); /* sdma */ tmp = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE; WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, tmp); @@ -5001,7 +5967,7 @@ static void cik_disable_interrupt_state(struct radeon_device *rdev) * Called at device load and reume. * Returns 0 for success, errors for failure. */ -static __unused int cik_irq_init(struct radeon_device *rdev) +static int cik_irq_init(struct radeon_device *rdev) { int ret = 0; int rb_bufsz; @@ -5035,7 +6001,7 @@ static __unused int cik_irq_init(struct radeon_device *rdev) WREG32(INTERRUPT_CNTL, interrupt_cntl); WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8); - rb_bufsz = drm_order(rdev->ih.ring_size / 4); + rb_bufsz = order_base_2(rdev->ih.ring_size / 4); ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE | IH_WPTR_OVERFLOW_CLEAR | @@ -5083,14 +6049,14 @@ static __unused int cik_irq_init(struct radeon_device *rdev) */ int cik_irq_set(struct radeon_device *rdev) { - u32 cp_int_cntl = CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE | - PRIV_INSTR_INT_ENABLE | PRIV_REG_INT_ENABLE; + u32 cp_int_cntl; u32 cp_m1p0, cp_m1p1, cp_m1p2, cp_m1p3; u32 cp_m2p0, cp_m2p1, cp_m2p2, cp_m2p3; u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0; u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6; u32 grbm_int_cntl = 0; u32 dma_cntl, dma_cntl1; + u32 thermal_int; if (!rdev->irq.installed) { WARN(1, "Can't enable IRQ/MSI because no handler is installed\n"); @@ -5104,6 +6070,10 @@ int cik_irq_set(struct radeon_device *rdev) return 0; } + cp_int_cntl = RREG32(CP_INT_CNTL_RING0) & + (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE); + cp_int_cntl |= PRIV_INSTR_INT_ENABLE | PRIV_REG_INT_ENABLE; + hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~DC_HPDx_INT_EN; hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~DC_HPDx_INT_EN; hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~DC_HPDx_INT_EN; @@ -5123,6 +6093,13 @@ int cik_irq_set(struct radeon_device *rdev) cp_m2p2 = RREG32(CP_ME2_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE; cp_m2p3 = RREG32(CP_ME2_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE; + if (rdev->flags & RADEON_IS_IGP) + thermal_int = RREG32_SMC(CG_THERMAL_INT_CTRL) & + ~(THERM_INTH_MASK | THERM_INTL_MASK); + else + thermal_int = RREG32_SMC(CG_THERMAL_INT) & + ~(THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW); + /* enable CP interrupts on all rings */ if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) { DRM_DEBUG("cik_irq_set: sw int gfx\n"); @@ -5280,6 +6257,14 @@ int cik_irq_set(struct radeon_device *rdev) hpd6 |= DC_HPDx_INT_EN; } + if (rdev->irq.dpm_thermal) { + DRM_DEBUG("dpm thermal\n"); + if (rdev->flags & RADEON_IS_IGP) + thermal_int |= THERM_INTH_MASK | THERM_INTL_MASK; + else + thermal_int |= THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW; + } + WREG32(CP_INT_CNTL_RING0, cp_int_cntl); WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, dma_cntl); @@ -5314,6 +6299,11 @@ int cik_irq_set(struct radeon_device *rdev) WREG32(DC_HPD5_INT_CONTROL, hpd5); WREG32(DC_HPD6_INT_CONTROL, hpd6); + if (rdev->flags & RADEON_IS_IGP) + WREG32_SMC(CG_THERMAL_INT_CTRL, thermal_int); + else + WREG32_SMC(CG_THERMAL_INT, thermal_int); + return 0; } @@ -5440,7 +6430,7 @@ static void cik_irq_suspend(struct radeon_device *rdev) * buffer (CIK). * Used for driver unload. */ -static __unused void cik_irq_fini(struct radeon_device *rdev) +static void cik_irq_fini(struct radeon_device *rdev) { cik_irq_suspend(rdev); r600_ih_ring_fini(rdev); @@ -5525,6 +6515,7 @@ irqreturn_t cik_irq_process(struct radeon_device *rdev) bool queue_hotplug = false; bool queue_reset = false; u32 addr, status, mc_client; + bool queue_thermal = false; if (!rdev->ih.enabled || rdev->shutdown) return IRQ_NONE; @@ -5758,6 +6749,10 @@ restart_ih: break; } break; + case 124: /* UVD */ + DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data); + radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX); + break; case 146: case 147: addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR); @@ -5875,6 +6870,19 @@ restart_ih: break; } break; + case 230: /* thermal low to high */ + DRM_DEBUG("IH: thermal low to high\n"); + rdev->pm.dpm.thermal.high_to_low = false; + queue_thermal = true; + break; + case 231: /* thermal high to low */ + DRM_DEBUG("IH: thermal high to low\n"); + rdev->pm.dpm.thermal.high_to_low = true; + queue_thermal = true; + break; + case 233: /* GUI IDLE */ + DRM_DEBUG("IH: GUI idle\n"); + break; case 241: /* SDMA Privileged inst */ case 247: /* SDMA Privileged inst */ DRM_ERROR("Illegal instruction in SDMA command stream\n"); @@ -5914,9 +6922,6 @@ restart_ih: break; } break; - case 233: /* GUI IDLE */ - DRM_DEBUG("IH: GUI idle\n"); - break; default: DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); break; @@ -5930,6 +6935,8 @@ restart_ih: taskqueue_enqueue(rdev->tq, &rdev->hotplug_work); if (queue_reset) taskqueue_enqueue(rdev->tq, &rdev->reset_work); + if (queue_thermal) + taskqueue_enqueue(rdev->tq, &rdev->pm.dpm.thermal.work); rdev->ih.rptr = rptr; WREG32(IH_RB_RPTR, rdev->ih.rptr); atomic_set(&rdev->ih.lock, 0); @@ -5959,6 +6966,16 @@ static int cik_startup(struct radeon_device *rdev) struct radeon_ring *ring; int r; + /* enable pcie gen2/3 link */ + cik_pcie_gen3_enable(rdev); + /* enable aspm */ + cik_program_aspm(rdev); + + /* scratch needs to be initialized before MC */ + r = r600_vram_scratch_init(rdev); + if (r) + return r; + cik_mc_program(rdev); if (rdev->flags & RADEON_IS_IGP) { @@ -5988,17 +7005,26 @@ static int cik_startup(struct radeon_device *rdev) } } - r = r600_vram_scratch_init(rdev); - if (r) - return r; - r = cik_pcie_gart_enable(rdev); if (r) return r; cik_gpu_init(rdev); /* allocate rlc buffers */ - r = si_rlc_init(rdev); + if (rdev->flags & RADEON_IS_IGP) { + if (rdev->family == CHIP_KAVERI) { + rdev->rlc.reg_list = spectre_rlc_save_restore_register_list; + rdev->rlc.reg_list_size = + (u32)ARRAY_SIZE(spectre_rlc_save_restore_register_list); + } else { + rdev->rlc.reg_list = kalindi_rlc_save_restore_register_list; + rdev->rlc.reg_list_size = + (u32)ARRAY_SIZE(kalindi_rlc_save_restore_register_list); + } + } + rdev->rlc.cs_data = ci_cs_data; + rdev->rlc.cp_table_size = CP_ME_TABLE_SIZE * 5 * 4; + r = sumo_rlc_init(rdev); if (r) { DRM_ERROR("Failed to init rlc BOs!\n"); return r; @@ -6046,12 +7072,15 @@ static int cik_startup(struct radeon_device *rdev) return r; } - r = cik_uvd_resume(rdev); + r = radeon_uvd_resume(rdev); if (!r) { - r = radeon_fence_driver_start_ring(rdev, - R600_RING_TYPE_UVD_INDEX); - if (r) - dev_err(rdev->dev, "UVD fences init error (%d).\n", r); + r = uvd_v4_2_resume(rdev); + if (!r) { + r = radeon_fence_driver_start_ring(rdev, + R600_RING_TYPE_UVD_INDEX); + if (r) + dev_err(rdev->dev, "UVD fences init error (%d).\n", r); + } } if (r) rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0; @@ -6074,7 +7103,7 @@ static int cik_startup(struct radeon_device *rdev) ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET, CP_RB0_RPTR, CP_RB0_WPTR, - 0, 0xfffff, RADEON_CP_PACKET2); + RADEON_CP_PACKET2); if (r) return r; @@ -6083,7 +7112,7 @@ static int cik_startup(struct radeon_device *rdev) ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]; r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET, CP_HQD_PQ_RPTR, CP_HQD_PQ_WPTR, - 0, 0xfffff, PACKET3(PACKET3_NOP, 0x3FFF)); + PACKET3(PACKET3_NOP, 0x3FFF)); if (r) return r; ring->me = 1; /* first MEC */ @@ -6095,7 +7124,7 @@ static int cik_startup(struct radeon_device *rdev) ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]; r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET, CP_HQD_PQ_RPTR, CP_HQD_PQ_WPTR, - 0, 0xffffffff, PACKET3(PACKET3_NOP, 0x3FFF)); + PACKET3(PACKET3_NOP, 0x3FFF)); if (r) return r; /* dGPU only have 1 MEC */ @@ -6108,7 +7137,7 @@ static int cik_startup(struct radeon_device *rdev) r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET, SDMA0_GFX_RB_RPTR + SDMA0_REGISTER_OFFSET, SDMA0_GFX_RB_WPTR + SDMA0_REGISTER_OFFSET, - 2, 0xfffffffc, SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0)); + SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0)); if (r) return r; @@ -6116,7 +7145,7 @@ static int cik_startup(struct radeon_device *rdev) r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET, SDMA0_GFX_RB_RPTR + SDMA1_REGISTER_OFFSET, SDMA0_GFX_RB_WPTR + SDMA1_REGISTER_OFFSET, - 2, 0xfffffffc, SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0)); + SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0)); if (r) return r; @@ -6130,12 +7159,11 @@ static int cik_startup(struct radeon_device *rdev) ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX]; if (ring->ring_size) { - r = radeon_ring_init(rdev, ring, ring->ring_size, - R600_WB_UVD_RPTR_OFFSET, + r = radeon_ring_init(rdev, ring, ring->ring_size, 0, UVD_RBC_RB_RPTR, UVD_RBC_RB_WPTR, - 0, 0xfffff, RADEON_CP_PACKET2); + RADEON_CP_PACKET2); if (!r) - r = r600_uvd_init(rdev); + r = uvd_v1_0_init(rdev); if (r) DRM_ERROR("radeon: failed initializing UVD (%d).\n", r); } @@ -6152,6 +7180,10 @@ static int cik_startup(struct radeon_device *rdev) return r; } + r = dce6_audio_init(rdev); + if (r) + return r; + return 0; } @@ -6197,11 +7229,14 @@ int cik_resume(struct radeon_device *rdev) */ int cik_suspend(struct radeon_device *rdev) { + dce6_audio_fini(rdev); radeon_vm_manager_fini(rdev); cik_cp_enable(rdev, false); cik_sdma_enable(rdev, false); - r600_uvd_stop(rdev); + uvd_v1_0_fini(rdev); radeon_uvd_suspend(rdev); + cik_fini_pg(rdev); + cik_fini_cg(rdev); cik_irq_suspend(rdev); radeon_wb_disable(rdev); cik_pcie_gart_disable(rdev); @@ -6322,7 +7357,7 @@ int cik_init(struct radeon_device *rdev) cik_cp_fini(rdev); cik_sdma_fini(rdev); cik_irq_fini(rdev); - si_rlc_fini(rdev); + sumo_rlc_fini(rdev); cik_mec_fini(rdev); radeon_wb_fini(rdev); radeon_ib_pool_fini(rdev); @@ -6357,14 +7392,16 @@ void cik_fini(struct radeon_device *rdev) { cik_cp_fini(rdev); cik_sdma_fini(rdev); + cik_fini_pg(rdev); + cik_fini_cg(rdev); cik_irq_fini(rdev); - si_rlc_fini(rdev); + sumo_rlc_fini(rdev); cik_mec_fini(rdev); radeon_wb_fini(rdev); radeon_vm_manager_fini(rdev); radeon_ib_pool_fini(rdev); radeon_irq_kms_fini(rdev); - r600_uvd_stop(rdev); + uvd_v1_0_fini(rdev); radeon_uvd_fini(rdev); cik_pcie_gart_fini(rdev); r600_vram_scratch_fini(rdev); @@ -6393,8 +7430,8 @@ static u32 dce8_line_buffer_adjust(struct radeon_device *rdev, struct radeon_crtc *radeon_crtc, struct drm_display_mode *mode) { - u32 tmp; - + u32 tmp, buffer_alloc, i; + u32 pipe_offset = radeon_crtc->crtc_id * 0x20; /* * Line Buffer Setup * There are 6 line buffers, one for each display controllers. @@ -6404,22 +7441,37 @@ static u32 dce8_line_buffer_adjust(struct radeon_device *rdev, * them using the stereo blender. */ if (radeon_crtc->base.enabled && mode) { - if (mode->crtc_hdisplay < 1920) + if (mode->crtc_hdisplay < 1920) { tmp = 1; - else if (mode->crtc_hdisplay < 2560) + buffer_alloc = 2; + } else if (mode->crtc_hdisplay < 2560) { tmp = 2; - else if (mode->crtc_hdisplay < 4096) + buffer_alloc = 2; + } else if (mode->crtc_hdisplay < 4096) { tmp = 0; - else { + buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4; + } else { DRM_DEBUG_KMS("Mode too big for LB!\n"); tmp = 0; + buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4; } - } else + } else { tmp = 1; + buffer_alloc = 0; + } WREG32(LB_MEMORY_CTRL + radeon_crtc->crtc_offset, LB_MEMORY_CONFIG(tmp) | LB_MEMORY_SIZE(0x6B0)); + WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset, + DMIF_BUFFERS_ALLOCATED(buffer_alloc)); + for (i = 0; i < rdev->usec_timeout; i++) { + if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) & + DMIF_BUFFERS_ALLOCATED_COMPLETED) + break; + udelay(1); + } + if (radeon_crtc->base.enabled && mode) { switch (tmp) { case 0: @@ -6821,7 +7873,7 @@ static void dce8_program_watermarks(struct radeon_device *rdev, u32 lb_size, u32 num_heads) { struct drm_display_mode *mode = &radeon_crtc->base.mode; - struct dce8_wm_params wm; + struct dce8_wm_params wm_low, wm_high; u32 pixel_period; u32 line_time = 0; u32 latency_watermark_a = 0, latency_watermark_b = 0; @@ -6831,35 +7883,82 @@ static void dce8_program_watermarks(struct radeon_device *rdev, pixel_period = 1000000 / (u32)mode->clock; line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535); - wm.yclk = rdev->pm.current_mclk * 10; - wm.sclk = rdev->pm.current_sclk * 10; - wm.disp_clk = mode->clock; - wm.src_width = mode->crtc_hdisplay; - wm.active_time = mode->crtc_hdisplay * pixel_period; - wm.blank_time = line_time - wm.active_time; - wm.interlaced = false; + /* watermark for high clocks */ + if ((rdev->pm.pm_method == PM_METHOD_DPM) && + rdev->pm.dpm_enabled) { + wm_high.yclk = + radeon_dpm_get_mclk(rdev, false) * 10; + wm_high.sclk = + radeon_dpm_get_sclk(rdev, false) * 10; + } else { + wm_high.yclk = rdev->pm.current_mclk * 10; + wm_high.sclk = rdev->pm.current_sclk * 10; + } + + wm_high.disp_clk = mode->clock; + wm_high.src_width = mode->crtc_hdisplay; + wm_high.active_time = mode->crtc_hdisplay * pixel_period; + wm_high.blank_time = line_time - wm_high.active_time; + wm_high.interlaced = false; if (mode->flags & DRM_MODE_FLAG_INTERLACE) - wm.interlaced = true; - wm.vsc = radeon_crtc->vsc; - wm.vtaps = 1; + wm_high.interlaced = true; + wm_high.vsc = radeon_crtc->vsc; + wm_high.vtaps = 1; if (radeon_crtc->rmx_type != RMX_OFF) - wm.vtaps = 2; - wm.bytes_per_pixel = 4; /* XXX: get this from fb config */ - wm.lb_size = lb_size; - wm.dram_channels = cik_get_number_of_dram_channels(rdev); - wm.num_heads = num_heads; + wm_high.vtaps = 2; + wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */ + wm_high.lb_size = lb_size; + wm_high.dram_channels = cik_get_number_of_dram_channels(rdev); + wm_high.num_heads = num_heads; /* set for high clocks */ - latency_watermark_a = min(dce8_latency_watermark(&wm), (u32)65535); + latency_watermark_a = min(dce8_latency_watermark(&wm_high), (u32)65535); + + /* possibly force display priority to high */ + /* should really do this at mode validation time... */ + if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) || + !dce8_average_bandwidth_vs_available_bandwidth(&wm_high) || + !dce8_check_latency_hiding(&wm_high) || + (rdev->disp_priority == 2)) { + DRM_DEBUG_KMS("force priority to high\n"); + } + + /* watermark for low clocks */ + if ((rdev->pm.pm_method == PM_METHOD_DPM) && + rdev->pm.dpm_enabled) { + wm_low.yclk = + radeon_dpm_get_mclk(rdev, true) * 10; + wm_low.sclk = + radeon_dpm_get_sclk(rdev, true) * 10; + } else { + wm_low.yclk = rdev->pm.current_mclk * 10; + wm_low.sclk = rdev->pm.current_sclk * 10; + } + + wm_low.disp_clk = mode->clock; + wm_low.src_width = mode->crtc_hdisplay; + wm_low.active_time = mode->crtc_hdisplay * pixel_period; + wm_low.blank_time = line_time - wm_low.active_time; + wm_low.interlaced = false; + if (mode->flags & DRM_MODE_FLAG_INTERLACE) + wm_low.interlaced = true; + wm_low.vsc = radeon_crtc->vsc; + wm_low.vtaps = 1; + if (radeon_crtc->rmx_type != RMX_OFF) + wm_low.vtaps = 2; + wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */ + wm_low.lb_size = lb_size; + wm_low.dram_channels = cik_get_number_of_dram_channels(rdev); + wm_low.num_heads = num_heads; + /* set for low clocks */ - /* wm.yclk = low clk; wm.sclk = low clk */ - latency_watermark_b = min(dce8_latency_watermark(&wm), (u32)65535); + latency_watermark_b = min(dce8_latency_watermark(&wm_low), (u32)65535); /* possibly force display priority to high */ /* should really do this at mode validation time... */ - if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm) || - !dce8_average_bandwidth_vs_available_bandwidth(&wm) || - !dce8_check_latency_hiding(&wm) || + if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) || + !dce8_average_bandwidth_vs_available_bandwidth(&wm_low) || + !dce8_check_latency_hiding(&wm_low) || (rdev->disp_priority == 2)) { DRM_DEBUG_KMS("force priority to high\n"); } @@ -6884,6 +7983,11 @@ static void dce8_program_watermarks(struct radeon_device *rdev, LATENCY_HIGH_WATERMARK(line_time))); /* restore original selection */ WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, wm_mask); + + /* save values for DPM */ + radeon_crtc->line_time = line_time; + radeon_crtc->wm_high = latency_watermark_a; + radeon_crtc->wm_low = latency_watermark_b; } /** @@ -6973,39 +8077,324 @@ int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk) return r; } -int cik_uvd_resume(struct radeon_device *rdev) +static struct pci_dev dev_to_pcidev(device_t dev) { - uint64_t addr; - uint32_t size; - int r; + struct pci_dev pdev; + pdev.dev = dev; + return pdev; +} - r = radeon_uvd_resume(rdev); - if (r) - return r; +static void cik_pcie_gen3_enable(struct radeon_device *rdev) +{ +#if 0 + struct pci_dev *root = rdev->pdev->bus->self; +#else + device_t root = device_get_parent(rdev->dev); +#endif + int bridge_pos, gpu_pos; + u32 speed_cntl, mask, current_data_rate; + int ret, i; + u16 tmp16; + struct pci_dev root_pdev = dev_to_pcidev(root); + struct pci_dev pdev = dev_to_pcidev(rdev->dev); + + if (radeon_pcie_gen2 == 0) + return; - /* programm the VCPU memory controller bits 0-27 */ - addr = rdev->uvd.gpu_addr >> 3; - size = RADEON_GPU_PAGE_ALIGN(rdev->uvd_fw->datasize + 4) >> 3; - WREG32(UVD_VCPU_CACHE_OFFSET0, addr); - WREG32(UVD_VCPU_CACHE_SIZE0, size); + if (rdev->flags & RADEON_IS_IGP) + return; - addr += size; - size = RADEON_UVD_STACK_SIZE >> 3; - WREG32(UVD_VCPU_CACHE_OFFSET1, addr); - WREG32(UVD_VCPU_CACHE_SIZE1, size); + if (!(rdev->flags & RADEON_IS_PCIE)) + return; - addr += size; - size = RADEON_UVD_HEAP_SIZE >> 3; - WREG32(UVD_VCPU_CACHE_OFFSET2, addr); - WREG32(UVD_VCPU_CACHE_SIZE2, size); + ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask); + if (ret != 0) + return; - /* bits 28-31 */ - addr = (rdev->uvd.gpu_addr >> 28) & 0xF; - WREG32(UVD_LMI_ADDR_EXT, (addr << 12) | (addr << 0)); + if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80))) + return; - /* bits 32-39 */ - addr = (rdev->uvd.gpu_addr >> 32) & 0xFF; - WREG32(UVD_LMI_EXT40_ADDR, addr | (0x9 << 16) | (0x1 << 31)); + speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL); + current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >> + LC_CURRENT_DATA_RATE_SHIFT; + if (mask & DRM_PCIE_SPEED_80) { + if (current_data_rate == 2) { + DRM_INFO("PCIE gen 3 link speeds already enabled\n"); + return; + } + DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n"); + } else if (mask & DRM_PCIE_SPEED_50) { + if (current_data_rate == 1) { + DRM_INFO("PCIE gen 2 link speeds already enabled\n"); + return; + } + DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n"); + } - return 0; + bridge_pos = pci_get_pciecap_ptr(root); + if (!bridge_pos) + return; + + gpu_pos = pci_get_pciecap_ptr(rdev->dev); + if (!gpu_pos) + return; + + if (mask & DRM_PCIE_SPEED_80) { + /* re-try equalization if gen3 is not already enabled */ + if (current_data_rate != 2) { + u16 bridge_cfg, gpu_cfg; + u16 bridge_cfg2, gpu_cfg2; + u32 max_lw, current_lw, tmp; + + pci_read_config_word(&root_pdev, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg); + pci_read_config_word(&pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg); + + tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD; + pci_write_config_word(&root_pdev, bridge_pos + PCI_EXP_LNKCTL, tmp16); + + tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD; + pci_write_config_word(&pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16); + + tmp = RREG32_PCIE_PORT(PCIE_LC_STATUS1); + max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT; + current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT; + + if (current_lw < max_lw) { + tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL); + if (tmp & LC_RENEGOTIATION_SUPPORT) { + tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS); + tmp |= (max_lw << LC_LINK_WIDTH_SHIFT); + tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW; + WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp); + } + } + + for (i = 0; i < 10; i++) { + /* check status */ + pci_read_config_word(&pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16); + if (tmp16 & PCI_EXP_DEVSTA_TRPND) + break; + + pci_read_config_word(&root_pdev, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg); + pci_read_config_word(&pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg); + + pci_read_config_word(&root_pdev, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2); + pci_read_config_word(&pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2); + + tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4); + tmp |= LC_SET_QUIESCE; + WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp); + + tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4); + tmp |= LC_REDO_EQ; + WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp); + + mdelay(100); + + /* linkctl */ + pci_read_config_word(&root_pdev, bridge_pos + PCI_EXP_LNKCTL, &tmp16); + tmp16 &= ~PCI_EXP_LNKCTL_HAWD; + tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD); + pci_write_config_word(&root_pdev, bridge_pos + PCI_EXP_LNKCTL, tmp16); + + pci_read_config_word(&pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16); + tmp16 &= ~PCI_EXP_LNKCTL_HAWD; + tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD); + pci_write_config_word(&pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16); + + /* linkctl2 */ + pci_read_config_word(&root_pdev, bridge_pos + PCI_EXP_LNKCTL2, &tmp16); + tmp16 &= ~((1 << 4) | (7 << 9)); + tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9))); + pci_write_config_word(&root_pdev, bridge_pos + PCI_EXP_LNKCTL2, tmp16); + + pci_read_config_word(&pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16); + tmp16 &= ~((1 << 4) | (7 << 9)); + tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9))); + pci_write_config_word(&pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16); + + tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4); + tmp &= ~LC_SET_QUIESCE; + WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp); + } + } + } + + /* set the link speed */ + speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE; + speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE; + WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl); + + pci_read_config_word(&pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16); + tmp16 &= ~0xf; + if (mask & DRM_PCIE_SPEED_80) + tmp16 |= 3; /* gen3 */ + else if (mask & DRM_PCIE_SPEED_50) + tmp16 |= 2; /* gen2 */ + else + tmp16 |= 1; /* gen1 */ + pci_write_config_word(&pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16); + + speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL); + speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE; + WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl); + + for (i = 0; i < rdev->usec_timeout; i++) { + speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL); + if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0) + break; + udelay(1); + } +} + +static void cik_program_aspm(struct radeon_device *rdev) +{ + u32 data, orig; + bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false; + bool disable_clkreq = false; + + if (radeon_aspm == 0) + return; + + /* XXX double check IGPs */ + if (rdev->flags & RADEON_IS_IGP) + return; + + if (!(rdev->flags & RADEON_IS_PCIE)) + return; + + orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL); + data &= ~LC_XMIT_N_FTS_MASK; + data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN; + if (orig != data) + WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data); + + orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3); + data |= LC_GO_TO_RECOVERY; + if (orig != data) + WREG32_PCIE_PORT(PCIE_LC_CNTL3, data); + + orig = data = RREG32_PCIE_PORT(PCIE_P_CNTL); + data |= P_IGNORE_EDB_ERR; + if (orig != data) + WREG32_PCIE_PORT(PCIE_P_CNTL, data); + + orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL); + data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK); + data |= LC_PMI_TO_L1_DIS; + if (!disable_l0s) + data |= LC_L0S_INACTIVITY(7); + + if (!disable_l1) { + data |= LC_L1_INACTIVITY(7); + data &= ~LC_PMI_TO_L1_DIS; + if (orig != data) + WREG32_PCIE_PORT(PCIE_LC_CNTL, data); + + if (!disable_plloff_in_l1) { + bool clk_req_support; + + orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0); + data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK); + data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7); + if (orig != data) + WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0, data); + + orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1); + data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK); + data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7); + if (orig != data) + WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1, data); + + orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0); + data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK); + data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7); + if (orig != data) + WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0, data); + + orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1); + data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK); + data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7); + if (orig != data) + WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1, data); + + orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL); + data &= ~LC_DYN_LANES_PWR_STATE_MASK; + data |= LC_DYN_LANES_PWR_STATE(3); + if (orig != data) + WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data); + + if (!disable_clkreq) { +#ifdef zMN_TODO + struct pci_dev *root = rdev->pdev->bus->self; + u32 lnkcap; + + clk_req_support = false; + pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap); + if (lnkcap & PCI_EXP_LNKCAP_CLKPM) + clk_req_support = true; +#else + clk_req_support = false; +#endif + } else { + clk_req_support = false; + } + + if (clk_req_support) { + orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2); + data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23; + if (orig != data) + WREG32_PCIE_PORT(PCIE_LC_CNTL2, data); + + orig = data = RREG32_SMC(THM_CLK_CNTL); + data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK); + data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1); + if (orig != data) + WREG32_SMC(THM_CLK_CNTL, data); + + orig = data = RREG32_SMC(MISC_CLK_CTRL); + data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK); + data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1); + if (orig != data) + WREG32_SMC(MISC_CLK_CTRL, data); + + orig = data = RREG32_SMC(CG_CLKPIN_CNTL); + data &= ~BCLK_AS_XCLK; + if (orig != data) + WREG32_SMC(CG_CLKPIN_CNTL, data); + + orig = data = RREG32_SMC(CG_CLKPIN_CNTL_2); + data &= ~FORCE_BIF_REFCLK_EN; + if (orig != data) + WREG32_SMC(CG_CLKPIN_CNTL_2, data); + + orig = data = RREG32_SMC(MPLL_BYPASSCLK_SEL); + data &= ~MPLL_CLKOUT_SEL_MASK; + data |= MPLL_CLKOUT_SEL(4); + if (orig != data) + WREG32_SMC(MPLL_BYPASSCLK_SEL, data); + } + } + } else { + if (orig != data) + WREG32_PCIE_PORT(PCIE_LC_CNTL, data); + } + + orig = data = RREG32_PCIE_PORT(PCIE_CNTL2); + data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN; + if (orig != data) + WREG32_PCIE_PORT(PCIE_CNTL2, data); + + if (!disable_l0s) { + data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL); + if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) { + data = RREG32_PCIE_PORT(PCIE_LC_STATUS1); + if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) { + orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL); + data &= ~LC_L0S_INACTIVITY_MASK; + if (orig != data) + WREG32_PCIE_PORT(PCIE_LC_CNTL, data); + } + } + } } diff --git a/sys/dev/drm/radeon/cik_reg.h b/sys/dev/drm/radeon/cik_reg.h index d71e46d571..ca1bb61335 100644 --- a/sys/dev/drm/radeon/cik_reg.h +++ b/sys/dev/drm/radeon/cik_reg.h @@ -24,6 +24,9 @@ #ifndef __CIK_REG_H__ #define __CIK_REG_H__ +#define CIK_DIDT_IND_INDEX 0xca00 +#define CIK_DIDT_IND_DATA 0xca04 + #define CIK_DC_GPIO_HPD_MASK 0x65b0 #define CIK_DC_GPIO_HPD_A 0x65b4 #define CIK_DC_GPIO_HPD_EN 0x65b8 diff --git a/sys/dev/drm/radeon/cik_sdma.c b/sys/dev/drm/radeon/cik_sdma.c new file mode 100644 index 0000000000..4be726e014 --- /dev/null +++ b/sys/dev/drm/radeon/cik_sdma.c @@ -0,0 +1,782 @@ +/* + * Copyright 2013 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: Alex Deucher + */ +#include +#include +#include "radeon.h" +#include "radeon_asic.h" +#include "cikd.h" + +/* sdma */ +#define CIK_SDMA_UCODE_SIZE 1050 +#define CIK_SDMA_UCODE_VERSION 64 + +/* + * sDMA - System DMA + * Starting with CIK, the GPU has new asynchronous + * DMA engines. These engines are used for compute + * and gfx. There are two DMA engines (SDMA0, SDMA1) + * and each one supports 1 ring buffer used for gfx + * and 2 queues used for compute. + * + * The programming model is very similar to the CP + * (ring buffer, IBs, etc.), but sDMA has it's own + * packet format that is different from the PM4 format + * used by the CP. sDMA supports copying data, writing + * embedded data, solid fills, and a number of other + * things. It also has support for tiling/detiling of + * buffers. + */ + +/** + * cik_sdma_ring_ib_execute - Schedule an IB on the DMA engine + * + * @rdev: radeon_device pointer + * @ib: IB object to schedule + * + * Schedule an IB in the DMA ring (CIK). + */ +void cik_sdma_ring_ib_execute(struct radeon_device *rdev, + struct radeon_ib *ib) +{ + struct radeon_ring *ring = &rdev->ring[ib->ring]; + u32 extra_bits = (ib->vm ? ib->vm->id : 0) & 0xf; + + if (rdev->wb.enabled) { + u32 next_rptr = ring->wptr + 5; + while ((next_rptr & 7) != 4) + next_rptr++; + next_rptr += 4; + radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0)); + radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc); + radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff); + radeon_ring_write(ring, 1); /* number of DWs to follow */ + radeon_ring_write(ring, next_rptr); + } + + /* IB packet must end on a 8 DW boundary */ + while ((ring->wptr & 7) != 4) + radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0)); + radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_INDIRECT_BUFFER, 0, extra_bits)); + radeon_ring_write(ring, ib->gpu_addr & 0xffffffe0); /* base must be 32 byte aligned */ + radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xffffffff); + radeon_ring_write(ring, ib->length_dw); + +} + +/** + * cik_sdma_fence_ring_emit - emit a fence on the DMA ring + * + * @rdev: radeon_device pointer + * @fence: radeon fence object + * + * Add a DMA fence packet to the ring to write + * the fence seq number and DMA trap packet to generate + * an interrupt if needed (CIK). + */ +void cik_sdma_fence_ring_emit(struct radeon_device *rdev, + struct radeon_fence *fence) +{ + struct radeon_ring *ring = &rdev->ring[fence->ring]; + u64 addr = rdev->fence_drv[fence->ring].gpu_addr; + u32 extra_bits = (SDMA_POLL_REG_MEM_EXTRA_OP(1) | + SDMA_POLL_REG_MEM_EXTRA_FUNC(3)); /* == */ + u32 ref_and_mask; + + if (fence->ring == R600_RING_TYPE_DMA_INDEX) + ref_and_mask = SDMA0; + else + ref_and_mask = SDMA1; + + /* write the fence */ + radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_FENCE, 0, 0)); + radeon_ring_write(ring, addr & 0xffffffff); + radeon_ring_write(ring, upper_32_bits(addr) & 0xffffffff); + radeon_ring_write(ring, fence->seq); + /* generate an interrupt */ + radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_TRAP, 0, 0)); + /* flush HDP */ + radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_POLL_REG_MEM, 0, extra_bits)); + radeon_ring_write(ring, GPU_HDP_FLUSH_DONE); + radeon_ring_write(ring, GPU_HDP_FLUSH_REQ); + radeon_ring_write(ring, ref_and_mask); /* REFERENCE */ + radeon_ring_write(ring, ref_and_mask); /* MASK */ + radeon_ring_write(ring, (4 << 16) | 10); /* RETRY_COUNT, POLL_INTERVAL */ +} + +/** + * cik_sdma_semaphore_ring_emit - emit a semaphore on the dma ring + * + * @rdev: radeon_device pointer + * @ring: radeon_ring structure holding ring information + * @semaphore: radeon semaphore object + * @emit_wait: wait or signal semaphore + * + * Add a DMA semaphore packet to the ring wait on or signal + * other rings (CIK). + */ +void cik_sdma_semaphore_ring_emit(struct radeon_device *rdev, + struct radeon_ring *ring, + struct radeon_semaphore *semaphore, + bool emit_wait) +{ + u64 addr = semaphore->gpu_addr; + u32 extra_bits = emit_wait ? 0 : SDMA_SEMAPHORE_EXTRA_S; + + radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SEMAPHORE, 0, extra_bits)); + radeon_ring_write(ring, addr & 0xfffffff8); + radeon_ring_write(ring, upper_32_bits(addr) & 0xffffffff); +} + +/** + * cik_sdma_gfx_stop - stop the gfx async dma engines + * + * @rdev: radeon_device pointer + * + * Stop the gfx async dma ring buffers (CIK). + */ +static void cik_sdma_gfx_stop(struct radeon_device *rdev) +{ + u32 rb_cntl, reg_offset; + int i; + + radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size); + + for (i = 0; i < 2; i++) { + if (i == 0) + reg_offset = SDMA0_REGISTER_OFFSET; + else + reg_offset = SDMA1_REGISTER_OFFSET; + rb_cntl = RREG32(SDMA0_GFX_RB_CNTL + reg_offset); + rb_cntl &= ~SDMA_RB_ENABLE; + WREG32(SDMA0_GFX_RB_CNTL + reg_offset, rb_cntl); + WREG32(SDMA0_GFX_IB_CNTL + reg_offset, 0); + } +} + +/** + * cik_sdma_rlc_stop - stop the compute async dma engines + * + * @rdev: radeon_device pointer + * + * Stop the compute async dma queues (CIK). + */ +static void cik_sdma_rlc_stop(struct radeon_device *rdev) +{ + /* XXX todo */ +} + +/** + * cik_sdma_enable - stop the async dma engines + * + * @rdev: radeon_device pointer + * @enable: enable/disable the DMA MEs. + * + * Halt or unhalt the async dma engines (CIK). + */ +void cik_sdma_enable(struct radeon_device *rdev, bool enable) +{ + u32 me_cntl, reg_offset; + int i; + + for (i = 0; i < 2; i++) { + if (i == 0) + reg_offset = SDMA0_REGISTER_OFFSET; + else + reg_offset = SDMA1_REGISTER_OFFSET; + me_cntl = RREG32(SDMA0_ME_CNTL + reg_offset); + if (enable) + me_cntl &= ~SDMA_HALT; + else + me_cntl |= SDMA_HALT; + WREG32(SDMA0_ME_CNTL + reg_offset, me_cntl); + } +} + +/** + * cik_sdma_gfx_resume - setup and start the async dma engines + * + * @rdev: radeon_device pointer + * + * Set up the gfx DMA ring buffers and enable them (CIK). + * Returns 0 for success, error for failure. + */ +static int cik_sdma_gfx_resume(struct radeon_device *rdev) +{ + struct radeon_ring *ring; + u32 rb_cntl, ib_cntl; + u32 rb_bufsz; + u32 reg_offset, wb_offset; + int i, r; + + for (i = 0; i < 2; i++) { + if (i == 0) { + ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX]; + reg_offset = SDMA0_REGISTER_OFFSET; + wb_offset = R600_WB_DMA_RPTR_OFFSET; + } else { + ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX]; + reg_offset = SDMA1_REGISTER_OFFSET; + wb_offset = CAYMAN_WB_DMA1_RPTR_OFFSET; + } + + WREG32(SDMA0_SEM_INCOMPLETE_TIMER_CNTL + reg_offset, 0); + WREG32(SDMA0_SEM_WAIT_FAIL_TIMER_CNTL + reg_offset, 0); + + /* Set ring buffer size in dwords */ + rb_bufsz = order_base_2(ring->ring_size / 4); + rb_cntl = rb_bufsz << 1; +#ifdef __BIG_ENDIAN + rb_cntl |= SDMA_RB_SWAP_ENABLE | SDMA_RPTR_WRITEBACK_SWAP_ENABLE; +#endif + WREG32(SDMA0_GFX_RB_CNTL + reg_offset, rb_cntl); + + /* Initialize the ring buffer's read and write pointers */ + WREG32(SDMA0_GFX_RB_RPTR + reg_offset, 0); + WREG32(SDMA0_GFX_RB_WPTR + reg_offset, 0); + + /* set the wb address whether it's enabled or not */ + WREG32(SDMA0_GFX_RB_RPTR_ADDR_HI + reg_offset, + upper_32_bits(rdev->wb.gpu_addr + wb_offset) & 0xFFFFFFFF); + WREG32(SDMA0_GFX_RB_RPTR_ADDR_LO + reg_offset, + ((rdev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC)); + + if (rdev->wb.enabled) + rb_cntl |= SDMA_RPTR_WRITEBACK_ENABLE; + + WREG32(SDMA0_GFX_RB_BASE + reg_offset, ring->gpu_addr >> 8); + WREG32(SDMA0_GFX_RB_BASE_HI + reg_offset, ring->gpu_addr >> 40); + + ring->wptr = 0; + WREG32(SDMA0_GFX_RB_WPTR + reg_offset, ring->wptr << 2); + + ring->rptr = RREG32(SDMA0_GFX_RB_RPTR + reg_offset) >> 2; + + /* enable DMA RB */ + WREG32(SDMA0_GFX_RB_CNTL + reg_offset, rb_cntl | SDMA_RB_ENABLE); + + ib_cntl = SDMA_IB_ENABLE; +#ifdef __BIG_ENDIAN + ib_cntl |= SDMA_IB_SWAP_ENABLE; +#endif + /* enable DMA IBs */ + WREG32(SDMA0_GFX_IB_CNTL + reg_offset, ib_cntl); + + ring->ready = true; + + r = radeon_ring_test(rdev, ring->idx, ring); + if (r) { + ring->ready = false; + return r; + } + } + + radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size); + + return 0; +} + +/** + * cik_sdma_rlc_resume - setup and start the async dma engines + * + * @rdev: radeon_device pointer + * + * Set up the compute DMA queues and enable them (CIK). + * Returns 0 for success, error for failure. + */ +static int cik_sdma_rlc_resume(struct radeon_device *rdev) +{ + /* XXX todo */ + return 0; +} + +/** + * cik_sdma_load_microcode - load the sDMA ME ucode + * + * @rdev: radeon_device pointer + * + * Loads the sDMA0/1 ucode. + * Returns 0 for success, -EINVAL if the ucode is not available. + */ +static int cik_sdma_load_microcode(struct radeon_device *rdev) +{ + const __be32 *fw_data; + int i; + + if (!rdev->sdma_fw) + return -EINVAL; + + /* stop the gfx rings and rlc compute queues */ + cik_sdma_gfx_stop(rdev); + cik_sdma_rlc_stop(rdev); + + /* halt the MEs */ + cik_sdma_enable(rdev, false); + + /* sdma0 */ + fw_data = (const __be32 *)rdev->sdma_fw->data; + WREG32(SDMA0_UCODE_ADDR + SDMA0_REGISTER_OFFSET, 0); + for (i = 0; i < CIK_SDMA_UCODE_SIZE; i++) + WREG32(SDMA0_UCODE_DATA + SDMA0_REGISTER_OFFSET, be32_to_cpup(fw_data++)); + WREG32(SDMA0_UCODE_DATA + SDMA0_REGISTER_OFFSET, CIK_SDMA_UCODE_VERSION); + + /* sdma1 */ + fw_data = (const __be32 *)rdev->sdma_fw->data; + WREG32(SDMA0_UCODE_ADDR + SDMA1_REGISTER_OFFSET, 0); + for (i = 0; i < CIK_SDMA_UCODE_SIZE; i++) + WREG32(SDMA0_UCODE_DATA + SDMA1_REGISTER_OFFSET, be32_to_cpup(fw_data++)); + WREG32(SDMA0_UCODE_DATA + SDMA1_REGISTER_OFFSET, CIK_SDMA_UCODE_VERSION); + + WREG32(SDMA0_UCODE_ADDR + SDMA0_REGISTER_OFFSET, 0); + WREG32(SDMA0_UCODE_ADDR + SDMA1_REGISTER_OFFSET, 0); + return 0; +} + +/** + * cik_sdma_resume - setup and start the async dma engines + * + * @rdev: radeon_device pointer + * + * Set up the DMA engines and enable them (CIK). + * Returns 0 for success, error for failure. + */ +int cik_sdma_resume(struct radeon_device *rdev) +{ + int r; + + /* Reset dma */ + WREG32(SRBM_SOFT_RESET, SOFT_RESET_SDMA | SOFT_RESET_SDMA1); + RREG32(SRBM_SOFT_RESET); + udelay(50); + WREG32(SRBM_SOFT_RESET, 0); + RREG32(SRBM_SOFT_RESET); + + r = cik_sdma_load_microcode(rdev); + if (r) + return r; + + /* unhalt the MEs */ + cik_sdma_enable(rdev, true); + + /* start the gfx rings and rlc compute queues */ + r = cik_sdma_gfx_resume(rdev); + if (r) + return r; + r = cik_sdma_rlc_resume(rdev); + if (r) + return r; + + return 0; +} + +/** + * cik_sdma_fini - tear down the async dma engines + * + * @rdev: radeon_device pointer + * + * Stop the async dma engines and free the rings (CIK). + */ +void cik_sdma_fini(struct radeon_device *rdev) +{ + /* stop the gfx rings and rlc compute queues */ + cik_sdma_gfx_stop(rdev); + cik_sdma_rlc_stop(rdev); + /* halt the MEs */ + cik_sdma_enable(rdev, false); + radeon_ring_fini(rdev, &rdev->ring[R600_RING_TYPE_DMA_INDEX]); + radeon_ring_fini(rdev, &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX]); + /* XXX - compute dma queue tear down */ +} + +/** + * cik_copy_dma - copy pages using the DMA engine + * + * @rdev: radeon_device pointer + * @src_offset: src GPU address + * @dst_offset: dst GPU address + * @num_gpu_pages: number of GPU pages to xfer + * @fence: radeon fence object + * + * Copy GPU paging using the DMA engine (CIK). + * Used by the radeon ttm implementation to move pages if + * registered as the asic copy callback. + */ +int cik_copy_dma(struct radeon_device *rdev, + uint64_t src_offset, uint64_t dst_offset, + unsigned num_gpu_pages, + struct radeon_fence **fence) +{ + struct radeon_semaphore *sem = NULL; + int ring_index = rdev->asic->copy.dma_ring_index; + struct radeon_ring *ring = &rdev->ring[ring_index]; + u32 size_in_bytes, cur_size_in_bytes; + int i, num_loops; + int r = 0; + + r = radeon_semaphore_create(rdev, &sem); + if (r) { + DRM_ERROR("radeon: moving bo (%d).\n", r); + return r; + } + + size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT); + num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff); + r = radeon_ring_lock(rdev, ring, num_loops * 7 + 14); + if (r) { + DRM_ERROR("radeon: moving bo (%d).\n", r); + radeon_semaphore_free(rdev, &sem, NULL); + return r; + } + + if (radeon_fence_need_sync(*fence, ring->idx)) { + radeon_semaphore_sync_rings(rdev, sem, (*fence)->ring, + ring->idx); + radeon_fence_note_sync(*fence, ring->idx); + } else { + radeon_semaphore_free(rdev, &sem, NULL); + } + + for (i = 0; i < num_loops; i++) { + cur_size_in_bytes = size_in_bytes; + if (cur_size_in_bytes > 0x1fffff) + cur_size_in_bytes = 0x1fffff; + size_in_bytes -= cur_size_in_bytes; + radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_COPY, SDMA_COPY_SUB_OPCODE_LINEAR, 0)); + radeon_ring_write(ring, cur_size_in_bytes); + radeon_ring_write(ring, 0); /* src/dst endian swap */ + radeon_ring_write(ring, src_offset & 0xffffffff); + radeon_ring_write(ring, upper_32_bits(src_offset) & 0xffffffff); + radeon_ring_write(ring, dst_offset & 0xfffffffc); + radeon_ring_write(ring, upper_32_bits(dst_offset) & 0xffffffff); + src_offset += cur_size_in_bytes; + dst_offset += cur_size_in_bytes; + } + + r = radeon_fence_emit(rdev, fence, ring->idx); + if (r) { + radeon_ring_unlock_undo(rdev, ring); + return r; + } + + radeon_ring_unlock_commit(rdev, ring); + radeon_semaphore_free(rdev, &sem, *fence); + + return r; +} + +/** + * cik_sdma_ring_test - simple async dma engine test + * + * @rdev: radeon_device pointer + * @ring: radeon_ring structure holding ring information + * + * Test the DMA engine by writing using it to write an + * value to memory. (CIK). + * Returns 0 for success, error for failure. + */ +int cik_sdma_ring_test(struct radeon_device *rdev, + struct radeon_ring *ring) +{ + unsigned i; + int r; + volatile void __iomem *ptr = (volatile void *)rdev->vram_scratch.ptr; + u32 tmp; + + if (!ptr) { + DRM_ERROR("invalid vram scratch pointer\n"); + return -EINVAL; + } + + tmp = 0xCAFEDEAD; + writel(tmp, ptr); + + r = radeon_ring_lock(rdev, ring, 4); + if (r) { + DRM_ERROR("radeon: dma failed to lock ring %d (%d).\n", ring->idx, r); + return r; + } + radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0)); + radeon_ring_write(ring, rdev->vram_scratch.gpu_addr & 0xfffffffc); + radeon_ring_write(ring, upper_32_bits(rdev->vram_scratch.gpu_addr) & 0xffffffff); + radeon_ring_write(ring, 1); /* number of DWs to follow */ + radeon_ring_write(ring, 0xDEADBEEF); + radeon_ring_unlock_commit(rdev, ring); + + for (i = 0; i < rdev->usec_timeout; i++) { + tmp = readl(ptr); + if (tmp == 0xDEADBEEF) + break; + DRM_UDELAY(1); + } + + if (i < rdev->usec_timeout) { + DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i); + } else { + DRM_ERROR("radeon: ring %d test failed (0x%08X)\n", + ring->idx, tmp); + r = -EINVAL; + } + return r; +} + +/** + * cik_sdma_ib_test - test an IB on the DMA engine + * + * @rdev: radeon_device pointer + * @ring: radeon_ring structure holding ring information + * + * Test a simple IB in the DMA ring (CIK). + * Returns 0 on success, error on failure. + */ +int cik_sdma_ib_test(struct radeon_device *rdev, struct radeon_ring *ring) +{ + struct radeon_ib ib; + unsigned i; + int r; + volatile void __iomem *ptr = (volatile void *)rdev->vram_scratch.ptr; + u32 tmp = 0; + + if (!ptr) { + DRM_ERROR("invalid vram scratch pointer\n"); + return -EINVAL; + } + + tmp = 0xCAFEDEAD; + writel(tmp, ptr); + + r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256); + if (r) { + DRM_ERROR("radeon: failed to get ib (%d).\n", r); + return r; + } + + ib.ptr[0] = SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0); + ib.ptr[1] = rdev->vram_scratch.gpu_addr & 0xfffffffc; + ib.ptr[2] = upper_32_bits(rdev->vram_scratch.gpu_addr) & 0xffffffff; + ib.ptr[3] = 1; + ib.ptr[4] = 0xDEADBEEF; + ib.length_dw = 5; + + r = radeon_ib_schedule(rdev, &ib, NULL); + if (r) { + radeon_ib_free(rdev, &ib); + DRM_ERROR("radeon: failed to schedule ib (%d).\n", r); + return r; + } + r = radeon_fence_wait(ib.fence, false); + if (r) { + DRM_ERROR("radeon: fence wait failed (%d).\n", r); + return r; + } + for (i = 0; i < rdev->usec_timeout; i++) { + tmp = readl(ptr); + if (tmp == 0xDEADBEEF) + break; + DRM_UDELAY(1); + } + if (i < rdev->usec_timeout) { + DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i); + } else { + DRM_ERROR("radeon: ib test failed (0x%08X)\n", tmp); + r = -EINVAL; + } + radeon_ib_free(rdev, &ib); + return r; +} + +/** + * cik_sdma_is_lockup - Check if the DMA engine is locked up + * + * @rdev: radeon_device pointer + * @ring: radeon_ring structure holding ring information + * + * Check if the async DMA engine is locked up (CIK). + * Returns true if the engine appears to be locked up, false if not. + */ +bool cik_sdma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring) +{ + u32 reset_mask = cik_gpu_check_soft_reset(rdev); + u32 mask; + + if (ring->idx == R600_RING_TYPE_DMA_INDEX) + mask = RADEON_RESET_DMA; + else + mask = RADEON_RESET_DMA1; + + if (!(reset_mask & mask)) { + radeon_ring_lockup_update(ring); + return false; + } + /* force ring activities */ + radeon_ring_force_activity(rdev, ring); + return radeon_ring_test_lockup(rdev, ring); +} + +/** + * cik_sdma_vm_set_page - update the page tables using sDMA + * + * @rdev: radeon_device pointer + * @ib: indirect buffer to fill with commands + * @pe: addr of the page entry + * @addr: dst addr to write into pe + * @count: number of page entries to update + * @incr: increase next addr by incr bytes + * @flags: access flags + * + * Update the page tables using sDMA (CIK). + */ +void cik_sdma_vm_set_page(struct radeon_device *rdev, + struct radeon_ib *ib, + uint64_t pe, + uint64_t addr, unsigned count, + uint32_t incr, uint32_t flags) +{ + uint32_t r600_flags = cayman_vm_page_flags(rdev, flags); + uint64_t value; + unsigned ndw; + + if (flags & RADEON_VM_PAGE_SYSTEM) { + while (count) { + ndw = count * 2; + if (ndw > 0xFFFFE) + ndw = 0xFFFFE; + + /* for non-physically contiguous pages (system) */ + ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0); + ib->ptr[ib->length_dw++] = pe; + ib->ptr[ib->length_dw++] = upper_32_bits(pe); + ib->ptr[ib->length_dw++] = ndw; + for (; ndw > 0; ndw -= 2, --count, pe += 8) { + if (flags & RADEON_VM_PAGE_SYSTEM) { + value = radeon_vm_map_gart(rdev, addr); + value &= 0xFFFFFFFFFFFFF000ULL; + } else if (flags & RADEON_VM_PAGE_VALID) { + value = addr; + } else { + value = 0; + } + addr += incr; + value |= r600_flags; + ib->ptr[ib->length_dw++] = value; + ib->ptr[ib->length_dw++] = upper_32_bits(value); + } + } + } else { + while (count) { + ndw = count; + if (ndw > 0x7FFFF) + ndw = 0x7FFFF; + + if (flags & RADEON_VM_PAGE_VALID) + value = addr; + else + value = 0; + /* for physically contiguous pages (vram) */ + ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_GENERATE_PTE_PDE, 0, 0); + ib->ptr[ib->length_dw++] = pe; /* dst addr */ + ib->ptr[ib->length_dw++] = upper_32_bits(pe); + ib->ptr[ib->length_dw++] = r600_flags; /* mask */ + ib->ptr[ib->length_dw++] = 0; + ib->ptr[ib->length_dw++] = value; /* value */ + ib->ptr[ib->length_dw++] = upper_32_bits(value); + ib->ptr[ib->length_dw++] = incr; /* increment size */ + ib->ptr[ib->length_dw++] = 0; + ib->ptr[ib->length_dw++] = ndw; /* number of entries */ + pe += ndw * 8; + addr += ndw * incr; + count -= ndw; + } + } + while (ib->length_dw & 0x7) + ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0); +} + +/** + * cik_dma_vm_flush - cik vm flush using sDMA + * + * @rdev: radeon_device pointer + * + * Update the page table base and flush the VM TLB + * using sDMA (CIK). + */ +void cik_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm) +{ + struct radeon_ring *ring = &rdev->ring[ridx]; + u32 extra_bits = (SDMA_POLL_REG_MEM_EXTRA_OP(1) | + SDMA_POLL_REG_MEM_EXTRA_FUNC(3)); /* == */ + u32 ref_and_mask; + + if (vm == NULL) + return; + + if (ridx == R600_RING_TYPE_DMA_INDEX) + ref_and_mask = SDMA0; + else + ref_and_mask = SDMA1; + + radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000)); + if (vm->id < 8) { + radeon_ring_write(ring, (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2); + } else { + radeon_ring_write(ring, (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2); + } + radeon_ring_write(ring, vm->pd_gpu_addr >> 12); + + /* update SH_MEM_* regs */ + radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000)); + radeon_ring_write(ring, SRBM_GFX_CNTL >> 2); + radeon_ring_write(ring, VMID(vm->id)); + + radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000)); + radeon_ring_write(ring, SH_MEM_BASES >> 2); + radeon_ring_write(ring, 0); + + radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000)); + radeon_ring_write(ring, SH_MEM_CONFIG >> 2); + radeon_ring_write(ring, 0); + + radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000)); + radeon_ring_write(ring, SH_MEM_APE1_BASE >> 2); + radeon_ring_write(ring, 1); + + radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000)); + radeon_ring_write(ring, SH_MEM_APE1_LIMIT >> 2); + radeon_ring_write(ring, 0); + + radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000)); + radeon_ring_write(ring, SRBM_GFX_CNTL >> 2); + radeon_ring_write(ring, VMID(0)); + + /* flush HDP */ + radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_POLL_REG_MEM, 0, extra_bits)); + radeon_ring_write(ring, GPU_HDP_FLUSH_DONE); + radeon_ring_write(ring, GPU_HDP_FLUSH_REQ); + radeon_ring_write(ring, ref_and_mask); /* REFERENCE */ + radeon_ring_write(ring, ref_and_mask); /* MASK */ + radeon_ring_write(ring, (4 << 16) | 10); /* RETRY_COUNT, POLL_INTERVAL */ + + /* flush TLB */ + radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000)); + radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2); + radeon_ring_write(ring, 1 << vm->id); +} diff --git a/sys/dev/drm/radeon/cikd.h b/sys/dev/drm/radeon/cikd.h index 7e9275eaef..203d2a09a1 100644 --- a/sys/dev/drm/radeon/cikd.h +++ b/sys/dev/drm/radeon/cikd.h @@ -28,21 +28,375 @@ #define CIK_RB_BITMAP_WIDTH_PER_SH 2 +/* DIDT IND registers */ +#define DIDT_SQ_CTRL0 0x0 +# define DIDT_CTRL_EN (1 << 0) +#define DIDT_DB_CTRL0 0x20 +#define DIDT_TD_CTRL0 0x40 +#define DIDT_TCP_CTRL0 0x60 + /* SMC IND registers */ +#define DPM_TABLE_475 0x3F768 +# define SamuBootLevel(x) ((x) << 0) +# define SamuBootLevel_MASK 0x000000ff +# define SamuBootLevel_SHIFT 0 +# define AcpBootLevel(x) ((x) << 8) +# define AcpBootLevel_MASK 0x0000ff00 +# define AcpBootLevel_SHIFT 8 +# define VceBootLevel(x) ((x) << 16) +# define VceBootLevel_MASK 0x00ff0000 +# define VceBootLevel_SHIFT 16 +# define UvdBootLevel(x) ((x) << 24) +# define UvdBootLevel_MASK 0xff000000 +# define UvdBootLevel_SHIFT 24 + +#define FIRMWARE_FLAGS 0x3F800 +# define INTERRUPTS_ENABLED (1 << 0) + +#define NB_DPM_CONFIG_1 0x3F9E8 +# define Dpm0PgNbPsLo(x) ((x) << 0) +# define Dpm0PgNbPsLo_MASK 0x000000ff +# define Dpm0PgNbPsLo_SHIFT 0 +# define Dpm0PgNbPsHi(x) ((x) << 8) +# define Dpm0PgNbPsHi_MASK 0x0000ff00 +# define Dpm0PgNbPsHi_SHIFT 8 +# define DpmXNbPsLo(x) ((x) << 16) +# define DpmXNbPsLo_MASK 0x00ff0000 +# define DpmXNbPsLo_SHIFT 16 +# define DpmXNbPsHi(x) ((x) << 24) +# define DpmXNbPsHi_MASK 0xff000000 +# define DpmXNbPsHi_SHIFT 24 + +#define SMC_SYSCON_RESET_CNTL 0x80000000 +# define RST_REG (1 << 0) +#define SMC_SYSCON_CLOCK_CNTL_0 0x80000004 +# define CK_DISABLE (1 << 0) +# define CKEN (1 << 24) + +#define SMC_SYSCON_MISC_CNTL 0x80000010 + +#define SMC_SYSCON_MSG_ARG_0 0x80000068 + +#define SMC_PC_C 0x80000370 + +#define SMC_SCRATCH9 0x80000424 + +#define RCU_UC_EVENTS 0xC0000004 +# define BOOT_SEQ_DONE (1 << 7) + #define GENERAL_PWRMGT 0xC0200000 +# define GLOBAL_PWRMGT_EN (1 << 0) +# define STATIC_PM_EN (1 << 1) +# define THERMAL_PROTECTION_DIS (1 << 2) +# define THERMAL_PROTECTION_TYPE (1 << 3) +# define SW_SMIO_INDEX(x) ((x) << 6) +# define SW_SMIO_INDEX_MASK (1 << 6) +# define SW_SMIO_INDEX_SHIFT 6 +# define VOLT_PWRMGT_EN (1 << 10) # define GPU_COUNTER_CLK (1 << 15) - +# define DYN_SPREAD_SPECTRUM_EN (1 << 23) + +#define CNB_PWRMGT_CNTL 0xC0200004 +# define GNB_SLOW_MODE(x) ((x) << 0) +# define GNB_SLOW_MODE_MASK (3 << 0) +# define GNB_SLOW_MODE_SHIFT 0 +# define GNB_SLOW (1 << 2) +# define FORCE_NB_PS1 (1 << 3) +# define DPM_ENABLED (1 << 4) + +#define SCLK_PWRMGT_CNTL 0xC0200008 +# define SCLK_PWRMGT_OFF (1 << 0) +# define RESET_BUSY_CNT (1 << 4) +# define RESET_SCLK_CNT (1 << 5) +# define DYNAMIC_PM_EN (1 << 21) + +#define TARGET_AND_CURRENT_PROFILE_INDEX 0xC0200014 +# define CURRENT_STATE_MASK (0xf << 4) +# define CURRENT_STATE_SHIFT 4 +# define CURR_MCLK_INDEX_MASK (0xf << 8) +# define CURR_MCLK_INDEX_SHIFT 8 +# define CURR_SCLK_INDEX_MASK (0x1f << 16) +# define CURR_SCLK_INDEX_SHIFT 16 + +#define CG_SSP 0xC0200044 +# define SST(x) ((x) << 0) +# define SST_MASK (0xffff << 0) +# define SSTU(x) ((x) << 16) +# define SSTU_MASK (0xf << 16) + +#define CG_DISPLAY_GAP_CNTL 0xC0200060 +# define DISP_GAP(x) ((x) << 0) +# define DISP_GAP_MASK (3 << 0) +# define VBI_TIMER_COUNT(x) ((x) << 4) +# define VBI_TIMER_COUNT_MASK (0x3fff << 4) +# define VBI_TIMER_UNIT(x) ((x) << 20) +# define VBI_TIMER_UNIT_MASK (7 << 20) +# define DISP_GAP_MCHG(x) ((x) << 24) +# define DISP_GAP_MCHG_MASK (3 << 24) + +#define SMU_VOLTAGE_STATUS 0xC0200094 +# define SMU_VOLTAGE_CURRENT_LEVEL_MASK (0xff << 1) +# define SMU_VOLTAGE_CURRENT_LEVEL_SHIFT 1 + +#define TARGET_AND_CURRENT_PROFILE_INDEX_1 0xC02000F0 +# define CURR_PCIE_INDEX_MASK (0xf << 24) +# define CURR_PCIE_INDEX_SHIFT 24 + +#define CG_ULV_PARAMETER 0xC0200158 + +#define CG_FTV_0 0xC02001A8 +#define CG_FTV_1 0xC02001AC +#define CG_FTV_2 0xC02001B0 +#define CG_FTV_3 0xC02001B4 +#define CG_FTV_4 0xC02001B8 +#define CG_FTV_5 0xC02001BC +#define CG_FTV_6 0xC02001C0 +#define CG_FTV_7 0xC02001C4 + +#define CG_DISPLAY_GAP_CNTL2 0xC0200230 + +#define LCAC_SX0_OVR_SEL 0xC0400D04 +#define LCAC_SX0_OVR_VAL 0xC0400D08 + +#define LCAC_MC0_CNTL 0xC0400D30 +#define LCAC_MC0_OVR_SEL 0xC0400D34 +#define LCAC_MC0_OVR_VAL 0xC0400D38 +#define LCAC_MC1_CNTL 0xC0400D3C +#define LCAC_MC1_OVR_SEL 0xC0400D40 +#define LCAC_MC1_OVR_VAL 0xC0400D44 + +#define LCAC_MC2_OVR_SEL 0xC0400D4C +#define LCAC_MC2_OVR_VAL 0xC0400D50 + +#define LCAC_MC3_OVR_SEL 0xC0400D58 +#define LCAC_MC3_OVR_VAL 0xC0400D5C + +#define LCAC_CPL_CNTL 0xC0400D80 +#define LCAC_CPL_OVR_SEL 0xC0400D84 +#define LCAC_CPL_OVR_VAL 0xC0400D88 + +/* dGPU */ +#define CG_THERMAL_CTRL 0xC0300004 +#define DPM_EVENT_SRC(x) ((x) << 0) +#define DPM_EVENT_SRC_MASK (7 << 0) +#define DIG_THERM_DPM(x) ((x) << 14) +#define DIG_THERM_DPM_MASK 0x003FC000 +#define DIG_THERM_DPM_SHIFT 14 + +#define CG_THERMAL_INT 0xC030000C +#define CI_DIG_THERM_INTH(x) ((x) << 8) +#define CI_DIG_THERM_INTH_MASK 0x0000FF00 +#define CI_DIG_THERM_INTH_SHIFT 8 +#define CI_DIG_THERM_INTL(x) ((x) << 16) +#define CI_DIG_THERM_INTL_MASK 0x00FF0000 +#define CI_DIG_THERM_INTL_SHIFT 16 +#define THERM_INT_MASK_HIGH (1 << 24) +#define THERM_INT_MASK_LOW (1 << 25) + +#define CG_MULT_THERMAL_STATUS 0xC0300014 +#define ASIC_MAX_TEMP(x) ((x) << 0) +#define ASIC_MAX_TEMP_MASK 0x000001ff +#define ASIC_MAX_TEMP_SHIFT 0 +#define CTF_TEMP(x) ((x) << 9) +#define CTF_TEMP_MASK 0x0003fe00 +#define CTF_TEMP_SHIFT 9 + +#define CG_SPLL_FUNC_CNTL 0xC0500140 +#define SPLL_RESET (1 << 0) +#define SPLL_PWRON (1 << 1) +#define SPLL_BYPASS_EN (1 << 3) +#define SPLL_REF_DIV(x) ((x) << 5) +#define SPLL_REF_DIV_MASK (0x3f << 5) +#define SPLL_PDIV_A(x) ((x) << 20) +#define SPLL_PDIV_A_MASK (0x7f << 20) +#define SPLL_PDIV_A_SHIFT 20 +#define CG_SPLL_FUNC_CNTL_2 0xC0500144 +#define SCLK_MUX_SEL(x) ((x) << 0) +#define SCLK_MUX_SEL_MASK (0x1ff << 0) +#define CG_SPLL_FUNC_CNTL_3 0xC0500148 +#define SPLL_FB_DIV(x) ((x) << 0) +#define SPLL_FB_DIV_MASK (0x3ffffff << 0) +#define SPLL_FB_DIV_SHIFT 0 +#define SPLL_DITHEN (1 << 28) +#define CG_SPLL_FUNC_CNTL_4 0xC050014C + +#define CG_SPLL_SPREAD_SPECTRUM 0xC0500164 +#define SSEN (1 << 0) +#define CLK_S(x) ((x) << 4) +#define CLK_S_MASK (0xfff << 4) +#define CLK_S_SHIFT 4 +#define CG_SPLL_SPREAD_SPECTRUM_2 0xC0500168 +#define CLK_V(x) ((x) << 0) +#define CLK_V_MASK (0x3ffffff << 0) +#define CLK_V_SHIFT 0 + +#define MPLL_BYPASSCLK_SEL 0xC050019C +# define MPLL_CLKOUT_SEL(x) ((x) << 8) +# define MPLL_CLKOUT_SEL_MASK 0xFF00 #define CG_CLKPIN_CNTL 0xC05001A0 # define XTALIN_DIVIDE (1 << 1) - +# define BCLK_AS_XCLK (1 << 2) +#define CG_CLKPIN_CNTL_2 0xC05001A4 +# define FORCE_BIF_REFCLK_EN (1 << 3) +# define MUX_TCLK_TO_XCLK (1 << 8) +#define THM_CLK_CNTL 0xC05001A8 +# define CMON_CLK_SEL(x) ((x) << 0) +# define CMON_CLK_SEL_MASK 0xFF +# define TMON_CLK_SEL(x) ((x) << 8) +# define TMON_CLK_SEL_MASK 0xFF00 +#define MISC_CLK_CTRL 0xC05001AC +# define DEEP_SLEEP_CLK_SEL(x) ((x) << 0) +# define DEEP_SLEEP_CLK_SEL_MASK 0xFF +# define ZCLK_SEL(x) ((x) << 8) +# define ZCLK_SEL_MASK 0xFF00 + +/* KV/KB */ +#define CG_THERMAL_INT_CTRL 0xC2100028 +#define DIG_THERM_INTH(x) ((x) << 0) +#define DIG_THERM_INTH_MASK 0x000000FF +#define DIG_THERM_INTH_SHIFT 0 +#define DIG_THERM_INTL(x) ((x) << 8) +#define DIG_THERM_INTL_MASK 0x0000FF00 +#define DIG_THERM_INTL_SHIFT 8 +#define THERM_INTH_MASK (1 << 24) +#define THERM_INTL_MASK (1 << 25) + +/* PCIE registers idx/data 0x38/0x3c */ +#define PB0_PIF_PWRDOWN_0 0x1100012 /* PCIE */ +# define PLL_POWER_STATE_IN_TXS2_0(x) ((x) << 7) +# define PLL_POWER_STATE_IN_TXS2_0_MASK (0x7 << 7) +# define PLL_POWER_STATE_IN_TXS2_0_SHIFT 7 +# define PLL_POWER_STATE_IN_OFF_0(x) ((x) << 10) +# define PLL_POWER_STATE_IN_OFF_0_MASK (0x7 << 10) +# define PLL_POWER_STATE_IN_OFF_0_SHIFT 10 +# define PLL_RAMP_UP_TIME_0(x) ((x) << 24) +# define PLL_RAMP_UP_TIME_0_MASK (0x7 << 24) +# define PLL_RAMP_UP_TIME_0_SHIFT 24 +#define PB0_PIF_PWRDOWN_1 0x1100013 /* PCIE */ +# define PLL_POWER_STATE_IN_TXS2_1(x) ((x) << 7) +# define PLL_POWER_STATE_IN_TXS2_1_MASK (0x7 << 7) +# define PLL_POWER_STATE_IN_TXS2_1_SHIFT 7 +# define PLL_POWER_STATE_IN_OFF_1(x) ((x) << 10) +# define PLL_POWER_STATE_IN_OFF_1_MASK (0x7 << 10) +# define PLL_POWER_STATE_IN_OFF_1_SHIFT 10 +# define PLL_RAMP_UP_TIME_1(x) ((x) << 24) +# define PLL_RAMP_UP_TIME_1_MASK (0x7 << 24) +# define PLL_RAMP_UP_TIME_1_SHIFT 24 + +#define PCIE_CNTL2 0x1001001c /* PCIE */ +# define SLV_MEM_LS_EN (1 << 16) +# define SLV_MEM_AGGRESSIVE_LS_EN (1 << 17) +# define MST_MEM_LS_EN (1 << 18) +# define REPLAY_MEM_LS_EN (1 << 19) + +#define PCIE_LC_STATUS1 0x1400028 /* PCIE */ +# define LC_REVERSE_RCVR (1 << 0) +# define LC_REVERSE_XMIT (1 << 1) +# define LC_OPERATING_LINK_WIDTH_MASK (0x7 << 2) +# define LC_OPERATING_LINK_WIDTH_SHIFT 2 +# define LC_DETECTED_LINK_WIDTH_MASK (0x7 << 5) +# define LC_DETECTED_LINK_WIDTH_SHIFT 5 + +#define PCIE_P_CNTL 0x1400040 /* PCIE */ +# define P_IGNORE_EDB_ERR (1 << 6) + +#define PB1_PIF_PWRDOWN_0 0x2100012 /* PCIE */ +#define PB1_PIF_PWRDOWN_1 0x2100013 /* PCIE */ + +#define PCIE_LC_CNTL 0x100100A0 /* PCIE */ +# define LC_L0S_INACTIVITY(x) ((x) << 8) +# define LC_L0S_INACTIVITY_MASK (0xf << 8) +# define LC_L0S_INACTIVITY_SHIFT 8 +# define LC_L1_INACTIVITY(x) ((x) << 12) +# define LC_L1_INACTIVITY_MASK (0xf << 12) +# define LC_L1_INACTIVITY_SHIFT 12 +# define LC_PMI_TO_L1_DIS (1 << 16) +# define LC_ASPM_TO_L1_DIS (1 << 24) + +#define PCIE_LC_LINK_WIDTH_CNTL 0x100100A2 /* PCIE */ +# define LC_LINK_WIDTH_SHIFT 0 +# define LC_LINK_WIDTH_MASK 0x7 +# define LC_LINK_WIDTH_X0 0 +# define LC_LINK_WIDTH_X1 1 +# define LC_LINK_WIDTH_X2 2 +# define LC_LINK_WIDTH_X4 3 +# define LC_LINK_WIDTH_X8 4 +# define LC_LINK_WIDTH_X16 6 +# define LC_LINK_WIDTH_RD_SHIFT 4 +# define LC_LINK_WIDTH_RD_MASK 0x70 +# define LC_RECONFIG_ARC_MISSING_ESCAPE (1 << 7) +# define LC_RECONFIG_NOW (1 << 8) +# define LC_RENEGOTIATION_SUPPORT (1 << 9) +# define LC_RENEGOTIATE_EN (1 << 10) +# define LC_SHORT_RECONFIG_EN (1 << 11) +# define LC_UPCONFIGURE_SUPPORT (1 << 12) +# define LC_UPCONFIGURE_DIS (1 << 13) +# define LC_DYN_LANES_PWR_STATE(x) ((x) << 21) +# define LC_DYN_LANES_PWR_STATE_MASK (0x3 << 21) +# define LC_DYN_LANES_PWR_STATE_SHIFT 21 +#define PCIE_LC_N_FTS_CNTL 0x100100a3 /* PCIE */ +# define LC_XMIT_N_FTS(x) ((x) << 0) +# define LC_XMIT_N_FTS_MASK (0xff << 0) +# define LC_XMIT_N_FTS_SHIFT 0 +# define LC_XMIT_N_FTS_OVERRIDE_EN (1 << 8) +# define LC_N_FTS_MASK (0xff << 24) +#define PCIE_LC_SPEED_CNTL 0x100100A4 /* PCIE */ +# define LC_GEN2_EN_STRAP (1 << 0) +# define LC_GEN3_EN_STRAP (1 << 1) +# define LC_TARGET_LINK_SPEED_OVERRIDE_EN (1 << 2) +# define LC_TARGET_LINK_SPEED_OVERRIDE_MASK (0x3 << 3) +# define LC_TARGET_LINK_SPEED_OVERRIDE_SHIFT 3 +# define LC_FORCE_EN_SW_SPEED_CHANGE (1 << 5) +# define LC_FORCE_DIS_SW_SPEED_CHANGE (1 << 6) +# define LC_FORCE_EN_HW_SPEED_CHANGE (1 << 7) +# define LC_FORCE_DIS_HW_SPEED_CHANGE (1 << 8) +# define LC_INITIATE_LINK_SPEED_CHANGE (1 << 9) +# define LC_SPEED_CHANGE_ATTEMPTS_ALLOWED_MASK (0x3 << 10) +# define LC_SPEED_CHANGE_ATTEMPTS_ALLOWED_SHIFT 10 +# define LC_CURRENT_DATA_RATE_MASK (0x3 << 13) /* 0/1/2 = gen1/2/3 */ +# define LC_CURRENT_DATA_RATE_SHIFT 13 +# define LC_CLR_FAILED_SPD_CHANGE_CNT (1 << 16) +# define LC_OTHER_SIDE_EVER_SENT_GEN2 (1 << 18) +# define LC_OTHER_SIDE_SUPPORTS_GEN2 (1 << 19) +# define LC_OTHER_SIDE_EVER_SENT_GEN3 (1 << 20) +# define LC_OTHER_SIDE_SUPPORTS_GEN3 (1 << 21) + +#define PCIE_LC_CNTL2 0x100100B1 /* PCIE */ +# define LC_ALLOW_PDWN_IN_L1 (1 << 17) +# define LC_ALLOW_PDWN_IN_L23 (1 << 18) + +#define PCIE_LC_CNTL3 0x100100B5 /* PCIE */ +# define LC_GO_TO_RECOVERY (1 << 30) +#define PCIE_LC_CNTL4 0x100100B6 /* PCIE */ +# define LC_REDO_EQ (1 << 5) +# define LC_SET_QUIESCE (1 << 13) + +/* direct registers */ #define PCIE_INDEX 0x38 #define PCIE_DATA 0x3C +#define SMC_IND_INDEX_0 0x200 +#define SMC_IND_DATA_0 0x204 + +#define SMC_IND_ACCESS_CNTL 0x240 +#define AUTO_INCREMENT_IND_0 (1 << 0) + +#define SMC_MESSAGE_0 0x250 +#define SMC_MSG_MASK 0xffff +#define SMC_RESP_0 0x254 +#define SMC_RESP_MASK 0xffff + +#define SMC_MSG_ARG_0 0x290 + #define VGA_HDP_CONTROL 0x328 #define VGA_MEMORY_DISABLE (1 << 4) #define DMIF_ADDR_CALC 0xC00 +#define PIPE0_DMIF_BUFFER_CONTROL 0x0ca0 +# define DMIF_BUFFERS_ALLOCATED(x) ((x) << 0) +# define DMIF_BUFFERS_ALLOCATED_COMPLETED (1 << 4) + #define SRBM_GFX_CNTL 0xE44 #define PIPEID(x) ((x) << 0) #define MEID(x) ((x) << 2) @@ -172,6 +526,10 @@ #define VM_CONTEXT0_PAGE_TABLE_END_ADDR 0x157C #define VM_CONTEXT1_PAGE_TABLE_END_ADDR 0x1580 +#define VM_L2_CG 0x15c0 +#define MC_CG_ENABLE (1 << 18) +#define MC_LS_ENABLE (1 << 19) + #define MC_SHARED_CHMAP 0x2004 #define NOOFCHAN_SHIFT 12 #define NOOFCHAN_MASK 0x0000f000 @@ -201,6 +559,17 @@ #define MC_SHARED_BLACKOUT_CNTL 0x20ac +#define MC_HUB_MISC_HUB_CG 0x20b8 +#define MC_HUB_MISC_VM_CG 0x20bc + +#define MC_HUB_MISC_SIP_CG 0x20c0 + +#define MC_XPB_CLK_GAT 0x2478 + +#define MC_CITF_MISC_RD_CG 0x2648 +#define MC_CITF_MISC_WR_CG 0x264c +#define MC_CITF_MISC_VM_CG 0x2650 + #define MC_ARB_RAMCFG 0x2760 #define NOOFBANK_SHIFT 0 #define NOOFBANK_MASK 0x00000003 @@ -215,9 +584,37 @@ #define NOOFGROUPS_SHIFT 12 #define NOOFGROUPS_MASK 0x00001000 +#define MC_ARB_DRAM_TIMING 0x2774 +#define MC_ARB_DRAM_TIMING2 0x2778 + +#define MC_ARB_BURST_TIME 0x2808 +#define STATE0(x) ((x) << 0) +#define STATE0_MASK (0x1f << 0) +#define STATE0_SHIFT 0 +#define STATE1(x) ((x) << 5) +#define STATE1_MASK (0x1f << 5) +#define STATE1_SHIFT 5 +#define STATE2(x) ((x) << 10) +#define STATE2_MASK (0x1f << 10) +#define STATE2_SHIFT 10 +#define STATE3(x) ((x) << 15) +#define STATE3_MASK (0x1f << 15) +#define STATE3_SHIFT 15 + +#define MC_SEQ_RAS_TIMING 0x28a0 +#define MC_SEQ_CAS_TIMING 0x28a4 +#define MC_SEQ_MISC_TIMING 0x28a8 +#define MC_SEQ_MISC_TIMING2 0x28ac +#define MC_SEQ_PMG_TIMING 0x28b0 +#define MC_SEQ_RD_CTL_D0 0x28b4 +#define MC_SEQ_RD_CTL_D1 0x28b8 +#define MC_SEQ_WR_CTL_D0 0x28bc +#define MC_SEQ_WR_CTL_D1 0x28c0 + #define MC_SEQ_SUP_CNTL 0x28c8 #define RUN_MASK (1 << 0) #define MC_SEQ_SUP_PGM 0x28cc +#define MC_PMG_AUTO_CMD 0x28d0 #define MC_SEQ_TRAIN_WAKEUP_CNTL 0x28e8 #define TRAIN_DONE_D0 (1 << 30) @@ -226,10 +623,92 @@ #define MC_IO_PAD_CNTL_D0 0x29d0 #define MEM_FALL_OUT_CMD (1 << 8) +#define MC_SEQ_MISC0 0x2a00 +#define MC_SEQ_MISC0_VEN_ID_SHIFT 8 +#define MC_SEQ_MISC0_VEN_ID_MASK 0x00000f00 +#define MC_SEQ_MISC0_VEN_ID_VALUE 3 +#define MC_SEQ_MISC0_REV_ID_SHIFT 12 +#define MC_SEQ_MISC0_REV_ID_MASK 0x0000f000 +#define MC_SEQ_MISC0_REV_ID_VALUE 1 +#define MC_SEQ_MISC0_GDDR5_SHIFT 28 +#define MC_SEQ_MISC0_GDDR5_MASK 0xf0000000 +#define MC_SEQ_MISC0_GDDR5_VALUE 5 +#define MC_SEQ_MISC1 0x2a04 +#define MC_SEQ_RESERVE_M 0x2a08 +#define MC_PMG_CMD_EMRS 0x2a0c + #define MC_SEQ_IO_DEBUG_INDEX 0x2a44 #define MC_SEQ_IO_DEBUG_DATA 0x2a48 +#define MC_SEQ_MISC5 0x2a54 +#define MC_SEQ_MISC6 0x2a58 + +#define MC_SEQ_MISC7 0x2a64 + +#define MC_SEQ_RAS_TIMING_LP 0x2a6c +#define MC_SEQ_CAS_TIMING_LP 0x2a70 +#define MC_SEQ_MISC_TIMING_LP 0x2a74 +#define MC_SEQ_MISC_TIMING2_LP 0x2a78 +#define MC_SEQ_WR_CTL_D0_LP 0x2a7c +#define MC_SEQ_WR_CTL_D1_LP 0x2a80 +#define MC_SEQ_PMG_CMD_EMRS_LP 0x2a84 +#define MC_SEQ_PMG_CMD_MRS_LP 0x2a88 + +#define MC_PMG_CMD_MRS 0x2aac + +#define MC_SEQ_RD_CTL_D0_LP 0x2b1c +#define MC_SEQ_RD_CTL_D1_LP 0x2b20 + +#define MC_PMG_CMD_MRS1 0x2b44 +#define MC_SEQ_PMG_CMD_MRS1_LP 0x2b48 +#define MC_SEQ_PMG_TIMING_LP 0x2b4c + +#define MC_SEQ_WR_CTL_2 0x2b54 +#define MC_SEQ_WR_CTL_2_LP 0x2b58 +#define MC_PMG_CMD_MRS2 0x2b5c +#define MC_SEQ_PMG_CMD_MRS2_LP 0x2b60 + +#define MCLK_PWRMGT_CNTL 0x2ba0 +# define DLL_SPEED(x) ((x) << 0) +# define DLL_SPEED_MASK (0x1f << 0) +# define DLL_READY (1 << 6) +# define MC_INT_CNTL (1 << 7) +# define MRDCK0_PDNB (1 << 8) +# define MRDCK1_PDNB (1 << 9) +# define MRDCK0_RESET (1 << 16) +# define MRDCK1_RESET (1 << 17) +# define DLL_READY_READ (1 << 24) +#define DLL_CNTL 0x2ba4 +# define MRDCK0_BYPASS (1 << 24) +# define MRDCK1_BYPASS (1 << 25) + +#define MPLL_FUNC_CNTL 0x2bb4 +#define BWCTRL(x) ((x) << 20) +#define BWCTRL_MASK (0xff << 20) +#define MPLL_FUNC_CNTL_1 0x2bb8 +#define VCO_MODE(x) ((x) << 0) +#define VCO_MODE_MASK (3 << 0) +#define CLKFRAC(x) ((x) << 4) +#define CLKFRAC_MASK (0xfff << 4) +#define CLKF(x) ((x) << 16) +#define CLKF_MASK (0xfff << 16) +#define MPLL_FUNC_CNTL_2 0x2bbc +#define MPLL_AD_FUNC_CNTL 0x2bc0 +#define YCLK_POST_DIV(x) ((x) << 0) +#define YCLK_POST_DIV_MASK (7 << 0) +#define MPLL_DQ_FUNC_CNTL 0x2bc4 +#define YCLK_SEL(x) ((x) << 4) +#define YCLK_SEL_MASK (1 << 4) + +#define MPLL_SS1 0x2bcc +#define CLKV(x) ((x) << 0) +#define CLKV_MASK (0x3ffffff << 0) +#define MPLL_SS2 0x2bd0 +#define CLKS(x) ((x) << 0) +#define CLKS_MASK (0xfff << 0) + #define HDP_HOST_PATH_CNTL 0x2C00 +#define CLOCK_GATING_DIS (1 << 23) #define HDP_NONSURFACE_BASE 0x2C04 #define HDP_NONSURFACE_INFO 0x2C08 #define HDP_NONSURFACE_SIZE 0x2C0C @@ -237,6 +716,26 @@ #define HDP_ADDR_CONFIG 0x2F48 #define HDP_MISC_CNTL 0x2F4C #define HDP_FLUSH_INVALIDATE_CACHE (1 << 0) +#define HDP_MEM_POWER_LS 0x2F50 +#define HDP_LS_ENABLE (1 << 0) + +#define ATC_MISC_CG 0x3350 + +#define MC_SEQ_CNTL_3 0x3600 +# define CAC_EN (1 << 31) +#define MC_SEQ_G5PDX_CTRL 0x3604 +#define MC_SEQ_G5PDX_CTRL_LP 0x3608 +#define MC_SEQ_G5PDX_CMD0 0x360c +#define MC_SEQ_G5PDX_CMD0_LP 0x3610 +#define MC_SEQ_G5PDX_CMD1 0x3614 +#define MC_SEQ_G5PDX_CMD1_LP 0x3618 + +#define MC_SEQ_PMG_DVS_CTL 0x3628 +#define MC_SEQ_PMG_DVS_CTL_LP 0x362c +#define MC_SEQ_PMG_DVS_CMD 0x3630 +#define MC_SEQ_PMG_DVS_CMD_LP 0x3634 +#define MC_SEQ_DLL_STBY 0x3638 +#define MC_SEQ_DLL_STBY_LP 0x363c #define IH_RB_CNTL 0x3e00 # define IH_RB_ENABLE (1 << 0) @@ -265,6 +764,9 @@ # define MC_WR_CLEAN_CNT(x) ((x) << 20) # define MC_VMID(x) ((x) << 25) +#define BIF_LNCNT_RESET 0x5220 +# define RESET_LNCNT_EN (1 << 0) + #define CONFIG_MEMSIZE 0x5428 #define INTERRUPT_CNTL 0x5468 @@ -401,6 +903,9 @@ # define DC_HPDx_RX_INT_TIMER(x) ((x) << 16) # define DC_HPDx_EN (1 << 28) +#define DPG_PIPE_STUTTER_CONTROL 0x6cd4 +# define STUTTER_ENABLE (1 << 0) + #define GRBM_CNTL 0x8000 #define GRBM_READ_TIMEOUT(x) ((x) << 0) @@ -504,6 +1009,9 @@ #define CP_RB0_RPTR 0x8700 #define CP_RB_WPTR_DELAY 0x8704 +#define CP_RB_WPTR_POLL_CNTL 0x8708 +#define IDLE_POLL_COUNT(x) ((x) << 16) +#define IDLE_POLL_COUNT_MASK (0xffff << 16) #define CP_MEQ_THRESHOLDS 0x8764 #define MEQ1_START(x) ((x) << 0) @@ -730,6 +1238,9 @@ # define CP_RINGID1_INT_STAT (1 << 30) # define CP_RINGID0_INT_STAT (1 << 31) +#define CP_MEM_SLP_CNTL 0xC1E4 +# define CP_MEM_LS_EN (1 << 0) + #define CP_CPF_DEBUG 0xC200 #define CP_PQ_WPTR_POLL_CNTL 0xC20C @@ -775,14 +1286,20 @@ #define RLC_MC_CNTL 0xC30C +#define RLC_MEM_SLP_CNTL 0xC318 +# define RLC_MEM_LS_EN (1 << 0) + #define RLC_LB_CNTR_MAX 0xC348 #define RLC_LB_CNTL 0xC364 +# define LOAD_BALANCE_ENABLE (1 << 0) #define RLC_LB_CNTR_INIT 0xC36C #define RLC_SAVE_AND_RESTORE_BASE 0xC374 -#define RLC_DRIVER_DMA_STATUS 0xC378 +#define RLC_DRIVER_DMA_STATUS 0xC378 /* dGPU */ +#define RLC_CP_TABLE_RESTORE 0xC378 /* APU */ +#define RLC_PG_DELAY_2 0xC37C #define RLC_GPM_UCODE_ADDR 0xC388 #define RLC_GPM_UCODE_DATA 0xC38C @@ -791,12 +1308,52 @@ #define RLC_CAPTURE_GPU_CLOCK_COUNT 0xC398 #define RLC_UCODE_CNTL 0xC39C +#define RLC_GPM_STAT 0xC400 +# define RLC_GPM_BUSY (1 << 0) +# define GFX_POWER_STATUS (1 << 1) +# define GFX_CLOCK_STATUS (1 << 2) + +#define RLC_PG_CNTL 0xC40C +# define GFX_PG_ENABLE (1 << 0) +# define GFX_PG_SRC (1 << 1) +# define DYN_PER_CU_PG_ENABLE (1 << 2) +# define STATIC_PER_CU_PG_ENABLE (1 << 3) +# define DISABLE_GDS_PG (1 << 13) +# define DISABLE_CP_PG (1 << 15) +# define SMU_CLK_SLOWDOWN_ON_PU_ENABLE (1 << 17) +# define SMU_CLK_SLOWDOWN_ON_PD_ENABLE (1 << 18) + +#define RLC_CGTT_MGCG_OVERRIDE 0xC420 #define RLC_CGCG_CGLS_CTRL 0xC424 +# define CGCG_EN (1 << 0) +# define CGLS_EN (1 << 1) + +#define RLC_PG_DELAY 0xC434 #define RLC_LB_INIT_CU_MASK 0xC43C #define RLC_LB_PARAMS 0xC444 +#define RLC_PG_AO_CU_MASK 0xC44C + +#define RLC_MAX_PG_CU 0xC450 +# define MAX_PU_CU(x) ((x) << 0) +# define MAX_PU_CU_MASK (0xff << 0) +#define RLC_AUTO_PG_CTRL 0xC454 +# define AUTO_PG_EN (1 << 0) +# define GRBM_REG_SGIT(x) ((x) << 3) +# define GRBM_REG_SGIT_MASK (0xffff << 3) + +#define RLC_SERDES_WR_CU_MASTER_MASK 0xC474 +#define RLC_SERDES_WR_NONCU_MASTER_MASK 0xC478 +#define RLC_SERDES_WR_CTRL 0xC47C +#define BPM_ADDR(x) ((x) << 0) +#define BPM_ADDR_MASK (0xff << 0) +#define CGLS_ENABLE (1 << 16) +#define CGCG_OVERRIDE_0 (1 << 20) +#define MGCG_OVERRIDE_0 (1 << 22) +#define MGCG_OVERRIDE_1 (1 << 23) + #define RLC_SERDES_CU_MASTER_BUSY 0xC484 #define RLC_SERDES_NONCU_MASTER_BUSY 0xC488 # define SE_MASTER_BUSY_MASK 0x0000ffff @@ -807,6 +1364,13 @@ #define RLC_GPM_SCRATCH_ADDR 0xC4B0 #define RLC_GPM_SCRATCH_DATA 0xC4B4 +#define RLC_GPR_REG2 0xC4E8 +#define REQ 0x00000001 +#define MESSAGE(x) ((x) << 1) +#define MESSAGE_MASK 0x0000001e +#define MSG_ENTER_RLC_SAFE_MODE 1 +#define MSG_EXIT_RLC_SAFE_MODE 0 + #define CP_HPD_EOP_BASE_ADDR 0xC904 #define CP_HPD_EOP_BASE_ADDR_HI 0xC908 #define CP_HPD_EOP_VMID 0xC90C @@ -851,6 +1415,8 @@ #define MQD_VMID(x) ((x) << 0) #define MQD_VMID_MASK (0xf << 0) +#define DB_RENDER_CONTROL 0x28000 + #define PA_SC_RASTER_CONFIG 0x28350 # define RASTER_CONFIG_RB_MAP_0 0 # define RASTER_CONFIG_RB_MAP_1 1 @@ -944,6 +1510,16 @@ #define CP_PERFMON_CNTL 0x36020 +#define CGTS_SM_CTRL_REG 0x3c000 +#define SM_MODE(x) ((x) << 17) +#define SM_MODE_MASK (0x7 << 17) +#define SM_MODE_ENABLE (1 << 20) +#define CGTS_OVERRIDE (1 << 21) +#define CGTS_LS_OVERRIDE (1 << 22) +#define ON_MONITOR_ADD_EN (1 << 23) +#define ON_MONITOR_ADD(x) ((x) << 24) +#define ON_MONITOR_ADD_MASK (0xff << 24) + #define CGTS_TCC_DISABLE 0x3c00c #define CGTS_USER_TCC_DISABLE 0x3c010 #define TCC_DISABLE_MASK 0xFFFF0000 @@ -1176,6 +1752,8 @@ #define SDMA0_UCODE_ADDR 0xD000 #define SDMA0_UCODE_DATA 0xD004 +#define SDMA0_POWER_CNTL 0xD008 +#define SDMA0_CLK_CTRL 0xD00C #define SDMA0_CNTL 0xD010 # define TRAP_ENABLE (1 << 0) @@ -1300,6 +1878,13 @@ #define UVD_RBC_RB_RPTR 0xf690 #define UVD_RBC_RB_WPTR 0xf694 +#define UVD_CGC_CTRL 0xF4B0 +# define DCM (1 << 0) +# define CG_DT(x) ((x) << 2) +# define CG_DT_MASK (0xf << 2) +# define CLK_OD(x) ((x) << 6) +# define CLK_OD_MASK (0x1f << 6) + /* UVD clocks */ #define CG_DCLK_CNTL 0xC050009C @@ -1310,4 +1895,7 @@ #define CG_VCLK_CNTL 0xC05000A4 #define CG_VCLK_STATUS 0xC05000A8 +/* UVD CTX indirect */ +#define UVD_CGC_MEM_CTRL 0xC0 + #endif diff --git a/sys/dev/drm/radeon/clearstate_cayman.h b/sys/dev/drm/radeon/clearstate_cayman.h index c00339440c..aa908c55a5 100644 --- a/sys/dev/drm/radeon/clearstate_cayman.h +++ b/sys/dev/drm/radeon/clearstate_cayman.h @@ -1073,7 +1073,7 @@ static const struct cs_extent_def SECT_CTRLCONST_defs[] = {SECT_CTRLCONST_def_1, 0x0000f3fc, 2 }, { 0, 0, 0 } }; -struct cs_section_def cayman_cs_data[] = { +static const struct cs_section_def cayman_cs_data[] = { { SECT_CONTEXT_defs, SECT_CONTEXT }, { SECT_CLEAR_defs, SECT_CLEAR }, { SECT_CTRLCONST_defs, SECT_CTRLCONST }, diff --git a/sys/dev/drm/radeon/clearstate_cayman.h b/sys/dev/drm/radeon/clearstate_ci.h similarity index 57% copy from sys/dev/drm/radeon/clearstate_cayman.h copy to sys/dev/drm/radeon/clearstate_ci.h index c00339440c..c3982f9475 100644 --- a/sys/dev/drm/radeon/clearstate_cayman.h +++ b/sys/dev/drm/radeon/clearstate_ci.h @@ -1,5 +1,5 @@ /* - * Copyright 2012 Advanced Micro Devices, Inc. + * Copyright 2013 Advanced Micro Devices, Inc. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -21,7 +21,7 @@ * */ -static const u32 SECT_CONTEXT_def_1[] = +static const unsigned int ci_SECT_CONTEXT_def_1[] = { 0x00000000, // DB_RENDER_CONTROL 0x00000000, // DB_COUNT_CONTROL @@ -31,8 +31,8 @@ static const u32 SECT_CONTEXT_def_1[] = 0x00000000, // DB_HTILE_DATA_BASE 0, // HOLE 0, // HOLE - 0, // HOLE - 0, // HOLE + 0x00000000, // DB_DEPTH_BOUNDS_MIN + 0x00000000, // DB_DEPTH_BOUNDS_MAX 0x00000000, // DB_STENCIL_CLEAR 0x00000000, // DB_DEPTH_CLEAR 0x00000000, // PA_SC_SCREEN_SCISSOR_TL @@ -55,6 +55,42 @@ static const u32 SECT_CONTEXT_def_1[] = 0, // HOLE 0, // HOLE 0, // HOLE + 0x00000000, // TA_BC_BASE_ADDR + 0x00000000, // TA_BC_BASE_ADDR_HI + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE 0, // HOLE 0, // HOLE 0, // HOLE @@ -103,54 +139,18 @@ static const u32 SECT_CONTEXT_def_1[] = 0, // HOLE 0, // HOLE 0, // HOLE - 0x00000000, // SQ_ALU_CONST_BUFFER_SIZE_PS_0 - 0x00000000, // SQ_ALU_CONST_BUFFER_SIZE_PS_1 - 0x00000000, // SQ_ALU_CONST_BUFFER_SIZE_PS_2 - 0x00000000, // SQ_ALU_CONST_BUFFER_SIZE_PS_3 - 0x00000000, // SQ_ALU_CONST_BUFFER_SIZE_PS_4 - 0x00000000, // SQ_ALU_CONST_BUFFER_SIZE_PS_5 - 0x00000000, // SQ_ALU_CONST_BUFFER_SIZE_PS_6 - 0x00000000, // SQ_ALU_CONST_BUFFER_SIZE_PS_7 - 0x00000000, // SQ_ALU_CONST_BUFFER_SIZE_PS_8 - 0x00000000, // SQ_ALU_CONST_BUFFER_SIZE_PS_9 - 0x00000000, // SQ_ALU_CONST_BUFFER_SIZE_PS_10 - 0x00000000, // SQ_ALU_CONST_BUFFER_SIZE_PS_11 - 0x00000000, // SQ_ALU_CONST_BUFFER_SIZE_PS_12 - 0x00000000, // SQ_ALU_CONST_BUFFER_SIZE_PS_13 - 0x00000000, // SQ_ALU_CONST_BUFFER_SIZE_PS_14 - 0x00000000, // SQ_ALU_CONST_BUFFER_SIZE_PS_15 - 0x00000000, // SQ_ALU_CONST_BUFFER_SIZE_VS_0 - 0x00000000, // SQ_ALU_CONST_BUFFER_SIZE_VS_1 - 0x00000000, // SQ_ALU_CONST_BUFFER_SIZE_VS_2 - 0x00000000, // SQ_ALU_CONST_BUFFER_SIZE_VS_3 - 0x00000000, // SQ_ALU_CONST_BUFFER_SIZE_VS_4 - 0x00000000, // SQ_ALU_CONST_BUFFER_SIZE_VS_5 - 0x00000000, // SQ_ALU_CONST_BUFFER_SIZE_VS_6 - 0x00000000, // SQ_ALU_CONST_BUFFER_SIZE_VS_7 - 0x00000000, // SQ_ALU_CONST_BUFFER_SIZE_VS_8 - 0x00000000, // SQ_ALU_CONST_BUFFER_SIZE_VS_9 - 0x00000000, // SQ_ALU_CONST_BUFFER_SIZE_VS_10 - 0x00000000, // SQ_ALU_CONST_BUFFER_SIZE_VS_11 - 0x00000000, // SQ_ALU_CONST_BUFFER_SIZE_VS_12 - 0x00000000, // SQ_ALU_CONST_BUFFER_SIZE_VS_13 - 0x00000000, // SQ_ALU_CONST_BUFFER_SIZE_VS_14 - 0x00000000, // SQ_ALU_CONST_BUFFER_SIZE_VS_15 - 0x00000000, // SQ_ALU_CONST_BUFFER_SIZE_GS_0 - 0x00000000, // SQ_ALU_CONST_BUFFER_SIZE_GS_1 - 0x00000000, // SQ_ALU_CONST_BUFFER_SIZE_GS_2 - 0x00000000, // SQ_ALU_CONST_BUFFER_SIZE_GS_3 - 0x00000000, // SQ_ALU_CONST_BUFFER_SIZE_GS_4 - 0x00000000, // SQ_ALU_CONST_BUFFER_SIZE_GS_5 - 0x00000000, // SQ_ALU_CONST_BUFFER_SIZE_GS_6 - 0x00000000, // SQ_ALU_CONST_BUFFER_SIZE_GS_7 - 0x00000000, // SQ_ALU_CONST_BUFFER_SIZE_GS_8 - 0x00000000, // SQ_ALU_CONST_BUFFER_SIZE_GS_9 - 0x00000000, // SQ_ALU_CONST_BUFFER_SIZE_GS_10 - 0x00000000, // SQ_ALU_CONST_BUFFER_SIZE_GS_11 - 0x00000000, // SQ_ALU_CONST_BUFFER_SIZE_GS_12 - 0x00000000, // SQ_ALU_CONST_BUFFER_SIZE_GS_13 - 0x00000000, // SQ_ALU_CONST_BUFFER_SIZE_GS_14 - 0x00000000, // SQ_ALU_CONST_BUFFER_SIZE_GS_15 + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0x00000000, // COHER_DEST_BASE_HI_0 + 0x00000000, // COHER_DEST_BASE_HI_1 + 0x00000000, // COHER_DEST_BASE_HI_2 + 0x00000000, // COHER_DEST_BASE_HI_3 + 0x00000000, // COHER_DEST_BASE_2 + 0x00000000, // COHER_DEST_BASE_3 0x00000000, // PA_SC_WINDOW_OFFSET 0x80000000, // PA_SC_WINDOW_SCISSOR_TL 0x40004000, // PA_SC_WINDOW_SCISSOR_BR @@ -235,10 +235,11 @@ static const u32 SECT_CONTEXT_def_1[] = 0x3f800000, // PA_SC_VPORT_ZMAX_14 0x00000000, // PA_SC_VPORT_ZMIN_15 0x3f800000, // PA_SC_VPORT_ZMAX_15 - 0x00000000, // SX_MISC - 0x00000000, // SX_SURFACE_SYNC - 0x00000000, // SX_SCATTER_EXPORT_BASE - 0x00000000, // SX_SCATTER_EXPORT_SIZE +}; +static const unsigned int ci_SECT_CONTEXT_def_2[] = +{ + 0x00000000, // PA_SC_SCREEN_EXTENT_CONTROL + 0, // HOLE 0x00000000, // CP_PERFMON_CNTX_CNTL 0x00000000, // CP_RINGID 0x00000000, // CP_VMID @@ -247,53 +248,53 @@ static const u32 SECT_CONTEXT_def_1[] = 0, // HOLE 0, // HOLE 0, // HOLE - 0x00000000, // SQ_VTX_SEMANTIC_0 - 0x00000000, // SQ_VTX_SEMANTIC_1 - 0x00000000, // SQ_VTX_SEMANTIC_2 - 0x00000000, // SQ_VTX_SEMANTIC_3 - 0x00000000, // SQ_VTX_SEMANTIC_4 - 0x00000000, // SQ_VTX_SEMANTIC_5 - 0x00000000, // SQ_VTX_SEMANTIC_6 - 0x00000000, // SQ_VTX_SEMANTIC_7 - 0x00000000, // SQ_VTX_SEMANTIC_8 - 0x00000000, // SQ_VTX_SEMANTIC_9 - 0x00000000, // SQ_VTX_SEMANTIC_10 - 0x00000000, // SQ_VTX_SEMANTIC_11 - 0x00000000, // SQ_VTX_SEMANTIC_12 - 0x00000000, // SQ_VTX_SEMANTIC_13 - 0x00000000, // SQ_VTX_SEMANTIC_14 - 0x00000000, // SQ_VTX_SEMANTIC_15 - 0x00000000, // SQ_VTX_SEMANTIC_16 - 0x00000000, // SQ_VTX_SEMANTIC_17 - 0x00000000, // SQ_VTX_SEMANTIC_18 - 0x00000000, // SQ_VTX_SEMANTIC_19 - 0x00000000, // SQ_VTX_SEMANTIC_20 - 0x00000000, // SQ_VTX_SEMANTIC_21 - 0x00000000, // SQ_VTX_SEMANTIC_22 - 0x00000000, // SQ_VTX_SEMANTIC_23 - 0x00000000, // SQ_VTX_SEMANTIC_24 - 0x00000000, // SQ_VTX_SEMANTIC_25 - 0x00000000, // SQ_VTX_SEMANTIC_26 - 0x00000000, // SQ_VTX_SEMANTIC_27 - 0x00000000, // SQ_VTX_SEMANTIC_28 - 0x00000000, // SQ_VTX_SEMANTIC_29 - 0x00000000, // SQ_VTX_SEMANTIC_30 - 0x00000000, // SQ_VTX_SEMANTIC_31 + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE 0xffffffff, // VGT_MAX_VTX_INDX 0x00000000, // VGT_MIN_VTX_INDX 0x00000000, // VGT_INDX_OFFSET 0x00000000, // VGT_MULTI_PRIM_IB_RESET_INDX - 0x00000000, // SX_ALPHA_TEST_CONTROL + 0, // HOLE 0x00000000, // CB_BLEND_RED 0x00000000, // CB_BLEND_GREEN 0x00000000, // CB_BLEND_BLUE 0x00000000, // CB_BLEND_ALPHA 0, // HOLE 0, // HOLE - 0, // HOLE + 0x00000000, // DB_STENCIL_CONTROL 0x00000000, // DB_STENCILREFMASK 0x00000000, // DB_STENCILREFMASK_BF - 0x00000000, // SX_ALPHA_REF + 0, // HOLE 0x00000000, // PA_CL_VPORT_XSCALE 0x00000000, // PA_CL_VPORT_XOFFSET 0x00000000, // PA_CL_VPORT_YSCALE @@ -414,16 +415,16 @@ static const u32 SECT_CONTEXT_def_1[] = 0x00000000, // PA_CL_UCP_5_Y 0x00000000, // PA_CL_UCP_5_Z 0x00000000, // PA_CL_UCP_5_W - 0x00000000, // SPI_VS_OUT_ID_0 - 0x00000000, // SPI_VS_OUT_ID_1 - 0x00000000, // SPI_VS_OUT_ID_2 - 0x00000000, // SPI_VS_OUT_ID_3 - 0x00000000, // SPI_VS_OUT_ID_4 - 0x00000000, // SPI_VS_OUT_ID_5 - 0x00000000, // SPI_VS_OUT_ID_6 - 0x00000000, // SPI_VS_OUT_ID_7 - 0x00000000, // SPI_VS_OUT_ID_8 - 0x00000000, // SPI_VS_OUT_ID_9 + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE 0x00000000, // SPI_PS_INPUT_CNTL_0 0x00000000, // SPI_PS_INPUT_CNTL_1 0x00000000, // SPI_PS_INPUT_CNTL_2 @@ -457,48 +458,48 @@ static const u32 SECT_CONTEXT_def_1[] = 0x00000000, // SPI_PS_INPUT_CNTL_30 0x00000000, // SPI_PS_INPUT_CNTL_31 0x00000000, // SPI_VS_OUT_CONFIG - 0x00000001, // SPI_THREAD_GROUPING - 0x00000002, // SPI_PS_IN_CONTROL_0 - 0x00000000, // SPI_PS_IN_CONTROL_1 + 0, // HOLE + 0x00000000, // SPI_PS_INPUT_ENA + 0x00000000, // SPI_PS_INPUT_ADDR 0x00000000, // SPI_INTERP_CONTROL_0 - 0x00000000, // SPI_INPUT_Z - 0x00000000, // SPI_FOG_CNTL + 0x00000002, // SPI_PS_IN_CONTROL + 0, // HOLE 0x00000000, // SPI_BARYC_CNTL - 0x00000000, // SPI_PS_IN_CONTROL_2 - 0x00000000, // SPI_COMPUTE_INPUT_CNTL - 0x00000000, // SPI_COMPUTE_NUM_THREAD_X - 0x00000000, // SPI_COMPUTE_NUM_THREAD_Y - 0x00000000, // SPI_COMPUTE_NUM_THREAD_Z - 0x00000000, // SPI_GPR_MGMT - 0x00000000, // SPI_LDS_MGMT - 0x00000000, // SPI_STACK_MGMT - 0x00000000, // SPI_WAVE_MGMT_1 - 0x00000000, // SPI_WAVE_MGMT_2 + 0, // HOLE + 0x00000000, // SPI_TMPRING_SIZE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0x00000000, // SPI_SHADER_POS_FORMAT + 0x00000000, // SPI_SHADER_Z_FORMAT + 0x00000000, // SPI_SHADER_COL_FORMAT + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE 0, // HOLE 0, // HOLE 0, // HOLE 0, // HOLE 0, // HOLE - 0x00000000, // GDS_ADDR_BASE - 0x00003fff, // GDS_ADDR_SIZE 0, // HOLE 0, // HOLE - 0x00000000, // GDS_ORDERED_COUNT 0, // HOLE 0, // HOLE 0, // HOLE - 0x00000000, // GDS_APPEND_CONSUME_UAV0 - 0x00000000, // GDS_APPEND_CONSUME_UAV1 - 0x00000000, // GDS_APPEND_CONSUME_UAV2 - 0x00000000, // GDS_APPEND_CONSUME_UAV3 - 0x00000000, // GDS_APPEND_CONSUME_UAV4 - 0x00000000, // GDS_APPEND_CONSUME_UAV5 - 0x00000000, // GDS_APPEND_CONSUME_UAV6 - 0x00000000, // GDS_APPEND_CONSUME_UAV7 - 0x00000000, // GDS_APPEND_CONSUME_UAV8 - 0x00000000, // GDS_APPEND_CONSUME_UAV9 - 0x00000000, // GDS_APPEND_CONSUME_UAV10 - 0x00000000, // GDS_APPEND_CONSUME_UAV11 0, // HOLE 0, // HOLE 0, // HOLE @@ -512,7 +513,7 @@ static const u32 SECT_CONTEXT_def_1[] = 0x00000000, // CB_BLEND6_CONTROL 0x00000000, // CB_BLEND7_CONTROL }; -static const u32 SECT_CONTEXT_def_2[] = +static const unsigned int ci_SECT_CONTEXT_def_3[] = { 0x00000000, // PA_CL_POINT_X_RAD 0x00000000, // PA_CL_POINT_Y_RAD @@ -521,136 +522,136 @@ static const u32 SECT_CONTEXT_def_2[] = 0x00000000, // VGT_DMA_BASE_HI 0x00000000, // VGT_DMA_BASE }; -static const u32 SECT_CONTEXT_def_3[] = +static const unsigned int ci_SECT_CONTEXT_def_4[] = { 0x00000000, // DB_DEPTH_CONTROL 0x00000000, // DB_EQAA 0x00000000, // CB_COLOR_CONTROL - 0x00000200, // DB_SHADER_CONTROL - 0x00000000, // PA_CL_CLIP_CNTL - 0x00000000, // PA_SU_SC_MODE_CNTL + 0x00000000, // DB_SHADER_CONTROL + 0x00090000, // PA_CL_CLIP_CNTL + 0x00000004, // PA_SU_SC_MODE_CNTL 0x00000000, // PA_CL_VTE_CNTL 0x00000000, // PA_CL_VS_OUT_CNTL 0x00000000, // PA_CL_NANINF_CNTL 0x00000000, // PA_SU_LINE_STIPPLE_CNTL 0x00000000, // PA_SU_LINE_STIPPLE_SCALE 0x00000000, // PA_SU_PRIM_FILTER_CNTL - 0x00000000, // SQ_LSTMP_RING_ITEMSIZE - 0x00000000, // SQ_HSTMP_RING_ITEMSIZE 0, // HOLE 0, // HOLE - 0x00000000, // SQ_PGM_START_PS - 0x00000000, // SQ_PGM_RESOURCES_PS - 0x00000000, // SQ_PGM_RESOURCES_2_PS - 0x00000000, // SQ_PGM_EXPORTS_PS 0, // HOLE 0, // HOLE 0, // HOLE - 0x00000000, // SQ_PGM_START_VS - 0x00000000, // SQ_PGM_RESOURCES_VS - 0x00000000, // SQ_PGM_RESOURCES_2_VS 0, // HOLE 0, // HOLE 0, // HOLE - 0x00000000, // SQ_PGM_START_GS - 0x00000000, // SQ_PGM_RESOURCES_GS - 0x00000000, // SQ_PGM_RESOURCES_2_GS 0, // HOLE 0, // HOLE 0, // HOLE - 0x00000000, // SQ_PGM_START_ES - 0x00000000, // SQ_PGM_RESOURCES_ES - 0x00000000, // SQ_PGM_RESOURCES_2_ES 0, // HOLE 0, // HOLE 0, // HOLE - 0x00000000, // SQ_PGM_START_FS - 0x00000000, // SQ_PGM_RESOURCES_FS 0, // HOLE 0, // HOLE 0, // HOLE - 0x00000000, // SQ_PGM_START_HS - 0x00000000, // SQ_PGM_RESOURCES_HS - 0x00000000, // SQ_PGM_RESOURCES_2_HS 0, // HOLE 0, // HOLE 0, // HOLE - 0x00000000, // SQ_PGM_START_LS - 0x00000000, // SQ_PGM_RESOURCES_LS - 0x00000000, // SQ_PGM_RESOURCES_2_LS -}; -static const u32 SECT_CONTEXT_def_4[] = -{ - 0x00000000, // SQ_LDS_ALLOC - 0x00000000, // SQ_LDS_ALLOC_PS - 0x00000000, // SQ_VTX_SEMANTIC_CLEAR - 0, // HOLE - 0x00000000, // SQ_THREAD_TRACE_CTRL - 0, // HOLE - 0x00000000, // SQ_ESGS_RING_ITEMSIZE - 0x00000000, // SQ_GSVS_RING_ITEMSIZE - 0x00000000, // SQ_ESTMP_RING_ITEMSIZE - 0x00000000, // SQ_GSTMP_RING_ITEMSIZE - 0x00000000, // SQ_VSTMP_RING_ITEMSIZE - 0x00000000, // SQ_PSTMP_RING_ITEMSIZE - 0, // HOLE - 0x00000000, // SQ_GS_VERT_ITEMSIZE - 0x00000000, // SQ_GS_VERT_ITEMSIZE_1 - 0x00000000, // SQ_GS_VERT_ITEMSIZE_2 - 0x00000000, // SQ_GS_VERT_ITEMSIZE_3 - 0x00000000, // SQ_GSVS_RING_OFFSET_1 - 0x00000000, // SQ_GSVS_RING_OFFSET_2 - 0x00000000, // SQ_GSVS_RING_OFFSET_3 - 0x00000000, // SQ_GWS_RING_OFFSET - 0, // HOLE - 0x00000000, // SQ_ALU_CONST_CACHE_PS_0 - 0x00000000, // SQ_ALU_CONST_CACHE_PS_1 - 0x00000000, // SQ_ALU_CONST_CACHE_PS_2 - 0x00000000, // SQ_ALU_CONST_CACHE_PS_3 - 0x00000000, // SQ_ALU_CONST_CACHE_PS_4 - 0x00000000, // SQ_ALU_CONST_CACHE_PS_5 - 0x00000000, // SQ_ALU_CONST_CACHE_PS_6 - 0x00000000, // SQ_ALU_CONST_CACHE_PS_7 - 0x00000000, // SQ_ALU_CONST_CACHE_PS_8 - 0x00000000, // SQ_ALU_CONST_CACHE_PS_9 - 0x00000000, // SQ_ALU_CONST_CACHE_PS_10 - 0x00000000, // SQ_ALU_CONST_CACHE_PS_11 - 0x00000000, // SQ_ALU_CONST_CACHE_PS_12 - 0x00000000, // SQ_ALU_CONST_CACHE_PS_13 - 0x00000000, // SQ_ALU_CONST_CACHE_PS_14 - 0x00000000, // SQ_ALU_CONST_CACHE_PS_15 - 0x00000000, // SQ_ALU_CONST_CACHE_VS_0 - 0x00000000, // SQ_ALU_CONST_CACHE_VS_1 - 0x00000000, // SQ_ALU_CONST_CACHE_VS_2 - 0x00000000, // SQ_ALU_CONST_CACHE_VS_3 - 0x00000000, // SQ_ALU_CONST_CACHE_VS_4 - 0x00000000, // SQ_ALU_CONST_CACHE_VS_5 - 0x00000000, // SQ_ALU_CONST_CACHE_VS_6 - 0x00000000, // SQ_ALU_CONST_CACHE_VS_7 - 0x00000000, // SQ_ALU_CONST_CACHE_VS_8 - 0x00000000, // SQ_ALU_CONST_CACHE_VS_9 - 0x00000000, // SQ_ALU_CONST_CACHE_VS_10 - 0x00000000, // SQ_ALU_CONST_CACHE_VS_11 - 0x00000000, // SQ_ALU_CONST_CACHE_VS_12 - 0x00000000, // SQ_ALU_CONST_CACHE_VS_13 - 0x00000000, // SQ_ALU_CONST_CACHE_VS_14 - 0x00000000, // SQ_ALU_CONST_CACHE_VS_15 - 0x00000000, // SQ_ALU_CONST_CACHE_GS_0 - 0x00000000, // SQ_ALU_CONST_CACHE_GS_1 - 0x00000000, // SQ_ALU_CONST_CACHE_GS_2 - 0x00000000, // SQ_ALU_CONST_CACHE_GS_3 - 0x00000000, // SQ_ALU_CONST_CACHE_GS_4 - 0x00000000, // SQ_ALU_CONST_CACHE_GS_5 - 0x00000000, // SQ_ALU_CONST_CACHE_GS_6 - 0x00000000, // SQ_ALU_CONST_CACHE_GS_7 - 0x00000000, // SQ_ALU_CONST_CACHE_GS_8 - 0x00000000, // SQ_ALU_CONST_CACHE_GS_9 - 0x00000000, // SQ_ALU_CONST_CACHE_GS_10 - 0x00000000, // SQ_ALU_CONST_CACHE_GS_11 - 0x00000000, // SQ_ALU_CONST_CACHE_GS_12 - 0x00000000, // SQ_ALU_CONST_CACHE_GS_13 - 0x00000000, // SQ_ALU_CONST_CACHE_GS_14 - 0x00000000, // SQ_ALU_CONST_CACHE_GS_15 + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE 0x00000000, // PA_SU_POINT_SIZE 0x00000000, // PA_SU_POINT_MINMAX 0x00000000, // PA_SU_LINE_CNTL @@ -668,28 +669,29 @@ static const u32 SECT_CONTEXT_def_4[] = 0x00000000, // VGT_GROUP_VECT_0_FMT_CNTL 0x00000000, // VGT_GROUP_VECT_1_FMT_CNTL 0x00000000, // VGT_GS_MODE - 0, // HOLE + 0x00000000, // VGT_GS_ONCHIP_CNTL 0x00000000, // PA_SC_MODE_CNTL_0 0x00000000, // PA_SC_MODE_CNTL_1 0x00000000, // VGT_ENHANCE 0x00000100, // VGT_GS_PER_ES 0x00000080, // VGT_ES_PER_GS 0x00000002, // VGT_GS_PER_VS - 0, // HOLE - 0, // HOLE - 0, // HOLE + 0x00000000, // VGT_GSVS_RING_OFFSET_1 + 0x00000000, // VGT_GSVS_RING_OFFSET_2 + 0x00000000, // VGT_GSVS_RING_OFFSET_3 0x00000000, // VGT_GS_OUT_PRIM_TYPE 0x00000000, // IA_ENHANCE }; -static const u32 SECT_CONTEXT_def_5[] = +static const unsigned int ci_SECT_CONTEXT_def_5[] = { - 0x00000000, // VGT_DMA_MAX_SIZE - 0x00000000, // VGT_DMA_INDEX_TYPE - 0, // HOLE + 0x00000000, // WD_ENHANCE 0x00000000, // VGT_PRIMITIVEID_EN - 0x00000000, // VGT_DMA_NUM_INSTANCES }; -static const u32 SECT_CONTEXT_def_6[] = +static const unsigned int ci_SECT_CONTEXT_def_6[] = +{ + 0x00000000, // VGT_PRIMITIVEID_RESET +}; +static const unsigned int ci_SECT_CONTEXT_def_7[] = { 0x00000000, // VGT_MULTI_PRIM_IB_RESET_EN 0, // HOLE @@ -697,8 +699,8 @@ static const u32 SECT_CONTEXT_def_6[] = 0x00000000, // VGT_INSTANCE_STEP_RATE_0 0x00000000, // VGT_INSTANCE_STEP_RATE_1 0x000000ff, // IA_MULTI_VGT_PARAM - 0, // HOLE - 0, // HOLE + 0x00000000, // VGT_ESGS_RING_ITEMSIZE + 0x00000000, // VGT_GSVS_RING_ITEMSIZE 0x00000000, // VGT_REUSE_OFF 0x00000000, // VGT_VTX_CNT_EN 0x00000000, // DB_HTILE_SURFACE @@ -708,24 +710,24 @@ static const u32 SECT_CONTEXT_def_6[] = 0, // HOLE 0x00000000, // VGT_STRMOUT_BUFFER_SIZE_0 0x00000000, // VGT_STRMOUT_VTX_STRIDE_0 - 0x00000000, // VGT_STRMOUT_BUFFER_BASE_0 + 0, // HOLE 0x00000000, // VGT_STRMOUT_BUFFER_OFFSET_0 0x00000000, // VGT_STRMOUT_BUFFER_SIZE_1 0x00000000, // VGT_STRMOUT_VTX_STRIDE_1 - 0x00000000, // VGT_STRMOUT_BUFFER_BASE_1 + 0, // HOLE 0x00000000, // VGT_STRMOUT_BUFFER_OFFSET_1 0x00000000, // VGT_STRMOUT_BUFFER_SIZE_2 0x00000000, // VGT_STRMOUT_VTX_STRIDE_2 - 0x00000000, // VGT_STRMOUT_BUFFER_BASE_2 + 0, // HOLE 0x00000000, // VGT_STRMOUT_BUFFER_OFFSET_2 0x00000000, // VGT_STRMOUT_BUFFER_SIZE_3 0x00000000, // VGT_STRMOUT_VTX_STRIDE_3 - 0x00000000, // VGT_STRMOUT_BUFFER_BASE_3 + 0, // HOLE 0x00000000, // VGT_STRMOUT_BUFFER_OFFSET_3 - 0x00000000, // VGT_STRMOUT_BASE_OFFSET_0 - 0x00000000, // VGT_STRMOUT_BASE_OFFSET_1 - 0x00000000, // VGT_STRMOUT_BASE_OFFSET_2 - 0x00000000, // VGT_STRMOUT_BASE_OFFSET_3 + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE 0, // HOLE 0, // HOLE 0x00000000, // VGT_STRMOUT_DRAW_OPAQUE_OFFSET @@ -735,21 +737,19 @@ static const u32 SECT_CONTEXT_def_6[] = 0x00000000, // VGT_GS_MAX_VERT_OUT 0, // HOLE 0, // HOLE - 0x00000000, // VGT_STRMOUT_BASE_OFFSET_HI_0 - 0x00000000, // VGT_STRMOUT_BASE_OFFSET_HI_1 - 0x00000000, // VGT_STRMOUT_BASE_OFFSET_HI_2 - 0x00000000, // VGT_STRMOUT_BASE_OFFSET_HI_3 - 0x00000000, // VGT_SHADER_STAGES_EN - 0x00000000, // VGT_LS_HS_CONFIG 0, // HOLE 0, // HOLE 0, // HOLE 0, // HOLE + 0x00000000, // VGT_SHADER_STAGES_EN + 0x00000000, // VGT_LS_HS_CONFIG + 0x00000000, // VGT_GS_VERT_ITEMSIZE + 0x00000000, // VGT_GS_VERT_ITEMSIZE_1 + 0x00000000, // VGT_GS_VERT_ITEMSIZE_2 + 0x00000000, // VGT_GS_VERT_ITEMSIZE_3 0x00000000, // VGT_TF_PARAM 0x00000000, // DB_ALPHA_TO_MASK -}; -static const u32 SECT_CONTEXT_def_7[] = -{ + 0, // HOLE 0x00000000, // PA_SU_POLY_OFFSET_DB_FMT_CNTL 0x00000000, // PA_SU_POLY_OFFSET_CLAMP 0x00000000, // PA_SU_POLY_OFFSET_FRONT_SCALE @@ -759,18 +759,18 @@ static const u32 SECT_CONTEXT_def_7[] = 0x00000000, // VGT_GS_INSTANCE_CNT 0x00000000, // VGT_STRMOUT_CONFIG 0x00000000, // VGT_STRMOUT_BUFFER_CONFIG - 0x00000000, // CB_IMMED0_BASE - 0x00000000, // CB_IMMED1_BASE - 0x00000000, // CB_IMMED2_BASE - 0x00000000, // CB_IMMED3_BASE - 0x00000000, // CB_IMMED4_BASE - 0x00000000, // CB_IMMED5_BASE - 0x00000000, // CB_IMMED6_BASE - 0x00000000, // CB_IMMED7_BASE - 0x00000000, // CB_IMMED8_BASE - 0x00000000, // CB_IMMED9_BASE - 0x00000000, // CB_IMMED10_BASE - 0x00000000, // CB_IMMED11_BASE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE 0, // HOLE 0, // HOLE 0x00000000, // PA_SC_CENTROID_PRIORITY_0 @@ -800,10 +800,10 @@ static const u32 SECT_CONTEXT_def_7[] = 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_3 0xffffffff, // PA_SC_AA_MASK_X0Y0_X1Y0 0xffffffff, // PA_SC_AA_MASK_X0Y1_X1Y1 - 0x00000000, // CB_CLRCMP_CONTROL - 0x00000000, // CB_CLRCMP_SRC - 0x00000000, // CB_CLRCMP_DST - 0x00000000, // CB_CLRCMP_MSK + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE 0, // HOLE 0, // HOLE 0x0000000e, // VGT_VERTEX_REUSE_BLOCK_CNTL @@ -814,268 +814,131 @@ static const u32 SECT_CONTEXT_def_7[] = 0x00000000, // CB_COLOR0_VIEW 0x00000000, // CB_COLOR0_INFO 0x00000000, // CB_COLOR0_ATTRIB - 0x00000000, // CB_COLOR0_DIM + 0, // HOLE 0x00000000, // CB_COLOR0_CMASK 0x00000000, // CB_COLOR0_CMASK_SLICE 0x00000000, // CB_COLOR0_FMASK 0x00000000, // CB_COLOR0_FMASK_SLICE 0x00000000, // CB_COLOR0_CLEAR_WORD0 0x00000000, // CB_COLOR0_CLEAR_WORD1 - 0x00000000, // CB_COLOR0_CLEAR_WORD2 - 0x00000000, // CB_COLOR0_CLEAR_WORD3 + 0, // HOLE + 0, // HOLE 0x00000000, // CB_COLOR1_BASE 0x00000000, // CB_COLOR1_PITCH 0x00000000, // CB_COLOR1_SLICE 0x00000000, // CB_COLOR1_VIEW 0x00000000, // CB_COLOR1_INFO 0x00000000, // CB_COLOR1_ATTRIB - 0x00000000, // CB_COLOR1_DIM + 0, // HOLE 0x00000000, // CB_COLOR1_CMASK 0x00000000, // CB_COLOR1_CMASK_SLICE 0x00000000, // CB_COLOR1_FMASK 0x00000000, // CB_COLOR1_FMASK_SLICE 0x00000000, // CB_COLOR1_CLEAR_WORD0 0x00000000, // CB_COLOR1_CLEAR_WORD1 - 0x00000000, // CB_COLOR1_CLEAR_WORD2 - 0x00000000, // CB_COLOR1_CLEAR_WORD3 + 0, // HOLE + 0, // HOLE 0x00000000, // CB_COLOR2_BASE 0x00000000, // CB_COLOR2_PITCH 0x00000000, // CB_COLOR2_SLICE 0x00000000, // CB_COLOR2_VIEW 0x00000000, // CB_COLOR2_INFO 0x00000000, // CB_COLOR2_ATTRIB - 0x00000000, // CB_COLOR2_DIM + 0, // HOLE 0x00000000, // CB_COLOR2_CMASK 0x00000000, // CB_COLOR2_CMASK_SLICE 0x00000000, // CB_COLOR2_FMASK 0x00000000, // CB_COLOR2_FMASK_SLICE 0x00000000, // CB_COLOR2_CLEAR_WORD0 0x00000000, // CB_COLOR2_CLEAR_WORD1 - 0x00000000, // CB_COLOR2_CLEAR_WORD2 - 0x00000000, // CB_COLOR2_CLEAR_WORD3 + 0, // HOLE + 0, // HOLE 0x00000000, // CB_COLOR3_BASE 0x00000000, // CB_COLOR3_PITCH 0x00000000, // CB_COLOR3_SLICE 0x00000000, // CB_COLOR3_VIEW 0x00000000, // CB_COLOR3_INFO 0x00000000, // CB_COLOR3_ATTRIB - 0x00000000, // CB_COLOR3_DIM + 0, // HOLE 0x00000000, // CB_COLOR3_CMASK 0x00000000, // CB_COLOR3_CMASK_SLICE 0x00000000, // CB_COLOR3_FMASK 0x00000000, // CB_COLOR3_FMASK_SLICE 0x00000000, // CB_COLOR3_CLEAR_WORD0 0x00000000, // CB_COLOR3_CLEAR_WORD1 - 0x00000000, // CB_COLOR3_CLEAR_WORD2 - 0x00000000, // CB_COLOR3_CLEAR_WORD3 + 0, // HOLE + 0, // HOLE 0x00000000, // CB_COLOR4_BASE 0x00000000, // CB_COLOR4_PITCH 0x00000000, // CB_COLOR4_SLICE 0x00000000, // CB_COLOR4_VIEW 0x00000000, // CB_COLOR4_INFO 0x00000000, // CB_COLOR4_ATTRIB - 0x00000000, // CB_COLOR4_DIM + 0, // HOLE 0x00000000, // CB_COLOR4_CMASK 0x00000000, // CB_COLOR4_CMASK_SLICE 0x00000000, // CB_COLOR4_FMASK 0x00000000, // CB_COLOR4_FMASK_SLICE 0x00000000, // CB_COLOR4_CLEAR_WORD0 0x00000000, // CB_COLOR4_CLEAR_WORD1 - 0x00000000, // CB_COLOR4_CLEAR_WORD2 - 0x00000000, // CB_COLOR4_CLEAR_WORD3 + 0, // HOLE + 0, // HOLE 0x00000000, // CB_COLOR5_BASE 0x00000000, // CB_COLOR5_PITCH 0x00000000, // CB_COLOR5_SLICE 0x00000000, // CB_COLOR5_VIEW 0x00000000, // CB_COLOR5_INFO 0x00000000, // CB_COLOR5_ATTRIB - 0x00000000, // CB_COLOR5_DIM + 0, // HOLE 0x00000000, // CB_COLOR5_CMASK 0x00000000, // CB_COLOR5_CMASK_SLICE 0x00000000, // CB_COLOR5_FMASK 0x00000000, // CB_COLOR5_FMASK_SLICE 0x00000000, // CB_COLOR5_CLEAR_WORD0 0x00000000, // CB_COLOR5_CLEAR_WORD1 - 0x00000000, // CB_COLOR5_CLEAR_WORD2 - 0x00000000, // CB_COLOR5_CLEAR_WORD3 + 0, // HOLE + 0, // HOLE 0x00000000, // CB_COLOR6_BASE 0x00000000, // CB_COLOR6_PITCH 0x00000000, // CB_COLOR6_SLICE 0x00000000, // CB_COLOR6_VIEW 0x00000000, // CB_COLOR6_INFO 0x00000000, // CB_COLOR6_ATTRIB - 0x00000000, // CB_COLOR6_DIM + 0, // HOLE 0x00000000, // CB_COLOR6_CMASK 0x00000000, // CB_COLOR6_CMASK_SLICE 0x00000000, // CB_COLOR6_FMASK 0x00000000, // CB_COLOR6_FMASK_SLICE 0x00000000, // CB_COLOR6_CLEAR_WORD0 0x00000000, // CB_COLOR6_CLEAR_WORD1 - 0x00000000, // CB_COLOR6_CLEAR_WORD2 - 0x00000000, // CB_COLOR6_CLEAR_WORD3 + 0, // HOLE + 0, // HOLE 0x00000000, // CB_COLOR7_BASE 0x00000000, // CB_COLOR7_PITCH 0x00000000, // CB_COLOR7_SLICE 0x00000000, // CB_COLOR7_VIEW 0x00000000, // CB_COLOR7_INFO 0x00000000, // CB_COLOR7_ATTRIB - 0x00000000, // CB_COLOR7_DIM + 0, // HOLE 0x00000000, // CB_COLOR7_CMASK 0x00000000, // CB_COLOR7_CMASK_SLICE 0x00000000, // CB_COLOR7_FMASK 0x00000000, // CB_COLOR7_FMASK_SLICE 0x00000000, // CB_COLOR7_CLEAR_WORD0 0x00000000, // CB_COLOR7_CLEAR_WORD1 - 0x00000000, // CB_COLOR7_CLEAR_WORD2 - 0x00000000, // CB_COLOR7_CLEAR_WORD3 - 0x00000000, // CB_COLOR8_BASE - 0x00000000, // CB_COLOR8_PITCH - 0x00000000, // CB_COLOR8_SLICE - 0x00000000, // CB_COLOR8_VIEW - 0x00000000, // CB_COLOR8_INFO - 0x00000000, // CB_COLOR8_ATTRIB - 0x00000000, // CB_COLOR8_DIM - 0x00000000, // CB_COLOR9_BASE - 0x00000000, // CB_COLOR9_PITCH - 0x00000000, // CB_COLOR9_SLICE - 0x00000000, // CB_COLOR9_VIEW - 0x00000000, // CB_COLOR9_INFO - 0x00000000, // CB_COLOR9_ATTRIB - 0x00000000, // CB_COLOR9_DIM - 0x00000000, // CB_COLOR10_BASE - 0x00000000, // CB_COLOR10_PITCH - 0x00000000, // CB_COLOR10_SLICE - 0x00000000, // CB_COLOR10_VIEW - 0x00000000, // CB_COLOR10_INFO - 0x00000000, // CB_COLOR10_ATTRIB - 0x00000000, // CB_COLOR10_DIM - 0x00000000, // CB_COLOR11_BASE - 0x00000000, // CB_COLOR11_PITCH - 0x00000000, // CB_COLOR11_SLICE - 0x00000000, // CB_COLOR11_VIEW - 0x00000000, // CB_COLOR11_INFO - 0x00000000, // CB_COLOR11_ATTRIB - 0x00000000, // CB_COLOR11_DIM - 0, // HOLE - 0, // HOLE - 0, // HOLE - 0, // HOLE - 0, // HOLE - 0, // HOLE - 0, // HOLE - 0, // HOLE - 0, // HOLE - 0, // HOLE - 0, // HOLE - 0, // HOLE - 0, // HOLE - 0, // HOLE - 0, // HOLE - 0, // HOLE - 0, // HOLE - 0, // HOLE - 0, // HOLE - 0, // HOLE - 0x00000000, // SQ_ALU_CONST_CACHE_HS_0 - 0x00000000, // SQ_ALU_CONST_CACHE_HS_1 - 0x00000000, // SQ_ALU_CONST_CACHE_HS_2 - 0x00000000, // SQ_ALU_CONST_CACHE_HS_3 - 0x00000000, // SQ_ALU_CONST_CACHE_HS_4 - 0x00000000, // SQ_ALU_CONST_CACHE_HS_5 - 0x00000000, // SQ_ALU_CONST_CACHE_HS_6 - 0x00000000, // SQ_ALU_CONST_CACHE_HS_7 - 0x00000000, // SQ_ALU_CONST_CACHE_HS_8 - 0x00000000, // SQ_ALU_CONST_CACHE_HS_9 - 0x00000000, // SQ_ALU_CONST_CACHE_HS_10 - 0x00000000, // SQ_ALU_CONST_CACHE_HS_11 - 0x00000000, // SQ_ALU_CONST_CACHE_HS_12 - 0x00000000, // SQ_ALU_CONST_CACHE_HS_13 - 0x00000000, // SQ_ALU_CONST_CACHE_HS_14 - 0x00000000, // SQ_ALU_CONST_CACHE_HS_15 - 0x00000000, // SQ_ALU_CONST_CACHE_LS_0 - 0x00000000, // SQ_ALU_CONST_CACHE_LS_1 - 0x00000000, // SQ_ALU_CONST_CACHE_LS_2 - 0x00000000, // SQ_ALU_CONST_CACHE_LS_3 - 0x00000000, // SQ_ALU_CONST_CACHE_LS_4 - 0x00000000, // SQ_ALU_CONST_CACHE_LS_5 - 0x00000000, // SQ_ALU_CONST_CACHE_LS_6 - 0x00000000, // SQ_ALU_CONST_CACHE_LS_7 - 0x00000000, // SQ_ALU_CONST_CACHE_LS_8 - 0x00000000, // SQ_ALU_CONST_CACHE_LS_9 - 0x00000000, // SQ_ALU_CONST_CACHE_LS_10 - 0x00000000, // SQ_ALU_CONST_CACHE_LS_11 - 0x00000000, // SQ_ALU_CONST_CACHE_LS_12 - 0x00000000, // SQ_ALU_CONST_CACHE_LS_13 - 0x00000000, // SQ_ALU_CONST_CACHE_LS_14 - 0x00000000, // SQ_ALU_CONST_CACHE_LS_15 - 0x00000000, // SQ_ALU_CONST_BUFFER_SIZE_HS_0 - 0x00000000, // SQ_ALU_CONST_BUFFER_SIZE_HS_1 - 0x00000000, // SQ_ALU_CONST_BUFFER_SIZE_HS_2 - 0x00000000, // SQ_ALU_CONST_BUFFER_SIZE_HS_3 - 0x00000000, // SQ_ALU_CONST_BUFFER_SIZE_HS_4 - 0x00000000, // SQ_ALU_CONST_BUFFER_SIZE_HS_5 - 0x00000000, // SQ_ALU_CONST_BUFFER_SIZE_HS_6 - 0x00000000, // SQ_ALU_CONST_BUFFER_SIZE_HS_7 - 0x00000000, // SQ_ALU_CONST_BUFFER_SIZE_HS_8 - 0x00000000, // SQ_ALU_CONST_BUFFER_SIZE_HS_9 - 0x00000000, // SQ_ALU_CONST_BUFFER_SIZE_HS_10 - 0x00000000, // SQ_ALU_CONST_BUFFER_SIZE_HS_11 - 0x00000000, // SQ_ALU_CONST_BUFFER_SIZE_HS_12 - 0x00000000, // SQ_ALU_CONST_BUFFER_SIZE_HS_13 - 0x00000000, // SQ_ALU_CONST_BUFFER_SIZE_HS_14 - 0x00000000, // SQ_ALU_CONST_BUFFER_SIZE_HS_15 - 0x00000000, // SQ_ALU_CONST_BUFFER_SIZE_LS_0 - 0x00000000, // SQ_ALU_CONST_BUFFER_SIZE_LS_1 - 0x00000000, // SQ_ALU_CONST_BUFFER_SIZE_LS_2 - 0x00000000, // SQ_ALU_CONST_BUFFER_SIZE_LS_3 - 0x00000000, // SQ_ALU_CONST_BUFFER_SIZE_LS_4 - 0x00000000, // SQ_ALU_CONST_BUFFER_SIZE_LS_5 - 0x00000000, // SQ_ALU_CONST_BUFFER_SIZE_LS_6 - 0x00000000, // SQ_ALU_CONST_BUFFER_SIZE_LS_7 - 0x00000000, // SQ_ALU_CONST_BUFFER_SIZE_LS_8 - 0x00000000, // SQ_ALU_CONST_BUFFER_SIZE_LS_9 - 0x00000000, // SQ_ALU_CONST_BUFFER_SIZE_LS_10 - 0x00000000, // SQ_ALU_CONST_BUFFER_SIZE_LS_11 - 0x00000000, // SQ_ALU_CONST_BUFFER_SIZE_LS_12 - 0x00000000, // SQ_ALU_CONST_BUFFER_SIZE_LS_13 - 0x00000000, // SQ_ALU_CONST_BUFFER_SIZE_LS_14 - 0x00000000, // SQ_ALU_CONST_BUFFER_SIZE_LS_15 -}; -static const struct cs_extent_def SECT_CONTEXT_defs[] = -{ - {SECT_CONTEXT_def_1, 0x0000a000, 488 }, - {SECT_CONTEXT_def_2, 0x0000a1f5, 6 }, - {SECT_CONTEXT_def_3, 0x0000a200, 55 }, - {SECT_CONTEXT_def_4, 0x0000a23a, 99 }, - {SECT_CONTEXT_def_5, 0x0000a29e, 5 }, - {SECT_CONTEXT_def_6, 0x0000a2a5, 56 }, - {SECT_CONTEXT_def_7, 0x0000a2de, 290 }, - { 0, 0, 0 } -}; -static const u32 SECT_CLEAR_def_1[] = -{ - 0xffffffff, // SQ_TEX_SAMPLER_CLEAR - 0xffffffff, // SQ_TEX_RESOURCE_CLEAR - 0xffffffff, // SQ_LOOP_BOOL_CLEAR -}; -static const struct cs_extent_def SECT_CLEAR_defs[] = -{ - {SECT_CLEAR_def_1, 0x0000ffc0, 3 }, - { 0, 0, 0 } -}; -static const u32 SECT_CTRLCONST_def_1[] = -{ - 0x00000000, // SQ_VTX_BASE_VTX_LOC - 0x00000000, // SQ_VTX_START_INST_LOC }; -static const struct cs_extent_def SECT_CTRLCONST_defs[] = +static const struct cs_extent_def ci_SECT_CONTEXT_defs[] = { - {SECT_CTRLCONST_def_1, 0x0000f3fc, 2 }, + {ci_SECT_CONTEXT_def_1, 0x0000a000, 212 }, + {ci_SECT_CONTEXT_def_2, 0x0000a0d6, 274 }, + {ci_SECT_CONTEXT_def_3, 0x0000a1f5, 6 }, + {ci_SECT_CONTEXT_def_4, 0x0000a200, 157 }, + {ci_SECT_CONTEXT_def_5, 0x0000a2a0, 2 }, + {ci_SECT_CONTEXT_def_6, 0x0000a2a3, 1 }, + {ci_SECT_CONTEXT_def_7, 0x0000a2a5, 233 }, { 0, 0, 0 } }; -struct cs_section_def cayman_cs_data[] = { - { SECT_CONTEXT_defs, SECT_CONTEXT }, - { SECT_CLEAR_defs, SECT_CLEAR }, - { SECT_CTRLCONST_defs, SECT_CTRLCONST }, +static const struct cs_section_def ci_cs_data[] = { + { ci_SECT_CONTEXT_defs, SECT_CONTEXT }, { 0, SECT_NONE } }; diff --git a/sys/dev/drm/radeon/clearstate_evergreen.h b/sys/dev/drm/radeon/clearstate_evergreen.h index 4791d856b7..63a1ffbb3c 100644 --- a/sys/dev/drm/radeon/clearstate_evergreen.h +++ b/sys/dev/drm/radeon/clearstate_evergreen.h @@ -1072,7 +1072,7 @@ static const struct cs_extent_def SECT_CTRLCONST_defs[] = {SECT_CTRLCONST_def_1, 0x0000f3fc, 2 }, { 0, 0, 0 } }; -struct cs_section_def evergreen_cs_data[] = { +static const struct cs_section_def evergreen_cs_data[] = { { SECT_CONTEXT_defs, SECT_CONTEXT }, { SECT_CLEAR_defs, SECT_CLEAR }, { SECT_CTRLCONST_defs, SECT_CTRLCONST }, diff --git a/sys/dev/drm/radeon/cypress_dpm.c b/sys/dev/drm/radeon/cypress_dpm.c index 3f87c61210..252a2f4269 100644 --- a/sys/dev/drm/radeon/cypress_dpm.c +++ b/sys/dev/drm/radeon/cypress_dpm.c @@ -2016,12 +2016,6 @@ int cypress_dpm_set_power_state(struct radeon_device *rdev) if (eg_pi->pcie_performance_request) cypress_notify_link_speed_change_after_state_change(rdev, new_ps, old_ps); - ret = rv770_dpm_force_performance_level(rdev, RADEON_DPM_FORCED_LEVEL_AUTO); - if (ret) { - DRM_ERROR("rv770_dpm_force_performance_level failed\n"); - return ret; - } - return 0; } @@ -2168,7 +2162,8 @@ bool cypress_dpm_vblank_too_short(struct radeon_device *rdev) { struct rv7xx_power_info *pi = rv770_get_pi(rdev); u32 vblank_time = r600_dpm_get_vblank_time(rdev); - u32 switch_limit = pi->mem_gddr5 ? 450 : 300; + /* we never hit the non-gddr5 limit so disable it */ + u32 switch_limit = pi->mem_gddr5 ? 450 : 0; if (vblank_time < switch_limit) return true; diff --git a/sys/dev/drm/radeon/dce6_afmt.c b/sys/dev/drm/radeon/dce6_afmt.c new file mode 100644 index 0000000000..5035a1ede1 --- /dev/null +++ b/sys/dev/drm/radeon/dce6_afmt.c @@ -0,0 +1,282 @@ +/* + * Copyright 2013 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#include +#include +#include "radeon.h" +#include "radeon_asic.h" +#include "sid.h" + +static u32 dce6_endpoint_rreg(struct radeon_device *rdev, + u32 block_offset, u32 reg) +{ + u32 r; + + WREG32(AZ_F0_CODEC_ENDPOINT_INDEX + block_offset, reg); + r = RREG32(AZ_F0_CODEC_ENDPOINT_DATA + block_offset); + return r; +} + +static void dce6_endpoint_wreg(struct radeon_device *rdev, + u32 block_offset, u32 reg, u32 v) +{ + if (ASIC_IS_DCE8(rdev)) + WREG32(AZ_F0_CODEC_ENDPOINT_INDEX + block_offset, reg); + else + WREG32(AZ_F0_CODEC_ENDPOINT_INDEX + block_offset, + AZ_ENDPOINT_REG_WRITE_EN | AZ_ENDPOINT_REG_INDEX(reg)); + WREG32(AZ_F0_CODEC_ENDPOINT_DATA + block_offset, v); +} + +#define RREG32_ENDPOINT(block, reg) dce6_endpoint_rreg(rdev, (block), (reg)) +#define WREG32_ENDPOINT(block, reg, v) dce6_endpoint_wreg(rdev, (block), (reg), (v)) + + +static void dce6_afmt_get_connected_pins(struct radeon_device *rdev) +{ + int i; + u32 offset, tmp; + + for (i = 0; i < rdev->audio.num_pins; i++) { + offset = rdev->audio.pin[i].offset; + tmp = RREG32_ENDPOINT(offset, + AZ_F0_CODEC_PIN_CONTROL_RESPONSE_CONFIGURATION_DEFAULT); + if (((tmp & PORT_CONNECTIVITY_MASK) >> PORT_CONNECTIVITY_SHIFT) == 1) + rdev->audio.pin[i].connected = false; + else + rdev->audio.pin[i].connected = true; + } +} + +struct r600_audio_pin *dce6_audio_get_pin(struct radeon_device *rdev) +{ + int i; + + dce6_afmt_get_connected_pins(rdev); + + for (i = 0; i < rdev->audio.num_pins; i++) { + if (rdev->audio.pin[i].connected) + return &rdev->audio.pin[i]; + } + DRM_ERROR("No connected audio pins found!\n"); + return NULL; +} + +void dce6_afmt_select_pin(struct drm_encoder *encoder) +{ + struct radeon_device *rdev = encoder->dev->dev_private; + struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder); + struct radeon_encoder_atom_dig *dig = radeon_encoder->enc_priv; + u32 offset = dig->afmt->offset; + + if (!dig->afmt->pin) + return; + + WREG32(AFMT_AUDIO_SRC_CONTROL + offset, + AFMT_AUDIO_SRC_SELECT(dig->afmt->pin->id)); +} + +void dce6_afmt_write_speaker_allocation(struct drm_encoder *encoder) +{ + struct radeon_device *rdev = encoder->dev->dev_private; + struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder); + struct radeon_encoder_atom_dig *dig = radeon_encoder->enc_priv; + struct drm_connector *connector; + struct radeon_connector *radeon_connector = NULL; + u32 offset, tmp; + u8 *sadb; + int sad_count; + + /* XXX: setting this register causes hangs on some asics */ + return; + + if (!dig->afmt->pin) + return; + + offset = dig->afmt->pin->offset; + + list_for_each_entry(connector, &encoder->dev->mode_config.connector_list, head) { + if (connector->encoder == encoder) + radeon_connector = to_radeon_connector(connector); + } + + if (!radeon_connector) { + DRM_ERROR("Couldn't find encoder's connector\n"); + return; + } + + sad_count = drm_edid_to_speaker_allocation(radeon_connector->edid, &sadb); + if (sad_count < 0) { + DRM_ERROR("Couldn't read Speaker Allocation Data Block: %d\n", sad_count); + return; + } + + /* program the speaker allocation */ + tmp = RREG32_ENDPOINT(offset, AZ_F0_CODEC_PIN_CONTROL_CHANNEL_SPEAKER); + tmp &= ~(DP_CONNECTION | SPEAKER_ALLOCATION_MASK); + /* set HDMI mode */ + tmp |= HDMI_CONNECTION; + if (sad_count) + tmp |= SPEAKER_ALLOCATION(sadb[0]); + else + tmp |= SPEAKER_ALLOCATION(5); /* stereo */ + WREG32_ENDPOINT(offset, AZ_F0_CODEC_PIN_CONTROL_CHANNEL_SPEAKER, tmp); + + kfree(sadb); +} + +void dce6_afmt_write_sad_regs(struct drm_encoder *encoder) +{ + struct radeon_device *rdev = encoder->dev->dev_private; + struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder); + struct radeon_encoder_atom_dig *dig = radeon_encoder->enc_priv; + u32 offset; + struct drm_connector *connector; + struct radeon_connector *radeon_connector = NULL; + struct cea_sad *sads; + int i, sad_count; + + static const u16 eld_reg_to_type[][2] = { + { AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR0, HDMI_AUDIO_CODING_TYPE_PCM }, + { AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR1, HDMI_AUDIO_CODING_TYPE_AC3 }, + { AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR2, HDMI_AUDIO_CODING_TYPE_MPEG1 }, + { AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR3, HDMI_AUDIO_CODING_TYPE_MP3 }, + { AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR4, HDMI_AUDIO_CODING_TYPE_MPEG2 }, + { AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR5, HDMI_AUDIO_CODING_TYPE_AAC_LC }, + { AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR6, HDMI_AUDIO_CODING_TYPE_DTS }, + { AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR7, HDMI_AUDIO_CODING_TYPE_ATRAC }, + { AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR9, HDMI_AUDIO_CODING_TYPE_EAC3 }, + { AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR10, HDMI_AUDIO_CODING_TYPE_DTS_HD }, + { AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR11, HDMI_AUDIO_CODING_TYPE_MLP }, + { AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR13, HDMI_AUDIO_CODING_TYPE_WMA_PRO }, + }; + + if (!dig->afmt->pin) + return; + + offset = dig->afmt->pin->offset; + + list_for_each_entry(connector, &encoder->dev->mode_config.connector_list, head) { + if (connector->encoder == encoder) + radeon_connector = to_radeon_connector(connector); + } + + if (!radeon_connector) { + DRM_ERROR("Couldn't find encoder's connector\n"); + return; + } + + sad_count = drm_edid_to_sad(radeon_connector->edid, &sads); + if (sad_count < 0) { + DRM_ERROR("Couldn't read SADs: %d\n", sad_count); + return; + } + BUG_ON(!sads); + + for (i = 0; i < ARRAY_SIZE(eld_reg_to_type); i++) { + u32 value = 0; + int j; + + for (j = 0; j < sad_count; j++) { + struct cea_sad *sad = &sads[j]; + + if (sad->format == eld_reg_to_type[i][1]) { + value = MAX_CHANNELS(sad->channels) | + DESCRIPTOR_BYTE_2(sad->byte2) | + SUPPORTED_FREQUENCIES(sad->freq); + if (sad->format == HDMI_AUDIO_CODING_TYPE_PCM) + value |= SUPPORTED_FREQUENCIES_STEREO(sad->freq); + break; + } + } + WREG32_ENDPOINT(offset, eld_reg_to_type[i][0], value); + } + + kfree(sads); +} + +static int dce6_audio_chipset_supported(struct radeon_device *rdev) +{ + return !ASIC_IS_NODCE(rdev); +} + +static void dce6_audio_enable(struct radeon_device *rdev, + struct r600_audio_pin *pin, + bool enable) +{ + WREG32_ENDPOINT(pin->offset, AZ_F0_CODEC_PIN_CONTROL_HOTPLUG_CONTROL, + AUDIO_ENABLED); + DRM_INFO("%s audio %d support\n", enable ? "Enabling" : "Disabling", pin->id); +} + +static const u32 pin_offsets[7] = +{ + (0x5e00 - 0x5e00), + (0x5e18 - 0x5e00), + (0x5e30 - 0x5e00), + (0x5e48 - 0x5e00), + (0x5e60 - 0x5e00), + (0x5e78 - 0x5e00), + (0x5e90 - 0x5e00), +}; + +int dce6_audio_init(struct radeon_device *rdev) +{ + int i; + + if (!radeon_audio || !dce6_audio_chipset_supported(rdev)) + return 0; + + rdev->audio.enabled = true; + + if (ASIC_IS_DCE8(rdev)) + rdev->audio.num_pins = 7; + else + rdev->audio.num_pins = 6; + + for (i = 0; i < rdev->audio.num_pins; i++) { + rdev->audio.pin[i].channels = -1; + rdev->audio.pin[i].rate = -1; + rdev->audio.pin[i].bits_per_sample = -1; + rdev->audio.pin[i].status_bits = 0; + rdev->audio.pin[i].category_code = 0; + rdev->audio.pin[i].connected = false; + rdev->audio.pin[i].offset = pin_offsets[i]; + rdev->audio.pin[i].id = i; + dce6_audio_enable(rdev, &rdev->audio.pin[i], true); + } + + return 0; +} + +void dce6_audio_fini(struct radeon_device *rdev) +{ + int i; + + if (!rdev->audio.enabled) + return; + + for (i = 0; i < rdev->audio.num_pins; i++) + dce6_audio_enable(rdev, &rdev->audio.pin[i], false); + + rdev->audio.enabled = false; +} diff --git a/sys/dev/drm/radeon/evergreen.c b/sys/dev/drm/radeon/evergreen.c index 2081483522..47f278fd2b 100644 --- a/sys/dev/drm/radeon/evergreen.c +++ b/sys/dev/drm/radeon/evergreen.c @@ -20,8 +20,6 @@ * OTHER DEALINGS IN THE SOFTWARE. * * Authors: Alex Deucher - * - * $FreeBSD: head/sys/dev/drm2/radeon/evergreen.c 254885 2013-08-25 19:37:15Z dumbbell $ */ #include #include @@ -47,7 +45,7 @@ static const u32 crtc_offsets[6] = #include "clearstate_evergreen.h" -static u32 sumo_rlc_save_restore_register_list[] = +static const u32 sumo_rlc_save_restore_register_list[] = { 0x98fc, 0x9830, @@ -131,7 +129,6 @@ static u32 sumo_rlc_save_restore_register_list[] = 0x9150, 0x802c, }; -static u32 sumo_rlc_save_restore_register_list_size = ARRAY_SIZE(sumo_rlc_save_restore_register_list); static void evergreen_gpu_init(struct radeon_device *rdev); @@ -1804,7 +1801,8 @@ static u32 evergreen_line_buffer_adjust(struct radeon_device *rdev, struct drm_display_mode *mode, struct drm_display_mode *other_mode) { - u32 tmp; + u32 tmp, buffer_alloc, i; + u32 pipe_offset = radeon_crtc->crtc_id * 0x20; /* * Line Buffer Setup * There are 3 line buffers, each one shared by 2 display controllers. @@ -1827,18 +1825,34 @@ static u32 evergreen_line_buffer_adjust(struct radeon_device *rdev, * non-linked crtcs for maximum line buffer allocation. */ if (radeon_crtc->base.enabled && mode) { - if (other_mode) + if (other_mode) { tmp = 0; /* 1/2 */ - else + buffer_alloc = 1; + } else { tmp = 2; /* whole */ - } else + buffer_alloc = 2; + } + } else { tmp = 0; + buffer_alloc = 0; + } /* second controller of the pair uses second half of the lb */ if (radeon_crtc->crtc_id % 2) tmp += 4; WREG32(DC_LB_MEMORY_SPLIT + radeon_crtc->crtc_offset, tmp); + if (ASIC_IS_DCE41(rdev) || ASIC_IS_DCE5(rdev)) { + WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset, + DMIF_BUFFERS_ALLOCATED(buffer_alloc)); + for (i = 0; i < rdev->usec_timeout; i++) { + if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) & + DMIF_BUFFERS_ALLOCATED_COMPLETED) + break; + udelay(1); + } + } + if (radeon_crtc->base.enabled && mode) { switch (tmp) { case 0: @@ -2878,8 +2892,8 @@ static int evergreen_cp_resume(struct radeon_device *rdev) RREG32(GRBM_SOFT_RESET); /* Set ring buffer size */ - rb_bufsz = drm_order(ring->ring_size / 8); - tmp = (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz; + rb_bufsz = order_base_2(ring->ring_size / 8); + tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz; #ifdef __BIG_ENDIAN tmp |= BUF_SWAP_32BIT; #endif @@ -3106,7 +3120,7 @@ static void evergreen_gpu_init(struct radeon_device *rdev) rdev->config.evergreen.sx_max_export_size = 256; rdev->config.evergreen.sx_max_export_pos_size = 64; rdev->config.evergreen.sx_max_export_smx_size = 192; - rdev->config.evergreen.max_hw_contexts = 8; + rdev->config.evergreen.max_hw_contexts = 4; rdev->config.evergreen.sq_num_cf_insts = 2; rdev->config.evergreen.sc_prim_fifo_size = 0x40; @@ -3610,7 +3624,7 @@ bool evergreen_is_display_hung(struct radeon_device *rdev) return true; } -static u32 evergreen_gpu_check_soft_reset(struct radeon_device *rdev) +u32 evergreen_gpu_check_soft_reset(struct radeon_device *rdev) { u32 reset_mask = 0; u32 tmp; @@ -3836,28 +3850,6 @@ bool evergreen_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *rin return radeon_ring_test_lockup(rdev, ring); } -/** - * evergreen_dma_is_lockup - Check if the DMA engine is locked up - * - * @rdev: radeon_device pointer - * @ring: radeon_ring structure holding ring information - * - * Check if the async DMA engine is locked up. - * Returns true if the engine appears to be locked up, false if not. - */ -bool evergreen_dma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring) -{ - u32 reset_mask = evergreen_gpu_check_soft_reset(rdev); - - if (!(reset_mask & RADEON_RESET_DMA)) { - radeon_ring_lockup_update(ring); - return false; - } - /* force ring activities */ - radeon_ring_force_activity(rdev, ring); - return radeon_ring_test_lockup(rdev, ring); -} - /* * RLC */ @@ -3891,151 +3883,236 @@ void sumo_rlc_fini(struct radeon_device *rdev) radeon_bo_unref(&rdev->rlc.clear_state_obj); rdev->rlc.clear_state_obj = NULL; } + + /* clear state block */ + if (rdev->rlc.cp_table_obj) { + r = radeon_bo_reserve(rdev->rlc.cp_table_obj, false); + if (unlikely(r != 0)) + dev_warn(rdev->dev, "(%d) reserve RLC cp table bo failed\n", r); + radeon_bo_unpin(rdev->rlc.cp_table_obj); + radeon_bo_unreserve(rdev->rlc.cp_table_obj); + + radeon_bo_unref(&rdev->rlc.cp_table_obj); + rdev->rlc.cp_table_obj = NULL; + } } +#define CP_ME_TABLE_SIZE 96 + int sumo_rlc_init(struct radeon_device *rdev) { - u32 *src_ptr; + const u32 *src_ptr; volatile u32 *dst_ptr; u32 dws, data, i, j, k, reg_num; - u32 reg_list_num, reg_list_hdr_blk_index, reg_list_blk_index; + u32 reg_list_num, reg_list_hdr_blk_index, reg_list_blk_index = 0; u64 reg_list_mc_addr; - struct cs_section_def *cs_data; + const struct cs_section_def *cs_data; int r; void *vptr; vptr = NULL; src_ptr = rdev->rlc.reg_list; dws = rdev->rlc.reg_list_size; + if (rdev->family >= CHIP_BONAIRE) { + dws += (5 * 16) + 48 + 48 + 64; + } cs_data = rdev->rlc.cs_data; - /* save restore block */ - if (rdev->rlc.save_restore_obj == NULL) { - r = radeon_bo_create(rdev, dws * 4, PAGE_SIZE, true, - RADEON_GEM_DOMAIN_VRAM, NULL, &rdev->rlc.save_restore_obj); + if (src_ptr) { + /* save restore block */ + if (rdev->rlc.save_restore_obj == NULL) { + r = radeon_bo_create(rdev, dws * 4, PAGE_SIZE, true, + RADEON_GEM_DOMAIN_VRAM, NULL, &rdev->rlc.save_restore_obj); + if (r) { + dev_warn(rdev->dev, "(%d) create RLC sr bo failed\n", r); + return r; + } + } + + r = radeon_bo_reserve(rdev->rlc.save_restore_obj, false); + if (unlikely(r != 0)) { + sumo_rlc_fini(rdev); + return r; + } + r = radeon_bo_pin(rdev->rlc.save_restore_obj, RADEON_GEM_DOMAIN_VRAM, + &rdev->rlc.save_restore_gpu_addr); if (r) { - dev_warn(rdev->dev, "(%d) create RLC sr bo failed\n", r); + radeon_bo_unreserve(rdev->rlc.save_restore_obj); + dev_warn(rdev->dev, "(%d) pin RLC sr bo failed\n", r); + sumo_rlc_fini(rdev); return r; } - } - r = radeon_bo_reserve(rdev->rlc.save_restore_obj, false); - if (unlikely(r != 0)) { - sumo_rlc_fini(rdev); - return r; - } - r = radeon_bo_pin(rdev->rlc.save_restore_obj, RADEON_GEM_DOMAIN_VRAM, - &rdev->rlc.save_restore_gpu_addr); - if (r) { + r = radeon_bo_kmap(rdev->rlc.save_restore_obj, (void**)&vptr); + if (r) { + dev_warn(rdev->dev, "(%d) map RLC sr bo failed\n", r); + sumo_rlc_fini(rdev); + return r; + } + rdev->rlc.sr_ptr = vptr; + /* write the sr buffer */ + dst_ptr = rdev->rlc.sr_ptr; + if (rdev->family >= CHIP_TAHITI) { + /* SI */ + for (i = 0; i < rdev->rlc.reg_list_size; i++) + dst_ptr[i] = src_ptr[i]; + } else { + /* ON/LN/TN */ + /* format: + * dw0: (reg2 << 16) | reg1 + * dw1: reg1 save space + * dw2: reg2 save space + */ + for (i = 0; i < dws; i++) { + data = src_ptr[i] >> 2; + i++; + if (i < dws) + data |= (src_ptr[i] >> 2) << 16; + j = (((i - 1) * 3) / 2); + dst_ptr[j] = data; + } + j = ((i * 3) / 2); + dst_ptr[j] = RLC_SAVE_RESTORE_LIST_END_MARKER; + } + radeon_bo_kunmap(rdev->rlc.save_restore_obj); radeon_bo_unreserve(rdev->rlc.save_restore_obj); - dev_warn(rdev->dev, "(%d) pin RLC sr bo failed\n", r); - sumo_rlc_fini(rdev); - return r; - } - r = radeon_bo_kmap(rdev->rlc.save_restore_obj, (void**)&vptr); - if (r) { - dev_warn(rdev->dev, "(%d) map RLC sr bo failed\n", r); - sumo_rlc_fini(rdev); - return r; - } - rdev->rlc.sr_ptr = vptr; - /* write the sr buffer */ - dst_ptr = rdev->rlc.sr_ptr; - /* format: - * dw0: (reg2 << 16) | reg1 - * dw1: reg1 save space - * dw2: reg2 save space - */ - for (i = 0; i < dws; i++) { - data = src_ptr[i] >> 2; - i++; - if (i < dws) - data |= (src_ptr[i] >> 2) << 16; - j = (((i - 1) * 3) / 2); - dst_ptr[j] = data; } - j = ((i * 3) / 2); - dst_ptr[j] = RLC_SAVE_RESTORE_LIST_END_MARKER; - - radeon_bo_kunmap(rdev->rlc.save_restore_obj); - radeon_bo_unreserve(rdev->rlc.save_restore_obj); - /* clear state block */ - reg_list_num = 0; - dws = 0; - for (i = 0; cs_data[i].section != NULL; i++) { - for (j = 0; cs_data[i].section[j].extent != NULL; j++) { - reg_list_num++; - dws += cs_data[i].section[j].reg_count; + if (cs_data) { + /* clear state block */ + if (rdev->family >= CHIP_BONAIRE) { + rdev->rlc.clear_state_size = dws = cik_get_csb_size(rdev); + } else if (rdev->family >= CHIP_TAHITI) { + rdev->rlc.clear_state_size = si_get_csb_size(rdev); + dws = rdev->rlc.clear_state_size + (256 / 4); + } else { + reg_list_num = 0; + dws = 0; + for (i = 0; cs_data[i].section != NULL; i++) { + for (j = 0; cs_data[i].section[j].extent != NULL; j++) { + reg_list_num++; + dws += cs_data[i].section[j].reg_count; + } + } + reg_list_blk_index = (3 * reg_list_num + 2); + dws += reg_list_blk_index; + rdev->rlc.clear_state_size = dws; } - } - reg_list_blk_index = (3 * reg_list_num + 2); - dws += reg_list_blk_index; - if (rdev->rlc.clear_state_obj == NULL) { - r = radeon_bo_create(rdev, dws * 4, PAGE_SIZE, true, - RADEON_GEM_DOMAIN_VRAM, NULL, &rdev->rlc.clear_state_obj); + if (rdev->rlc.clear_state_obj == NULL) { + r = radeon_bo_create(rdev, dws * 4, PAGE_SIZE, true, + RADEON_GEM_DOMAIN_VRAM, NULL, &rdev->rlc.clear_state_obj); + if (r) { + dev_warn(rdev->dev, "(%d) create RLC c bo failed\n", r); + sumo_rlc_fini(rdev); + return r; + } + } + r = radeon_bo_reserve(rdev->rlc.clear_state_obj, false); + if (unlikely(r != 0)) { + sumo_rlc_fini(rdev); + return r; + } + r = radeon_bo_pin(rdev->rlc.clear_state_obj, RADEON_GEM_DOMAIN_VRAM, + &rdev->rlc.clear_state_gpu_addr); if (r) { - dev_warn(rdev->dev, "(%d) create RLC c bo failed\n", r); + radeon_bo_unreserve(rdev->rlc.clear_state_obj); + dev_warn(rdev->dev, "(%d) pin RLC c bo failed\n", r); sumo_rlc_fini(rdev); return r; } - } - r = radeon_bo_reserve(rdev->rlc.clear_state_obj, false); - if (unlikely(r != 0)) { - sumo_rlc_fini(rdev); - return r; - } - r = radeon_bo_pin(rdev->rlc.clear_state_obj, RADEON_GEM_DOMAIN_VRAM, - &rdev->rlc.clear_state_gpu_addr); - if (r) { - - radeon_bo_unreserve(rdev->rlc.clear_state_obj); - dev_warn(rdev->dev, "(%d) pin RLC c bo failed\n", r); - sumo_rlc_fini(rdev); - return r; - } - r = radeon_bo_kmap(rdev->rlc.clear_state_obj, (void **)&vptr); - if (r) { - dev_warn(rdev->dev, "(%d) map RLC c bo failed\n", r); - sumo_rlc_fini(rdev); - return r; - } - rdev->rlc.cs_ptr = vptr; - /* set up the cs buffer */ - dst_ptr = rdev->rlc.cs_ptr; - reg_list_hdr_blk_index = 0; - reg_list_mc_addr = rdev->rlc.clear_state_gpu_addr + (reg_list_blk_index * 4); - data = upper_32_bits(reg_list_mc_addr); - dst_ptr[reg_list_hdr_blk_index] = data; - reg_list_hdr_blk_index++; - for (i = 0; cs_data[i].section != NULL; i++) { - for (j = 0; cs_data[i].section[j].extent != NULL; j++) { - reg_num = cs_data[i].section[j].reg_count; - data = reg_list_mc_addr & 0xffffffff; - dst_ptr[reg_list_hdr_blk_index] = data; - reg_list_hdr_blk_index++; - - data = (cs_data[i].section[j].reg_index * 4) & 0xffffffff; - dst_ptr[reg_list_hdr_blk_index] = data; - reg_list_hdr_blk_index++; - data = 0x08000000 | (reg_num * 4); + r = radeon_bo_kmap(rdev->rlc.clear_state_obj, (void**)&vptr); + if (r) { + dev_warn(rdev->dev, "(%d) map RLC c bo failed\n", r); + sumo_rlc_fini(rdev); + return r; + } + rdev->rlc.cs_ptr = vptr; + /* set up the cs buffer */ + dst_ptr = rdev->rlc.cs_ptr; + if (rdev->family >= CHIP_BONAIRE) { + cik_get_csb_buffer(rdev, dst_ptr); + } else if (rdev->family >= CHIP_TAHITI) { + reg_list_mc_addr = rdev->rlc.clear_state_gpu_addr + 256; + dst_ptr[0] = upper_32_bits(reg_list_mc_addr); + dst_ptr[1] = lower_32_bits(reg_list_mc_addr); + dst_ptr[2] = rdev->rlc.clear_state_size; + si_get_csb_buffer(rdev, &dst_ptr[(256/4)]); + } else { + reg_list_hdr_blk_index = 0; + reg_list_mc_addr = rdev->rlc.clear_state_gpu_addr + (reg_list_blk_index * 4); + data = upper_32_bits(reg_list_mc_addr); dst_ptr[reg_list_hdr_blk_index] = data; reg_list_hdr_blk_index++; - - for (k = 0; k < reg_num; k++) { - data = cs_data[i].section[j].extent[k]; - dst_ptr[reg_list_blk_index + k] = data; + for (i = 0; cs_data[i].section != NULL; i++) { + for (j = 0; cs_data[i].section[j].extent != NULL; j++) { + reg_num = cs_data[i].section[j].reg_count; + data = reg_list_mc_addr & 0xffffffff; + dst_ptr[reg_list_hdr_blk_index] = data; + reg_list_hdr_blk_index++; + + data = (cs_data[i].section[j].reg_index * 4) & 0xffffffff; + dst_ptr[reg_list_hdr_blk_index] = data; + reg_list_hdr_blk_index++; + + data = 0x08000000 | (reg_num * 4); + dst_ptr[reg_list_hdr_blk_index] = data; + reg_list_hdr_blk_index++; + + for (k = 0; k < reg_num; k++) { + data = cs_data[i].section[j].extent[k]; + dst_ptr[reg_list_blk_index + k] = data; + } + reg_list_mc_addr += reg_num * 4; + reg_list_blk_index += reg_num; + } } - reg_list_mc_addr += reg_num * 4; - reg_list_blk_index += reg_num; + dst_ptr[reg_list_hdr_blk_index] = RLC_CLEAR_STATE_END_MARKER; } + radeon_bo_kunmap(rdev->rlc.clear_state_obj); + radeon_bo_unreserve(rdev->rlc.clear_state_obj); } - dst_ptr[reg_list_hdr_blk_index] = RLC_CLEAR_STATE_END_MARKER; - radeon_bo_kunmap(rdev->rlc.clear_state_obj); - radeon_bo_unreserve(rdev->rlc.clear_state_obj); + if (rdev->rlc.cp_table_size) { + if (rdev->rlc.cp_table_obj == NULL) { + r = radeon_bo_create(rdev, rdev->rlc.cp_table_size, PAGE_SIZE, true, + RADEON_GEM_DOMAIN_VRAM, NULL, &rdev->rlc.cp_table_obj); + if (r) { + dev_warn(rdev->dev, "(%d) create RLC cp table bo failed\n", r); + sumo_rlc_fini(rdev); + return r; + } + } + + r = radeon_bo_reserve(rdev->rlc.cp_table_obj, false); + if (unlikely(r != 0)) { + dev_warn(rdev->dev, "(%d) reserve RLC cp table bo failed\n", r); + sumo_rlc_fini(rdev); + return r; + } + r = radeon_bo_pin(rdev->rlc.cp_table_obj, RADEON_GEM_DOMAIN_VRAM, + &rdev->rlc.cp_table_gpu_addr); + if (r) { + radeon_bo_unreserve(rdev->rlc.cp_table_obj); + dev_warn(rdev->dev, "(%d) pin RLC cp_table bo failed\n", r); + sumo_rlc_fini(rdev); + return r; + } + r = radeon_bo_kmap(rdev->rlc.cp_table_obj, (void**)&vptr); + if (r) { + dev_warn(rdev->dev, "(%d) map RLC cp table bo failed\n", r); + sumo_rlc_fini(rdev); + return r; + } + rdev->rlc.cp_table_ptr = vptr; + + cik_init_cp_pg_table(rdev); + + radeon_bo_kunmap(rdev->rlc.cp_table_obj); + radeon_bo_unreserve(rdev->rlc.cp_table_obj); + + } return 0; } @@ -4199,7 +4276,7 @@ int evergreen_irq_set(struct radeon_device *rdev) u32 thermal_int = 0; if (!rdev->irq.installed) { - dev_warn(rdev->dev, "Can't enable IRQ/MSI because no handler is installed\n"); + WARN(1, "Can't enable IRQ/MSI because no handler is installed\n"); return -EINVAL; } /* don't enable anything if the ih is disabled */ @@ -4960,143 +5037,6 @@ restart_ih: return IRQ_HANDLED; } -/** - * evergreen_dma_fence_ring_emit - emit a fence on the DMA ring - * - * @rdev: radeon_device pointer - * @fence: radeon fence object - * - * Add a DMA fence packet to the ring to write - * the fence seq number and DMA trap packet to generate - * an interrupt if needed (evergreen-SI). - */ -void evergreen_dma_fence_ring_emit(struct radeon_device *rdev, - struct radeon_fence *fence) -{ - struct radeon_ring *ring = &rdev->ring[fence->ring]; - u64 addr = rdev->fence_drv[fence->ring].gpu_addr; - /* write the fence */ - radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_FENCE, 0, 0)); - radeon_ring_write(ring, addr & 0xfffffffc); - radeon_ring_write(ring, (upper_32_bits(addr) & 0xff)); - radeon_ring_write(ring, fence->seq); - /* generate an interrupt */ - radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_TRAP, 0, 0)); - /* flush HDP */ - radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0)); - radeon_ring_write(ring, (0xf << 16) | (HDP_MEM_COHERENCY_FLUSH_CNTL >> 2)); - radeon_ring_write(ring, 1); -} - -/** - * evergreen_dma_ring_ib_execute - schedule an IB on the DMA engine - * - * @rdev: radeon_device pointer - * @ib: IB object to schedule - * - * Schedule an IB in the DMA ring (evergreen). - */ -void evergreen_dma_ring_ib_execute(struct radeon_device *rdev, - struct radeon_ib *ib) -{ - struct radeon_ring *ring = &rdev->ring[ib->ring]; - - if (rdev->wb.enabled) { - u32 next_rptr = ring->wptr + 4; - while ((next_rptr & 7) != 5) - next_rptr++; - next_rptr += 3; - radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_WRITE, 0, 1)); - radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc); - radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xff); - radeon_ring_write(ring, next_rptr); - } - - /* The indirect buffer packet must end on an 8 DW boundary in the DMA ring. - * Pad as necessary with NOPs. - */ - while ((ring->wptr & 7) != 5) - radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_NOP, 0, 0)); - radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_INDIRECT_BUFFER, 0, 0)); - radeon_ring_write(ring, (ib->gpu_addr & 0xFFFFFFE0)); - radeon_ring_write(ring, (ib->length_dw << 12) | (upper_32_bits(ib->gpu_addr) & 0xFF)); - -} - -/** - * evergreen_copy_dma - copy pages using the DMA engine - * - * @rdev: radeon_device pointer - * @src_offset: src GPU address - * @dst_offset: dst GPU address - * @num_gpu_pages: number of GPU pages to xfer - * @fence: radeon fence object - * - * Copy GPU paging using the DMA engine (evergreen-cayman). - * Used by the radeon ttm implementation to move pages if - * registered as the asic copy callback. - */ -int evergreen_copy_dma(struct radeon_device *rdev, - uint64_t src_offset, uint64_t dst_offset, - unsigned num_gpu_pages, - struct radeon_fence **fence) -{ - struct radeon_semaphore *sem = NULL; - int ring_index = rdev->asic->copy.dma_ring_index; - struct radeon_ring *ring = &rdev->ring[ring_index]; - u32 size_in_dw, cur_size_in_dw; - int i, num_loops; - int r = 0; - - r = radeon_semaphore_create(rdev, &sem); - if (r) { - DRM_ERROR("radeon: moving bo (%d).\n", r); - return r; - } - - size_in_dw = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT) / 4; - num_loops = DIV_ROUND_UP(size_in_dw, 0xfffff); - r = radeon_ring_lock(rdev, ring, num_loops * 5 + 11); - if (r) { - DRM_ERROR("radeon: moving bo (%d).\n", r); - radeon_semaphore_free(rdev, &sem, NULL); - return r; - } - - if (radeon_fence_need_sync(*fence, ring->idx)) { - radeon_semaphore_sync_rings(rdev, sem, (*fence)->ring, - ring->idx); - radeon_fence_note_sync(*fence, ring->idx); - } else { - radeon_semaphore_free(rdev, &sem, NULL); - } - - for (i = 0; i < num_loops; i++) { - cur_size_in_dw = size_in_dw; - if (cur_size_in_dw > 0xFFFFF) - cur_size_in_dw = 0xFFFFF; - size_in_dw -= cur_size_in_dw; - radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_COPY, 0, cur_size_in_dw)); - radeon_ring_write(ring, dst_offset & 0xfffffffc); - radeon_ring_write(ring, src_offset & 0xfffffffc); - radeon_ring_write(ring, upper_32_bits(dst_offset) & 0xff); - radeon_ring_write(ring, upper_32_bits(src_offset) & 0xff); - src_offset += cur_size_in_dw * 4; - dst_offset += cur_size_in_dw * 4; - } - - r = radeon_fence_emit(rdev, fence, ring->idx); - if (r) { - radeon_ring_unlock_undo(rdev, ring); - return r; - } - - radeon_ring_unlock_commit(rdev, ring); - radeon_semaphore_free(rdev, &sem, *fence); - - return r; -} - static int evergreen_startup(struct radeon_device *rdev) { struct radeon_ring *ring; @@ -5107,6 +5047,11 @@ static int evergreen_startup(struct radeon_device *rdev) /* enable aspm */ evergreen_program_aspm(rdev); + /* scratch needs to be initialized before MC */ + r = r600_vram_scratch_init(rdev); + if (r) + return r; + evergreen_mc_program(rdev); if (ASIC_IS_DCE5(rdev)) { @@ -5132,10 +5077,6 @@ static int evergreen_startup(struct radeon_device *rdev) } } - r = r600_vram_scratch_init(rdev); - if (r) - return r; - if (rdev->flags & RADEON_IS_AGP) { evergreen_agp_enable(rdev); } else { @@ -5145,17 +5086,11 @@ static int evergreen_startup(struct radeon_device *rdev) } evergreen_gpu_init(rdev); - r = evergreen_blit_init(rdev); - if (r) { - r600_blit_fini(rdev); - rdev->asic->copy.copy = NULL; - dev_warn(rdev->dev, "failed blitter (%d) falling back to memcpy\n", r); - } - /* allocate rlc buffers */ if (rdev->flags & RADEON_IS_IGP) { rdev->rlc.reg_list = sumo_rlc_save_restore_register_list; - rdev->rlc.reg_list_size = sumo_rlc_save_restore_register_list_size; + rdev->rlc.reg_list_size = + (u32)ARRAY_SIZE(sumo_rlc_save_restore_register_list); rdev->rlc.cs_data = evergreen_cs_data; r = sumo_rlc_init(rdev); if (r) { @@ -5181,7 +5116,7 @@ static int evergreen_startup(struct radeon_device *rdev) return r; } - r = rv770_uvd_resume(rdev); + r = uvd_v2_2_resume(rdev); if (!r) { r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_UVD_INDEX); @@ -5210,14 +5145,14 @@ static int evergreen_startup(struct radeon_device *rdev) ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET, R600_CP_RB_RPTR, R600_CP_RB_WPTR, - 0, 0xfffff, RADEON_CP_PACKET2); + RADEON_CP_PACKET2); if (r) return r; ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX]; r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET, DMA_RB_RPTR, DMA_RB_WPTR, - 2, 0x3fffc, DMA_PACKET(DMA_PACKET_NOP, 0, 0)); + DMA_PACKET(DMA_PACKET_NOP, 0, 0)); if (r) return r; @@ -5233,12 +5168,11 @@ static int evergreen_startup(struct radeon_device *rdev) ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX]; if (ring->ring_size) { - r = radeon_ring_init(rdev, ring, ring->ring_size, - R600_WB_UVD_RPTR_OFFSET, + r = radeon_ring_init(rdev, ring, ring->ring_size, 0, UVD_RBC_RB_RPTR, UVD_RBC_RB_WPTR, - 0, 0xfffff, RADEON_CP_PACKET2); + RADEON_CP_PACKET2); if (!r) - r = r600_uvd_init(rdev); + r = uvd_v1_0_init(rdev); if (r) DRM_ERROR("radeon: error initializing UVD (%d).\n", r); @@ -5293,7 +5227,7 @@ int evergreen_resume(struct radeon_device *rdev) int evergreen_suspend(struct radeon_device *rdev) { r600_audio_fini(rdev); - r600_uvd_stop(rdev); + uvd_v1_0_fini(rdev); radeon_uvd_suspend(rdev); r700_cp_stop(rdev); r600_dma_stop(rdev); @@ -5421,7 +5355,6 @@ int evergreen_init(struct radeon_device *rdev) void evergreen_fini(struct radeon_device *rdev) { r600_audio_fini(rdev); - r600_blit_fini(rdev); r700_cp_fini(rdev); r600_dma_fini(rdev); r600_irq_fini(rdev); @@ -5431,7 +5364,7 @@ void evergreen_fini(struct radeon_device *rdev) radeon_ib_pool_fini(rdev); radeon_irq_kms_fini(rdev); evergreen_pcie_gart_fini(rdev); - r600_uvd_stop(rdev); + uvd_v1_0_fini(rdev); radeon_uvd_fini(rdev); r600_vram_scratch_fini(rdev); radeon_gem_fini(rdev); diff --git a/sys/dev/drm/radeon/evergreen_blit_kms.c b/sys/dev/drm/radeon/evergreen_blit_kms.c deleted file mode 100644 index 9297b81e0f..0000000000 --- a/sys/dev/drm/radeon/evergreen_blit_kms.c +++ /dev/null @@ -1,730 +0,0 @@ -/* - * Copyright 2010 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - * - * Authors: - * Alex Deucher - */ - -#include -#include -#include "radeon.h" -#include "radeon_asic.h" - -#include "evergreend.h" -#include "evergreen_blit_shaders.h" -#include "cayman_blit_shaders.h" -#include "radeon_blit_common.h" - -/* emits 17 */ -static void -set_render_target(struct radeon_device *rdev, int format, - int w, int h, u64 gpu_addr) -{ - struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; - u32 cb_color_info; - int pitch, slice; - - h = ALIGN(h, 8); - if (h < 8) - h = 8; - - cb_color_info = CB_FORMAT(format) | - CB_SOURCE_FORMAT(CB_SF_EXPORT_NORM) | - CB_ARRAY_MODE(ARRAY_1D_TILED_THIN1); - pitch = (w / 8) - 1; - slice = ((w * h) / 64) - 1; - - radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 15)); - radeon_ring_write(ring, (CB_COLOR0_BASE - PACKET3_SET_CONTEXT_REG_START) >> 2); - radeon_ring_write(ring, gpu_addr >> 8); - radeon_ring_write(ring, pitch); - radeon_ring_write(ring, slice); - radeon_ring_write(ring, 0); - radeon_ring_write(ring, cb_color_info); - radeon_ring_write(ring, 0); - radeon_ring_write(ring, (w - 1) | ((h - 1) << 16)); - radeon_ring_write(ring, 0); - radeon_ring_write(ring, 0); - radeon_ring_write(ring, 0); - radeon_ring_write(ring, 0); - radeon_ring_write(ring, 0); - radeon_ring_write(ring, 0); - radeon_ring_write(ring, 0); - radeon_ring_write(ring, 0); -} - -/* emits 5dw */ -static void -cp_set_surface_sync(struct radeon_device *rdev, - u32 sync_type, u32 size, - u64 mc_addr) -{ - struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; - u32 cp_coher_size; - - if (size == 0xffffffff) - cp_coher_size = 0xffffffff; - else - cp_coher_size = ((size + 255) >> 8); - - if (rdev->family >= CHIP_CAYMAN) { - /* CP_COHER_CNTL2 has to be set manually when submitting a surface_sync - * to the RB directly. For IBs, the CP programs this as part of the - * surface_sync packet. - */ - radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1)); - radeon_ring_write(ring, (0x85e8 - PACKET3_SET_CONFIG_REG_START) >> 2); - radeon_ring_write(ring, 0); /* CP_COHER_CNTL2 */ - } - radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3)); - radeon_ring_write(ring, sync_type); - radeon_ring_write(ring, cp_coher_size); - radeon_ring_write(ring, mc_addr >> 8); - radeon_ring_write(ring, 10); /* poll interval */ -} - -/* emits 11dw + 1 surface sync = 16dw */ -static void -set_shaders(struct radeon_device *rdev) -{ - struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; - u64 gpu_addr; - - /* VS */ - gpu_addr = rdev->r600_blit.shader_gpu_addr + rdev->r600_blit.vs_offset; - radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 3)); - radeon_ring_write(ring, (SQ_PGM_START_VS - PACKET3_SET_CONTEXT_REG_START) >> 2); - radeon_ring_write(ring, gpu_addr >> 8); - radeon_ring_write(ring, 2); - radeon_ring_write(ring, 0); - - /* PS */ - gpu_addr = rdev->r600_blit.shader_gpu_addr + rdev->r600_blit.ps_offset; - radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 4)); - radeon_ring_write(ring, (SQ_PGM_START_PS - PACKET3_SET_CONTEXT_REG_START) >> 2); - radeon_ring_write(ring, gpu_addr >> 8); - radeon_ring_write(ring, 1); - radeon_ring_write(ring, 0); - radeon_ring_write(ring, 2); - - gpu_addr = rdev->r600_blit.shader_gpu_addr + rdev->r600_blit.vs_offset; - cp_set_surface_sync(rdev, PACKET3_SH_ACTION_ENA, 512, gpu_addr); -} - -/* emits 10 + 1 sync (5) = 15 */ -static void -set_vtx_resource(struct radeon_device *rdev, u64 gpu_addr) -{ - struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; - u32 sq_vtx_constant_word2, sq_vtx_constant_word3; - - /* high addr, stride */ - sq_vtx_constant_word2 = SQ_VTXC_BASE_ADDR_HI(upper_32_bits(gpu_addr) & 0xff) | - SQ_VTXC_STRIDE(16); -#ifdef __BIG_ENDIAN - sq_vtx_constant_word2 |= SQ_VTXC_ENDIAN_SWAP(SQ_ENDIAN_8IN32); -#endif - /* xyzw swizzles */ - sq_vtx_constant_word3 = SQ_VTCX_SEL_X(SQ_SEL_X) | - SQ_VTCX_SEL_Y(SQ_SEL_Y) | - SQ_VTCX_SEL_Z(SQ_SEL_Z) | - SQ_VTCX_SEL_W(SQ_SEL_W); - - radeon_ring_write(ring, PACKET3(PACKET3_SET_RESOURCE, 8)); - radeon_ring_write(ring, 0x580); - radeon_ring_write(ring, gpu_addr & 0xffffffff); - radeon_ring_write(ring, 48 - 1); /* size */ - radeon_ring_write(ring, sq_vtx_constant_word2); - radeon_ring_write(ring, sq_vtx_constant_word3); - radeon_ring_write(ring, 0); - radeon_ring_write(ring, 0); - radeon_ring_write(ring, 0); - radeon_ring_write(ring, S__SQ_CONSTANT_TYPE(SQ_TEX_VTX_VALID_BUFFER)); - - if ((rdev->family == CHIP_CEDAR) || - (rdev->family == CHIP_PALM) || - (rdev->family == CHIP_SUMO) || - (rdev->family == CHIP_SUMO2) || - (rdev->family == CHIP_CAICOS)) - cp_set_surface_sync(rdev, - PACKET3_TC_ACTION_ENA, 48, gpu_addr); - else - cp_set_surface_sync(rdev, - PACKET3_VC_ACTION_ENA, 48, gpu_addr); - -} - -/* emits 10 */ -static void -set_tex_resource(struct radeon_device *rdev, - int format, int w, int h, int pitch, - u64 gpu_addr, u32 size) -{ - struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; - u32 sq_tex_resource_word0, sq_tex_resource_word1; - u32 sq_tex_resource_word4, sq_tex_resource_word7; - - if (h < 1) - h = 1; - - sq_tex_resource_word0 = TEX_DIM(SQ_TEX_DIM_2D); - sq_tex_resource_word0 |= ((((pitch >> 3) - 1) << 6) | - ((w - 1) << 18)); - sq_tex_resource_word1 = ((h - 1) << 0) | - TEX_ARRAY_MODE(ARRAY_1D_TILED_THIN1); - /* xyzw swizzles */ - sq_tex_resource_word4 = TEX_DST_SEL_X(SQ_SEL_X) | - TEX_DST_SEL_Y(SQ_SEL_Y) | - TEX_DST_SEL_Z(SQ_SEL_Z) | - TEX_DST_SEL_W(SQ_SEL_W); - - sq_tex_resource_word7 = format | - S__SQ_CONSTANT_TYPE(SQ_TEX_VTX_VALID_TEXTURE); - - cp_set_surface_sync(rdev, - PACKET3_TC_ACTION_ENA, size, gpu_addr); - - radeon_ring_write(ring, PACKET3(PACKET3_SET_RESOURCE, 8)); - radeon_ring_write(ring, 0); - radeon_ring_write(ring, sq_tex_resource_word0); - radeon_ring_write(ring, sq_tex_resource_word1); - radeon_ring_write(ring, gpu_addr >> 8); - radeon_ring_write(ring, gpu_addr >> 8); - radeon_ring_write(ring, sq_tex_resource_word4); - radeon_ring_write(ring, 0); - radeon_ring_write(ring, 0); - radeon_ring_write(ring, sq_tex_resource_word7); -} - -/* emits 12 */ -static void -set_scissors(struct radeon_device *rdev, int x1, int y1, - int x2, int y2) -{ - struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; - /* workaround some hw bugs */ - if (x2 == 0) - x1 = 1; - if (y2 == 0) - y1 = 1; - if (rdev->family >= CHIP_CAYMAN) { - if ((x2 == 1) && (y2 == 1)) - x2 = 2; - } - - radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2)); - radeon_ring_write(ring, (PA_SC_SCREEN_SCISSOR_TL - PACKET3_SET_CONTEXT_REG_START) >> 2); - radeon_ring_write(ring, (x1 << 0) | (y1 << 16)); - radeon_ring_write(ring, (x2 << 0) | (y2 << 16)); - - radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2)); - radeon_ring_write(ring, (PA_SC_GENERIC_SCISSOR_TL - PACKET3_SET_CONTEXT_REG_START) >> 2); - radeon_ring_write(ring, (x1 << 0) | (y1 << 16) | (1 << 31)); - radeon_ring_write(ring, (x2 << 0) | (y2 << 16)); - - radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2)); - radeon_ring_write(ring, (PA_SC_WINDOW_SCISSOR_TL - PACKET3_SET_CONTEXT_REG_START) >> 2); - radeon_ring_write(ring, (x1 << 0) | (y1 << 16) | (1 << 31)); - radeon_ring_write(ring, (x2 << 0) | (y2 << 16)); -} - -/* emits 10 */ -static void -draw_auto(struct radeon_device *rdev) -{ - struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; - radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1)); - radeon_ring_write(ring, (VGT_PRIMITIVE_TYPE - PACKET3_SET_CONFIG_REG_START) >> 2); - radeon_ring_write(ring, DI_PT_RECTLIST); - - radeon_ring_write(ring, PACKET3(PACKET3_INDEX_TYPE, 0)); - radeon_ring_write(ring, -#ifdef __BIG_ENDIAN - (2 << 2) | -#endif - DI_INDEX_SIZE_16_BIT); - - radeon_ring_write(ring, PACKET3(PACKET3_NUM_INSTANCES, 0)); - radeon_ring_write(ring, 1); - - radeon_ring_write(ring, PACKET3(PACKET3_DRAW_INDEX_AUTO, 1)); - radeon_ring_write(ring, 3); - radeon_ring_write(ring, DI_SRC_SEL_AUTO_INDEX); - -} - -/* emits 39 */ -static void -set_default_state(struct radeon_device *rdev) -{ - struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; - u32 sq_config, sq_gpr_resource_mgmt_1, sq_gpr_resource_mgmt_2, sq_gpr_resource_mgmt_3; - u32 sq_thread_resource_mgmt, sq_thread_resource_mgmt_2; - u32 sq_stack_resource_mgmt_1, sq_stack_resource_mgmt_2, sq_stack_resource_mgmt_3; - int num_ps_gprs, num_vs_gprs, num_temp_gprs; - int num_gs_gprs, num_es_gprs, num_hs_gprs, num_ls_gprs; - int num_ps_threads, num_vs_threads, num_gs_threads, num_es_threads; - int num_hs_threads, num_ls_threads; - int num_ps_stack_entries, num_vs_stack_entries, num_gs_stack_entries, num_es_stack_entries; - int num_hs_stack_entries, num_ls_stack_entries; - u64 gpu_addr; - int dwords; - - /* set clear context state */ - radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0)); - radeon_ring_write(ring, 0); - - if (rdev->family < CHIP_CAYMAN) { - switch (rdev->family) { - case CHIP_CEDAR: - default: - num_ps_gprs = 93; - num_vs_gprs = 46; - num_temp_gprs = 4; - num_gs_gprs = 31; - num_es_gprs = 31; - num_hs_gprs = 23; - num_ls_gprs = 23; - num_ps_threads = 96; - num_vs_threads = 16; - num_gs_threads = 16; - num_es_threads = 16; - num_hs_threads = 16; - num_ls_threads = 16; - num_ps_stack_entries = 42; - num_vs_stack_entries = 42; - num_gs_stack_entries = 42; - num_es_stack_entries = 42; - num_hs_stack_entries = 42; - num_ls_stack_entries = 42; - break; - case CHIP_REDWOOD: - num_ps_gprs = 93; - num_vs_gprs = 46; - num_temp_gprs = 4; - num_gs_gprs = 31; - num_es_gprs = 31; - num_hs_gprs = 23; - num_ls_gprs = 23; - num_ps_threads = 128; - num_vs_threads = 20; - num_gs_threads = 20; - num_es_threads = 20; - num_hs_threads = 20; - num_ls_threads = 20; - num_ps_stack_entries = 42; - num_vs_stack_entries = 42; - num_gs_stack_entries = 42; - num_es_stack_entries = 42; - num_hs_stack_entries = 42; - num_ls_stack_entries = 42; - break; - case CHIP_JUNIPER: - num_ps_gprs = 93; - num_vs_gprs = 46; - num_temp_gprs = 4; - num_gs_gprs = 31; - num_es_gprs = 31; - num_hs_gprs = 23; - num_ls_gprs = 23; - num_ps_threads = 128; - num_vs_threads = 20; - num_gs_threads = 20; - num_es_threads = 20; - num_hs_threads = 20; - num_ls_threads = 20; - num_ps_stack_entries = 85; - num_vs_stack_entries = 85; - num_gs_stack_entries = 85; - num_es_stack_entries = 85; - num_hs_stack_entries = 85; - num_ls_stack_entries = 85; - break; - case CHIP_CYPRESS: - case CHIP_HEMLOCK: - num_ps_gprs = 93; - num_vs_gprs = 46; - num_temp_gprs = 4; - num_gs_gprs = 31; - num_es_gprs = 31; - num_hs_gprs = 23; - num_ls_gprs = 23; - num_ps_threads = 128; - num_vs_threads = 20; - num_gs_threads = 20; - num_es_threads = 20; - num_hs_threads = 20; - num_ls_threads = 20; - num_ps_stack_entries = 85; - num_vs_stack_entries = 85; - num_gs_stack_entries = 85; - num_es_stack_entries = 85; - num_hs_stack_entries = 85; - num_ls_stack_entries = 85; - break; - case CHIP_PALM: - num_ps_gprs = 93; - num_vs_gprs = 46; - num_temp_gprs = 4; - num_gs_gprs = 31; - num_es_gprs = 31; - num_hs_gprs = 23; - num_ls_gprs = 23; - num_ps_threads = 96; - num_vs_threads = 16; - num_gs_threads = 16; - num_es_threads = 16; - num_hs_threads = 16; - num_ls_threads = 16; - num_ps_stack_entries = 42; - num_vs_stack_entries = 42; - num_gs_stack_entries = 42; - num_es_stack_entries = 42; - num_hs_stack_entries = 42; - num_ls_stack_entries = 42; - break; - case CHIP_SUMO: - num_ps_gprs = 93; - num_vs_gprs = 46; - num_temp_gprs = 4; - num_gs_gprs = 31; - num_es_gprs = 31; - num_hs_gprs = 23; - num_ls_gprs = 23; - num_ps_threads = 96; - num_vs_threads = 25; - num_gs_threads = 25; - num_es_threads = 25; - num_hs_threads = 25; - num_ls_threads = 25; - num_ps_stack_entries = 42; - num_vs_stack_entries = 42; - num_gs_stack_entries = 42; - num_es_stack_entries = 42; - num_hs_stack_entries = 42; - num_ls_stack_entries = 42; - break; - case CHIP_SUMO2: - num_ps_gprs = 93; - num_vs_gprs = 46; - num_temp_gprs = 4; - num_gs_gprs = 31; - num_es_gprs = 31; - num_hs_gprs = 23; - num_ls_gprs = 23; - num_ps_threads = 96; - num_vs_threads = 25; - num_gs_threads = 25; - num_es_threads = 25; - num_hs_threads = 25; - num_ls_threads = 25; - num_ps_stack_entries = 85; - num_vs_stack_entries = 85; - num_gs_stack_entries = 85; - num_es_stack_entries = 85; - num_hs_stack_entries = 85; - num_ls_stack_entries = 85; - break; - case CHIP_BARTS: - num_ps_gprs = 93; - num_vs_gprs = 46; - num_temp_gprs = 4; - num_gs_gprs = 31; - num_es_gprs = 31; - num_hs_gprs = 23; - num_ls_gprs = 23; - num_ps_threads = 128; - num_vs_threads = 20; - num_gs_threads = 20; - num_es_threads = 20; - num_hs_threads = 20; - num_ls_threads = 20; - num_ps_stack_entries = 85; - num_vs_stack_entries = 85; - num_gs_stack_entries = 85; - num_es_stack_entries = 85; - num_hs_stack_entries = 85; - num_ls_stack_entries = 85; - break; - case CHIP_TURKS: - num_ps_gprs = 93; - num_vs_gprs = 46; - num_temp_gprs = 4; - num_gs_gprs = 31; - num_es_gprs = 31; - num_hs_gprs = 23; - num_ls_gprs = 23; - num_ps_threads = 128; - num_vs_threads = 20; - num_gs_threads = 20; - num_es_threads = 20; - num_hs_threads = 20; - num_ls_threads = 20; - num_ps_stack_entries = 42; - num_vs_stack_entries = 42; - num_gs_stack_entries = 42; - num_es_stack_entries = 42; - num_hs_stack_entries = 42; - num_ls_stack_entries = 42; - break; - case CHIP_CAICOS: - num_ps_gprs = 93; - num_vs_gprs = 46; - num_temp_gprs = 4; - num_gs_gprs = 31; - num_es_gprs = 31; - num_hs_gprs = 23; - num_ls_gprs = 23; - num_ps_threads = 128; - num_vs_threads = 10; - num_gs_threads = 10; - num_es_threads = 10; - num_hs_threads = 10; - num_ls_threads = 10; - num_ps_stack_entries = 42; - num_vs_stack_entries = 42; - num_gs_stack_entries = 42; - num_es_stack_entries = 42; - num_hs_stack_entries = 42; - num_ls_stack_entries = 42; - break; - } - - if ((rdev->family == CHIP_CEDAR) || - (rdev->family == CHIP_PALM) || - (rdev->family == CHIP_SUMO) || - (rdev->family == CHIP_SUMO2) || - (rdev->family == CHIP_CAICOS)) - sq_config = 0; - else - sq_config = VC_ENABLE; - - sq_config |= (EXPORT_SRC_C | - CS_PRIO(0) | - LS_PRIO(0) | - HS_PRIO(0) | - PS_PRIO(0) | - VS_PRIO(1) | - GS_PRIO(2) | - ES_PRIO(3)); - - sq_gpr_resource_mgmt_1 = (NUM_PS_GPRS(num_ps_gprs) | - NUM_VS_GPRS(num_vs_gprs) | - NUM_CLAUSE_TEMP_GPRS(num_temp_gprs)); - sq_gpr_resource_mgmt_2 = (NUM_GS_GPRS(num_gs_gprs) | - NUM_ES_GPRS(num_es_gprs)); - sq_gpr_resource_mgmt_3 = (NUM_HS_GPRS(num_hs_gprs) | - NUM_LS_GPRS(num_ls_gprs)); - sq_thread_resource_mgmt = (NUM_PS_THREADS(num_ps_threads) | - NUM_VS_THREADS(num_vs_threads) | - NUM_GS_THREADS(num_gs_threads) | - NUM_ES_THREADS(num_es_threads)); - sq_thread_resource_mgmt_2 = (NUM_HS_THREADS(num_hs_threads) | - NUM_LS_THREADS(num_ls_threads)); - sq_stack_resource_mgmt_1 = (NUM_PS_STACK_ENTRIES(num_ps_stack_entries) | - NUM_VS_STACK_ENTRIES(num_vs_stack_entries)); - sq_stack_resource_mgmt_2 = (NUM_GS_STACK_ENTRIES(num_gs_stack_entries) | - NUM_ES_STACK_ENTRIES(num_es_stack_entries)); - sq_stack_resource_mgmt_3 = (NUM_HS_STACK_ENTRIES(num_hs_stack_entries) | - NUM_LS_STACK_ENTRIES(num_ls_stack_entries)); - - /* disable dyn gprs */ - radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1)); - radeon_ring_write(ring, (SQ_DYN_GPR_CNTL_PS_FLUSH_REQ - PACKET3_SET_CONFIG_REG_START) >> 2); - radeon_ring_write(ring, 0); - - /* setup LDS */ - radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1)); - radeon_ring_write(ring, (SQ_LDS_RESOURCE_MGMT - PACKET3_SET_CONFIG_REG_START) >> 2); - radeon_ring_write(ring, 0x10001000); - - /* SQ config */ - radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 11)); - radeon_ring_write(ring, (SQ_CONFIG - PACKET3_SET_CONFIG_REG_START) >> 2); - radeon_ring_write(ring, sq_config); - radeon_ring_write(ring, sq_gpr_resource_mgmt_1); - radeon_ring_write(ring, sq_gpr_resource_mgmt_2); - radeon_ring_write(ring, sq_gpr_resource_mgmt_3); - radeon_ring_write(ring, 0); - radeon_ring_write(ring, 0); - radeon_ring_write(ring, sq_thread_resource_mgmt); - radeon_ring_write(ring, sq_thread_resource_mgmt_2); - radeon_ring_write(ring, sq_stack_resource_mgmt_1); - radeon_ring_write(ring, sq_stack_resource_mgmt_2); - radeon_ring_write(ring, sq_stack_resource_mgmt_3); - } - - /* CONTEXT_CONTROL */ - radeon_ring_write(ring, 0xc0012800); - radeon_ring_write(ring, 0x80000000); - radeon_ring_write(ring, 0x80000000); - - /* SQ_VTX_BASE_VTX_LOC */ - radeon_ring_write(ring, 0xc0026f00); - radeon_ring_write(ring, 0x00000000); - radeon_ring_write(ring, 0x00000000); - radeon_ring_write(ring, 0x00000000); - - /* SET_SAMPLER */ - radeon_ring_write(ring, 0xc0036e00); - radeon_ring_write(ring, 0x00000000); - radeon_ring_write(ring, 0x00000012); - radeon_ring_write(ring, 0x00000000); - radeon_ring_write(ring, 0x00000000); - - /* set to DX10/11 mode */ - radeon_ring_write(ring, PACKET3(PACKET3_MODE_CONTROL, 0)); - radeon_ring_write(ring, 1); - - /* emit an IB pointing at default state */ - dwords = ALIGN(rdev->r600_blit.state_len, 0x10); - gpu_addr = rdev->r600_blit.shader_gpu_addr + rdev->r600_blit.state_offset; - radeon_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2)); - radeon_ring_write(ring, gpu_addr & 0xFFFFFFFC); - radeon_ring_write(ring, upper_32_bits(gpu_addr) & 0xFF); - radeon_ring_write(ring, dwords); - -} - -int evergreen_blit_init(struct radeon_device *rdev) -{ - u32 obj_size; - int i, r, dwords; - void *ptr; - u32 packet2s[16]; - int num_packet2s = 0; - - rdev->r600_blit.primitives.set_render_target = set_render_target; - rdev->r600_blit.primitives.cp_set_surface_sync = cp_set_surface_sync; - rdev->r600_blit.primitives.set_shaders = set_shaders; - rdev->r600_blit.primitives.set_vtx_resource = set_vtx_resource; - rdev->r600_blit.primitives.set_tex_resource = set_tex_resource; - rdev->r600_blit.primitives.set_scissors = set_scissors; - rdev->r600_blit.primitives.draw_auto = draw_auto; - rdev->r600_blit.primitives.set_default_state = set_default_state; - - rdev->r600_blit.ring_size_common = 8; /* sync semaphore */ - rdev->r600_blit.ring_size_common += 55; /* shaders + def state */ - rdev->r600_blit.ring_size_common += 16; /* fence emit for VB IB */ - rdev->r600_blit.ring_size_common += 5; /* done copy */ - rdev->r600_blit.ring_size_common += 16; /* fence emit for done copy */ - - rdev->r600_blit.ring_size_per_loop = 74; - if (rdev->family >= CHIP_CAYMAN) - rdev->r600_blit.ring_size_per_loop += 9; /* additional DWs for surface sync */ - - rdev->r600_blit.max_dim = 16384; - - rdev->r600_blit.state_offset = 0; - - if (rdev->family < CHIP_CAYMAN) - rdev->r600_blit.state_len = evergreen_default_size; - else - rdev->r600_blit.state_len = cayman_default_size; - - dwords = rdev->r600_blit.state_len; - while (dwords & 0xf) { - packet2s[num_packet2s++] = cpu_to_le32(PACKET2(0)); - dwords++; - } - - obj_size = dwords * 4; - obj_size = ALIGN(obj_size, 256); - - rdev->r600_blit.vs_offset = obj_size; - if (rdev->family < CHIP_CAYMAN) - obj_size += evergreen_vs_size * 4; - else - obj_size += cayman_vs_size * 4; - obj_size = ALIGN(obj_size, 256); - - rdev->r600_blit.ps_offset = obj_size; - if (rdev->family < CHIP_CAYMAN) - obj_size += evergreen_ps_size * 4; - else - obj_size += cayman_ps_size * 4; - obj_size = ALIGN(obj_size, 256); - - /* pin copy shader into vram if not already initialized */ - if (!rdev->r600_blit.shader_obj) { - r = radeon_bo_create(rdev, obj_size, PAGE_SIZE, true, - RADEON_GEM_DOMAIN_VRAM, - NULL, &rdev->r600_blit.shader_obj); - if (r) { - DRM_ERROR("evergreen failed to allocate shader\n"); - return r; - } - - r = radeon_bo_reserve(rdev->r600_blit.shader_obj, false); - if (unlikely(r != 0)) - return r; - r = radeon_bo_pin(rdev->r600_blit.shader_obj, RADEON_GEM_DOMAIN_VRAM, - &rdev->r600_blit.shader_gpu_addr); - radeon_bo_unreserve(rdev->r600_blit.shader_obj); - if (r) { - dev_err(rdev->dev, "(%d) pin blit object failed\n", r); - return r; - } - } - - DRM_DEBUG("evergreen blit allocated bo %08x vs %08x ps %08x\n", - obj_size, - rdev->r600_blit.vs_offset, rdev->r600_blit.ps_offset); - - r = radeon_bo_reserve(rdev->r600_blit.shader_obj, false); - if (unlikely(r != 0)) - return r; - r = radeon_bo_kmap(rdev->r600_blit.shader_obj, &ptr); - if (r) { - DRM_ERROR("failed to map blit object %d\n", r); - return r; - } - - if (rdev->family < CHIP_CAYMAN) { - memcpy_toio((char *)ptr + rdev->r600_blit.state_offset, - evergreen_default_state, rdev->r600_blit.state_len * 4); - - if (num_packet2s) - memcpy_toio((char *)ptr + rdev->r600_blit.state_offset + (rdev->r600_blit.state_len * 4), - packet2s, num_packet2s * 4); - for (i = 0; i < evergreen_vs_size; i++) - *(u32 *)((unsigned long)ptr + rdev->r600_blit.vs_offset + i * 4) = cpu_to_le32(evergreen_vs[i]); - for (i = 0; i < evergreen_ps_size; i++) - *(u32 *)((unsigned long)ptr + rdev->r600_blit.ps_offset + i * 4) = cpu_to_le32(evergreen_ps[i]); - } else { - memcpy_toio((char *)ptr + rdev->r600_blit.state_offset, - cayman_default_state, rdev->r600_blit.state_len * 4); - - if (num_packet2s) - memcpy_toio((char *)ptr + rdev->r600_blit.state_offset + (rdev->r600_blit.state_len * 4), - packet2s, num_packet2s * 4); - for (i = 0; i < cayman_vs_size; i++) - *(u32 *)((unsigned long)ptr + rdev->r600_blit.vs_offset + i * 4) = cpu_to_le32(cayman_vs[i]); - for (i = 0; i < cayman_ps_size; i++) - *(u32 *)((unsigned long)ptr + rdev->r600_blit.ps_offset + i * 4) = cpu_to_le32(cayman_ps[i]); - } - radeon_bo_kunmap(rdev->r600_blit.shader_obj); - radeon_bo_unreserve(rdev->r600_blit.shader_obj); - - radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size); - return 0; -} diff --git a/sys/dev/drm/radeon/evergreen_blit_shaders.c b/sys/dev/drm/radeon/evergreen_blit_shaders.c index f85c0af115..d43383470c 100644 --- a/sys/dev/drm/radeon/evergreen_blit_shaders.c +++ b/sys/dev/drm/radeon/evergreen_blit_shaders.c @@ -300,58 +300,4 @@ const u32 evergreen_default_state[] = 0x00000010, /* */ }; -const u32 evergreen_vs[] = -{ - 0x00000004, - 0x80800400, - 0x0000a03c, - 0x95000688, - 0x00004000, - 0x15200688, - 0x00000000, - 0x00000000, - 0x3c000000, - 0x67961001, -#ifdef __BIG_ENDIAN - 0x000a0000, -#else - 0x00080000, -#endif - 0x00000000, - 0x1c000000, - 0x67961000, -#ifdef __BIG_ENDIAN - 0x00020008, -#else - 0x00000008, -#endif - 0x00000000, -}; - -const u32 evergreen_ps[] = -{ - 0x00000003, - 0xa00c0000, - 0x00000008, - 0x80400000, - 0x00000000, - 0x95200688, - 0x00380400, - 0x00146b10, - 0x00380000, - 0x20146b10, - 0x00380400, - 0x40146b00, - 0x80380000, - 0x60146b00, - 0x00000000, - 0x00000000, - 0x00000010, - 0x000d1000, - 0xb0800000, - 0x00000000, -}; - -const u32 evergreen_ps_size = ARRAY_SIZE(evergreen_ps); -const u32 evergreen_vs_size = ARRAY_SIZE(evergreen_vs); const u32 evergreen_default_size = ARRAY_SIZE(evergreen_default_state); diff --git a/sys/dev/drm/radeon/evergreen_cs.c b/sys/dev/drm/radeon/evergreen_cs.c index 79bd1a413a..2a2529faca 100644 --- a/sys/dev/drm/radeon/evergreen_cs.c +++ b/sys/dev/drm/radeon/evergreen_cs.c @@ -24,10 +24,7 @@ * Authors: Dave Airlie * Alex Deucher * Jerome Glisse - * - * $FreeBSD: head/sys/dev/drm2/radeon/evergreen_cs.c 254885 2013-08-25 19:37:15Z dumbbell $ */ - #include #include "radeon.h" #include "radeon_asic.h" @@ -948,7 +945,7 @@ static int evergreen_cs_track_check(struct radeon_cs_parser *p) (u64)track->vgt_strmout_size[i]; if (offset > radeon_bo_size(track->vgt_strmout_bo[i])) { DRM_ERROR("streamout %d bo too small: 0x%jx, 0x%lx\n", - i, (uintmax_t)offset, + i, offset, radeon_bo_size(track->vgt_strmout_bo[i])); return -EINVAL; } @@ -2109,7 +2106,7 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, if ((tmp + size) > radeon_bo_size(reloc->robj)) { dev_warn(p->dev, "CP DMA src buffer too small (%ju %lu)\n", - (uintmax_t)tmp + size, radeon_bo_size(reloc->robj)); + tmp + size, radeon_bo_size(reloc->robj)); return -EINVAL; } @@ -2147,7 +2144,7 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, if ((tmp + size) > radeon_bo_size(reloc->robj)) { dev_warn(p->dev, "CP DMA dst buffer too small (%ju %lu)\n", - (uintmax_t)tmp + size, radeon_bo_size(reloc->robj)); + tmp + size, radeon_bo_size(reloc->robj)); return -EINVAL; } @@ -2441,7 +2438,7 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, offset += ((u64)(radeon_get_ib_value(p, idx+2) & 0xff)) << 32; if ((offset + 4) > radeon_bo_size(reloc->robj)) { DRM_ERROR("bad STRMOUT_BUFFER_UPDATE dst bo too small: 0x%jx, 0x%lx\n", - (uintmax_t)offset + 4, radeon_bo_size(reloc->robj)); + offset + 4, radeon_bo_size(reloc->robj)); return -EINVAL; } offset += reloc->lobj.gpu_offset; @@ -2460,7 +2457,7 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, offset += ((u64)(radeon_get_ib_value(p, idx+4) & 0xff)) << 32; if ((offset + 4) > radeon_bo_size(reloc->robj)) { DRM_ERROR("bad STRMOUT_BUFFER_UPDATE src bo too small: 0x%jx, 0x%lx\n", - (uintmax_t)offset + 4, radeon_bo_size(reloc->robj)); + offset + 4, radeon_bo_size(reloc->robj)); return -EINVAL; } offset += reloc->lobj.gpu_offset; @@ -2489,7 +2486,7 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, } if ((offset + 8) > radeon_bo_size(reloc->robj)) { DRM_ERROR("bad MEM_WRITE bo too small: 0x%jx, 0x%lx\n", - (uintmax_t)offset + 8, radeon_bo_size(reloc->robj)); + offset + 8, radeon_bo_size(reloc->robj)); return -EINVAL; } offset += reloc->lobj.gpu_offset; @@ -2514,7 +2511,7 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, offset += ((u64)(radeon_get_ib_value(p, idx+2) & 0xff)) << 32; if ((offset + 4) > radeon_bo_size(reloc->robj)) { DRM_ERROR("bad COPY_DW src bo too small: 0x%jx, 0x%lx\n", - (uintmax_t)offset + 4, radeon_bo_size(reloc->robj)); + offset + 4, radeon_bo_size(reloc->robj)); return -EINVAL; } offset += reloc->lobj.gpu_offset; @@ -2538,7 +2535,7 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, offset += ((u64)(radeon_get_ib_value(p, idx+4) & 0xff)) << 32; if ((offset + 4) > radeon_bo_size(reloc->robj)) { DRM_ERROR("bad COPY_DW dst bo too small: 0x%jx, 0x%lx\n", - (uintmax_t)offset + 4, radeon_bo_size(reloc->robj)); + offset + 4, radeon_bo_size(reloc->robj)); return -EINVAL; } offset += reloc->lobj.gpu_offset; @@ -2663,7 +2660,7 @@ int evergreen_cs_parse(struct radeon_cs_parser *p) } while (p->idx < p->chunks[p->chunk_ib_idx].length_dw); #if 0 for (r = 0; r < p->ib.length_dw; r++) { - DRM_INFO("%05d 0x%08X\n", r, p->ib.ptr[r]); + printk(KERN_INFO "%05d 0x%08X\n", r, p->ib.ptr[r]); mdelay(1); } #endif @@ -2713,7 +2710,7 @@ int evergreen_dma_cs_parse(struct radeon_cs_parser *p) switch (sub_cmd) { /* tiled */ case 8: - dst_offset = ib[idx+1]; + dst_offset = radeon_get_ib_value(p, idx+1); dst_offset <<= 8; ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8); @@ -2721,20 +2718,20 @@ int evergreen_dma_cs_parse(struct radeon_cs_parser *p) break; /* linear */ case 0: - dst_offset = ib[idx+1]; - dst_offset |= ((u64)(ib[idx+2] & 0xff)) << 32; + dst_offset = radeon_get_ib_value(p, idx+1); + dst_offset |= ((u64)(radeon_get_ib_value(p, idx+2) & 0xff)) << 32; ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc); ib[idx+2] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff; p->idx += count + 3; break; default: - DRM_ERROR("bad DMA_PACKET_WRITE [%6d] 0x%08x sub cmd is not 0 or 8\n", idx, ib[idx+0]); + DRM_ERROR("bad DMA_PACKET_WRITE [%6d] 0x%08x sub cmd is not 0 or 8\n", idx, header); return -EINVAL; } if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) { dev_warn(p->dev, "DMA write buffer too small (%ju %lu)\n", - (uintmax_t)dst_offset, radeon_bo_size(dst_reloc->robj)); + dst_offset, radeon_bo_size(dst_reloc->robj)); return -EINVAL; } break; @@ -2753,18 +2750,18 @@ int evergreen_dma_cs_parse(struct radeon_cs_parser *p) /* Copy L2L, DW aligned */ case 0x00: /* L2L, dw */ - src_offset = ib[idx+2]; - src_offset |= ((u64)(ib[idx+4] & 0xff)) << 32; - dst_offset = ib[idx+1]; - dst_offset |= ((u64)(ib[idx+3] & 0xff)) << 32; + src_offset = radeon_get_ib_value(p, idx+2); + src_offset |= ((u64)(radeon_get_ib_value(p, idx+4) & 0xff)) << 32; + dst_offset = radeon_get_ib_value(p, idx+1); + dst_offset |= ((u64)(radeon_get_ib_value(p, idx+3) & 0xff)) << 32; if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) { dev_warn(p->dev, "DMA L2L, dw src buffer too small (%ju %lu)\n", - (uintmax_t)src_offset + (count * 4), radeon_bo_size(src_reloc->robj)); + src_offset + (count * 4), radeon_bo_size(src_reloc->robj)); return -EINVAL; } if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) { dev_warn(p->dev, "DMA L2L, dw dst buffer too small (%ju %lu)\n", - (uintmax_t)dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj)); + dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj)); return -EINVAL; } ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc); @@ -2776,35 +2773,35 @@ int evergreen_dma_cs_parse(struct radeon_cs_parser *p) /* Copy L2T/T2L */ case 0x08: /* detile bit */ - if (ib[idx + 2] & (1 << 31)) { + if (radeon_get_ib_value(p, idx + 2) & (1 << 31)) { /* tiled src, linear dst */ - src_offset = ib[idx+1]; + src_offset = radeon_get_ib_value(p, idx+1); src_offset <<= 8; ib[idx+1] += (u32)(src_reloc->lobj.gpu_offset >> 8); dst_offset = radeon_get_ib_value(p, idx + 7); - dst_offset |= ((u64)(ib[idx+8] & 0xff)) << 32; + dst_offset |= ((u64)(radeon_get_ib_value(p, idx+8) & 0xff)) << 32; ib[idx+7] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc); ib[idx+8] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff; } else { /* linear src, tiled dst */ - src_offset = ib[idx+7]; - src_offset |= ((u64)(ib[idx+8] & 0xff)) << 32; + src_offset = radeon_get_ib_value(p, idx+7); + src_offset |= ((u64)(radeon_get_ib_value(p, idx+8) & 0xff)) << 32; ib[idx+7] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc); ib[idx+8] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff; - dst_offset = ib[idx+1]; + dst_offset = radeon_get_ib_value(p, idx+1); dst_offset <<= 8; ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8); } if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) { dev_warn(p->dev, "DMA L2T, src buffer too small (%ju %lu)\n", - (uintmax_t)src_offset + (count * 4), radeon_bo_size(src_reloc->robj)); + src_offset + (count * 4), radeon_bo_size(src_reloc->robj)); return -EINVAL; } if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) { dev_warn(p->dev, "DMA L2T, dst buffer too small (%ju %lu)\n", - (uintmax_t)dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj)); + dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj)); return -EINVAL; } p->idx += 9; @@ -2812,18 +2809,18 @@ int evergreen_dma_cs_parse(struct radeon_cs_parser *p) /* Copy L2L, byte aligned */ case 0x40: /* L2L, byte */ - src_offset = ib[idx+2]; - src_offset |= ((u64)(ib[idx+4] & 0xff)) << 32; - dst_offset = ib[idx+1]; - dst_offset |= ((u64)(ib[idx+3] & 0xff)) << 32; + src_offset = radeon_get_ib_value(p, idx+2); + src_offset |= ((u64)(radeon_get_ib_value(p, idx+4) & 0xff)) << 32; + dst_offset = radeon_get_ib_value(p, idx+1); + dst_offset |= ((u64)(radeon_get_ib_value(p, idx+3) & 0xff)) << 32; if ((src_offset + count) > radeon_bo_size(src_reloc->robj)) { dev_warn(p->dev, "DMA L2L, byte src buffer too small (%ju %lu)\n", - (uintmax_t)src_offset + count, radeon_bo_size(src_reloc->robj)); + src_offset + count, radeon_bo_size(src_reloc->robj)); return -EINVAL; } if ((dst_offset + count) > radeon_bo_size(dst_reloc->robj)) { dev_warn(p->dev, "DMA L2L, byte dst buffer too small (%ju %lu)\n", - (uintmax_t)dst_offset + count, radeon_bo_size(dst_reloc->robj)); + dst_offset + count, radeon_bo_size(dst_reloc->robj)); return -EINVAL; } ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xffffffff); @@ -2854,25 +2851,25 @@ int evergreen_dma_cs_parse(struct radeon_cs_parser *p) DRM_ERROR("bad L2L, dw, broadcast DMA_PACKET_COPY\n"); return -EINVAL; } - dst_offset = ib[idx+1]; - dst_offset |= ((u64)(ib[idx+4] & 0xff)) << 32; - dst2_offset = ib[idx+2]; - dst2_offset |= ((u64)(ib[idx+5] & 0xff)) << 32; - src_offset = ib[idx+3]; - src_offset |= ((u64)(ib[idx+6] & 0xff)) << 32; + dst_offset = radeon_get_ib_value(p, idx+1); + dst_offset |= ((u64)(radeon_get_ib_value(p, idx+4) & 0xff)) << 32; + dst2_offset = radeon_get_ib_value(p, idx+2); + dst2_offset |= ((u64)(radeon_get_ib_value(p, idx+5) & 0xff)) << 32; + src_offset = radeon_get_ib_value(p, idx+3); + src_offset |= ((u64)(radeon_get_ib_value(p, idx+6) & 0xff)) << 32; if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) { dev_warn(p->dev, "DMA L2L, dw, broadcast src buffer too small (%ju %lu)\n", - (uintmax_t)src_offset + (count * 4), radeon_bo_size(src_reloc->robj)); + src_offset + (count * 4), radeon_bo_size(src_reloc->robj)); return -EINVAL; } if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) { dev_warn(p->dev, "DMA L2L, dw, broadcast dst buffer too small (%ju %lu)\n", - (uintmax_t)dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj)); + dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj)); return -EINVAL; } if ((dst2_offset + (count * 4)) > radeon_bo_size(dst2_reloc->robj)) { dev_warn(p->dev, "DMA L2L, dw, broadcast dst2 buffer too small (%ju %lu)\n", - (uintmax_t)dst2_offset + (count * 4), radeon_bo_size(dst2_reloc->robj)); + dst2_offset + (count * 4), radeon_bo_size(dst2_reloc->robj)); return -EINVAL; } ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc); @@ -2885,7 +2882,7 @@ int evergreen_dma_cs_parse(struct radeon_cs_parser *p) break; /* Copy L2T Frame to Field */ case 0x48: - if (ib[idx + 2] & (1 << 31)) { + if (radeon_get_ib_value(p, idx + 2) & (1 << 31)) { DRM_ERROR("bad L2T, frame to fields DMA_PACKET_COPY\n"); return -EINVAL; } @@ -2894,25 +2891,25 @@ int evergreen_dma_cs_parse(struct radeon_cs_parser *p) DRM_ERROR("bad L2T, frame to fields DMA_PACKET_COPY\n"); return -EINVAL; } - dst_offset = ib[idx+1]; + dst_offset = radeon_get_ib_value(p, idx+1); dst_offset <<= 8; - dst2_offset = ib[idx+2]; + dst2_offset = radeon_get_ib_value(p, idx+2); dst2_offset <<= 8; - src_offset = ib[idx+8]; - src_offset |= ((u64)(ib[idx+9] & 0xff)) << 32; + src_offset = radeon_get_ib_value(p, idx+8); + src_offset |= ((u64)(radeon_get_ib_value(p, idx+9) & 0xff)) << 32; if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) { dev_warn(p->dev, "DMA L2T, frame to fields src buffer too small (%ju %lu)\n", - (uintmax_t)src_offset + (count * 4), radeon_bo_size(src_reloc->robj)); + src_offset + (count * 4), radeon_bo_size(src_reloc->robj)); return -EINVAL; } if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) { dev_warn(p->dev, "DMA L2T, frame to fields buffer too small (%ju %lu)\n", - (uintmax_t)dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj)); + dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj)); return -EINVAL; } if ((dst2_offset + (count * 4)) > radeon_bo_size(dst2_reloc->robj)) { dev_warn(p->dev, "DMA L2T, frame to fields buffer too small (%ju %lu)\n", - (uintmax_t)dst2_offset + (count * 4), radeon_bo_size(dst2_reloc->robj)); + dst2_offset + (count * 4), radeon_bo_size(dst2_reloc->robj)); return -EINVAL; } ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8); @@ -2929,7 +2926,7 @@ int evergreen_dma_cs_parse(struct radeon_cs_parser *p) return -EINVAL; } /* detile bit */ - if (ib[idx + 2 ] & (1 << 31)) { + if (radeon_get_ib_value(p, idx + 2) & (1 << 31)) { /* tiled src, linear dst */ ib[idx+1] += (u32)(src_reloc->lobj.gpu_offset >> 8); @@ -2947,7 +2944,7 @@ int evergreen_dma_cs_parse(struct radeon_cs_parser *p) /* Copy L2T broadcast */ case 0x4b: /* L2T, broadcast */ - if (ib[idx + 2] & (1 << 31)) { + if (radeon_get_ib_value(p, idx + 2) & (1 << 31)) { DRM_ERROR("bad L2T, broadcast DMA_PACKET_COPY\n"); return -EINVAL; } @@ -2956,25 +2953,25 @@ int evergreen_dma_cs_parse(struct radeon_cs_parser *p) DRM_ERROR("bad L2T, broadcast DMA_PACKET_COPY\n"); return -EINVAL; } - dst_offset = ib[idx+1]; + dst_offset = radeon_get_ib_value(p, idx+1); dst_offset <<= 8; - dst2_offset = ib[idx+2]; + dst2_offset = radeon_get_ib_value(p, idx+2); dst2_offset <<= 8; - src_offset = ib[idx+8]; - src_offset |= ((u64)(ib[idx+9] & 0xff)) << 32; + src_offset = radeon_get_ib_value(p, idx+8); + src_offset |= ((u64)(radeon_get_ib_value(p, idx+9) & 0xff)) << 32; if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) { dev_warn(p->dev, "DMA L2T, broadcast src buffer too small (%ju %lu)\n", - (uintmax_t)src_offset + (count * 4), radeon_bo_size(src_reloc->robj)); + src_offset + (count * 4), radeon_bo_size(src_reloc->robj)); return -EINVAL; } if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) { dev_warn(p->dev, "DMA L2T, broadcast dst buffer too small (%ju %lu)\n", - (uintmax_t)dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj)); + dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj)); return -EINVAL; } if ((dst2_offset + (count * 4)) > radeon_bo_size(dst2_reloc->robj)) { dev_warn(p->dev, "DMA L2T, broadcast dst2 buffer too small (%ju %lu)\n", - (uintmax_t)dst2_offset + (count * 4), radeon_bo_size(dst2_reloc->robj)); + dst2_offset + (count * 4), radeon_bo_size(dst2_reloc->robj)); return -EINVAL; } ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8); @@ -2987,35 +2984,35 @@ int evergreen_dma_cs_parse(struct radeon_cs_parser *p) case 0x4c: /* L2T, T2L */ /* detile bit */ - if (ib[idx + 2] & (1 << 31)) { + if (radeon_get_ib_value(p, idx + 2) & (1 << 31)) { /* tiled src, linear dst */ - src_offset = ib[idx+1]; + src_offset = radeon_get_ib_value(p, idx+1); src_offset <<= 8; ib[idx+1] += (u32)(src_reloc->lobj.gpu_offset >> 8); - dst_offset = ib[idx+7]; - dst_offset |= ((u64)(ib[idx+8] & 0xff)) << 32; + dst_offset = radeon_get_ib_value(p, idx+7); + dst_offset |= ((u64)(radeon_get_ib_value(p, idx+8) & 0xff)) << 32; ib[idx+7] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc); ib[idx+8] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff; } else { /* linear src, tiled dst */ - src_offset = ib[idx+7]; - src_offset |= ((u64)(ib[idx+8] & 0xff)) << 32; + src_offset = radeon_get_ib_value(p, idx+7); + src_offset |= ((u64)(radeon_get_ib_value(p, idx+8) & 0xff)) << 32; ib[idx+7] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc); ib[idx+8] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff; - dst_offset = ib[idx+1]; + dst_offset = radeon_get_ib_value(p, idx+1); dst_offset <<= 8; ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8); } if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) { dev_warn(p->dev, "DMA L2T, T2L src buffer too small (%ju %lu)\n", - (uintmax_t)src_offset + (count * 4), radeon_bo_size(src_reloc->robj)); + src_offset + (count * 4), radeon_bo_size(src_reloc->robj)); return -EINVAL; } if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) { dev_warn(p->dev, "DMA L2T, T2L dst buffer too small (%ju %lu)\n", - (uintmax_t)dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj)); + dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj)); return -EINVAL; } p->idx += 9; @@ -3034,7 +3031,7 @@ int evergreen_dma_cs_parse(struct radeon_cs_parser *p) /* Copy L2T broadcast (tile units) */ case 0x4f: /* L2T, broadcast */ - if (ib[idx + 2] & (1 << 31)) { + if (radeon_get_ib_value(p, idx + 2) & (1 << 31)) { DRM_ERROR("bad L2T, broadcast DMA_PACKET_COPY\n"); return -EINVAL; } @@ -3043,25 +3040,25 @@ int evergreen_dma_cs_parse(struct radeon_cs_parser *p) DRM_ERROR("bad L2T, broadcast DMA_PACKET_COPY\n"); return -EINVAL; } - dst_offset = ib[idx+1]; + dst_offset = radeon_get_ib_value(p, idx+1); dst_offset <<= 8; - dst2_offset = ib[idx+2]; + dst2_offset = radeon_get_ib_value(p, idx+2); dst2_offset <<= 8; - src_offset = ib[idx+8]; - src_offset |= ((u64)(ib[idx+9] & 0xff)) << 32; + src_offset = radeon_get_ib_value(p, idx+8); + src_offset |= ((u64)(radeon_get_ib_value(p, idx+9) & 0xff)) << 32; if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) { dev_warn(p->dev, "DMA L2T, broadcast src buffer too small (%ju %lu)\n", - (uintmax_t)src_offset + (count * 4), radeon_bo_size(src_reloc->robj)); + src_offset + (count * 4), radeon_bo_size(src_reloc->robj)); return -EINVAL; } if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) { dev_warn(p->dev, "DMA L2T, broadcast dst buffer too small (%ju %lu)\n", - (uintmax_t)dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj)); + dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj)); return -EINVAL; } if ((dst2_offset + (count * 4)) > radeon_bo_size(dst2_reloc->robj)) { dev_warn(p->dev, "DMA L2T, broadcast dst2 buffer too small (%ju %lu)\n", - (uintmax_t)dst2_offset + (count * 4), radeon_bo_size(dst2_reloc->robj)); + dst2_offset + (count * 4), radeon_bo_size(dst2_reloc->robj)); return -EINVAL; } ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8); @@ -3071,7 +3068,7 @@ int evergreen_dma_cs_parse(struct radeon_cs_parser *p) p->idx += 10; break; default: - DRM_ERROR("bad DMA_PACKET_COPY [%6d] 0x%08x invalid sub cmd\n", idx, ib[idx+0]); + DRM_ERROR("bad DMA_PACKET_COPY [%6d] 0x%08x invalid sub cmd\n", idx, header); return -EINVAL; } break; @@ -3085,7 +3082,7 @@ int evergreen_dma_cs_parse(struct radeon_cs_parser *p) dst_offset |= ((u64)(radeon_get_ib_value(p, idx+3) & 0x00ff0000)) << 16; if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) { dev_warn(p->dev, "DMA constant fill buffer too small (%ju %lu)\n", - (uintmax_t)dst_offset, radeon_bo_size(dst_reloc->robj)); + dst_offset, radeon_bo_size(dst_reloc->robj)); return -EINVAL; } ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc); @@ -3102,7 +3099,7 @@ int evergreen_dma_cs_parse(struct radeon_cs_parser *p) } while (p->idx < p->chunks[p->chunk_ib_idx].length_dw); #if 0 for (r = 0; r < p->ib->length_dw; r++) { - DRM_INFO("%05d 0x%08X\n", r, p->ib.ptr[r]); + printk(KERN_INFO "%05d 0x%08X\n", r, p->ib.ptr[r]); mdelay(1); } #endif diff --git a/sys/dev/drm/radeon/evergreen_dma.c b/sys/dev/drm/radeon/evergreen_dma.c new file mode 100644 index 0000000000..47bb34ff37 --- /dev/null +++ b/sys/dev/drm/radeon/evergreen_dma.c @@ -0,0 +1,186 @@ +/* + * Copyright 2010 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: Alex Deucher + */ +#include +#include "radeon.h" +#include "radeon_asic.h" +#include "evergreend.h" + +/** + * evergreen_dma_fence_ring_emit - emit a fence on the DMA ring + * + * @rdev: radeon_device pointer + * @fence: radeon fence object + * + * Add a DMA fence packet to the ring to write + * the fence seq number and DMA trap packet to generate + * an interrupt if needed (evergreen-SI). + */ +void evergreen_dma_fence_ring_emit(struct radeon_device *rdev, + struct radeon_fence *fence) +{ + struct radeon_ring *ring = &rdev->ring[fence->ring]; + u64 addr = rdev->fence_drv[fence->ring].gpu_addr; + /* write the fence */ + radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_FENCE, 0, 0)); + radeon_ring_write(ring, addr & 0xfffffffc); + radeon_ring_write(ring, (upper_32_bits(addr) & 0xff)); + radeon_ring_write(ring, fence->seq); + /* generate an interrupt */ + radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_TRAP, 0, 0)); + /* flush HDP */ + radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0)); + radeon_ring_write(ring, (0xf << 16) | (HDP_MEM_COHERENCY_FLUSH_CNTL >> 2)); + radeon_ring_write(ring, 1); +} + +/** + * evergreen_dma_ring_ib_execute - schedule an IB on the DMA engine + * + * @rdev: radeon_device pointer + * @ib: IB object to schedule + * + * Schedule an IB in the DMA ring (evergreen). + */ +void evergreen_dma_ring_ib_execute(struct radeon_device *rdev, + struct radeon_ib *ib) +{ + struct radeon_ring *ring = &rdev->ring[ib->ring]; + + if (rdev->wb.enabled) { + u32 next_rptr = ring->wptr + 4; + while ((next_rptr & 7) != 5) + next_rptr++; + next_rptr += 3; + radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_WRITE, 0, 1)); + radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc); + radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xff); + radeon_ring_write(ring, next_rptr); + } + + /* The indirect buffer packet must end on an 8 DW boundary in the DMA ring. + * Pad as necessary with NOPs. + */ + while ((ring->wptr & 7) != 5) + radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_NOP, 0, 0)); + radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_INDIRECT_BUFFER, 0, 0)); + radeon_ring_write(ring, (ib->gpu_addr & 0xFFFFFFE0)); + radeon_ring_write(ring, (ib->length_dw << 12) | (upper_32_bits(ib->gpu_addr) & 0xFF)); + +} + +/** + * evergreen_copy_dma - copy pages using the DMA engine + * + * @rdev: radeon_device pointer + * @src_offset: src GPU address + * @dst_offset: dst GPU address + * @num_gpu_pages: number of GPU pages to xfer + * @fence: radeon fence object + * + * Copy GPU paging using the DMA engine (evergreen-cayman). + * Used by the radeon ttm implementation to move pages if + * registered as the asic copy callback. + */ +int evergreen_copy_dma(struct radeon_device *rdev, + uint64_t src_offset, uint64_t dst_offset, + unsigned num_gpu_pages, + struct radeon_fence **fence) +{ + struct radeon_semaphore *sem = NULL; + int ring_index = rdev->asic->copy.dma_ring_index; + struct radeon_ring *ring = &rdev->ring[ring_index]; + u32 size_in_dw, cur_size_in_dw; + int i, num_loops; + int r = 0; + + r = radeon_semaphore_create(rdev, &sem); + if (r) { + DRM_ERROR("radeon: moving bo (%d).\n", r); + return r; + } + + size_in_dw = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT) / 4; + num_loops = DIV_ROUND_UP(size_in_dw, 0xfffff); + r = radeon_ring_lock(rdev, ring, num_loops * 5 + 11); + if (r) { + DRM_ERROR("radeon: moving bo (%d).\n", r); + radeon_semaphore_free(rdev, &sem, NULL); + return r; + } + + if (radeon_fence_need_sync(*fence, ring->idx)) { + radeon_semaphore_sync_rings(rdev, sem, (*fence)->ring, + ring->idx); + radeon_fence_note_sync(*fence, ring->idx); + } else { + radeon_semaphore_free(rdev, &sem, NULL); + } + + for (i = 0; i < num_loops; i++) { + cur_size_in_dw = size_in_dw; + if (cur_size_in_dw > 0xFFFFF) + cur_size_in_dw = 0xFFFFF; + size_in_dw -= cur_size_in_dw; + radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_COPY, 0, cur_size_in_dw)); + radeon_ring_write(ring, dst_offset & 0xfffffffc); + radeon_ring_write(ring, src_offset & 0xfffffffc); + radeon_ring_write(ring, upper_32_bits(dst_offset) & 0xff); + radeon_ring_write(ring, upper_32_bits(src_offset) & 0xff); + src_offset += cur_size_in_dw * 4; + dst_offset += cur_size_in_dw * 4; + } + + r = radeon_fence_emit(rdev, fence, ring->idx); + if (r) { + radeon_ring_unlock_undo(rdev, ring); + return r; + } + + radeon_ring_unlock_commit(rdev, ring); + radeon_semaphore_free(rdev, &sem, *fence); + + return r; +} + +/** + * evergreen_dma_is_lockup - Check if the DMA engine is locked up + * + * @rdev: radeon_device pointer + * @ring: radeon_ring structure holding ring information + * + * Check if the async DMA engine is locked up. + * Returns true if the engine appears to be locked up, false if not. + */ +bool evergreen_dma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring) +{ + u32 reset_mask = evergreen_gpu_check_soft_reset(rdev); + + if (!(reset_mask & RADEON_RESET_DMA)) { + radeon_ring_lockup_update(ring); + return false; + } + /* force ring activities */ + radeon_ring_force_activity(rdev, ring); + return radeon_ring_test_lockup(rdev, ring); +} diff --git a/sys/dev/drm/radeon/evergreen_hdmi.c b/sys/dev/drm/radeon/evergreen_hdmi.c index 39d4ac5b34..7a3f03d9a6 100644 --- a/sys/dev/drm/radeon/evergreen_hdmi.c +++ b/sys/dev/drm/radeon/evergreen_hdmi.c @@ -54,6 +54,48 @@ static void evergreen_hdmi_update_ACR(struct drm_encoder *encoder, uint32_t cloc WREG32(HDMI_ACR_48_1 + offset, acr.n_48khz); } +static void dce4_afmt_write_speaker_allocation(struct drm_encoder *encoder) +{ + struct radeon_device *rdev = encoder->dev->dev_private; + struct drm_connector *connector; + struct radeon_connector *radeon_connector = NULL; + u32 tmp; + u8 *sadb; + int sad_count; + + /* XXX: setting this register causes hangs on some asics */ + return; + + list_for_each_entry(connector, &encoder->dev->mode_config.connector_list, head) { + if (connector->encoder == encoder) + radeon_connector = to_radeon_connector(connector); + } + + if (!radeon_connector) { + DRM_ERROR("Couldn't find encoder's connector\n"); + return; + } + + sad_count = drm_edid_to_speaker_allocation(radeon_connector->edid, &sadb); + if (sad_count < 0) { + DRM_ERROR("Couldn't read Speaker Allocation Data Block: %d\n", sad_count); + return; + } + + /* program the speaker allocation */ + tmp = RREG32(AZ_F0_CODEC_PIN0_CONTROL_CHANNEL_SPEAKER); + tmp &= ~(DP_CONNECTION | SPEAKER_ALLOCATION_MASK); + /* set HDMI mode */ + tmp |= HDMI_CONNECTION; + if (sad_count) + tmp |= SPEAKER_ALLOCATION(sadb[0]); + else + tmp |= SPEAKER_ALLOCATION(5); /* stereo */ + WREG32(AZ_F0_CODEC_PIN0_CONTROL_CHANNEL_SPEAKER, tmp); + + kfree(sadb); +} + static void evergreen_hdmi_write_sad_regs(struct drm_encoder *encoder) { struct radeon_device *rdev = encoder->dev->dev_private; @@ -157,22 +199,26 @@ static void evergreen_audio_set_dto(struct drm_encoder *encoder, u32 clock) if (!dig || !dig->afmt) return; - if (max_ratio >= 8) { - dto_phase = 192 * 1000; - wallclock_ratio = 3; - } else if (max_ratio >= 4) { - dto_phase = 96 * 1000; - wallclock_ratio = 2; - } else if (max_ratio >= 2) { - dto_phase = 48 * 1000; - wallclock_ratio = 1; - } else { + if (ASIC_IS_DCE6(rdev)) { dto_phase = 24 * 1000; - wallclock_ratio = 0; + } else { + if (max_ratio >= 8) { + dto_phase = 192 * 1000; + wallclock_ratio = 3; + } else if (max_ratio >= 4) { + dto_phase = 96 * 1000; + wallclock_ratio = 2; + } else if (max_ratio >= 2) { + dto_phase = 48 * 1000; + wallclock_ratio = 1; + } else { + dto_phase = 24 * 1000; + wallclock_ratio = 0; + } + dto_cntl = RREG32(DCCG_AUDIO_DTO0_CNTL) & ~DCCG_AUDIO_DTO_WALLCLOCK_RATIO_MASK; + dto_cntl |= DCCG_AUDIO_DTO_WALLCLOCK_RATIO(wallclock_ratio); + WREG32(DCCG_AUDIO_DTO0_CNTL, dto_cntl); } - dto_cntl = RREG32(DCCG_AUDIO_DTO0_CNTL) & ~DCCG_AUDIO_DTO_WALLCLOCK_RATIO_MASK; - dto_cntl |= DCCG_AUDIO_DTO_WALLCLOCK_RATIO(wallclock_ratio); - WREG32(DCCG_AUDIO_DTO0_CNTL, dto_cntl); /* XXX two dtos; generally use dto0 for hdmi */ /* Express [24MHz / target pixel clock] as an exact rational @@ -241,8 +287,8 @@ void evergreen_hdmi_setmode(struct drm_encoder *encoder, struct drm_display_mode /* fglrx clears sth in AFMT_AUDIO_PACKET_CONTROL2 here */ WREG32(HDMI_ACR_PACKET_CONTROL + offset, - HDMI_ACR_AUTO_SEND | /* allow hw to sent ACR packets when required */ - HDMI_ACR_SOURCE); /* select SW CTS value */ + HDMI_ACR_SOURCE | /* select SW CTS value */ + HDMI_ACR_AUTO_SEND); /* allow hw to sent ACR packets when required */ evergreen_hdmi_update_ACR(encoder, mode->clock); @@ -260,13 +306,23 @@ void evergreen_hdmi_setmode(struct drm_encoder *encoder, struct drm_display_mode AFMT_60958_CS_CHANNEL_NUMBER_6(7) | AFMT_60958_CS_CHANNEL_NUMBER_7(8)); - /* fglrx sets 0x0001005f | (x & 0x00fc0000) in 0x5f78 here */ + if (ASIC_IS_DCE6(rdev)) { + dce6_afmt_write_speaker_allocation(encoder); + } else { + dce4_afmt_write_speaker_allocation(encoder); + } WREG32(AFMT_AUDIO_PACKET_CONTROL2 + offset, AFMT_AUDIO_CHANNEL_ENABLE(0xff)); /* fglrx sets 0x40 in 0x5f80 here */ - evergreen_hdmi_write_sad_regs(encoder); + + if (ASIC_IS_DCE6(rdev)) { + dce6_afmt_select_pin(encoder); + dce6_afmt_write_sad_regs(encoder); + } else { + evergreen_hdmi_write_sad_regs(encoder); + } err = drm_hdmi_avi_infoframe_from_display_mode(&frame, mode); if (err < 0) { @@ -302,6 +358,8 @@ void evergreen_hdmi_setmode(struct drm_encoder *encoder, struct drm_display_mode void evergreen_hdmi_enable(struct drm_encoder *encoder, bool enable) { + struct drm_device *dev = encoder->dev; + struct radeon_device *rdev = dev->dev_private; struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder); struct radeon_encoder_atom_dig *dig = radeon_encoder->enc_priv; @@ -314,6 +372,15 @@ void evergreen_hdmi_enable(struct drm_encoder *encoder, bool enable) if (!enable && !dig->afmt->enabled) return; + if (enable) { + if (ASIC_IS_DCE6(rdev)) + dig->afmt->pin = dce6_audio_get_pin(rdev); + else + dig->afmt->pin = r600_audio_get_pin(rdev); + } else { + dig->afmt->pin = NULL; + } + dig->afmt->enabled = enable; DRM_DEBUG("%sabling HDMI interface @ 0x%04X for encoder 0x%x\n", diff --git a/sys/dev/drm/radeon/evergreend.h b/sys/dev/drm/radeon/evergreend.h index 0d582ac1dc..4f6d296276 100644 --- a/sys/dev/drm/radeon/evergreend.h +++ b/sys/dev/drm/radeon/evergreend.h @@ -714,6 +714,13 @@ #define AFMT_GENERIC0_7 0x7138 /* DCE4/5 ELD audio interface */ +#define AZ_F0_CODEC_PIN0_CONTROL_CHANNEL_SPEAKER 0x5f78 +#define SPEAKER_ALLOCATION(x) (((x) & 0x7f) << 0) +#define SPEAKER_ALLOCATION_MASK (0x7f << 0) +#define SPEAKER_ALLOCATION_SHIFT 0 +#define HDMI_CONNECTION (1 << 16) +#define DP_CONNECTION (1 << 17) + #define AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR0 0x5f84 /* LPCM */ #define AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR1 0x5f88 /* AC3 */ #define AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR2 0x5f8c /* MPEG1 */ @@ -1153,6 +1160,10 @@ # define LATENCY_LOW_WATERMARK(x) ((x) << 0) # define LATENCY_HIGH_WATERMARK(x) ((x) << 16) +#define PIPE0_DMIF_BUFFER_CONTROL 0x0ca0 +# define DMIF_BUFFERS_ALLOCATED(x) ((x) << 0) +# define DMIF_BUFFERS_ALLOCATED_COMPLETED (1 << 4) + #define IH_RB_CNTL 0x3e00 # define IH_RB_ENABLE (1 << 0) # define IH_IB_SIZE(x) ((x) << 1) /* log2 */ @@ -1490,7 +1501,7 @@ * 6. COMMAND [29:22] | BYTE_COUNT [20:0] */ # define PACKET3_CP_DMA_DST_SEL(x) ((x) << 20) - /* 0 - SRC_ADDR + /* 0 - DST_ADDR * 1 - GDS */ # define PACKET3_CP_DMA_ENGINE(x) ((x) << 27) @@ -1505,7 +1516,7 @@ # define PACKET3_CP_DMA_CP_SYNC (1 << 31) /* COMMAND */ # define PACKET3_CP_DMA_DIS_WC (1 << 21) -# define PACKET3_CP_DMA_CMD_SRC_SWAP(x) ((x) << 23) +# define PACKET3_CP_DMA_CMD_SRC_SWAP(x) ((x) << 22) /* 0 - none * 1 - 8 in 16 * 2 - 8 in 32 diff --git a/sys/dev/drm/radeon/kv_dpm.c b/sys/dev/drm/radeon/kv_dpm.c new file mode 100644 index 0000000000..68796f1f7a --- /dev/null +++ b/sys/dev/drm/radeon/kv_dpm.c @@ -0,0 +1,2733 @@ +/* + * Copyright 2013 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include +#include "radeon.h" +#include "radeon_asic.h" +#include "cikd.h" +#include "r600_dpm.h" +#include "kv_dpm.h" +#include "radeon_asic.h" +#include + +#define KV_MAX_DEEPSLEEP_DIVIDER_ID 5 +#define KV_MINIMUM_ENGINE_CLOCK 800 +#define SMC_RAM_END 0x40000 + +static void kv_init_graphics_levels(struct radeon_device *rdev); +static int kv_calculate_ds_divider(struct radeon_device *rdev); +static int kv_calculate_nbps_level_settings(struct radeon_device *rdev); +static int kv_calculate_dpm_settings(struct radeon_device *rdev); +static void kv_enable_new_levels(struct radeon_device *rdev); +static void kv_program_nbps_index_settings(struct radeon_device *rdev, + struct radeon_ps *new_rps); +static int kv_set_enabled_level(struct radeon_device *rdev, u32 level); +static int kv_set_enabled_levels(struct radeon_device *rdev); +static int kv_force_dpm_highest(struct radeon_device *rdev); +static int kv_force_dpm_lowest(struct radeon_device *rdev); +static void kv_apply_state_adjust_rules(struct radeon_device *rdev, + struct radeon_ps *new_rps, + struct radeon_ps *old_rps); +static int kv_set_thermal_temperature_range(struct radeon_device *rdev, + int min_temp, int max_temp); +static int kv_init_fps_limits(struct radeon_device *rdev); + +static void kv_dpm_powergate_vce(struct radeon_device *rdev, bool gate); +static void kv_dpm_powergate_samu(struct radeon_device *rdev, bool gate); +static void kv_dpm_powergate_acp(struct radeon_device *rdev, bool gate); + +static const struct kv_lcac_config_values sx_local_cac_cfg_kv[] = +{ + { 0, 4, 1 }, + { 1, 4, 1 }, + { 2, 5, 1 }, + { 3, 4, 2 }, + { 4, 1, 1 }, + { 5, 5, 2 }, + { 6, 6, 1 }, + { 7, 9, 2 }, + { 0xffffffff } +}; + +static const struct kv_lcac_config_values mc0_local_cac_cfg_kv[] = +{ + { 0, 4, 1 }, + { 0xffffffff } +}; + +static const struct kv_lcac_config_values mc1_local_cac_cfg_kv[] = +{ + { 0, 4, 1 }, + { 0xffffffff } +}; + +static const struct kv_lcac_config_values mc2_local_cac_cfg_kv[] = +{ + { 0, 4, 1 }, + { 0xffffffff } +}; + +static const struct kv_lcac_config_values mc3_local_cac_cfg_kv[] = +{ + { 0, 4, 1 }, + { 0xffffffff } +}; + +static const struct kv_lcac_config_values cpl_local_cac_cfg_kv[] = +{ + { 0, 4, 1 }, + { 1, 4, 1 }, + { 2, 5, 1 }, + { 3, 4, 1 }, + { 4, 1, 1 }, + { 5, 5, 1 }, + { 6, 6, 1 }, + { 7, 9, 1 }, + { 8, 4, 1 }, + { 9, 2, 1 }, + { 10, 3, 1 }, + { 11, 6, 1 }, + { 12, 8, 2 }, + { 13, 1, 1 }, + { 14, 2, 1 }, + { 15, 3, 1 }, + { 16, 1, 1 }, + { 17, 4, 1 }, + { 18, 3, 1 }, + { 19, 1, 1 }, + { 20, 8, 1 }, + { 21, 5, 1 }, + { 22, 1, 1 }, + { 23, 1, 1 }, + { 24, 4, 1 }, + { 27, 6, 1 }, + { 28, 1, 1 }, + { 0xffffffff } +}; + +static const struct kv_lcac_config_reg sx0_cac_config_reg[] = +{ + { 0xc0400d00, 0x003e0000, 17, 0x3fc00000, 22, 0x0001fffe, 1, 0x00000001, 0 } +}; + +static const struct kv_lcac_config_reg mc0_cac_config_reg[] = +{ + { 0xc0400d30, 0x003e0000, 17, 0x3fc00000, 22, 0x0001fffe, 1, 0x00000001, 0 } +}; + +static const struct kv_lcac_config_reg mc1_cac_config_reg[] = +{ + { 0xc0400d3c, 0x003e0000, 17, 0x3fc00000, 22, 0x0001fffe, 1, 0x00000001, 0 } +}; + +static const struct kv_lcac_config_reg mc2_cac_config_reg[] = +{ + { 0xc0400d48, 0x003e0000, 17, 0x3fc00000, 22, 0x0001fffe, 1, 0x00000001, 0 } +}; + +static const struct kv_lcac_config_reg mc3_cac_config_reg[] = +{ + { 0xc0400d54, 0x003e0000, 17, 0x3fc00000, 22, 0x0001fffe, 1, 0x00000001, 0 } +}; + +static const struct kv_lcac_config_reg cpl_cac_config_reg[] = +{ + { 0xc0400d80, 0x003e0000, 17, 0x3fc00000, 22, 0x0001fffe, 1, 0x00000001, 0 } +}; + +static const struct kv_pt_config_reg didt_config_kv[] = +{ + { 0x10, 0x000000ff, 0, 0x0, KV_CONFIGREG_DIDT_IND }, + { 0x10, 0x0000ff00, 8, 0x0, KV_CONFIGREG_DIDT_IND }, + { 0x10, 0x00ff0000, 16, 0x0, KV_CONFIGREG_DIDT_IND }, + { 0x10, 0xff000000, 24, 0x0, KV_CONFIGREG_DIDT_IND }, + { 0x11, 0x000000ff, 0, 0x0, KV_CONFIGREG_DIDT_IND }, + { 0x11, 0x0000ff00, 8, 0x0, KV_CONFIGREG_DIDT_IND }, + { 0x11, 0x00ff0000, 16, 0x0, KV_CONFIGREG_DIDT_IND }, + { 0x11, 0xff000000, 24, 0x0, KV_CONFIGREG_DIDT_IND }, + { 0x12, 0x000000ff, 0, 0x0, KV_CONFIGREG_DIDT_IND }, + { 0x12, 0x0000ff00, 8, 0x0, KV_CONFIGREG_DIDT_IND }, + { 0x12, 0x00ff0000, 16, 0x0, KV_CONFIGREG_DIDT_IND }, + { 0x12, 0xff000000, 24, 0x0, KV_CONFIGREG_DIDT_IND }, + { 0x2, 0x00003fff, 0, 0x4, KV_CONFIGREG_DIDT_IND }, + { 0x2, 0x03ff0000, 16, 0x80, KV_CONFIGREG_DIDT_IND }, + { 0x2, 0x78000000, 27, 0x3, KV_CONFIGREG_DIDT_IND }, + { 0x1, 0x0000ffff, 0, 0x3FFF, KV_CONFIGREG_DIDT_IND }, + { 0x1, 0xffff0000, 16, 0x3FFF, KV_CONFIGREG_DIDT_IND }, + { 0x0, 0x00000001, 0, 0x0, KV_CONFIGREG_DIDT_IND }, + { 0x30, 0x000000ff, 0, 0x0, KV_CONFIGREG_DIDT_IND }, + { 0x30, 0x0000ff00, 8, 0x0, KV_CONFIGREG_DIDT_IND }, + { 0x30, 0x00ff0000, 16, 0x0, KV_CONFIGREG_DIDT_IND }, + { 0x30, 0xff000000, 24, 0x0, KV_CONFIGREG_DIDT_IND }, + { 0x31, 0x000000ff, 0, 0x0, KV_CONFIGREG_DIDT_IND }, + { 0x31, 0x0000ff00, 8, 0x0, KV_CONFIGREG_DIDT_IND }, + { 0x31, 0x00ff0000, 16, 0x0, KV_CONFIGREG_DIDT_IND }, + { 0x31, 0xff000000, 24, 0x0, KV_CONFIGREG_DIDT_IND }, + { 0x32, 0x000000ff, 0, 0x0, KV_CONFIGREG_DIDT_IND }, + { 0x32, 0x0000ff00, 8, 0x0, KV_CONFIGREG_DIDT_IND }, + { 0x32, 0x00ff0000, 16, 0x0, KV_CONFIGREG_DIDT_IND }, + { 0x32, 0xff000000, 24, 0x0, KV_CONFIGREG_DIDT_IND }, + { 0x22, 0x00003fff, 0, 0x4, KV_CONFIGREG_DIDT_IND }, + { 0x22, 0x03ff0000, 16, 0x80, KV_CONFIGREG_DIDT_IND }, + { 0x22, 0x78000000, 27, 0x3, KV_CONFIGREG_DIDT_IND }, + { 0x21, 0x0000ffff, 0, 0x3FFF, KV_CONFIGREG_DIDT_IND }, + { 0x21, 0xffff0000, 16, 0x3FFF, KV_CONFIGREG_DIDT_IND }, + { 0x20, 0x00000001, 0, 0x0, KV_CONFIGREG_DIDT_IND }, + { 0x50, 0x000000ff, 0, 0x0, KV_CONFIGREG_DIDT_IND }, + { 0x50, 0x0000ff00, 8, 0x0, KV_CONFIGREG_DIDT_IND }, + { 0x50, 0x00ff0000, 16, 0x0, KV_CONFIGREG_DIDT_IND }, + { 0x50, 0xff000000, 24, 0x0, KV_CONFIGREG_DIDT_IND }, + { 0x51, 0x000000ff, 0, 0x0, KV_CONFIGREG_DIDT_IND }, + { 0x51, 0x0000ff00, 8, 0x0, KV_CONFIGREG_DIDT_IND }, + { 0x51, 0x00ff0000, 16, 0x0, KV_CONFIGREG_DIDT_IND }, + { 0x51, 0xff000000, 24, 0x0, KV_CONFIGREG_DIDT_IND }, + { 0x52, 0x000000ff, 0, 0x0, KV_CONFIGREG_DIDT_IND }, + { 0x52, 0x0000ff00, 8, 0x0, KV_CONFIGREG_DIDT_IND }, + { 0x52, 0x00ff0000, 16, 0x0, KV_CONFIGREG_DIDT_IND }, + { 0x52, 0xff000000, 24, 0x0, KV_CONFIGREG_DIDT_IND }, + { 0x42, 0x00003fff, 0, 0x4, KV_CONFIGREG_DIDT_IND }, + { 0x42, 0x03ff0000, 16, 0x80, KV_CONFIGREG_DIDT_IND }, + { 0x42, 0x78000000, 27, 0x3, KV_CONFIGREG_DIDT_IND }, + { 0x41, 0x0000ffff, 0, 0x3FFF, KV_CONFIGREG_DIDT_IND }, + { 0x41, 0xffff0000, 16, 0x3FFF, KV_CONFIGREG_DIDT_IND }, + { 0x40, 0x00000001, 0, 0x0, KV_CONFIGREG_DIDT_IND }, + { 0x70, 0x000000ff, 0, 0x0, KV_CONFIGREG_DIDT_IND }, + { 0x70, 0x0000ff00, 8, 0x0, KV_CONFIGREG_DIDT_IND }, + { 0x70, 0x00ff0000, 16, 0x0, KV_CONFIGREG_DIDT_IND }, + { 0x70, 0xff000000, 24, 0x0, KV_CONFIGREG_DIDT_IND }, + { 0x71, 0x000000ff, 0, 0x0, KV_CONFIGREG_DIDT_IND }, + { 0x71, 0x0000ff00, 8, 0x0, KV_CONFIGREG_DIDT_IND }, + { 0x71, 0x00ff0000, 16, 0x0, KV_CONFIGREG_DIDT_IND }, + { 0x71, 0xff000000, 24, 0x0, KV_CONFIGREG_DIDT_IND }, + { 0x72, 0x000000ff, 0, 0x0, KV_CONFIGREG_DIDT_IND }, + { 0x72, 0x0000ff00, 8, 0x0, KV_CONFIGREG_DIDT_IND }, + { 0x72, 0x00ff0000, 16, 0x0, KV_CONFIGREG_DIDT_IND }, + { 0x72, 0xff000000, 24, 0x0, KV_CONFIGREG_DIDT_IND }, + { 0x62, 0x00003fff, 0, 0x4, KV_CONFIGREG_DIDT_IND }, + { 0x62, 0x03ff0000, 16, 0x80, KV_CONFIGREG_DIDT_IND }, + { 0x62, 0x78000000, 27, 0x3, KV_CONFIGREG_DIDT_IND }, + { 0x61, 0x0000ffff, 0, 0x3FFF, KV_CONFIGREG_DIDT_IND }, + { 0x61, 0xffff0000, 16, 0x3FFF, KV_CONFIGREG_DIDT_IND }, + { 0x60, 0x00000001, 0, 0x0, KV_CONFIGREG_DIDT_IND }, + { 0xFFFFFFFF } +}; + +static struct kv_ps *kv_get_ps(struct radeon_ps *rps) +{ + struct kv_ps *ps = rps->ps_priv; + + return ps; +} + +static struct kv_power_info *kv_get_pi(struct radeon_device *rdev) +{ + struct kv_power_info *pi = rdev->pm.dpm.priv; + + return pi; +} + +#if 0 +static void kv_program_local_cac_table(struct radeon_device *rdev, + const struct kv_lcac_config_values *local_cac_table, + const struct kv_lcac_config_reg *local_cac_reg) +{ + u32 i, count, data; + const struct kv_lcac_config_values *values = local_cac_table; + + while (values->block_id != 0xffffffff) { + count = values->signal_id; + for (i = 0; i < count; i++) { + data = ((values->block_id << local_cac_reg->block_shift) & + local_cac_reg->block_mask); + data |= ((i << local_cac_reg->signal_shift) & + local_cac_reg->signal_mask); + data |= ((values->t << local_cac_reg->t_shift) & + local_cac_reg->t_mask); + data |= ((1 << local_cac_reg->enable_shift) & + local_cac_reg->enable_mask); + WREG32_SMC(local_cac_reg->cntl, data); + } + values++; + } +} +#endif + +static int kv_program_pt_config_registers(struct radeon_device *rdev, + const struct kv_pt_config_reg *cac_config_regs) +{ + const struct kv_pt_config_reg *config_regs = cac_config_regs; + u32 data; + u32 cache = 0; + + if (config_regs == NULL) + return -EINVAL; + + while (config_regs->offset != 0xFFFFFFFF) { + if (config_regs->type == KV_CONFIGREG_CACHE) { + cache |= ((config_regs->value << config_regs->shift) & config_regs->mask); + } else { + switch (config_regs->type) { + case KV_CONFIGREG_SMC_IND: + data = RREG32_SMC(config_regs->offset); + break; + case KV_CONFIGREG_DIDT_IND: + data = RREG32_DIDT(config_regs->offset); + break; + default: + data = RREG32(config_regs->offset << 2); + break; + } + + data &= ~config_regs->mask; + data |= ((config_regs->value << config_regs->shift) & config_regs->mask); + data |= cache; + cache = 0; + + switch (config_regs->type) { + case KV_CONFIGREG_SMC_IND: + WREG32_SMC(config_regs->offset, data); + break; + case KV_CONFIGREG_DIDT_IND: + WREG32_DIDT(config_regs->offset, data); + break; + default: + WREG32(config_regs->offset << 2, data); + break; + } + } + config_regs++; + } + + return 0; +} + +static void kv_do_enable_didt(struct radeon_device *rdev, bool enable) +{ + struct kv_power_info *pi = kv_get_pi(rdev); + u32 data; + + if (pi->caps_sq_ramping) { + data = RREG32_DIDT(DIDT_SQ_CTRL0); + if (enable) + data |= DIDT_CTRL_EN; + else + data &= ~DIDT_CTRL_EN; + WREG32_DIDT(DIDT_SQ_CTRL0, data); + } + + if (pi->caps_db_ramping) { + data = RREG32_DIDT(DIDT_DB_CTRL0); + if (enable) + data |= DIDT_CTRL_EN; + else + data &= ~DIDT_CTRL_EN; + WREG32_DIDT(DIDT_DB_CTRL0, data); + } + + if (pi->caps_td_ramping) { + data = RREG32_DIDT(DIDT_TD_CTRL0); + if (enable) + data |= DIDT_CTRL_EN; + else + data &= ~DIDT_CTRL_EN; + WREG32_DIDT(DIDT_TD_CTRL0, data); + } + + if (pi->caps_tcp_ramping) { + data = RREG32_DIDT(DIDT_TCP_CTRL0); + if (enable) + data |= DIDT_CTRL_EN; + else + data &= ~DIDT_CTRL_EN; + WREG32_DIDT(DIDT_TCP_CTRL0, data); + } +} + +static int kv_enable_didt(struct radeon_device *rdev, bool enable) +{ + struct kv_power_info *pi = kv_get_pi(rdev); + int ret; + + if (pi->caps_sq_ramping || + pi->caps_db_ramping || + pi->caps_td_ramping || + pi->caps_tcp_ramping) { + cik_enter_rlc_safe_mode(rdev); + + if (enable) { + ret = kv_program_pt_config_registers(rdev, didt_config_kv); + if (ret) { + cik_exit_rlc_safe_mode(rdev); + return ret; + } + } + + kv_do_enable_didt(rdev, enable); + + cik_exit_rlc_safe_mode(rdev); + } + + return 0; +} + +#if 0 +static void kv_initialize_hardware_cac_manager(struct radeon_device *rdev) +{ + struct kv_power_info *pi = kv_get_pi(rdev); + + if (pi->caps_cac) { + WREG32_SMC(LCAC_SX0_OVR_SEL, 0); + WREG32_SMC(LCAC_SX0_OVR_VAL, 0); + kv_program_local_cac_table(rdev, sx_local_cac_cfg_kv, sx0_cac_config_reg); + + WREG32_SMC(LCAC_MC0_OVR_SEL, 0); + WREG32_SMC(LCAC_MC0_OVR_VAL, 0); + kv_program_local_cac_table(rdev, mc0_local_cac_cfg_kv, mc0_cac_config_reg); + + WREG32_SMC(LCAC_MC1_OVR_SEL, 0); + WREG32_SMC(LCAC_MC1_OVR_VAL, 0); + kv_program_local_cac_table(rdev, mc1_local_cac_cfg_kv, mc1_cac_config_reg); + + WREG32_SMC(LCAC_MC2_OVR_SEL, 0); + WREG32_SMC(LCAC_MC2_OVR_VAL, 0); + kv_program_local_cac_table(rdev, mc2_local_cac_cfg_kv, mc2_cac_config_reg); + + WREG32_SMC(LCAC_MC3_OVR_SEL, 0); + WREG32_SMC(LCAC_MC3_OVR_VAL, 0); + kv_program_local_cac_table(rdev, mc3_local_cac_cfg_kv, mc3_cac_config_reg); + + WREG32_SMC(LCAC_CPL_OVR_SEL, 0); + WREG32_SMC(LCAC_CPL_OVR_VAL, 0); + kv_program_local_cac_table(rdev, cpl_local_cac_cfg_kv, cpl_cac_config_reg); + } +} +#endif + +static int kv_enable_smc_cac(struct radeon_device *rdev, bool enable) +{ + struct kv_power_info *pi = kv_get_pi(rdev); + int ret = 0; + + if (pi->caps_cac) { + if (enable) { + ret = kv_notify_message_to_smu(rdev, PPSMC_MSG_EnableCac); + if (ret) + pi->cac_enabled = false; + else + pi->cac_enabled = true; + } else if (pi->cac_enabled) { + kv_notify_message_to_smu(rdev, PPSMC_MSG_DisableCac); + pi->cac_enabled = false; + } + } + + return ret; +} + +static int kv_process_firmware_header(struct radeon_device *rdev) +{ + struct kv_power_info *pi = kv_get_pi(rdev); + u32 tmp; + int ret; + + ret = kv_read_smc_sram_dword(rdev, SMU7_FIRMWARE_HEADER_LOCATION + + offsetof(SMU7_Firmware_Header, DpmTable), + &tmp, pi->sram_end); + + if (ret == 0) + pi->dpm_table_start = tmp; + + ret = kv_read_smc_sram_dword(rdev, SMU7_FIRMWARE_HEADER_LOCATION + + offsetof(SMU7_Firmware_Header, SoftRegisters), + &tmp, pi->sram_end); + + if (ret == 0) + pi->soft_regs_start = tmp; + + return ret; +} + +static int kv_enable_dpm_voltage_scaling(struct radeon_device *rdev) +{ + struct kv_power_info *pi = kv_get_pi(rdev); + int ret; + + pi->graphics_voltage_change_enable = 1; + + ret = kv_copy_bytes_to_smc(rdev, + pi->dpm_table_start + + offsetof(SMU7_Fusion_DpmTable, GraphicsVoltageChangeEnable), + &pi->graphics_voltage_change_enable, + sizeof(u8), pi->sram_end); + + return ret; +} + +static int kv_set_dpm_interval(struct radeon_device *rdev) +{ + struct kv_power_info *pi = kv_get_pi(rdev); + int ret; + + pi->graphics_interval = 1; + + ret = kv_copy_bytes_to_smc(rdev, + pi->dpm_table_start + + offsetof(SMU7_Fusion_DpmTable, GraphicsInterval), + &pi->graphics_interval, + sizeof(u8), pi->sram_end); + + return ret; +} + +static int kv_set_dpm_boot_state(struct radeon_device *rdev) +{ + struct kv_power_info *pi = kv_get_pi(rdev); + int ret; + + ret = kv_copy_bytes_to_smc(rdev, + pi->dpm_table_start + + offsetof(SMU7_Fusion_DpmTable, GraphicsBootLevel), + &pi->graphics_boot_level, + sizeof(u8), pi->sram_end); + + return ret; +} + +static void kv_program_vc(struct radeon_device *rdev) +{ + WREG32_SMC(CG_FTV_0, 0x3FFFC100); +} + +static void kv_clear_vc(struct radeon_device *rdev) +{ + WREG32_SMC(CG_FTV_0, 0); +} + +static int kv_set_divider_value(struct radeon_device *rdev, + u32 index, u32 sclk) +{ + struct kv_power_info *pi = kv_get_pi(rdev); + struct atom_clock_dividers dividers; + int ret; + + ret = radeon_atom_get_clock_dividers(rdev, COMPUTE_ENGINE_PLL_PARAM, + sclk, false, ÷rs); + if (ret) + return ret; + + pi->graphics_level[index].SclkDid = (u8)dividers.post_div; + pi->graphics_level[index].SclkFrequency = cpu_to_be32(sclk); + + return 0; +} + +static u16 kv_convert_8bit_index_to_voltage(struct radeon_device *rdev, + u16 voltage) +{ + return 6200 - (voltage * 25); +} + +static u16 kv_convert_2bit_index_to_voltage(struct radeon_device *rdev, + u32 vid_2bit) +{ + struct kv_power_info *pi = kv_get_pi(rdev); + u32 vid_8bit = sumo_convert_vid2_to_vid7(rdev, + &pi->sys_info.vid_mapping_table, + vid_2bit); + + return kv_convert_8bit_index_to_voltage(rdev, (u16)vid_8bit); +} + + +static int kv_set_vid(struct radeon_device *rdev, u32 index, u32 vid) +{ + struct kv_power_info *pi = kv_get_pi(rdev); + + pi->graphics_level[index].VoltageDownH = (u8)pi->voltage_drop_t; + pi->graphics_level[index].MinVddNb = + cpu_to_be32(kv_convert_2bit_index_to_voltage(rdev, vid)); + + return 0; +} + +static int kv_set_at(struct radeon_device *rdev, u32 index, u32 at) +{ + struct kv_power_info *pi = kv_get_pi(rdev); + + pi->graphics_level[index].AT = cpu_to_be16((u16)at); + + return 0; +} + +static void kv_dpm_power_level_enable(struct radeon_device *rdev, + u32 index, bool enable) +{ + struct kv_power_info *pi = kv_get_pi(rdev); + + pi->graphics_level[index].EnabledForActivity = enable ? 1 : 0; +} + +static void kv_start_dpm(struct radeon_device *rdev) +{ + u32 tmp = RREG32_SMC(GENERAL_PWRMGT); + + tmp |= GLOBAL_PWRMGT_EN; + WREG32_SMC(GENERAL_PWRMGT, tmp); + + kv_smc_dpm_enable(rdev, true); +} + +static void kv_stop_dpm(struct radeon_device *rdev) +{ + kv_smc_dpm_enable(rdev, false); +} + +static void kv_start_am(struct radeon_device *rdev) +{ + u32 sclk_pwrmgt_cntl = RREG32_SMC(SCLK_PWRMGT_CNTL); + + sclk_pwrmgt_cntl &= ~(RESET_SCLK_CNT | RESET_BUSY_CNT); + sclk_pwrmgt_cntl |= DYNAMIC_PM_EN; + + WREG32_SMC(SCLK_PWRMGT_CNTL, sclk_pwrmgt_cntl); +} + +static void kv_reset_am(struct radeon_device *rdev) +{ + u32 sclk_pwrmgt_cntl = RREG32_SMC(SCLK_PWRMGT_CNTL); + + sclk_pwrmgt_cntl |= (RESET_SCLK_CNT | RESET_BUSY_CNT); + + WREG32_SMC(SCLK_PWRMGT_CNTL, sclk_pwrmgt_cntl); +} + +static int kv_freeze_sclk_dpm(struct radeon_device *rdev, bool freeze) +{ + return kv_notify_message_to_smu(rdev, freeze ? + PPSMC_MSG_SCLKDPM_FreezeLevel : PPSMC_MSG_SCLKDPM_UnfreezeLevel); +} + +static int kv_force_lowest_valid(struct radeon_device *rdev) +{ + return kv_force_dpm_lowest(rdev); +} + +static int kv_unforce_levels(struct radeon_device *rdev) +{ + if (rdev->family == CHIP_KABINI) + return kv_notify_message_to_smu(rdev, PPSMC_MSG_NoForcedLevel); + else + return kv_set_enabled_levels(rdev); +} + +static int kv_update_sclk_t(struct radeon_device *rdev) +{ + struct kv_power_info *pi = kv_get_pi(rdev); + u32 low_sclk_interrupt_t = 0; + int ret = 0; + + if (pi->caps_sclk_throttle_low_notification) { + low_sclk_interrupt_t = cpu_to_be32(pi->low_sclk_interrupt_t); + + ret = kv_copy_bytes_to_smc(rdev, + pi->dpm_table_start + + offsetof(SMU7_Fusion_DpmTable, LowSclkInterruptT), + (u8 *)&low_sclk_interrupt_t, + sizeof(u32), pi->sram_end); + } + return ret; +} + +static int kv_program_bootup_state(struct radeon_device *rdev) +{ + struct kv_power_info *pi = kv_get_pi(rdev); + u32 i; + struct radeon_clock_voltage_dependency_table *table = + &rdev->pm.dpm.dyn_state.vddc_dependency_on_sclk; + + if (table && table->count) { + for (i = pi->graphics_dpm_level_count - 1; i > 0; i--) { + if (table->entries[i].clk == pi->boot_pl.sclk) + break; + } + + pi->graphics_boot_level = (u8)i; + kv_dpm_power_level_enable(rdev, i, true); + } else { + struct sumo_sclk_voltage_mapping_table *table = + &pi->sys_info.sclk_voltage_mapping_table; + + if (table->num_max_dpm_entries == 0) + return -EINVAL; + + for (i = pi->graphics_dpm_level_count - 1; i > 0; i--) { + if (table->entries[i].sclk_frequency == pi->boot_pl.sclk) + break; + } + + pi->graphics_boot_level = (u8)i; + kv_dpm_power_level_enable(rdev, i, true); + } + return 0; +} + +static int kv_enable_auto_thermal_throttling(struct radeon_device *rdev) +{ + struct kv_power_info *pi = kv_get_pi(rdev); + int ret; + + pi->graphics_therm_throttle_enable = 1; + + ret = kv_copy_bytes_to_smc(rdev, + pi->dpm_table_start + + offsetof(SMU7_Fusion_DpmTable, GraphicsThermThrottleEnable), + &pi->graphics_therm_throttle_enable, + sizeof(u8), pi->sram_end); + + return ret; +} + +static int kv_upload_dpm_settings(struct radeon_device *rdev) +{ + struct kv_power_info *pi = kv_get_pi(rdev); + int ret; + + ret = kv_copy_bytes_to_smc(rdev, + pi->dpm_table_start + + offsetof(SMU7_Fusion_DpmTable, GraphicsLevel), + (u8 *)&pi->graphics_level, + sizeof(SMU7_Fusion_GraphicsLevel) * SMU7_MAX_LEVELS_GRAPHICS, + pi->sram_end); + + if (ret) + return ret; + + ret = kv_copy_bytes_to_smc(rdev, + pi->dpm_table_start + + offsetof(SMU7_Fusion_DpmTable, GraphicsDpmLevelCount), + &pi->graphics_dpm_level_count, + sizeof(u8), pi->sram_end); + + return ret; +} + +static u32 kv_get_clock_difference(u32 a, u32 b) +{ + return (a >= b) ? a - b : b - a; +} + +static u32 kv_get_clk_bypass(struct radeon_device *rdev, u32 clk) +{ + struct kv_power_info *pi = kv_get_pi(rdev); + u32 value; + + if (pi->caps_enable_dfs_bypass) { + if (kv_get_clock_difference(clk, 40000) < 200) + value = 3; + else if (kv_get_clock_difference(clk, 30000) < 200) + value = 2; + else if (kv_get_clock_difference(clk, 20000) < 200) + value = 7; + else if (kv_get_clock_difference(clk, 15000) < 200) + value = 6; + else if (kv_get_clock_difference(clk, 10000) < 200) + value = 8; + else + value = 0; + } else { + value = 0; + } + + return value; +} + +static int kv_populate_uvd_table(struct radeon_device *rdev) +{ + struct kv_power_info *pi = kv_get_pi(rdev); + struct radeon_uvd_clock_voltage_dependency_table *table = + &rdev->pm.dpm.dyn_state.uvd_clock_voltage_dependency_table; + struct atom_clock_dividers dividers; + int ret; + u32 i; + + if (table == NULL || table->count == 0) + return 0; + + pi->uvd_level_count = 0; + for (i = 0; i < table->count; i++) { + if (pi->high_voltage_t && + (pi->high_voltage_t < table->entries[i].v)) + break; + + pi->uvd_level[i].VclkFrequency = cpu_to_be32(table->entries[i].vclk); + pi->uvd_level[i].DclkFrequency = cpu_to_be32(table->entries[i].dclk); + pi->uvd_level[i].MinVddNb = cpu_to_be16(table->entries[i].v); + + pi->uvd_level[i].VClkBypassCntl = + (u8)kv_get_clk_bypass(rdev, table->entries[i].vclk); + pi->uvd_level[i].DClkBypassCntl = + (u8)kv_get_clk_bypass(rdev, table->entries[i].dclk); + + ret = radeon_atom_get_clock_dividers(rdev, COMPUTE_ENGINE_PLL_PARAM, + table->entries[i].vclk, false, ÷rs); + if (ret) + return ret; + pi->uvd_level[i].VclkDivider = (u8)dividers.post_div; + + ret = radeon_atom_get_clock_dividers(rdev, COMPUTE_ENGINE_PLL_PARAM, + table->entries[i].dclk, false, ÷rs); + if (ret) + return ret; + pi->uvd_level[i].DclkDivider = (u8)dividers.post_div; + + pi->uvd_level_count++; + } + + ret = kv_copy_bytes_to_smc(rdev, + pi->dpm_table_start + + offsetof(SMU7_Fusion_DpmTable, UvdLevelCount), + (u8 *)&pi->uvd_level_count, + sizeof(u8), pi->sram_end); + if (ret) + return ret; + + pi->uvd_interval = 1; + + ret = kv_copy_bytes_to_smc(rdev, + pi->dpm_table_start + + offsetof(SMU7_Fusion_DpmTable, UVDInterval), + &pi->uvd_interval, + sizeof(u8), pi->sram_end); + if (ret) + return ret; + + ret = kv_copy_bytes_to_smc(rdev, + pi->dpm_table_start + + offsetof(SMU7_Fusion_DpmTable, UvdLevel), + (u8 *)&pi->uvd_level, + sizeof(SMU7_Fusion_UvdLevel) * SMU7_MAX_LEVELS_UVD, + pi->sram_end); + + return ret; + +} + +static int kv_populate_vce_table(struct radeon_device *rdev) +{ + struct kv_power_info *pi = kv_get_pi(rdev); + int ret; + u32 i; + struct radeon_vce_clock_voltage_dependency_table *table = + &rdev->pm.dpm.dyn_state.vce_clock_voltage_dependency_table; + struct atom_clock_dividers dividers; + + if (table == NULL || table->count == 0) + return 0; + + pi->vce_level_count = 0; + for (i = 0; i < table->count; i++) { + if (pi->high_voltage_t && + pi->high_voltage_t < table->entries[i].v) + break; + + pi->vce_level[i].Frequency = cpu_to_be32(table->entries[i].evclk); + pi->vce_level[i].MinVoltage = cpu_to_be16(table->entries[i].v); + + pi->vce_level[i].ClkBypassCntl = + (u8)kv_get_clk_bypass(rdev, table->entries[i].evclk); + + ret = radeon_atom_get_clock_dividers(rdev, COMPUTE_ENGINE_PLL_PARAM, + table->entries[i].evclk, false, ÷rs); + if (ret) + return ret; + pi->vce_level[i].Divider = (u8)dividers.post_div; + + pi->vce_level_count++; + } + + ret = kv_copy_bytes_to_smc(rdev, + pi->dpm_table_start + + offsetof(SMU7_Fusion_DpmTable, VceLevelCount), + (u8 *)&pi->vce_level_count, + sizeof(u8), + pi->sram_end); + if (ret) + return ret; + + pi->vce_interval = 1; + + ret = kv_copy_bytes_to_smc(rdev, + pi->dpm_table_start + + offsetof(SMU7_Fusion_DpmTable, VCEInterval), + (u8 *)&pi->vce_interval, + sizeof(u8), + pi->sram_end); + if (ret) + return ret; + + ret = kv_copy_bytes_to_smc(rdev, + pi->dpm_table_start + + offsetof(SMU7_Fusion_DpmTable, VceLevel), + (u8 *)&pi->vce_level, + sizeof(SMU7_Fusion_ExtClkLevel) * SMU7_MAX_LEVELS_VCE, + pi->sram_end); + + return ret; +} + +static int kv_populate_samu_table(struct radeon_device *rdev) +{ + struct kv_power_info *pi = kv_get_pi(rdev); + struct radeon_clock_voltage_dependency_table *table = + &rdev->pm.dpm.dyn_state.samu_clock_voltage_dependency_table; + struct atom_clock_dividers dividers; + int ret; + u32 i; + + if (table == NULL || table->count == 0) + return 0; + + pi->samu_level_count = 0; + for (i = 0; i < table->count; i++) { + if (pi->high_voltage_t && + pi->high_voltage_t < table->entries[i].v) + break; + + pi->samu_level[i].Frequency = cpu_to_be32(table->entries[i].clk); + pi->samu_level[i].MinVoltage = cpu_to_be16(table->entries[i].v); + + pi->samu_level[i].ClkBypassCntl = + (u8)kv_get_clk_bypass(rdev, table->entries[i].clk); + + ret = radeon_atom_get_clock_dividers(rdev, COMPUTE_ENGINE_PLL_PARAM, + table->entries[i].clk, false, ÷rs); + if (ret) + return ret; + pi->samu_level[i].Divider = (u8)dividers.post_div; + + pi->samu_level_count++; + } + + ret = kv_copy_bytes_to_smc(rdev, + pi->dpm_table_start + + offsetof(SMU7_Fusion_DpmTable, SamuLevelCount), + (u8 *)&pi->samu_level_count, + sizeof(u8), + pi->sram_end); + if (ret) + return ret; + + pi->samu_interval = 1; + + ret = kv_copy_bytes_to_smc(rdev, + pi->dpm_table_start + + offsetof(SMU7_Fusion_DpmTable, SAMUInterval), + (u8 *)&pi->samu_interval, + sizeof(u8), + pi->sram_end); + if (ret) + return ret; + + ret = kv_copy_bytes_to_smc(rdev, + pi->dpm_table_start + + offsetof(SMU7_Fusion_DpmTable, SamuLevel), + (u8 *)&pi->samu_level, + sizeof(SMU7_Fusion_ExtClkLevel) * SMU7_MAX_LEVELS_SAMU, + pi->sram_end); + if (ret) + return ret; + + return ret; +} + + +static int kv_populate_acp_table(struct radeon_device *rdev) +{ + struct kv_power_info *pi = kv_get_pi(rdev); + struct radeon_clock_voltage_dependency_table *table = + &rdev->pm.dpm.dyn_state.acp_clock_voltage_dependency_table; + struct atom_clock_dividers dividers; + int ret; + u32 i; + + if (table == NULL || table->count == 0) + return 0; + + pi->acp_level_count = 0; + for (i = 0; i < table->count; i++) { + pi->acp_level[i].Frequency = cpu_to_be32(table->entries[i].clk); + pi->acp_level[i].MinVoltage = cpu_to_be16(table->entries[i].v); + + ret = radeon_atom_get_clock_dividers(rdev, COMPUTE_ENGINE_PLL_PARAM, + table->entries[i].clk, false, ÷rs); + if (ret) + return ret; + pi->acp_level[i].Divider = (u8)dividers.post_div; + + pi->acp_level_count++; + } + + ret = kv_copy_bytes_to_smc(rdev, + pi->dpm_table_start + + offsetof(SMU7_Fusion_DpmTable, AcpLevelCount), + (u8 *)&pi->acp_level_count, + sizeof(u8), + pi->sram_end); + if (ret) + return ret; + + pi->acp_interval = 1; + + ret = kv_copy_bytes_to_smc(rdev, + pi->dpm_table_start + + offsetof(SMU7_Fusion_DpmTable, ACPInterval), + (u8 *)&pi->acp_interval, + sizeof(u8), + pi->sram_end); + if (ret) + return ret; + + ret = kv_copy_bytes_to_smc(rdev, + pi->dpm_table_start + + offsetof(SMU7_Fusion_DpmTable, AcpLevel), + (u8 *)&pi->acp_level, + sizeof(SMU7_Fusion_ExtClkLevel) * SMU7_MAX_LEVELS_ACP, + pi->sram_end); + if (ret) + return ret; + + return ret; +} + +static void kv_calculate_dfs_bypass_settings(struct radeon_device *rdev) +{ + struct kv_power_info *pi = kv_get_pi(rdev); + u32 i; + struct radeon_clock_voltage_dependency_table *table = + &rdev->pm.dpm.dyn_state.vddc_dependency_on_sclk; + + if (table && table->count) { + for (i = 0; i < pi->graphics_dpm_level_count; i++) { + if (pi->caps_enable_dfs_bypass) { + if (kv_get_clock_difference(table->entries[i].clk, 40000) < 200) + pi->graphics_level[i].ClkBypassCntl = 3; + else if (kv_get_clock_difference(table->entries[i].clk, 30000) < 200) + pi->graphics_level[i].ClkBypassCntl = 2; + else if (kv_get_clock_difference(table->entries[i].clk, 26600) < 200) + pi->graphics_level[i].ClkBypassCntl = 7; + else if (kv_get_clock_difference(table->entries[i].clk , 20000) < 200) + pi->graphics_level[i].ClkBypassCntl = 6; + else if (kv_get_clock_difference(table->entries[i].clk , 10000) < 200) + pi->graphics_level[i].ClkBypassCntl = 8; + else + pi->graphics_level[i].ClkBypassCntl = 0; + } else { + pi->graphics_level[i].ClkBypassCntl = 0; + } + } + } else { + struct sumo_sclk_voltage_mapping_table *table = + &pi->sys_info.sclk_voltage_mapping_table; + for (i = 0; i < pi->graphics_dpm_level_count; i++) { + if (pi->caps_enable_dfs_bypass) { + if (kv_get_clock_difference(table->entries[i].sclk_frequency, 40000) < 200) + pi->graphics_level[i].ClkBypassCntl = 3; + else if (kv_get_clock_difference(table->entries[i].sclk_frequency, 30000) < 200) + pi->graphics_level[i].ClkBypassCntl = 2; + else if (kv_get_clock_difference(table->entries[i].sclk_frequency, 26600) < 200) + pi->graphics_level[i].ClkBypassCntl = 7; + else if (kv_get_clock_difference(table->entries[i].sclk_frequency, 20000) < 200) + pi->graphics_level[i].ClkBypassCntl = 6; + else if (kv_get_clock_difference(table->entries[i].sclk_frequency, 10000) < 200) + pi->graphics_level[i].ClkBypassCntl = 8; + else + pi->graphics_level[i].ClkBypassCntl = 0; + } else { + pi->graphics_level[i].ClkBypassCntl = 0; + } + } + } +} + +static int kv_enable_ulv(struct radeon_device *rdev, bool enable) +{ + return kv_notify_message_to_smu(rdev, enable ? + PPSMC_MSG_EnableULV : PPSMC_MSG_DisableULV); +} + +static void kv_reset_acp_boot_level(struct radeon_device *rdev) +{ + struct kv_power_info *pi = kv_get_pi(rdev); + + pi->acp_boot_level = 0xff; +} + +static void kv_update_current_ps(struct radeon_device *rdev, + struct radeon_ps *rps) +{ + struct kv_ps *new_ps = kv_get_ps(rps); + struct kv_power_info *pi = kv_get_pi(rdev); + + pi->current_rps = *rps; + pi->current_ps = *new_ps; + pi->current_rps.ps_priv = &pi->current_ps; +} + +static void kv_update_requested_ps(struct radeon_device *rdev, + struct radeon_ps *rps) +{ + struct kv_ps *new_ps = kv_get_ps(rps); + struct kv_power_info *pi = kv_get_pi(rdev); + + pi->requested_rps = *rps; + pi->requested_ps = *new_ps; + pi->requested_rps.ps_priv = &pi->requested_ps; +} + +void kv_dpm_enable_bapm(struct radeon_device *rdev, bool enable) +{ + struct kv_power_info *pi = kv_get_pi(rdev); + int ret; + + if (pi->bapm_enable) { + ret = kv_smc_bapm_enable(rdev, enable); + if (ret) + DRM_ERROR("kv_smc_bapm_enable failed\n"); + } +} + +int kv_dpm_enable(struct radeon_device *rdev) +{ + struct kv_power_info *pi = kv_get_pi(rdev); + int ret; + + cik_update_cg(rdev, (RADEON_CG_BLOCK_GFX | + RADEON_CG_BLOCK_SDMA | + RADEON_CG_BLOCK_BIF | + RADEON_CG_BLOCK_HDP), false); + + ret = kv_process_firmware_header(rdev); + if (ret) { + DRM_ERROR("kv_process_firmware_header failed\n"); + return ret; + } + kv_init_fps_limits(rdev); + kv_init_graphics_levels(rdev); + ret = kv_program_bootup_state(rdev); + if (ret) { + DRM_ERROR("kv_program_bootup_state failed\n"); + return ret; + } + kv_calculate_dfs_bypass_settings(rdev); + ret = kv_upload_dpm_settings(rdev); + if (ret) { + DRM_ERROR("kv_upload_dpm_settings failed\n"); + return ret; + } + ret = kv_populate_uvd_table(rdev); + if (ret) { + DRM_ERROR("kv_populate_uvd_table failed\n"); + return ret; + } + ret = kv_populate_vce_table(rdev); + if (ret) { + DRM_ERROR("kv_populate_vce_table failed\n"); + return ret; + } + ret = kv_populate_samu_table(rdev); + if (ret) { + DRM_ERROR("kv_populate_samu_table failed\n"); + return ret; + } + ret = kv_populate_acp_table(rdev); + if (ret) { + DRM_ERROR("kv_populate_acp_table failed\n"); + return ret; + } + kv_program_vc(rdev); +#if 0 + kv_initialize_hardware_cac_manager(rdev); +#endif + kv_start_am(rdev); + if (pi->enable_auto_thermal_throttling) { + ret = kv_enable_auto_thermal_throttling(rdev); + if (ret) { + DRM_ERROR("kv_enable_auto_thermal_throttling failed\n"); + return ret; + } + } + ret = kv_enable_dpm_voltage_scaling(rdev); + if (ret) { + DRM_ERROR("kv_enable_dpm_voltage_scaling failed\n"); + return ret; + } + ret = kv_set_dpm_interval(rdev); + if (ret) { + DRM_ERROR("kv_set_dpm_interval failed\n"); + return ret; + } + ret = kv_set_dpm_boot_state(rdev); + if (ret) { + DRM_ERROR("kv_set_dpm_boot_state failed\n"); + return ret; + } + ret = kv_enable_ulv(rdev, true); + if (ret) { + DRM_ERROR("kv_enable_ulv failed\n"); + return ret; + } + kv_start_dpm(rdev); + ret = kv_enable_didt(rdev, true); + if (ret) { + DRM_ERROR("kv_enable_didt failed\n"); + return ret; + } + ret = kv_enable_smc_cac(rdev, true); + if (ret) { + DRM_ERROR("kv_enable_smc_cac failed\n"); + return ret; + } + + kv_reset_acp_boot_level(rdev); + + if (rdev->irq.installed && + r600_is_internal_thermal_sensor(rdev->pm.int_thermal_type)) { + ret = kv_set_thermal_temperature_range(rdev, R600_TEMP_RANGE_MIN, R600_TEMP_RANGE_MAX); + if (ret) { + DRM_ERROR("kv_set_thermal_temperature_range failed\n"); + return ret; + } + rdev->irq.dpm_thermal = true; + radeon_irq_set(rdev); + } + + ret = kv_smc_bapm_enable(rdev, false); + if (ret) { + DRM_ERROR("kv_smc_bapm_enable failed\n"); + return ret; + } + + /* powerdown unused blocks for now */ + kv_dpm_powergate_acp(rdev, true); + kv_dpm_powergate_samu(rdev, true); + kv_dpm_powergate_vce(rdev, true); + kv_dpm_powergate_uvd(rdev, true); + + cik_update_cg(rdev, (RADEON_CG_BLOCK_GFX | + RADEON_CG_BLOCK_SDMA | + RADEON_CG_BLOCK_BIF | + RADEON_CG_BLOCK_HDP), true); + + kv_update_current_ps(rdev, rdev->pm.dpm.boot_ps); + + return ret; +} + +void kv_dpm_disable(struct radeon_device *rdev) +{ + cik_update_cg(rdev, (RADEON_CG_BLOCK_GFX | + RADEON_CG_BLOCK_SDMA | + RADEON_CG_BLOCK_BIF | + RADEON_CG_BLOCK_HDP), false); + + kv_smc_bapm_enable(rdev, false); + + /* powerup blocks */ + kv_dpm_powergate_acp(rdev, false); + kv_dpm_powergate_samu(rdev, false); + kv_dpm_powergate_vce(rdev, false); + kv_dpm_powergate_uvd(rdev, false); + + kv_enable_smc_cac(rdev, false); + kv_enable_didt(rdev, false); + kv_clear_vc(rdev); + kv_stop_dpm(rdev); + kv_enable_ulv(rdev, false); + kv_reset_am(rdev); + + kv_update_current_ps(rdev, rdev->pm.dpm.boot_ps); +} + +#if 0 +static int kv_write_smc_soft_register(struct radeon_device *rdev, + u16 reg_offset, u32 value) +{ + struct kv_power_info *pi = kv_get_pi(rdev); + + return kv_copy_bytes_to_smc(rdev, pi->soft_regs_start + reg_offset, + (u8 *)&value, sizeof(u16), pi->sram_end); +} + +static int kv_read_smc_soft_register(struct radeon_device *rdev, + u16 reg_offset, u32 *value) +{ + struct kv_power_info *pi = kv_get_pi(rdev); + + return kv_read_smc_sram_dword(rdev, pi->soft_regs_start + reg_offset, + value, pi->sram_end); +} +#endif + +static void kv_init_sclk_t(struct radeon_device *rdev) +{ + struct kv_power_info *pi = kv_get_pi(rdev); + + pi->low_sclk_interrupt_t = 0; +} + +static int kv_init_fps_limits(struct radeon_device *rdev) +{ + struct kv_power_info *pi = kv_get_pi(rdev); + int ret = 0; + + if (pi->caps_fps) { + u16 tmp; + + tmp = 45; + pi->fps_high_t = cpu_to_be16(tmp); + ret = kv_copy_bytes_to_smc(rdev, + pi->dpm_table_start + + offsetof(SMU7_Fusion_DpmTable, FpsHighT), + (u8 *)&pi->fps_high_t, + sizeof(u16), pi->sram_end); + + tmp = 30; + pi->fps_low_t = cpu_to_be16(tmp); + + ret = kv_copy_bytes_to_smc(rdev, + pi->dpm_table_start + + offsetof(SMU7_Fusion_DpmTable, FpsLowT), + (u8 *)&pi->fps_low_t, + sizeof(u16), pi->sram_end); + + } + return ret; +} + +static void kv_init_powergate_state(struct radeon_device *rdev) +{ + struct kv_power_info *pi = kv_get_pi(rdev); + + pi->uvd_power_gated = false; + pi->vce_power_gated = false; + pi->samu_power_gated = false; + pi->acp_power_gated = false; + +} + +static int kv_enable_uvd_dpm(struct radeon_device *rdev, bool enable) +{ + return kv_notify_message_to_smu(rdev, enable ? + PPSMC_MSG_UVDDPM_Enable : PPSMC_MSG_UVDDPM_Disable); +} + +#if 0 +static int kv_enable_vce_dpm(struct radeon_device *rdev, bool enable) +{ + return kv_notify_message_to_smu(rdev, enable ? + PPSMC_MSG_VCEDPM_Enable : PPSMC_MSG_VCEDPM_Disable); +} +#endif + +static int kv_enable_samu_dpm(struct radeon_device *rdev, bool enable) +{ + return kv_notify_message_to_smu(rdev, enable ? + PPSMC_MSG_SAMUDPM_Enable : PPSMC_MSG_SAMUDPM_Disable); +} + +static int kv_enable_acp_dpm(struct radeon_device *rdev, bool enable) +{ + return kv_notify_message_to_smu(rdev, enable ? + PPSMC_MSG_ACPDPM_Enable : PPSMC_MSG_ACPDPM_Disable); +} + +static int kv_update_uvd_dpm(struct radeon_device *rdev, bool gate) +{ + struct kv_power_info *pi = kv_get_pi(rdev); + struct radeon_uvd_clock_voltage_dependency_table *table = + &rdev->pm.dpm.dyn_state.uvd_clock_voltage_dependency_table; + int ret; + + if (!gate) { + if (!pi->caps_uvd_dpm || table->count || pi->caps_stable_p_state) + pi->uvd_boot_level = table->count - 1; + else + pi->uvd_boot_level = 0; + + ret = kv_copy_bytes_to_smc(rdev, + pi->dpm_table_start + + offsetof(SMU7_Fusion_DpmTable, UvdBootLevel), + (uint8_t *)&pi->uvd_boot_level, + sizeof(u8), pi->sram_end); + if (ret) + return ret; + + if (!pi->caps_uvd_dpm || + pi->caps_stable_p_state) + kv_send_msg_to_smc_with_parameter(rdev, + PPSMC_MSG_UVDDPM_SetEnabledMask, + (1 << pi->uvd_boot_level)); + } + + return kv_enable_uvd_dpm(rdev, !gate); +} + +#if 0 +static u8 kv_get_vce_boot_level(struct radeon_device *rdev) +{ + u8 i; + struct radeon_vce_clock_voltage_dependency_table *table = + &rdev->pm.dpm.dyn_state.vce_clock_voltage_dependency_table; + + for (i = 0; i < table->count; i++) { + if (table->entries[i].evclk >= 0) /* XXX */ + break; + } + + return i; +} + +static int kv_update_vce_dpm(struct radeon_device *rdev, + struct radeon_ps *radeon_new_state, + struct radeon_ps *radeon_current_state) +{ + struct kv_power_info *pi = kv_get_pi(rdev); + struct radeon_vce_clock_voltage_dependency_table *table = + &rdev->pm.dpm.dyn_state.vce_clock_voltage_dependency_table; + int ret; + + if (radeon_new_state->evclk > 0 && radeon_current_state->evclk == 0) { + if (pi->caps_stable_p_state) + pi->vce_boot_level = table->count - 1; + else + pi->vce_boot_level = kv_get_vce_boot_level(rdev); + + ret = kv_copy_bytes_to_smc(rdev, + pi->dpm_table_start + + offsetof(SMU7_Fusion_DpmTable, VceBootLevel), + (u8 *)&pi->vce_boot_level, + sizeof(u8), + pi->sram_end); + if (ret) + return ret; + + if (pi->caps_stable_p_state) + kv_send_msg_to_smc_with_parameter(rdev, + PPSMC_MSG_VCEDPM_SetEnabledMask, + (1 << pi->vce_boot_level)); + + kv_enable_vce_dpm(rdev, true); + } else if (radeon_new_state->evclk == 0 && radeon_current_state->evclk > 0) { + kv_enable_vce_dpm(rdev, false); + } + + return 0; +} +#endif + +static int kv_update_samu_dpm(struct radeon_device *rdev, bool gate) +{ + struct kv_power_info *pi = kv_get_pi(rdev); + struct radeon_clock_voltage_dependency_table *table = + &rdev->pm.dpm.dyn_state.samu_clock_voltage_dependency_table; + int ret; + + if (!gate) { + if (pi->caps_stable_p_state) + pi->samu_boot_level = table->count - 1; + else + pi->samu_boot_level = 0; + + ret = kv_copy_bytes_to_smc(rdev, + pi->dpm_table_start + + offsetof(SMU7_Fusion_DpmTable, SamuBootLevel), + (u8 *)&pi->samu_boot_level, + sizeof(u8), + pi->sram_end); + if (ret) + return ret; + + if (pi->caps_stable_p_state) + kv_send_msg_to_smc_with_parameter(rdev, + PPSMC_MSG_SAMUDPM_SetEnabledMask, + (1 << pi->samu_boot_level)); + } + + return kv_enable_samu_dpm(rdev, !gate); +} + +static u8 kv_get_acp_boot_level(struct radeon_device *rdev) +{ + u8 i; + struct radeon_clock_voltage_dependency_table *table = + &rdev->pm.dpm.dyn_state.acp_clock_voltage_dependency_table; + + for (i = 0; i < table->count; i++) { + if (table->entries[i].clk >= 0) /* XXX */ + break; + } + + if (i >= table->count) + i = table->count - 1; + + return i; +} + +static void kv_update_acp_boot_level(struct radeon_device *rdev) +{ + struct kv_power_info *pi = kv_get_pi(rdev); + u8 acp_boot_level; + + if (!pi->caps_stable_p_state) { + acp_boot_level = kv_get_acp_boot_level(rdev); + if (acp_boot_level != pi->acp_boot_level) { + pi->acp_boot_level = acp_boot_level; + kv_send_msg_to_smc_with_parameter(rdev, + PPSMC_MSG_ACPDPM_SetEnabledMask, + (1 << pi->acp_boot_level)); + } + } +} + +static int kv_update_acp_dpm(struct radeon_device *rdev, bool gate) +{ + struct kv_power_info *pi = kv_get_pi(rdev); + struct radeon_clock_voltage_dependency_table *table = + &rdev->pm.dpm.dyn_state.acp_clock_voltage_dependency_table; + int ret; + + if (!gate) { + if (pi->caps_stable_p_state) + pi->acp_boot_level = table->count - 1; + else + pi->acp_boot_level = kv_get_acp_boot_level(rdev); + + ret = kv_copy_bytes_to_smc(rdev, + pi->dpm_table_start + + offsetof(SMU7_Fusion_DpmTable, AcpBootLevel), + (u8 *)&pi->acp_boot_level, + sizeof(u8), + pi->sram_end); + if (ret) + return ret; + + if (pi->caps_stable_p_state) + kv_send_msg_to_smc_with_parameter(rdev, + PPSMC_MSG_ACPDPM_SetEnabledMask, + (1 << pi->acp_boot_level)); + } + + return kv_enable_acp_dpm(rdev, !gate); +} + +void kv_dpm_powergate_uvd(struct radeon_device *rdev, bool gate) +{ + struct kv_power_info *pi = kv_get_pi(rdev); + + if (pi->uvd_power_gated == gate) + return; + + pi->uvd_power_gated = gate; + + if (gate) { + if (pi->caps_uvd_pg) { + uvd_v1_0_stop(rdev); + cik_update_cg(rdev, RADEON_CG_BLOCK_UVD, false); + } + kv_update_uvd_dpm(rdev, gate); + if (pi->caps_uvd_pg) + kv_notify_message_to_smu(rdev, PPSMC_MSG_UVDPowerOFF); + } else { + if (pi->caps_uvd_pg) { + kv_notify_message_to_smu(rdev, PPSMC_MSG_UVDPowerON); + uvd_v4_2_resume(rdev); + uvd_v1_0_start(rdev); + cik_update_cg(rdev, RADEON_CG_BLOCK_UVD, true); + } + kv_update_uvd_dpm(rdev, gate); + } +} + +static void kv_dpm_powergate_vce(struct radeon_device *rdev, bool gate) +{ + struct kv_power_info *pi = kv_get_pi(rdev); + + if (pi->vce_power_gated == gate) + return; + + pi->vce_power_gated = gate; + + if (gate) { + if (pi->caps_vce_pg) + kv_notify_message_to_smu(rdev, PPSMC_MSG_VCEPowerOFF); + } else { + if (pi->caps_vce_pg) + kv_notify_message_to_smu(rdev, PPSMC_MSG_VCEPowerON); + } +} + +static void kv_dpm_powergate_samu(struct radeon_device *rdev, bool gate) +{ + struct kv_power_info *pi = kv_get_pi(rdev); + + if (pi->samu_power_gated == gate) + return; + + pi->samu_power_gated = gate; + + if (gate) { + kv_update_samu_dpm(rdev, true); + if (pi->caps_samu_pg) + kv_notify_message_to_smu(rdev, PPSMC_MSG_SAMPowerOFF); + } else { + if (pi->caps_samu_pg) + kv_notify_message_to_smu(rdev, PPSMC_MSG_SAMPowerON); + kv_update_samu_dpm(rdev, false); + } +} + +static void kv_dpm_powergate_acp(struct radeon_device *rdev, bool gate) +{ + struct kv_power_info *pi = kv_get_pi(rdev); + + if (pi->acp_power_gated == gate) + return; + + if (rdev->family == CHIP_KABINI) + return; + + pi->acp_power_gated = gate; + + if (gate) { + kv_update_acp_dpm(rdev, true); + if (pi->caps_acp_pg) + kv_notify_message_to_smu(rdev, PPSMC_MSG_ACPPowerOFF); + } else { + if (pi->caps_acp_pg) + kv_notify_message_to_smu(rdev, PPSMC_MSG_ACPPowerON); + kv_update_acp_dpm(rdev, false); + } +} + +static void kv_set_valid_clock_range(struct radeon_device *rdev, + struct radeon_ps *new_rps) +{ + struct kv_ps *new_ps = kv_get_ps(new_rps); + struct kv_power_info *pi = kv_get_pi(rdev); + u32 i; + struct radeon_clock_voltage_dependency_table *table = + &rdev->pm.dpm.dyn_state.vddc_dependency_on_sclk; + + if (table && table->count) { + for (i = 0; i < pi->graphics_dpm_level_count; i++) { + if ((table->entries[i].clk >= new_ps->levels[0].sclk) || + (i == (pi->graphics_dpm_level_count - 1))) { + pi->lowest_valid = i; + break; + } + } + + for (i = pi->graphics_dpm_level_count - 1; i > 0; i--) { + if (table->entries[i].clk <= new_ps->levels[new_ps->num_levels - 1].sclk) + break; + } + pi->highest_valid = i; + + if (pi->lowest_valid > pi->highest_valid) { + if ((new_ps->levels[0].sclk - table->entries[pi->highest_valid].clk) > + (table->entries[pi->lowest_valid].clk - new_ps->levels[new_ps->num_levels - 1].sclk)) + pi->highest_valid = pi->lowest_valid; + else + pi->lowest_valid = pi->highest_valid; + } + } else { + struct sumo_sclk_voltage_mapping_table *table = + &pi->sys_info.sclk_voltage_mapping_table; + + for (i = 0; i < (int)pi->graphics_dpm_level_count; i++) { + if (table->entries[i].sclk_frequency >= new_ps->levels[0].sclk || + i == (int)(pi->graphics_dpm_level_count - 1)) { + pi->lowest_valid = i; + break; + } + } + + for (i = pi->graphics_dpm_level_count - 1; i > 0; i--) { + if (table->entries[i].sclk_frequency <= + new_ps->levels[new_ps->num_levels - 1].sclk) + break; + } + pi->highest_valid = i; + + if (pi->lowest_valid > pi->highest_valid) { + if ((new_ps->levels[0].sclk - + table->entries[pi->highest_valid].sclk_frequency) > + (table->entries[pi->lowest_valid].sclk_frequency - + new_ps->levels[new_ps->num_levels -1].sclk)) + pi->highest_valid = pi->lowest_valid; + else + pi->lowest_valid = pi->highest_valid; + } + } +} + +static int kv_update_dfs_bypass_settings(struct radeon_device *rdev, + struct radeon_ps *new_rps) +{ + struct kv_ps *new_ps = kv_get_ps(new_rps); + struct kv_power_info *pi = kv_get_pi(rdev); + int ret = 0; + u8 clk_bypass_cntl; + + if (pi->caps_enable_dfs_bypass) { + clk_bypass_cntl = new_ps->need_dfs_bypass ? + pi->graphics_level[pi->graphics_boot_level].ClkBypassCntl : 0; + ret = kv_copy_bytes_to_smc(rdev, + (pi->dpm_table_start + + offsetof(SMU7_Fusion_DpmTable, GraphicsLevel) + + (pi->graphics_boot_level * sizeof(SMU7_Fusion_GraphicsLevel)) + + offsetof(SMU7_Fusion_GraphicsLevel, ClkBypassCntl)), + &clk_bypass_cntl, + sizeof(u8), pi->sram_end); + } + + return ret; +} + +static int kv_enable_nb_dpm(struct radeon_device *rdev) +{ + struct kv_power_info *pi = kv_get_pi(rdev); + int ret = 0; + + if (pi->enable_nb_dpm && !pi->nb_dpm_enabled) { + ret = kv_notify_message_to_smu(rdev, PPSMC_MSG_NBDPM_Enable); + if (ret == 0) + pi->nb_dpm_enabled = true; + } + + return ret; +} + +int kv_dpm_force_performance_level(struct radeon_device *rdev, + enum radeon_dpm_forced_level level) +{ + int ret; + + if (level == RADEON_DPM_FORCED_LEVEL_HIGH) { + ret = kv_force_dpm_highest(rdev); + if (ret) + return ret; + } else if (level == RADEON_DPM_FORCED_LEVEL_LOW) { + ret = kv_force_dpm_lowest(rdev); + if (ret) + return ret; + } else if (level == RADEON_DPM_FORCED_LEVEL_AUTO) { + ret = kv_unforce_levels(rdev); + if (ret) + return ret; + } + + rdev->pm.dpm.forced_level = level; + + return 0; +} + +int kv_dpm_pre_set_power_state(struct radeon_device *rdev) +{ + struct kv_power_info *pi = kv_get_pi(rdev); + struct radeon_ps requested_ps = *rdev->pm.dpm.requested_ps; + struct radeon_ps *new_ps = &requested_ps; + + kv_update_requested_ps(rdev, new_ps); + + kv_apply_state_adjust_rules(rdev, + &pi->requested_rps, + &pi->current_rps); + + return 0; +} + +int kv_dpm_set_power_state(struct radeon_device *rdev) +{ + struct kv_power_info *pi = kv_get_pi(rdev); + struct radeon_ps *new_ps = &pi->requested_rps; + /*struct radeon_ps *old_ps = &pi->current_rps;*/ + int ret; + + cik_update_cg(rdev, (RADEON_CG_BLOCK_GFX | + RADEON_CG_BLOCK_SDMA | + RADEON_CG_BLOCK_BIF | + RADEON_CG_BLOCK_HDP), false); + + if (pi->bapm_enable) { + ret = kv_smc_bapm_enable(rdev, rdev->pm.dpm.ac_power); + if (ret) { + DRM_ERROR("kv_smc_bapm_enable failed\n"); + return ret; + } + } + + if (rdev->family == CHIP_KABINI) { + if (pi->enable_dpm) { + kv_set_valid_clock_range(rdev, new_ps); + kv_update_dfs_bypass_settings(rdev, new_ps); + ret = kv_calculate_ds_divider(rdev); + if (ret) { + DRM_ERROR("kv_calculate_ds_divider failed\n"); + return ret; + } + kv_calculate_nbps_level_settings(rdev); + kv_calculate_dpm_settings(rdev); + kv_force_lowest_valid(rdev); + kv_enable_new_levels(rdev); + kv_upload_dpm_settings(rdev); + kv_program_nbps_index_settings(rdev, new_ps); + kv_unforce_levels(rdev); + kv_set_enabled_levels(rdev); + kv_force_lowest_valid(rdev); + kv_unforce_levels(rdev); +#if 0 + ret = kv_update_vce_dpm(rdev, new_ps, old_ps); + if (ret) { + DRM_ERROR("kv_update_vce_dpm failed\n"); + return ret; + } +#endif + kv_update_sclk_t(rdev); + } + } else { + if (pi->enable_dpm) { + kv_set_valid_clock_range(rdev, new_ps); + kv_update_dfs_bypass_settings(rdev, new_ps); + ret = kv_calculate_ds_divider(rdev); + if (ret) { + DRM_ERROR("kv_calculate_ds_divider failed\n"); + return ret; + } + kv_calculate_nbps_level_settings(rdev); + kv_calculate_dpm_settings(rdev); + kv_freeze_sclk_dpm(rdev, true); + kv_upload_dpm_settings(rdev); + kv_program_nbps_index_settings(rdev, new_ps); + kv_freeze_sclk_dpm(rdev, false); + kv_set_enabled_levels(rdev); +#if 0 + ret = kv_update_vce_dpm(rdev, new_ps, old_ps); + if (ret) { + DRM_ERROR("kv_update_vce_dpm failed\n"); + return ret; + } +#endif + kv_update_acp_boot_level(rdev); + kv_update_sclk_t(rdev); + kv_enable_nb_dpm(rdev); + } + } + + cik_update_cg(rdev, (RADEON_CG_BLOCK_GFX | + RADEON_CG_BLOCK_SDMA | + RADEON_CG_BLOCK_BIF | + RADEON_CG_BLOCK_HDP), true); + + return 0; +} + +void kv_dpm_post_set_power_state(struct radeon_device *rdev) +{ + struct kv_power_info *pi = kv_get_pi(rdev); + struct radeon_ps *new_ps = &pi->requested_rps; + + kv_update_current_ps(rdev, new_ps); +} + +void kv_dpm_setup_asic(struct radeon_device *rdev) +{ + sumo_take_smu_control(rdev, true); + kv_init_powergate_state(rdev); + kv_init_sclk_t(rdev); +} + +void kv_dpm_reset_asic(struct radeon_device *rdev) +{ + struct kv_power_info *pi = kv_get_pi(rdev); + + if (rdev->family == CHIP_KABINI) { + kv_force_lowest_valid(rdev); + kv_init_graphics_levels(rdev); + kv_program_bootup_state(rdev); + kv_upload_dpm_settings(rdev); + kv_force_lowest_valid(rdev); + kv_unforce_levels(rdev); + } else { + kv_init_graphics_levels(rdev); + kv_program_bootup_state(rdev); + kv_freeze_sclk_dpm(rdev, true); + kv_upload_dpm_settings(rdev); + kv_freeze_sclk_dpm(rdev, false); + kv_set_enabled_level(rdev, pi->graphics_boot_level); + } +} + +//XXX use sumo_dpm_display_configuration_changed + +static void kv_construct_max_power_limits_table(struct radeon_device *rdev, + struct radeon_clock_and_voltage_limits *table) +{ + struct kv_power_info *pi = kv_get_pi(rdev); + + if (pi->sys_info.sclk_voltage_mapping_table.num_max_dpm_entries > 0) { + int idx = pi->sys_info.sclk_voltage_mapping_table.num_max_dpm_entries - 1; + table->sclk = + pi->sys_info.sclk_voltage_mapping_table.entries[idx].sclk_frequency; + table->vddc = + kv_convert_2bit_index_to_voltage(rdev, + pi->sys_info.sclk_voltage_mapping_table.entries[idx].vid_2bit); + } + + table->mclk = pi->sys_info.nbp_memory_clock[0]; +} + +static void kv_patch_voltage_values(struct radeon_device *rdev) +{ + int i; + struct radeon_uvd_clock_voltage_dependency_table *table = + &rdev->pm.dpm.dyn_state.uvd_clock_voltage_dependency_table; + + if (table->count) { + for (i = 0; i < table->count; i++) + table->entries[i].v = + kv_convert_8bit_index_to_voltage(rdev, + table->entries[i].v); + } + +} + +static void kv_construct_boot_state(struct radeon_device *rdev) +{ + struct kv_power_info *pi = kv_get_pi(rdev); + + pi->boot_pl.sclk = pi->sys_info.bootup_sclk; + pi->boot_pl.vddc_index = pi->sys_info.bootup_nb_voltage_index; + pi->boot_pl.ds_divider_index = 0; + pi->boot_pl.ss_divider_index = 0; + pi->boot_pl.allow_gnb_slow = 1; + pi->boot_pl.force_nbp_state = 0; + pi->boot_pl.display_wm = 0; + pi->boot_pl.vce_wm = 0; +} + +static int kv_force_dpm_highest(struct radeon_device *rdev) +{ + int ret; + u32 enable_mask, i; + + ret = kv_dpm_get_enable_mask(rdev, &enable_mask); + if (ret) + return ret; + + for (i = SMU7_MAX_LEVELS_GRAPHICS - 1; i > 0; i--) { + if (enable_mask & (1 << i)) + break; + } + + if (rdev->family == CHIP_KABINI) + return kv_send_msg_to_smc_with_parameter(rdev, PPSMC_MSG_DPM_ForceState, i); + else + return kv_set_enabled_level(rdev, i); +} + +static int kv_force_dpm_lowest(struct radeon_device *rdev) +{ + int ret; + u32 enable_mask, i; + + ret = kv_dpm_get_enable_mask(rdev, &enable_mask); + if (ret) + return ret; + + for (i = 0; i < SMU7_MAX_LEVELS_GRAPHICS; i++) { + if (enable_mask & (1 << i)) + break; + } + + if (rdev->family == CHIP_KABINI) + return kv_send_msg_to_smc_with_parameter(rdev, PPSMC_MSG_DPM_ForceState, i); + else + return kv_set_enabled_level(rdev, i); +} + +static u8 kv_get_sleep_divider_id_from_clock(struct radeon_device *rdev, + u32 sclk, u32 min_sclk_in_sr) +{ + struct kv_power_info *pi = kv_get_pi(rdev); + u32 i; + u32 temp; + u32 min = (min_sclk_in_sr > KV_MINIMUM_ENGINE_CLOCK) ? + min_sclk_in_sr : KV_MINIMUM_ENGINE_CLOCK; + + if (sclk < min) + return 0; + + if (!pi->caps_sclk_ds) + return 0; + + for (i = KV_MAX_DEEPSLEEP_DIVIDER_ID; i > 0; i--) { + temp = sclk / sumo_get_sleep_divider_from_id(i); + if (temp >= min) + break; + } + + return (u8)i; +} + +static int kv_get_high_voltage_limit(struct radeon_device *rdev, int *limit) +{ + struct kv_power_info *pi = kv_get_pi(rdev); + struct radeon_clock_voltage_dependency_table *table = + &rdev->pm.dpm.dyn_state.vddc_dependency_on_sclk; + int i; + + if (table && table->count) { + for (i = table->count - 1; i >= 0; i--) { + if (pi->high_voltage_t && + (kv_convert_8bit_index_to_voltage(rdev, table->entries[i].v) <= + pi->high_voltage_t)) { + *limit = i; + return 0; + } + } + } else { + struct sumo_sclk_voltage_mapping_table *table = + &pi->sys_info.sclk_voltage_mapping_table; + + for (i = table->num_max_dpm_entries - 1; i >= 0; i--) { + if (pi->high_voltage_t && + (kv_convert_2bit_index_to_voltage(rdev, table->entries[i].vid_2bit) <= + pi->high_voltage_t)) { + *limit = i; + return 0; + } + } + } + + *limit = 0; + return 0; +} + +static void kv_apply_state_adjust_rules(struct radeon_device *rdev, + struct radeon_ps *new_rps, + struct radeon_ps *old_rps) +{ + struct kv_ps *ps = kv_get_ps(new_rps); + struct kv_power_info *pi = kv_get_pi(rdev); + u32 min_sclk = 10000; /* ??? */ + u32 sclk, mclk = 0; + int i, limit; + bool force_high; + struct radeon_clock_voltage_dependency_table *table = + &rdev->pm.dpm.dyn_state.vddc_dependency_on_sclk; + u32 stable_p_state_sclk = 0; + struct radeon_clock_and_voltage_limits *max_limits = + &rdev->pm.dpm.dyn_state.max_clock_voltage_on_ac; + + mclk = max_limits->mclk; + sclk = min_sclk; + + if (pi->caps_stable_p_state) { + stable_p_state_sclk = (max_limits->sclk * 75) / 100; + + for (i = table->count - 1; i >= 0; i++) { + if (stable_p_state_sclk >= table->entries[i].clk) { + stable_p_state_sclk = table->entries[i].clk; + break; + } + } + + if (i > 0) + stable_p_state_sclk = table->entries[0].clk; + + sclk = stable_p_state_sclk; + } + + ps->need_dfs_bypass = true; + + for (i = 0; i < ps->num_levels; i++) { + if (ps->levels[i].sclk < sclk) + ps->levels[i].sclk = sclk; + } + + if (table && table->count) { + for (i = 0; i < ps->num_levels; i++) { + if (pi->high_voltage_t && + (pi->high_voltage_t < + kv_convert_8bit_index_to_voltage(rdev, ps->levels[i].vddc_index))) { + kv_get_high_voltage_limit(rdev, &limit); + ps->levels[i].sclk = table->entries[limit].clk; + } + } + } else { + struct sumo_sclk_voltage_mapping_table *table = + &pi->sys_info.sclk_voltage_mapping_table; + + for (i = 0; i < ps->num_levels; i++) { + if (pi->high_voltage_t && + (pi->high_voltage_t < + kv_convert_8bit_index_to_voltage(rdev, ps->levels[i].vddc_index))) { + kv_get_high_voltage_limit(rdev, &limit); + ps->levels[i].sclk = table->entries[limit].sclk_frequency; + } + } + } + + if (pi->caps_stable_p_state) { + for (i = 0; i < ps->num_levels; i++) { + ps->levels[i].sclk = stable_p_state_sclk; + } + } + + pi->video_start = new_rps->dclk || new_rps->vclk; + + if ((new_rps->class & ATOM_PPLIB_CLASSIFICATION_UI_MASK) == + ATOM_PPLIB_CLASSIFICATION_UI_BATTERY) + pi->battery_state = true; + else + pi->battery_state = false; + + if (rdev->family == CHIP_KABINI) { + ps->dpm0_pg_nb_ps_lo = 0x1; + ps->dpm0_pg_nb_ps_hi = 0x0; + ps->dpmx_nb_ps_lo = 0x1; + ps->dpmx_nb_ps_hi = 0x0; + } else { + ps->dpm0_pg_nb_ps_lo = 0x3; + ps->dpm0_pg_nb_ps_hi = 0x0; + ps->dpmx_nb_ps_lo = 0x3; + ps->dpmx_nb_ps_hi = 0x0; + + if (pi->sys_info.nb_dpm_enable) { + force_high = (mclk >= pi->sys_info.nbp_memory_clock[3]) || + pi->video_start || (rdev->pm.dpm.new_active_crtc_count >= 3) || + pi->disable_nb_ps3_in_battery; + ps->dpm0_pg_nb_ps_lo = force_high ? 0x2 : 0x3; + ps->dpm0_pg_nb_ps_hi = 0x2; + ps->dpmx_nb_ps_lo = force_high ? 0x2 : 0x3; + ps->dpmx_nb_ps_hi = 0x2; + } + } +} + +static void kv_dpm_power_level_enabled_for_throttle(struct radeon_device *rdev, + u32 index, bool enable) +{ + struct kv_power_info *pi = kv_get_pi(rdev); + + pi->graphics_level[index].EnabledForThrottle = enable ? 1 : 0; +} + +static int kv_calculate_ds_divider(struct radeon_device *rdev) +{ + struct kv_power_info *pi = kv_get_pi(rdev); + u32 sclk_in_sr = 10000; /* ??? */ + u32 i; + + if (pi->lowest_valid > pi->highest_valid) + return -EINVAL; + + for (i = pi->lowest_valid; i <= pi->highest_valid; i++) { + pi->graphics_level[i].DeepSleepDivId = + kv_get_sleep_divider_id_from_clock(rdev, + be32_to_cpu(pi->graphics_level[i].SclkFrequency), + sclk_in_sr); + } + return 0; +} + +static int kv_calculate_nbps_level_settings(struct radeon_device *rdev) +{ + struct kv_power_info *pi = kv_get_pi(rdev); + u32 i; + bool force_high; + struct radeon_clock_and_voltage_limits *max_limits = + &rdev->pm.dpm.dyn_state.max_clock_voltage_on_ac; + u32 mclk = max_limits->mclk; + + if (pi->lowest_valid > pi->highest_valid) + return -EINVAL; + + if (rdev->family == CHIP_KABINI) { + for (i = pi->lowest_valid; i <= pi->highest_valid; i++) { + pi->graphics_level[i].GnbSlow = 1; + pi->graphics_level[i].ForceNbPs1 = 0; + pi->graphics_level[i].UpH = 0; + } + + if (!pi->sys_info.nb_dpm_enable) + return 0; + + force_high = ((mclk >= pi->sys_info.nbp_memory_clock[3]) || + (rdev->pm.dpm.new_active_crtc_count >= 3) || pi->video_start); + + if (force_high) { + for (i = pi->lowest_valid; i <= pi->highest_valid; i++) + pi->graphics_level[i].GnbSlow = 0; + } else { + if (pi->battery_state) + pi->graphics_level[0].ForceNbPs1 = 1; + + pi->graphics_level[1].GnbSlow = 0; + pi->graphics_level[2].GnbSlow = 0; + pi->graphics_level[3].GnbSlow = 0; + pi->graphics_level[4].GnbSlow = 0; + } + } else { + for (i = pi->lowest_valid; i <= pi->highest_valid; i++) { + pi->graphics_level[i].GnbSlow = 1; + pi->graphics_level[i].ForceNbPs1 = 0; + pi->graphics_level[i].UpH = 0; + } + + if (pi->sys_info.nb_dpm_enable && pi->battery_state) { + pi->graphics_level[pi->lowest_valid].UpH = 0x28; + pi->graphics_level[pi->lowest_valid].GnbSlow = 0; + if (pi->lowest_valid != pi->highest_valid) + pi->graphics_level[pi->lowest_valid].ForceNbPs1 = 1; + } + } + return 0; +} + +static int kv_calculate_dpm_settings(struct radeon_device *rdev) +{ + struct kv_power_info *pi = kv_get_pi(rdev); + u32 i; + + if (pi->lowest_valid > pi->highest_valid) + return -EINVAL; + + for (i = pi->lowest_valid; i <= pi->highest_valid; i++) + pi->graphics_level[i].DisplayWatermark = (i == pi->highest_valid) ? 1 : 0; + + return 0; +} + +static void kv_init_graphics_levels(struct radeon_device *rdev) +{ + struct kv_power_info *pi = kv_get_pi(rdev); + u32 i; + struct radeon_clock_voltage_dependency_table *table = + &rdev->pm.dpm.dyn_state.vddc_dependency_on_sclk; + + if (table && table->count) { + u32 vid_2bit; + + pi->graphics_dpm_level_count = 0; + for (i = 0; i < table->count; i++) { + if (pi->high_voltage_t && + (pi->high_voltage_t < + kv_convert_8bit_index_to_voltage(rdev, table->entries[i].v))) + break; + + kv_set_divider_value(rdev, i, table->entries[i].clk); + vid_2bit = sumo_convert_vid7_to_vid2(rdev, + &pi->sys_info.vid_mapping_table, + table->entries[i].v); + kv_set_vid(rdev, i, vid_2bit); + kv_set_at(rdev, i, pi->at[i]); + kv_dpm_power_level_enabled_for_throttle(rdev, i, true); + pi->graphics_dpm_level_count++; + } + } else { + struct sumo_sclk_voltage_mapping_table *table = + &pi->sys_info.sclk_voltage_mapping_table; + + pi->graphics_dpm_level_count = 0; + for (i = 0; i < table->num_max_dpm_entries; i++) { + if (pi->high_voltage_t && + pi->high_voltage_t < + kv_convert_2bit_index_to_voltage(rdev, table->entries[i].vid_2bit)) + break; + + kv_set_divider_value(rdev, i, table->entries[i].sclk_frequency); + kv_set_vid(rdev, i, table->entries[i].vid_2bit); + kv_set_at(rdev, i, pi->at[i]); + kv_dpm_power_level_enabled_for_throttle(rdev, i, true); + pi->graphics_dpm_level_count++; + } + } + + for (i = 0; i < SMU7_MAX_LEVELS_GRAPHICS; i++) + kv_dpm_power_level_enable(rdev, i, false); +} + +static void kv_enable_new_levels(struct radeon_device *rdev) +{ + struct kv_power_info *pi = kv_get_pi(rdev); + u32 i; + + for (i = 0; i < SMU7_MAX_LEVELS_GRAPHICS; i++) { + if (i >= pi->lowest_valid && i <= pi->highest_valid) + kv_dpm_power_level_enable(rdev, i, true); + } +} + +static int kv_set_enabled_level(struct radeon_device *rdev, u32 level) +{ + u32 new_mask = (1 << level); + + return kv_send_msg_to_smc_with_parameter(rdev, + PPSMC_MSG_SCLKDPM_SetEnabledMask, + new_mask); +} + +static int kv_set_enabled_levels(struct radeon_device *rdev) +{ + struct kv_power_info *pi = kv_get_pi(rdev); + u32 i, new_mask = 0; + + for (i = pi->lowest_valid; i <= pi->highest_valid; i++) + new_mask |= (1 << i); + + return kv_send_msg_to_smc_with_parameter(rdev, + PPSMC_MSG_SCLKDPM_SetEnabledMask, + new_mask); +} + +static void kv_program_nbps_index_settings(struct radeon_device *rdev, + struct radeon_ps *new_rps) +{ + struct kv_ps *new_ps = kv_get_ps(new_rps); + struct kv_power_info *pi = kv_get_pi(rdev); + u32 nbdpmconfig1; + + if (rdev->family == CHIP_KABINI) + return; + + if (pi->sys_info.nb_dpm_enable) { + nbdpmconfig1 = RREG32_SMC(NB_DPM_CONFIG_1); + nbdpmconfig1 &= ~(Dpm0PgNbPsLo_MASK | Dpm0PgNbPsHi_MASK | + DpmXNbPsLo_MASK | DpmXNbPsHi_MASK); + nbdpmconfig1 |= (Dpm0PgNbPsLo(new_ps->dpm0_pg_nb_ps_lo) | + Dpm0PgNbPsHi(new_ps->dpm0_pg_nb_ps_hi) | + DpmXNbPsLo(new_ps->dpmx_nb_ps_lo) | + DpmXNbPsHi(new_ps->dpmx_nb_ps_hi)); + WREG32_SMC(NB_DPM_CONFIG_1, nbdpmconfig1); + } +} + +static int kv_set_thermal_temperature_range(struct radeon_device *rdev, + int min_temp, int max_temp) +{ + int low_temp = 0 * 1000; + int high_temp = 255 * 1000; + u32 tmp; + + if (low_temp < min_temp) + low_temp = min_temp; + if (high_temp > max_temp) + high_temp = max_temp; + if (high_temp < low_temp) { + DRM_ERROR("invalid thermal range: %d - %d\n", low_temp, high_temp); + return -EINVAL; + } + + tmp = RREG32_SMC(CG_THERMAL_INT_CTRL); + tmp &= ~(DIG_THERM_INTH_MASK | DIG_THERM_INTL_MASK); + tmp |= (DIG_THERM_INTH(49 + (high_temp / 1000)) | + DIG_THERM_INTL(49 + (low_temp / 1000))); + WREG32_SMC(CG_THERMAL_INT_CTRL, tmp); + + rdev->pm.dpm.thermal.min_temp = low_temp; + rdev->pm.dpm.thermal.max_temp = high_temp; + + return 0; +} + +union igp_info { + struct _ATOM_INTEGRATED_SYSTEM_INFO info; + struct _ATOM_INTEGRATED_SYSTEM_INFO_V2 info_2; + struct _ATOM_INTEGRATED_SYSTEM_INFO_V5 info_5; + struct _ATOM_INTEGRATED_SYSTEM_INFO_V6 info_6; + struct _ATOM_INTEGRATED_SYSTEM_INFO_V1_7 info_7; + struct _ATOM_INTEGRATED_SYSTEM_INFO_V1_8 info_8; +}; + +static int kv_parse_sys_info_table(struct radeon_device *rdev) +{ + struct kv_power_info *pi = kv_get_pi(rdev); + struct radeon_mode_info *mode_info = &rdev->mode_info; + int index = GetIndexIntoMasterTable(DATA, IntegratedSystemInfo); + union igp_info *igp_info; + u8 frev, crev; + u16 data_offset; + int i; + + if (atom_parse_data_header(mode_info->atom_context, index, NULL, + &frev, &crev, &data_offset)) { + igp_info = (union igp_info *)(mode_info->atom_context->bios + + data_offset); + + if (crev != 8) { + DRM_ERROR("Unsupported IGP table: %d %d\n", frev, crev); + return -EINVAL; + } + pi->sys_info.bootup_sclk = le32_to_cpu(igp_info->info_8.ulBootUpEngineClock); + pi->sys_info.bootup_uma_clk = le32_to_cpu(igp_info->info_8.ulBootUpUMAClock); + pi->sys_info.bootup_nb_voltage_index = + le16_to_cpu(igp_info->info_8.usBootUpNBVoltage); + if (igp_info->info_8.ucHtcTmpLmt == 0) + pi->sys_info.htc_tmp_lmt = 203; + else + pi->sys_info.htc_tmp_lmt = igp_info->info_8.ucHtcTmpLmt; + if (igp_info->info_8.ucHtcHystLmt == 0) + pi->sys_info.htc_hyst_lmt = 5; + else + pi->sys_info.htc_hyst_lmt = igp_info->info_8.ucHtcHystLmt; + if (pi->sys_info.htc_tmp_lmt <= pi->sys_info.htc_hyst_lmt) { + DRM_ERROR("The htcTmpLmt should be larger than htcHystLmt.\n"); + } + + if (le32_to_cpu(igp_info->info_8.ulSystemConfig) & (1 << 3)) + pi->sys_info.nb_dpm_enable = true; + else + pi->sys_info.nb_dpm_enable = false; + + for (i = 0; i < KV_NUM_NBPSTATES; i++) { + pi->sys_info.nbp_memory_clock[i] = + le32_to_cpu(igp_info->info_8.ulNbpStateMemclkFreq[i]); + pi->sys_info.nbp_n_clock[i] = + le32_to_cpu(igp_info->info_8.ulNbpStateNClkFreq[i]); + } + if (le32_to_cpu(igp_info->info_8.ulGPUCapInfo) & + SYS_INFO_GPUCAPS__ENABEL_DFS_BYPASS) + pi->caps_enable_dfs_bypass = true; + + sumo_construct_sclk_voltage_mapping_table(rdev, + &pi->sys_info.sclk_voltage_mapping_table, + igp_info->info_8.sAvail_SCLK); + + sumo_construct_vid_mapping_table(rdev, + &pi->sys_info.vid_mapping_table, + igp_info->info_8.sAvail_SCLK); + + kv_construct_max_power_limits_table(rdev, + &rdev->pm.dpm.dyn_state.max_clock_voltage_on_ac); + } + return 0; +} + +union power_info { + struct _ATOM_POWERPLAY_INFO info; + struct _ATOM_POWERPLAY_INFO_V2 info_2; + struct _ATOM_POWERPLAY_INFO_V3 info_3; + struct _ATOM_PPLIB_POWERPLAYTABLE pplib; + struct _ATOM_PPLIB_POWERPLAYTABLE2 pplib2; + struct _ATOM_PPLIB_POWERPLAYTABLE3 pplib3; +}; + +union pplib_clock_info { + struct _ATOM_PPLIB_R600_CLOCK_INFO r600; + struct _ATOM_PPLIB_RS780_CLOCK_INFO rs780; + struct _ATOM_PPLIB_EVERGREEN_CLOCK_INFO evergreen; + struct _ATOM_PPLIB_SUMO_CLOCK_INFO sumo; +}; + +union pplib_power_state { + struct _ATOM_PPLIB_STATE v1; + struct _ATOM_PPLIB_STATE_V2 v2; +}; + +static void kv_patch_boot_state(struct radeon_device *rdev, + struct kv_ps *ps) +{ + struct kv_power_info *pi = kv_get_pi(rdev); + + ps->num_levels = 1; + ps->levels[0] = pi->boot_pl; +} + +static void kv_parse_pplib_non_clock_info(struct radeon_device *rdev, + struct radeon_ps *rps, + struct _ATOM_PPLIB_NONCLOCK_INFO *non_clock_info, + u8 table_rev) +{ + struct kv_ps *ps = kv_get_ps(rps); + + rps->caps = le32_to_cpu(non_clock_info->ulCapsAndSettings); + rps->class = le16_to_cpu(non_clock_info->usClassification); + rps->class2 = le16_to_cpu(non_clock_info->usClassification2); + + if (ATOM_PPLIB_NONCLOCKINFO_VER1 < table_rev) { + rps->vclk = le32_to_cpu(non_clock_info->ulVCLK); + rps->dclk = le32_to_cpu(non_clock_info->ulDCLK); + } else { + rps->vclk = 0; + rps->dclk = 0; + } + + if (rps->class & ATOM_PPLIB_CLASSIFICATION_BOOT) { + rdev->pm.dpm.boot_ps = rps; + kv_patch_boot_state(rdev, ps); + } + if (rps->class & ATOM_PPLIB_CLASSIFICATION_UVDSTATE) + rdev->pm.dpm.uvd_ps = rps; +} + +static void kv_parse_pplib_clock_info(struct radeon_device *rdev, + struct radeon_ps *rps, int index, + union pplib_clock_info *clock_info) +{ + struct kv_power_info *pi = kv_get_pi(rdev); + struct kv_ps *ps = kv_get_ps(rps); + struct kv_pl *pl = &ps->levels[index]; + u32 sclk; + + sclk = le16_to_cpu(clock_info->sumo.usEngineClockLow); + sclk |= clock_info->sumo.ucEngineClockHigh << 16; + pl->sclk = sclk; + pl->vddc_index = clock_info->sumo.vddcIndex; + + ps->num_levels = index + 1; + + if (pi->caps_sclk_ds) { + pl->ds_divider_index = 5; + pl->ss_divider_index = 5; + } +} + +static int kv_parse_power_table(struct radeon_device *rdev) +{ + struct radeon_mode_info *mode_info = &rdev->mode_info; + struct _ATOM_PPLIB_NONCLOCK_INFO *non_clock_info; + union pplib_power_state *power_state; + int i, j, k, non_clock_array_index, clock_array_index; + union pplib_clock_info *clock_info; + struct _StateArray *state_array; + struct _ClockInfoArray *clock_info_array; + struct _NonClockInfoArray *non_clock_info_array; + union power_info *power_info; + int index = GetIndexIntoMasterTable(DATA, PowerPlayInfo); + u16 data_offset; + u8 frev, crev; + u8 *power_state_offset; + struct kv_ps *ps; + + if (!atom_parse_data_header(mode_info->atom_context, index, NULL, + &frev, &crev, &data_offset)) + return -EINVAL; + power_info = (union power_info *)(mode_info->atom_context->bios + data_offset); + + state_array = (struct _StateArray *) + (mode_info->atom_context->bios + data_offset + + le16_to_cpu(power_info->pplib.usStateArrayOffset)); + clock_info_array = (struct _ClockInfoArray *) + (mode_info->atom_context->bios + data_offset + + le16_to_cpu(power_info->pplib.usClockInfoArrayOffset)); + non_clock_info_array = (struct _NonClockInfoArray *) + (mode_info->atom_context->bios + data_offset + + le16_to_cpu(power_info->pplib.usNonClockInfoArrayOffset)); + + rdev->pm.dpm.ps = kzalloc(sizeof(struct radeon_ps) * + state_array->ucNumEntries, GFP_KERNEL); + if (!rdev->pm.dpm.ps) + return -ENOMEM; + power_state_offset = (u8 *)state_array->states; + rdev->pm.dpm.platform_caps = le32_to_cpu(power_info->pplib.ulPlatformCaps); + rdev->pm.dpm.backbias_response_time = le16_to_cpu(power_info->pplib.usBackbiasTime); + rdev->pm.dpm.voltage_response_time = le16_to_cpu(power_info->pplib.usVoltageTime); + for (i = 0; i < state_array->ucNumEntries; i++) { + u8 *idx; + power_state = (union pplib_power_state *)power_state_offset; + non_clock_array_index = power_state->v2.nonClockInfoIndex; + non_clock_info = (struct _ATOM_PPLIB_NONCLOCK_INFO *) + &non_clock_info_array->nonClockInfo[non_clock_array_index]; + if (!rdev->pm.power_state[i].clock_info) + return -EINVAL; + ps = kzalloc(sizeof(struct kv_ps), GFP_KERNEL); + if (ps == NULL) { + kfree(rdev->pm.dpm.ps); + return -ENOMEM; + } + rdev->pm.dpm.ps[i].ps_priv = ps; + k = 0; + idx = (u8 *)&power_state->v2.clockInfoIndex[0]; + for (j = 0; j < power_state->v2.ucNumDPMLevels; j++) { + clock_array_index = idx[j]; + if (clock_array_index >= clock_info_array->ucNumEntries) + continue; + if (k >= SUMO_MAX_HARDWARE_POWERLEVELS) + break; + clock_info = (union pplib_clock_info *) + ((u8 *)&clock_info_array->clockInfo[0] + + (clock_array_index * clock_info_array->ucEntrySize)); + kv_parse_pplib_clock_info(rdev, + &rdev->pm.dpm.ps[i], k, + clock_info); + k++; + } + kv_parse_pplib_non_clock_info(rdev, &rdev->pm.dpm.ps[i], + non_clock_info, + non_clock_info_array->ucEntrySize); + power_state_offset += 2 + power_state->v2.ucNumDPMLevels; + } + rdev->pm.dpm.num_ps = state_array->ucNumEntries; + return 0; +} + +int kv_dpm_init(struct radeon_device *rdev) +{ + struct kv_power_info *pi; + int ret, i; + + pi = kzalloc(sizeof(struct kv_power_info), GFP_KERNEL); + if (pi == NULL) + return -ENOMEM; + rdev->pm.dpm.priv = pi; + + ret = r600_parse_extended_power_table(rdev); + if (ret) + return ret; + + for (i = 0; i < SUMO_MAX_HARDWARE_POWERLEVELS; i++) + pi->at[i] = TRINITY_AT_DFLT; + + pi->sram_end = SMC_RAM_END; + + if (rdev->family == CHIP_KABINI) + pi->high_voltage_t = 4001; + + pi->enable_nb_dpm = true; + + pi->caps_power_containment = true; + pi->caps_cac = true; + pi->enable_didt = false; + if (pi->enable_didt) { + pi->caps_sq_ramping = true; + pi->caps_db_ramping = true; + pi->caps_td_ramping = true; + pi->caps_tcp_ramping = true; + } + + pi->caps_sclk_ds = true; + pi->enable_auto_thermal_throttling = true; + pi->disable_nb_ps3_in_battery = false; + pi->bapm_enable = false; + pi->voltage_drop_t = 0; + pi->caps_sclk_throttle_low_notification = false; + pi->caps_fps = false; /* true? */ + pi->caps_uvd_pg = true; + pi->caps_uvd_dpm = true; + pi->caps_vce_pg = false; + pi->caps_samu_pg = false; + pi->caps_acp_pg = false; + pi->caps_stable_p_state = false; + + ret = kv_parse_sys_info_table(rdev); + if (ret) + return ret; + + kv_patch_voltage_values(rdev); + kv_construct_boot_state(rdev); + + ret = kv_parse_power_table(rdev); + if (ret) + return ret; + + pi->enable_dpm = true; + + return 0; +} + +void kv_dpm_debugfs_print_current_performance_level(struct radeon_device *rdev, + struct seq_file *m) +{ + struct kv_power_info *pi = kv_get_pi(rdev); + u32 current_index = + (RREG32_SMC(TARGET_AND_CURRENT_PROFILE_INDEX) & CURR_SCLK_INDEX_MASK) >> + CURR_SCLK_INDEX_SHIFT; + u32 sclk, tmp; + u16 vddc; + + if (current_index >= SMU__NUM_SCLK_DPM_STATE) { + seq_printf(m, "invalid dpm profile %d\n", current_index); + } else { + sclk = be32_to_cpu(pi->graphics_level[current_index].SclkFrequency); + tmp = (RREG32_SMC(SMU_VOLTAGE_STATUS) & SMU_VOLTAGE_CURRENT_LEVEL_MASK) >> + SMU_VOLTAGE_CURRENT_LEVEL_SHIFT; + vddc = kv_convert_8bit_index_to_voltage(rdev, (u16)tmp); + seq_printf(m, "power level %d sclk: %u vddc: %u\n", + current_index, sclk, vddc); + } +} + +void kv_dpm_print_power_state(struct radeon_device *rdev, + struct radeon_ps *rps) +{ + int i; + struct kv_ps *ps = kv_get_ps(rps); + + r600_dpm_print_class_info(rps->class, rps->class2); + r600_dpm_print_cap_info(rps->caps); + printk("\tuvd vclk: %d dclk: %d\n", rps->vclk, rps->dclk); + for (i = 0; i < ps->num_levels; i++) { + struct kv_pl *pl = &ps->levels[i]; + printk("\t\tpower level %d sclk: %u vddc: %u\n", + i, pl->sclk, + kv_convert_8bit_index_to_voltage(rdev, pl->vddc_index)); + } + r600_dpm_print_ps_status(rdev, rps); +} + +void kv_dpm_fini(struct radeon_device *rdev) +{ + int i; + + for (i = 0; i < rdev->pm.dpm.num_ps; i++) { + kfree(rdev->pm.dpm.ps[i].ps_priv); + } + kfree(rdev->pm.dpm.ps); + kfree(rdev->pm.dpm.priv); + r600_free_extended_power_table(rdev); +} + +void kv_dpm_display_configuration_changed(struct radeon_device *rdev) +{ + +} + +u32 kv_dpm_get_sclk(struct radeon_device *rdev, bool low) +{ + struct kv_power_info *pi = kv_get_pi(rdev); + struct kv_ps *requested_state = kv_get_ps(&pi->requested_rps); + + if (low) + return requested_state->levels[0].sclk; + else + return requested_state->levels[requested_state->num_levels - 1].sclk; +} + +u32 kv_dpm_get_mclk(struct radeon_device *rdev, bool low) +{ + struct kv_power_info *pi = kv_get_pi(rdev); + + return pi->sys_info.bootup_uma_clk; +} diff --git a/sys/dev/drm/radeon/kv_dpm.h b/sys/dev/drm/radeon/kv_dpm.h new file mode 100644 index 0000000000..5ecb48c462 --- /dev/null +++ b/sys/dev/drm/radeon/kv_dpm.h @@ -0,0 +1,201 @@ +/* + * Copyright 2013 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#ifndef __KV_DPM_H__ +#define __KV_DPM_H__ + +#define SMU__NUM_SCLK_DPM_STATE 8 +#define SMU__NUM_MCLK_DPM_LEVELS 4 +#define SMU__NUM_LCLK_DPM_LEVELS 8 +#define SMU__NUM_PCIE_DPM_LEVELS 0 /* ??? */ +#include "smu7_fusion.h" +#include "trinity_dpm.h" +#include "ppsmc.h" + +#define KV_NUM_NBPSTATES 4 + +enum kv_pt_config_reg_type { + KV_CONFIGREG_MMR = 0, + KV_CONFIGREG_SMC_IND, + KV_CONFIGREG_DIDT_IND, + KV_CONFIGREG_CACHE, + KV_CONFIGREG_MAX +}; + +struct kv_pt_config_reg { + u32 offset; + u32 mask; + u32 shift; + u32 value; + enum kv_pt_config_reg_type type; +}; + +struct kv_lcac_config_values { + u32 block_id; + u32 signal_id; + u32 t; +}; + +struct kv_lcac_config_reg { + u32 cntl; + u32 block_mask; + u32 block_shift; + u32 signal_mask; + u32 signal_shift; + u32 t_mask; + u32 t_shift; + u32 enable_mask; + u32 enable_shift; +}; + +struct kv_pl { + u32 sclk; + u8 vddc_index; + u8 ds_divider_index; + u8 ss_divider_index; + u8 allow_gnb_slow; + u8 force_nbp_state; + u8 display_wm; + u8 vce_wm; +}; + +struct kv_ps { + struct kv_pl levels[SUMO_MAX_HARDWARE_POWERLEVELS]; + u32 num_levels; + bool need_dfs_bypass; + u8 dpm0_pg_nb_ps_lo; + u8 dpm0_pg_nb_ps_hi; + u8 dpmx_nb_ps_lo; + u8 dpmx_nb_ps_hi; +}; + +struct kv_sys_info { + u32 bootup_uma_clk; + u32 bootup_sclk; + u32 dentist_vco_freq; + u32 nb_dpm_enable; + u32 nbp_memory_clock[KV_NUM_NBPSTATES]; + u32 nbp_n_clock[KV_NUM_NBPSTATES]; + u16 bootup_nb_voltage_index; + u8 htc_tmp_lmt; + u8 htc_hyst_lmt; + struct sumo_sclk_voltage_mapping_table sclk_voltage_mapping_table; + struct sumo_vid_mapping_table vid_mapping_table; + u32 uma_channel_number; +}; + +struct kv_power_info { + u32 at[SUMO_MAX_HARDWARE_POWERLEVELS]; + u32 voltage_drop_t; + struct kv_sys_info sys_info; + struct kv_pl boot_pl; + bool enable_nb_ps_policy; + bool disable_nb_ps3_in_battery; + bool video_start; + bool battery_state; + u32 lowest_valid; + u32 highest_valid; + u16 high_voltage_t; + bool cac_enabled; + bool bapm_enable; + /* smc offsets */ + u32 sram_end; + u32 dpm_table_start; + u32 soft_regs_start; + /* dpm SMU tables */ + u8 graphics_dpm_level_count; + u8 uvd_level_count; + u8 vce_level_count; + u8 acp_level_count; + u8 samu_level_count; + u16 fps_high_t; + SMU7_Fusion_GraphicsLevel graphics_level[SMU__NUM_SCLK_DPM_STATE]; + SMU7_Fusion_ACPILevel acpi_level; + SMU7_Fusion_UvdLevel uvd_level[SMU7_MAX_LEVELS_UVD]; + SMU7_Fusion_ExtClkLevel vce_level[SMU7_MAX_LEVELS_VCE]; + SMU7_Fusion_ExtClkLevel acp_level[SMU7_MAX_LEVELS_ACP]; + SMU7_Fusion_ExtClkLevel samu_level[SMU7_MAX_LEVELS_SAMU]; + u8 uvd_boot_level; + u8 vce_boot_level; + u8 acp_boot_level; + u8 samu_boot_level; + u8 uvd_interval; + u8 vce_interval; + u8 acp_interval; + u8 samu_interval; + u8 graphics_boot_level; + u8 graphics_interval; + u8 graphics_therm_throttle_enable; + u8 graphics_voltage_change_enable; + u8 graphics_clk_slow_enable; + u8 graphics_clk_slow_divider; + u8 fps_low_t; + u32 low_sclk_interrupt_t; + bool uvd_power_gated; + bool vce_power_gated; + bool acp_power_gated; + bool samu_power_gated; + bool nb_dpm_enabled; + /* flags */ + bool enable_didt; + bool enable_dpm; + bool enable_auto_thermal_throttling; + bool enable_nb_dpm; + /* caps */ + bool caps_cac; + bool caps_power_containment; + bool caps_sq_ramping; + bool caps_db_ramping; + bool caps_td_ramping; + bool caps_tcp_ramping; + bool caps_sclk_throttle_low_notification; + bool caps_fps; + bool caps_uvd_dpm; + bool caps_uvd_pg; + bool caps_vce_pg; + bool caps_samu_pg; + bool caps_acp_pg; + bool caps_stable_p_state; + bool caps_enable_dfs_bypass; + bool caps_sclk_ds; + struct radeon_ps current_rps; + struct kv_ps current_ps; + struct radeon_ps requested_rps; + struct kv_ps requested_ps; +}; + + +/* kv_smc.c */ +int kv_notify_message_to_smu(struct radeon_device *rdev, u32 id); +int kv_dpm_get_enable_mask(struct radeon_device *rdev, u32 *enable_mask); +void kv_dpm_reset_asic(struct radeon_device *rdev); +int kv_send_msg_to_smc_with_parameter(struct radeon_device *rdev, + PPSMC_Msg msg, u32 parameter); +int kv_read_smc_sram_dword(struct radeon_device *rdev, u32 smc_address, + u32 *value, u32 limit); +int kv_smc_dpm_enable(struct radeon_device *rdev, bool enable); +int kv_smc_bapm_enable(struct radeon_device *rdev, bool enable); +int kv_copy_bytes_to_smc(struct radeon_device *rdev, + u32 smc_start_address, + const u8 *src, u32 byte_count, u32 limit); + +#endif diff --git a/sys/dev/drm/radeon/kv_smc.c b/sys/dev/drm/radeon/kv_smc.c new file mode 100644 index 0000000000..574a772ab6 --- /dev/null +++ b/sys/dev/drm/radeon/kv_smc.c @@ -0,0 +1,214 @@ +/* + * Copyright 2013 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: Alex Deucher + */ + +#include +#include "radeon.h" +#include "cikd.h" +#include "kv_dpm.h" + +int kv_notify_message_to_smu(struct radeon_device *rdev, u32 id) +{ + u32 i; + u32 tmp = 0; + + WREG32(SMC_MESSAGE_0, id & SMC_MSG_MASK); + + for (i = 0; i < rdev->usec_timeout; i++) { + if ((RREG32(SMC_RESP_0) & SMC_RESP_MASK) != 0) + break; + udelay(1); + } + tmp = RREG32(SMC_RESP_0) & SMC_RESP_MASK; + + if (tmp != 1) { + if (tmp == 0xFF) + return -EINVAL; + else if (tmp == 0xFE) + return -EINVAL; + } + + return 0; +} + +int kv_dpm_get_enable_mask(struct radeon_device *rdev, u32 *enable_mask) +{ + int ret; + + ret = kv_notify_message_to_smu(rdev, PPSMC_MSG_SCLKDPM_GetEnabledMask); + + if (ret == 0) + *enable_mask = RREG32_SMC(SMC_SYSCON_MSG_ARG_0); + + return ret; +} + +int kv_send_msg_to_smc_with_parameter(struct radeon_device *rdev, + PPSMC_Msg msg, u32 parameter) +{ + + WREG32(SMC_MSG_ARG_0, parameter); + + return kv_notify_message_to_smu(rdev, msg); +} + +static int kv_set_smc_sram_address(struct radeon_device *rdev, + u32 smc_address, u32 limit) +{ + if (smc_address & 3) + return -EINVAL; + if ((smc_address + 3) > limit) + return -EINVAL; + + WREG32(SMC_IND_INDEX_0, smc_address); + WREG32_P(SMC_IND_ACCESS_CNTL, 0, ~AUTO_INCREMENT_IND_0); + + return 0; +} + +int kv_read_smc_sram_dword(struct radeon_device *rdev, u32 smc_address, + u32 *value, u32 limit) +{ + int ret; + + ret = kv_set_smc_sram_address(rdev, smc_address, limit); + if (ret) + return ret; + + *value = RREG32(SMC_IND_DATA_0); + return 0; +} + +int kv_smc_dpm_enable(struct radeon_device *rdev, bool enable) +{ + if (enable) + return kv_notify_message_to_smu(rdev, PPSMC_MSG_DPM_Enable); + else + return kv_notify_message_to_smu(rdev, PPSMC_MSG_DPM_Disable); +} + +int kv_smc_bapm_enable(struct radeon_device *rdev, bool enable) +{ + if (enable) + return kv_notify_message_to_smu(rdev, PPSMC_MSG_EnableBAPM); + else + return kv_notify_message_to_smu(rdev, PPSMC_MSG_DisableBAPM); +} + +int kv_copy_bytes_to_smc(struct radeon_device *rdev, + u32 smc_start_address, + const u8 *src, u32 byte_count, u32 limit) +{ + int ret; + u32 data, original_data, addr, extra_shift, t_byte, count, mask; + + if ((smc_start_address + byte_count) > limit) + return -EINVAL; + + addr = smc_start_address; + t_byte = addr & 3; + + /* RMW for the initial bytes */ + if (t_byte != 0) { + addr -= t_byte; + + ret = kv_set_smc_sram_address(rdev, addr, limit); + if (ret) + return ret; + + original_data = RREG32(SMC_IND_DATA_0); + + data = 0; + mask = 0; + count = 4; + while (count > 0) { + if (t_byte > 0) { + mask = (mask << 8) | 0xff; + t_byte--; + } else if (byte_count > 0) { + data = (data << 8) + *src++; + byte_count--; + mask <<= 8; + } else { + data <<= 8; + mask = (mask << 8) | 0xff; + } + count--; + } + + data |= original_data & mask; + + ret = kv_set_smc_sram_address(rdev, addr, limit); + if (ret) + return ret; + + WREG32(SMC_IND_DATA_0, data); + + addr += 4; + } + + while (byte_count >= 4) { + /* SMC address space is BE */ + data = (src[0] << 24) + (src[1] << 16) + (src[2] << 8) + src[3]; + + ret = kv_set_smc_sram_address(rdev, addr, limit); + if (ret) + return ret; + + WREG32(SMC_IND_DATA_0, data); + + src += 4; + byte_count -= 4; + addr += 4; + } + + /* RMW for the final bytes */ + if (byte_count > 0) { + data = 0; + + ret = kv_set_smc_sram_address(rdev, addr, limit); + if (ret) + return ret; + + original_data= RREG32(SMC_IND_DATA_0); + + extra_shift = 8 * (4 - byte_count); + + while (byte_count > 0) { + /* SMC address space is BE */ + data = (data << 8) + *src++; + byte_count--; + } + + data <<= extra_shift; + + data |= (original_data & ~((~0UL) << extra_shift)); + + ret = kv_set_smc_sram_address(rdev, addr, limit); + if (ret) + return ret; + + WREG32(SMC_IND_DATA_0, data); + } + return 0; +} diff --git a/sys/dev/drm/radeon/ni.c b/sys/dev/drm/radeon/ni.c index 4ca80d07ad..165f189739 100644 --- a/sys/dev/drm/radeon/ni.c +++ b/sys/dev/drm/radeon/ni.c @@ -34,7 +34,7 @@ #include "radeon_ucode.h" #include "clearstate_cayman.h" -static u32 tn_rlc_save_restore_register_list[] = +static const u32 tn_rlc_save_restore_register_list[] = { 0x98fc, 0x98f0, @@ -159,7 +159,6 @@ static u32 tn_rlc_save_restore_register_list[] = 0x9830, 0x802c, }; -static u32 tn_rlc_save_restore_register_list_size = ARRAY_SIZE(tn_rlc_save_restore_register_list); /* Firmware Names */ MODULE_FIRMWARE("radeon/BARTS_pfp.bin"); @@ -786,6 +785,7 @@ int ni_init_microcode(struct radeon_device *rdev) fw_name); release_firmware(rdev->smc_fw); rdev->smc_fw = NULL; + err = 0; } else if (rdev->smc_fw->datasize != smc_req_size) { printk(KERN_ERR "ni_smc: Bogus length %zu in firmware \"%s\"\n", @@ -832,6 +832,8 @@ void ni_fini_microcode(struct radeon_device *rdev) rdev->rlc_fw = NULL; release_firmware(rdev->mc_fw); rdev->mc_fw = NULL; + release_firmware(rdev->smc_fw); + rdev->smc_fw = NULL; } int tn_get_temp(struct radeon_device *rdev) @@ -1382,23 +1384,6 @@ void cayman_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib) radeon_ring_write(ring, 10); /* poll interval */ } -void cayman_uvd_semaphore_emit(struct radeon_device *rdev, - struct radeon_ring *ring, - struct radeon_semaphore *semaphore, - bool emit_wait) -{ - uint64_t addr = semaphore->gpu_addr; - - radeon_ring_write(ring, PACKET0(UVD_SEMA_ADDR_LOW, 0)); - radeon_ring_write(ring, (addr >> 3) & 0x000FFFFF); - - radeon_ring_write(ring, PACKET0(UVD_SEMA_ADDR_HIGH, 0)); - radeon_ring_write(ring, (addr >> 23) & 0x000FFFFF); - - radeon_ring_write(ring, PACKET0(UVD_SEMA_CMD, 0)); - radeon_ring_write(ring, 0x80 | (emit_wait ? 1 : 0)); -} - static void cayman_cp_enable(struct radeon_device *rdev, bool enable) { if (enable) @@ -1572,8 +1557,8 @@ static int cayman_cp_resume(struct radeon_device *rdev) /* Set ring buffer size */ ring = &rdev->ring[ridx[i]]; - rb_cntl = drm_order(ring->ring_size / 8); - rb_cntl |= drm_order(RADEON_GPU_PAGE_SIZE/8) << 8; + rb_cntl = order_base_2(ring->ring_size / 8); + rb_cntl |= order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8; #ifdef __BIG_ENDIAN rb_cntl |= BUF_SWAP_32BIT; #endif @@ -1621,186 +1606,7 @@ static int cayman_cp_resume(struct radeon_device *rdev) return 0; } -/* - * DMA - * Starting with R600, the GPU has an asynchronous - * DMA engine. The programming model is very similar - * to the 3D engine (ring buffer, IBs, etc.), but the - * DMA controller has it's own packet format that is - * different form the PM4 format used by the 3D engine. - * It supports copying data, writing embedded data, - * solid fills, and a number of other things. It also - * has support for tiling/detiling of buffers. - * Cayman and newer support two asynchronous DMA engines. - */ -/** - * cayman_dma_ring_ib_execute - Schedule an IB on the DMA engine - * - * @rdev: radeon_device pointer - * @ib: IB object to schedule - * - * Schedule an IB in the DMA ring (cayman-SI). - */ -void cayman_dma_ring_ib_execute(struct radeon_device *rdev, - struct radeon_ib *ib) -{ - struct radeon_ring *ring = &rdev->ring[ib->ring]; - - if (rdev->wb.enabled) { - u32 next_rptr = ring->wptr + 4; - while ((next_rptr & 7) != 5) - next_rptr++; - next_rptr += 3; - radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 1)); - radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc); - radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xff); - radeon_ring_write(ring, next_rptr); - } - - /* The indirect buffer packet must end on an 8 DW boundary in the DMA ring. - * Pad as necessary with NOPs. - */ - while ((ring->wptr & 7) != 5) - radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0)); - radeon_ring_write(ring, DMA_IB_PACKET(DMA_PACKET_INDIRECT_BUFFER, ib->vm ? ib->vm->id : 0, 0)); - radeon_ring_write(ring, (ib->gpu_addr & 0xFFFFFFE0)); - radeon_ring_write(ring, (ib->length_dw << 12) | (upper_32_bits(ib->gpu_addr) & 0xFF)); - -} - -/** - * cayman_dma_stop - stop the async dma engines - * - * @rdev: radeon_device pointer - * - * Stop the async dma engines (cayman-SI). - */ -void cayman_dma_stop(struct radeon_device *rdev) -{ - u32 rb_cntl; - - radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size); - - /* dma0 */ - rb_cntl = RREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET); - rb_cntl &= ~DMA_RB_ENABLE; - WREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET, rb_cntl); - - /* dma1 */ - rb_cntl = RREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET); - rb_cntl &= ~DMA_RB_ENABLE; - WREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET, rb_cntl); - - rdev->ring[R600_RING_TYPE_DMA_INDEX].ready = false; - rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX].ready = false; -} - -/** - * cayman_dma_resume - setup and start the async dma engines - * - * @rdev: radeon_device pointer - * - * Set up the DMA ring buffers and enable them. (cayman-SI). - * Returns 0 for success, error for failure. - */ -int cayman_dma_resume(struct radeon_device *rdev) -{ - struct radeon_ring *ring; - u32 rb_cntl, dma_cntl, ib_cntl; - u32 rb_bufsz; - u32 reg_offset, wb_offset; - int i, r; - - /* Reset dma */ - WREG32(SRBM_SOFT_RESET, SOFT_RESET_DMA | SOFT_RESET_DMA1); - RREG32(SRBM_SOFT_RESET); - udelay(50); - WREG32(SRBM_SOFT_RESET, 0); - - for (i = 0; i < 2; i++) { - if (i == 0) { - ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX]; - reg_offset = DMA0_REGISTER_OFFSET; - wb_offset = R600_WB_DMA_RPTR_OFFSET; - } else { - ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX]; - reg_offset = DMA1_REGISTER_OFFSET; - wb_offset = CAYMAN_WB_DMA1_RPTR_OFFSET; - } - - WREG32(DMA_SEM_INCOMPLETE_TIMER_CNTL + reg_offset, 0); - WREG32(DMA_SEM_WAIT_FAIL_TIMER_CNTL + reg_offset, 0); - - /* Set ring buffer size in dwords */ - rb_bufsz = drm_order(ring->ring_size / 4); - rb_cntl = rb_bufsz << 1; -#ifdef __BIG_ENDIAN - rb_cntl |= DMA_RB_SWAP_ENABLE | DMA_RPTR_WRITEBACK_SWAP_ENABLE; -#endif - WREG32(DMA_RB_CNTL + reg_offset, rb_cntl); - - /* Initialize the ring buffer's read and write pointers */ - WREG32(DMA_RB_RPTR + reg_offset, 0); - WREG32(DMA_RB_WPTR + reg_offset, 0); - - /* set the wb address whether it's enabled or not */ - WREG32(DMA_RB_RPTR_ADDR_HI + reg_offset, - upper_32_bits(rdev->wb.gpu_addr + wb_offset) & 0xFF); - WREG32(DMA_RB_RPTR_ADDR_LO + reg_offset, - ((rdev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC)); - - if (rdev->wb.enabled) - rb_cntl |= DMA_RPTR_WRITEBACK_ENABLE; - - WREG32(DMA_RB_BASE + reg_offset, ring->gpu_addr >> 8); - - /* enable DMA IBs */ - ib_cntl = DMA_IB_ENABLE | CMD_VMID_FORCE; -#ifdef __BIG_ENDIAN - ib_cntl |= DMA_IB_SWAP_ENABLE; -#endif - WREG32(DMA_IB_CNTL + reg_offset, ib_cntl); - - dma_cntl = RREG32(DMA_CNTL + reg_offset); - dma_cntl &= ~CTXEMPTY_INT_ENABLE; - WREG32(DMA_CNTL + reg_offset, dma_cntl); - - ring->wptr = 0; - WREG32(DMA_RB_WPTR + reg_offset, ring->wptr << 2); - - ring->rptr = RREG32(DMA_RB_RPTR + reg_offset) >> 2; - - WREG32(DMA_RB_CNTL + reg_offset, rb_cntl | DMA_RB_ENABLE); - - ring->ready = true; - - r = radeon_ring_test(rdev, ring->idx, ring); - if (r) { - ring->ready = false; - return r; - } - } - - radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size); - - return 0; -} - -/** - * cayman_dma_fini - tear down the async dma engines - * - * @rdev: radeon_device pointer - * - * Stop the async dma engines and free the rings (cayman-SI). - */ -void cayman_dma_fini(struct radeon_device *rdev) -{ - cayman_dma_stop(rdev); - radeon_ring_fini(rdev, &rdev->ring[R600_RING_TYPE_DMA_INDEX]); - radeon_ring_fini(rdev, &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX]); -} - -static u32 cayman_gpu_check_soft_reset(struct radeon_device *rdev) +u32 cayman_gpu_check_soft_reset(struct radeon_device *rdev) { u32 reset_mask = 0; u32 tmp; @@ -2053,34 +1859,6 @@ bool cayman_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring) return radeon_ring_test_lockup(rdev, ring); } -/** - * cayman_dma_is_lockup - Check if the DMA engine is locked up - * - * @rdev: radeon_device pointer - * @ring: radeon_ring structure holding ring information - * - * Check if the async DMA engine is locked up. - * Returns true if the engine appears to be locked up, false if not. - */ -bool cayman_dma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring) -{ - u32 reset_mask = cayman_gpu_check_soft_reset(rdev); - u32 mask; - - if (ring->idx == R600_RING_TYPE_DMA_INDEX) - mask = RADEON_RESET_DMA; - else - mask = RADEON_RESET_DMA1; - - if (!(reset_mask & mask)) { - radeon_ring_lockup_update(ring); - return false; - } - /* force ring activities */ - radeon_ring_force_activity(rdev, ring); - return radeon_ring_test_lockup(rdev, ring); -} - static int cayman_startup(struct radeon_device *rdev) { struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; @@ -2091,6 +1869,11 @@ static int cayman_startup(struct radeon_device *rdev) /* enable aspm */ evergreen_program_aspm(rdev); + /* scratch needs to be initialized before MC */ + r = r600_vram_scratch_init(rdev); + if (r) + return r; + evergreen_mc_program(rdev); if (rdev->flags & RADEON_IS_IGP) { @@ -2117,26 +1900,16 @@ static int cayman_startup(struct radeon_device *rdev) } } - r = r600_vram_scratch_init(rdev); - if (r) - return r; - r = cayman_pcie_gart_enable(rdev); if (r) return r; cayman_gpu_init(rdev); - r = evergreen_blit_init(rdev); - if (r) { - r600_blit_fini(rdev); - rdev->asic->copy.copy = NULL; - dev_warn(rdev->dev, "failed blitter (%d) falling back to memcpy\n", r); - } - /* allocate rlc buffers */ if (rdev->flags & RADEON_IS_IGP) { rdev->rlc.reg_list = tn_rlc_save_restore_register_list; - rdev->rlc.reg_list_size = tn_rlc_save_restore_register_list_size; + rdev->rlc.reg_list_size = + (u32)ARRAY_SIZE(tn_rlc_save_restore_register_list); rdev->rlc.cs_data = cayman_cs_data; r = sumo_rlc_init(rdev); if (r) { @@ -2156,7 +1929,7 @@ static int cayman_startup(struct radeon_device *rdev) return r; } - r = rv770_uvd_resume(rdev); + r = uvd_v2_2_resume(rdev); if (!r) { r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_UVD_INDEX); @@ -2207,7 +1980,7 @@ static int cayman_startup(struct radeon_device *rdev) r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET, CP_RB0_RPTR, CP_RB0_WPTR, - 0, 0xfffff, RADEON_CP_PACKET2); + RADEON_CP_PACKET2); if (r) return r; @@ -2215,7 +1988,7 @@ static int cayman_startup(struct radeon_device *rdev) r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET, DMA_RB_RPTR + DMA0_REGISTER_OFFSET, DMA_RB_WPTR + DMA0_REGISTER_OFFSET, - 2, 0x3fffc, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0)); + DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0)); if (r) return r; @@ -2223,7 +1996,7 @@ static int cayman_startup(struct radeon_device *rdev) r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET, DMA_RB_RPTR + DMA1_REGISTER_OFFSET, DMA_RB_WPTR + DMA1_REGISTER_OFFSET, - 2, 0x3fffc, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0)); + DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0)); if (r) return r; @@ -2240,12 +2013,11 @@ static int cayman_startup(struct radeon_device *rdev) ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX]; if (ring->ring_size) { - r = radeon_ring_init(rdev, ring, ring->ring_size, - R600_WB_UVD_RPTR_OFFSET, + r = radeon_ring_init(rdev, ring, ring->ring_size, 0, UVD_RBC_RB_RPTR, UVD_RBC_RB_WPTR, - 0, 0xfffff, RADEON_CP_PACKET2); + RADEON_CP_PACKET2); if (!r) - r = r600_uvd_init(rdev); + r = uvd_v1_0_init(rdev); if (r) DRM_ERROR("radeon: failed initializing UVD (%d).\n", r); } @@ -2262,9 +2034,15 @@ static int cayman_startup(struct radeon_device *rdev) return r; } - r = r600_audio_init(rdev); - if (r) - return r; + if (ASIC_IS_DCE6(rdev)) { + r = dce6_audio_init(rdev); + if (r) + return r; + } else { + r = r600_audio_init(rdev); + if (r) + return r; + } return 0; } @@ -2295,11 +2073,14 @@ int cayman_resume(struct radeon_device *rdev) int cayman_suspend(struct radeon_device *rdev) { - r600_audio_fini(rdev); + if (ASIC_IS_DCE6(rdev)) + dce6_audio_fini(rdev); + else + r600_audio_fini(rdev); radeon_vm_manager_fini(rdev); cayman_cp_enable(rdev, false); cayman_dma_stop(rdev); - r600_uvd_stop(rdev); + uvd_v1_0_fini(rdev); radeon_uvd_suspend(rdev); evergreen_irq_suspend(rdev); radeon_wb_disable(rdev); @@ -2421,7 +2202,6 @@ int cayman_init(struct radeon_device *rdev) void cayman_fini(struct radeon_device *rdev) { - r600_blit_fini(rdev); cayman_cp_fini(rdev); cayman_dma_fini(rdev); r600_irq_fini(rdev); @@ -2431,7 +2211,7 @@ void cayman_fini(struct radeon_device *rdev) radeon_vm_manager_fini(rdev); radeon_ib_pool_fini(rdev); radeon_irq_kms_fini(rdev); - r600_uvd_stop(rdev); + uvd_v1_0_fini(rdev); radeon_uvd_fini(rdev); cayman_pcie_gart_fini(rdev); r600_vram_scratch_fini(rdev); @@ -2693,61 +2473,7 @@ void cayman_vm_set_page(struct radeon_device *rdev, } } } else { - if ((flags & RADEON_VM_PAGE_SYSTEM) || - (count == 1)) { - while (count) { - ndw = count * 2; - if (ndw > 0xFFFFE) - ndw = 0xFFFFE; - - /* for non-physically contiguous pages (system) */ - ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_WRITE, 0, 0, ndw); - ib->ptr[ib->length_dw++] = pe; - ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff; - for (; ndw > 0; ndw -= 2, --count, pe += 8) { - if (flags & RADEON_VM_PAGE_SYSTEM) { - value = radeon_vm_map_gart(rdev, addr); - value &= 0xFFFFFFFFFFFFF000ULL; - } else if (flags & RADEON_VM_PAGE_VALID) { - value = addr; - } else { - value = 0; - } - addr += incr; - value |= r600_flags; - ib->ptr[ib->length_dw++] = value; - ib->ptr[ib->length_dw++] = upper_32_bits(value); - } - } - while (ib->length_dw & 0x7) - ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0); - } else { - while (count) { - ndw = count * 2; - if (ndw > 0xFFFFE) - ndw = 0xFFFFE; - - if (flags & RADEON_VM_PAGE_VALID) - value = addr; - else - value = 0; - /* for physically contiguous pages (vram) */ - ib->ptr[ib->length_dw++] = DMA_PTE_PDE_PACKET(ndw); - ib->ptr[ib->length_dw++] = pe; /* dst addr */ - ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff; - ib->ptr[ib->length_dw++] = r600_flags; /* mask */ - ib->ptr[ib->length_dw++] = 0; - ib->ptr[ib->length_dw++] = value; /* value */ - ib->ptr[ib->length_dw++] = upper_32_bits(value); - ib->ptr[ib->length_dw++] = incr; /* increment size */ - ib->ptr[ib->length_dw++] = 0; - pe += ndw * 4; - addr += (ndw / 2) * incr; - count -= ndw / 2; - } - } - while (ib->length_dw & 0x7) - ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0); + cayman_dma_vm_set_page(rdev, ib, pe, addr, count, incr, flags); } } @@ -2781,26 +2507,3 @@ void cayman_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm) radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0)); radeon_ring_write(ring, 0x0); } - -void cayman_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm) -{ - struct radeon_ring *ring = &rdev->ring[ridx]; - - if (vm == NULL) - return; - - radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0)); - radeon_ring_write(ring, (0xf << 16) | ((VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2)); - radeon_ring_write(ring, vm->pd_gpu_addr >> 12); - - /* flush hdp cache */ - radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0)); - radeon_ring_write(ring, (0xf << 16) | (HDP_MEM_COHERENCY_FLUSH_CNTL >> 2)); - radeon_ring_write(ring, 1); - - /* bits 0-7 are the VM contexts0-7 */ - radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0)); - radeon_ring_write(ring, (0xf << 16) | (VM_INVALIDATE_REQUEST >> 2)); - radeon_ring_write(ring, 1 << vm->id); -} - diff --git a/sys/dev/drm/radeon/ni_dma.c b/sys/dev/drm/radeon/ni_dma.c new file mode 100644 index 0000000000..c801601140 --- /dev/null +++ b/sys/dev/drm/radeon/ni_dma.c @@ -0,0 +1,335 @@ +/* + * Copyright 2010 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: Alex Deucher + */ +#include +#include "radeon.h" +#include "radeon_asic.h" +#include "nid.h" + +/* + * DMA + * Starting with R600, the GPU has an asynchronous + * DMA engine. The programming model is very similar + * to the 3D engine (ring buffer, IBs, etc.), but the + * DMA controller has it's own packet format that is + * different form the PM4 format used by the 3D engine. + * It supports copying data, writing embedded data, + * solid fills, and a number of other things. It also + * has support for tiling/detiling of buffers. + * Cayman and newer support two asynchronous DMA engines. + */ + +/** + * cayman_dma_ring_ib_execute - Schedule an IB on the DMA engine + * + * @rdev: radeon_device pointer + * @ib: IB object to schedule + * + * Schedule an IB in the DMA ring (cayman-SI). + */ +void cayman_dma_ring_ib_execute(struct radeon_device *rdev, + struct radeon_ib *ib) +{ + struct radeon_ring *ring = &rdev->ring[ib->ring]; + + if (rdev->wb.enabled) { + u32 next_rptr = ring->wptr + 4; + while ((next_rptr & 7) != 5) + next_rptr++; + next_rptr += 3; + radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 1)); + radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc); + radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xff); + radeon_ring_write(ring, next_rptr); + } + + /* The indirect buffer packet must end on an 8 DW boundary in the DMA ring. + * Pad as necessary with NOPs. + */ + while ((ring->wptr & 7) != 5) + radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0)); + radeon_ring_write(ring, DMA_IB_PACKET(DMA_PACKET_INDIRECT_BUFFER, ib->vm ? ib->vm->id : 0, 0)); + radeon_ring_write(ring, (ib->gpu_addr & 0xFFFFFFE0)); + radeon_ring_write(ring, (ib->length_dw << 12) | (upper_32_bits(ib->gpu_addr) & 0xFF)); + +} + +/** + * cayman_dma_stop - stop the async dma engines + * + * @rdev: radeon_device pointer + * + * Stop the async dma engines (cayman-SI). + */ +void cayman_dma_stop(struct radeon_device *rdev) +{ + u32 rb_cntl; + + radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size); + + /* dma0 */ + rb_cntl = RREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET); + rb_cntl &= ~DMA_RB_ENABLE; + WREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET, rb_cntl); + + /* dma1 */ + rb_cntl = RREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET); + rb_cntl &= ~DMA_RB_ENABLE; + WREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET, rb_cntl); + + rdev->ring[R600_RING_TYPE_DMA_INDEX].ready = false; + rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX].ready = false; +} + +/** + * cayman_dma_resume - setup and start the async dma engines + * + * @rdev: radeon_device pointer + * + * Set up the DMA ring buffers and enable them. (cayman-SI). + * Returns 0 for success, error for failure. + */ +int cayman_dma_resume(struct radeon_device *rdev) +{ + struct radeon_ring *ring; + u32 rb_cntl, dma_cntl, ib_cntl; + u32 rb_bufsz; + u32 reg_offset, wb_offset; + int i, r; + + /* Reset dma */ + WREG32(SRBM_SOFT_RESET, SOFT_RESET_DMA | SOFT_RESET_DMA1); + RREG32(SRBM_SOFT_RESET); + udelay(50); + WREG32(SRBM_SOFT_RESET, 0); + + for (i = 0; i < 2; i++) { + if (i == 0) { + ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX]; + reg_offset = DMA0_REGISTER_OFFSET; + wb_offset = R600_WB_DMA_RPTR_OFFSET; + } else { + ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX]; + reg_offset = DMA1_REGISTER_OFFSET; + wb_offset = CAYMAN_WB_DMA1_RPTR_OFFSET; + } + + WREG32(DMA_SEM_INCOMPLETE_TIMER_CNTL + reg_offset, 0); + WREG32(DMA_SEM_WAIT_FAIL_TIMER_CNTL + reg_offset, 0); + + /* Set ring buffer size in dwords */ + rb_bufsz = order_base_2(ring->ring_size / 4); + rb_cntl = rb_bufsz << 1; +#ifdef __BIG_ENDIAN + rb_cntl |= DMA_RB_SWAP_ENABLE | DMA_RPTR_WRITEBACK_SWAP_ENABLE; +#endif + WREG32(DMA_RB_CNTL + reg_offset, rb_cntl); + + /* Initialize the ring buffer's read and write pointers */ + WREG32(DMA_RB_RPTR + reg_offset, 0); + WREG32(DMA_RB_WPTR + reg_offset, 0); + + /* set the wb address whether it's enabled or not */ + WREG32(DMA_RB_RPTR_ADDR_HI + reg_offset, + upper_32_bits(rdev->wb.gpu_addr + wb_offset) & 0xFF); + WREG32(DMA_RB_RPTR_ADDR_LO + reg_offset, + ((rdev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC)); + + if (rdev->wb.enabled) + rb_cntl |= DMA_RPTR_WRITEBACK_ENABLE; + + WREG32(DMA_RB_BASE + reg_offset, ring->gpu_addr >> 8); + + /* enable DMA IBs */ + ib_cntl = DMA_IB_ENABLE | CMD_VMID_FORCE; +#ifdef __BIG_ENDIAN + ib_cntl |= DMA_IB_SWAP_ENABLE; +#endif + WREG32(DMA_IB_CNTL + reg_offset, ib_cntl); + + dma_cntl = RREG32(DMA_CNTL + reg_offset); + dma_cntl &= ~CTXEMPTY_INT_ENABLE; + WREG32(DMA_CNTL + reg_offset, dma_cntl); + + ring->wptr = 0; + WREG32(DMA_RB_WPTR + reg_offset, ring->wptr << 2); + + ring->rptr = RREG32(DMA_RB_RPTR + reg_offset) >> 2; + + WREG32(DMA_RB_CNTL + reg_offset, rb_cntl | DMA_RB_ENABLE); + + ring->ready = true; + + r = radeon_ring_test(rdev, ring->idx, ring); + if (r) { + ring->ready = false; + return r; + } + } + + radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size); + + return 0; +} + +/** + * cayman_dma_fini - tear down the async dma engines + * + * @rdev: radeon_device pointer + * + * Stop the async dma engines and free the rings (cayman-SI). + */ +void cayman_dma_fini(struct radeon_device *rdev) +{ + cayman_dma_stop(rdev); + radeon_ring_fini(rdev, &rdev->ring[R600_RING_TYPE_DMA_INDEX]); + radeon_ring_fini(rdev, &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX]); +} + +/** + * cayman_dma_is_lockup - Check if the DMA engine is locked up + * + * @rdev: radeon_device pointer + * @ring: radeon_ring structure holding ring information + * + * Check if the async DMA engine is locked up. + * Returns true if the engine appears to be locked up, false if not. + */ +bool cayman_dma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring) +{ + u32 reset_mask = cayman_gpu_check_soft_reset(rdev); + u32 mask; + + if (ring->idx == R600_RING_TYPE_DMA_INDEX) + mask = RADEON_RESET_DMA; + else + mask = RADEON_RESET_DMA1; + + if (!(reset_mask & mask)) { + radeon_ring_lockup_update(ring); + return false; + } + /* force ring activities */ + radeon_ring_force_activity(rdev, ring); + return radeon_ring_test_lockup(rdev, ring); +} + +/** + * cayman_dma_vm_set_page - update the page tables using the DMA + * + * @rdev: radeon_device pointer + * @ib: indirect buffer to fill with commands + * @pe: addr of the page entry + * @addr: dst addr to write into pe + * @count: number of page entries to update + * @incr: increase next addr by incr bytes + * @flags: access flags + * @r600_flags: hw access flags + * + * Update the page tables using the DMA (cayman/TN). + */ +void cayman_dma_vm_set_page(struct radeon_device *rdev, + struct radeon_ib *ib, + uint64_t pe, + uint64_t addr, unsigned count, + uint32_t incr, uint32_t flags) +{ + uint32_t r600_flags = cayman_vm_page_flags(rdev, flags); + uint64_t value; + unsigned ndw; + + if ((flags & RADEON_VM_PAGE_SYSTEM) || (count == 1)) { + while (count) { + ndw = count * 2; + if (ndw > 0xFFFFE) + ndw = 0xFFFFE; + + /* for non-physically contiguous pages (system) */ + ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_WRITE, 0, 0, ndw); + ib->ptr[ib->length_dw++] = pe; + ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff; + for (; ndw > 0; ndw -= 2, --count, pe += 8) { + if (flags & RADEON_VM_PAGE_SYSTEM) { + value = radeon_vm_map_gart(rdev, addr); + value &= 0xFFFFFFFFFFFFF000ULL; + } else if (flags & RADEON_VM_PAGE_VALID) { + value = addr; + } else { + value = 0; + } + addr += incr; + value |= r600_flags; + ib->ptr[ib->length_dw++] = value; + ib->ptr[ib->length_dw++] = upper_32_bits(value); + } + } + } else { + while (count) { + ndw = count * 2; + if (ndw > 0xFFFFE) + ndw = 0xFFFFE; + + if (flags & RADEON_VM_PAGE_VALID) + value = addr; + else + value = 0; + /* for physically contiguous pages (vram) */ + ib->ptr[ib->length_dw++] = DMA_PTE_PDE_PACKET(ndw); + ib->ptr[ib->length_dw++] = pe; /* dst addr */ + ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff; + ib->ptr[ib->length_dw++] = r600_flags; /* mask */ + ib->ptr[ib->length_dw++] = 0; + ib->ptr[ib->length_dw++] = value; /* value */ + ib->ptr[ib->length_dw++] = upper_32_bits(value); + ib->ptr[ib->length_dw++] = incr; /* increment size */ + ib->ptr[ib->length_dw++] = 0; + pe += ndw * 4; + addr += (ndw / 2) * incr; + count -= ndw / 2; + } + } + while (ib->length_dw & 0x7) + ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0); +} + +void cayman_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm) +{ + struct radeon_ring *ring = &rdev->ring[ridx]; + + if (vm == NULL) + return; + + radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0)); + radeon_ring_write(ring, (0xf << 16) | ((VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2)); + radeon_ring_write(ring, vm->pd_gpu_addr >> 12); + + /* flush hdp cache */ + radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0)); + radeon_ring_write(ring, (0xf << 16) | (HDP_MEM_COHERENCY_FLUSH_CNTL >> 2)); + radeon_ring_write(ring, 1); + + /* bits 0-7 are the VM contexts0-7 */ + radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0)); + radeon_ring_write(ring, (0xf << 16) | (VM_INVALIDATE_REQUEST >> 2)); + radeon_ring_write(ring, 1 << vm->id); +} diff --git a/sys/dev/drm/radeon/ni_dpm.c b/sys/dev/drm/radeon/ni_dpm.c index 001d6d759f..0b1fe1115d 100644 --- a/sys/dev/drm/radeon/ni_dpm.c +++ b/sys/dev/drm/radeon/ni_dpm.c @@ -773,7 +773,8 @@ bool ni_dpm_vblank_too_short(struct radeon_device *rdev) { struct rv7xx_power_info *pi = rv770_get_pi(rdev); u32 vblank_time = r600_dpm_get_vblank_time(rdev); - u32 switch_limit = pi->mem_gddr5 ? 450 : 300; + /* we never hit the non-gddr5 limit so disable it */ + u32 switch_limit = pi->mem_gddr5 ? 450 : 0; if (vblank_time < switch_limit) return true; @@ -790,6 +791,7 @@ static void ni_apply_state_adjust_rules(struct radeon_device *rdev, bool disable_mclk_switching; u32 mclk, sclk; u16 vddc, vddci; + u32 max_sclk_vddc, max_mclk_vddci, max_mclk_vddc; int i; if ((rdev->pm.dpm.new_active_crtc_count > 1) || @@ -816,6 +818,29 @@ static void ni_apply_state_adjust_rules(struct radeon_device *rdev, } } + /* limit clocks to max supported clocks based on voltage dependency tables */ + btc_get_max_clock_from_voltage_dependency_table(&rdev->pm.dpm.dyn_state.vddc_dependency_on_sclk, + &max_sclk_vddc); + btc_get_max_clock_from_voltage_dependency_table(&rdev->pm.dpm.dyn_state.vddci_dependency_on_mclk, + &max_mclk_vddci); + btc_get_max_clock_from_voltage_dependency_table(&rdev->pm.dpm.dyn_state.vddc_dependency_on_mclk, + &max_mclk_vddc); + + for (i = 0; i < ps->performance_level_count; i++) { + if (max_sclk_vddc) { + if (ps->performance_levels[i].sclk > max_sclk_vddc) + ps->performance_levels[i].sclk = max_sclk_vddc; + } + if (max_mclk_vddci) { + if (ps->performance_levels[i].mclk > max_mclk_vddci) + ps->performance_levels[i].mclk = max_mclk_vddci; + } + if (max_mclk_vddc) { + if (ps->performance_levels[i].mclk > max_mclk_vddc) + ps->performance_levels[i].mclk = max_mclk_vddc; + } + } + /* XXX validate the min clocks required for display */ if (disable_mclk_switching) { @@ -3868,12 +3893,6 @@ int ni_dpm_set_power_state(struct radeon_device *rdev) return ret; } - ret = ni_dpm_force_performance_level(rdev, RADEON_DPM_FORCED_LEVEL_AUTO); - if (ret) { - DRM_ERROR("ni_dpm_force_performance_level failed\n"); - return ret; - } - return 0; } @@ -4041,6 +4060,7 @@ static int ni_parse_power_table(struct radeon_device *rdev) (power_state->v1.ucNonClockStateIndex * power_info->pplib.ucNonClockSize)); if (power_info->pplib.ucStateEntrySize - 1) { + u8 *idx; ps = kzalloc(sizeof(struct ni_ps), GFP_KERNEL); if (ps == NULL) { kfree(rdev->pm.dpm.ps); @@ -4050,12 +4070,12 @@ static int ni_parse_power_table(struct radeon_device *rdev) ni_parse_pplib_non_clock_info(rdev, &rdev->pm.dpm.ps[i], non_clock_info, power_info->pplib.ucNonClockSize); + idx = (u8 *)&power_state->v1.ucClockStateIndices[0]; for (j = 0; j < (power_info->pplib.ucStateEntrySize - 1); j++) { clock_info = (union pplib_clock_info *) ((uint8_t*)mode_info->atom_context->bios + data_offset + le16_to_cpu(power_info->pplib.usClockInfoArrayOffset) + - (power_state->v1.ucClockStateIndices[j] * - power_info->pplib.ucClockInfoSize)); + (idx[j] * power_info->pplib.ucClockInfoSize)); ni_parse_pplib_clock_info(rdev, &rdev->pm.dpm.ps[i], j, clock_info); @@ -4274,6 +4294,12 @@ int ni_dpm_init(struct radeon_device *rdev) ni_pi->use_power_boost_limit = true; + /* make sure dc limits are valid */ + if ((rdev->pm.dpm.dyn_state.max_clock_voltage_on_dc.sclk == 0) || + (rdev->pm.dpm.dyn_state.max_clock_voltage_on_dc.mclk == 0)) + rdev->pm.dpm.dyn_state.max_clock_voltage_on_dc = + rdev->pm.dpm.dyn_state.max_clock_voltage_on_ac; + return 0; } diff --git a/sys/dev/drm/radeon/ppsmc.h b/sys/dev/drm/radeon/ppsmc.h index b5564a3645..5670b82912 100644 --- a/sys/dev/drm/radeon/ppsmc.h +++ b/sys/dev/drm/radeon/ppsmc.h @@ -99,13 +99,72 @@ typedef uint8_t PPSMC_Result; #define PPSMC_MSG_ThrottleOVRDSCLKDS ((uint8_t)0x96) #define PPSMC_MSG_CancelThrottleOVRDSCLKDS ((uint8_t)0x97) +/* CI/KV/KB */ +#define PPSMC_MSG_UVDDPM_SetEnabledMask ((uint16_t) 0x12D) +#define PPSMC_MSG_VCEDPM_SetEnabledMask ((uint16_t) 0x12E) +#define PPSMC_MSG_ACPDPM_SetEnabledMask ((uint16_t) 0x12F) +#define PPSMC_MSG_SAMUDPM_SetEnabledMask ((uint16_t) 0x130) +#define PPSMC_MSG_MCLKDPM_ForceState ((uint16_t) 0x131) +#define PPSMC_MSG_MCLKDPM_NoForcedLevel ((uint16_t) 0x132) +#define PPSMC_MSG_Voltage_Cntl_Disable ((uint16_t) 0x135) +#define PPSMC_MSG_PCIeDPM_Enable ((uint16_t) 0x136) +#define PPSMC_MSG_PCIeDPM_Disable ((uint16_t) 0x13d) +#define PPSMC_MSG_ACPPowerOFF ((uint16_t) 0x137) +#define PPSMC_MSG_ACPPowerON ((uint16_t) 0x138) +#define PPSMC_MSG_SAMPowerOFF ((uint16_t) 0x139) +#define PPSMC_MSG_SAMPowerON ((uint16_t) 0x13a) +#define PPSMC_MSG_PCIeDPM_Disable ((uint16_t) 0x13d) +#define PPSMC_MSG_NBDPM_Enable ((uint16_t) 0x140) +#define PPSMC_MSG_NBDPM_Disable ((uint16_t) 0x141) +#define PPSMC_MSG_SCLKDPM_SetEnabledMask ((uint16_t) 0x145) +#define PPSMC_MSG_MCLKDPM_SetEnabledMask ((uint16_t) 0x146) +#define PPSMC_MSG_PCIeDPM_ForceLevel ((uint16_t) 0x147) +#define PPSMC_MSG_PCIeDPM_UnForceLevel ((uint16_t) 0x148) +#define PPSMC_MSG_EnableVRHotGPIOInterrupt ((uint16_t) 0x14a) +#define PPSMC_MSG_DPM_Enable ((uint16_t) 0x14e) +#define PPSMC_MSG_DPM_Disable ((uint16_t) 0x14f) +#define PPSMC_MSG_MCLKDPM_Enable ((uint16_t) 0x150) +#define PPSMC_MSG_MCLKDPM_Disable ((uint16_t) 0x151) +#define PPSMC_MSG_UVDDPM_Enable ((uint16_t) 0x154) +#define PPSMC_MSG_UVDDPM_Disable ((uint16_t) 0x155) +#define PPSMC_MSG_SAMUDPM_Enable ((uint16_t) 0x156) +#define PPSMC_MSG_SAMUDPM_Disable ((uint16_t) 0x157) +#define PPSMC_MSG_ACPDPM_Enable ((uint16_t) 0x158) +#define PPSMC_MSG_ACPDPM_Disable ((uint16_t) 0x159) +#define PPSMC_MSG_VCEDPM_Enable ((uint16_t) 0x15a) +#define PPSMC_MSG_VCEDPM_Disable ((uint16_t) 0x15b) +#define PPSMC_MSG_VddC_Request ((uint16_t) 0x15f) +#define PPSMC_MSG_SCLKDPM_GetEnabledMask ((uint16_t) 0x162) +#define PPSMC_MSG_PCIeDPM_SetEnabledMask ((uint16_t) 0x167) +#define PPSMC_MSG_TDCLimitEnable ((uint16_t) 0x169) +#define PPSMC_MSG_TDCLimitDisable ((uint16_t) 0x16a) +#define PPSMC_MSG_PkgPwrLimitEnable ((uint16_t) 0x185) +#define PPSMC_MSG_PkgPwrLimitDisable ((uint16_t) 0x186) +#define PPSMC_MSG_PkgPwrSetLimit ((uint16_t) 0x187) +#define PPSMC_MSG_OverDriveSetTargetTdp ((uint16_t) 0x188) +#define PPSMC_MSG_SCLKDPM_FreezeLevel ((uint16_t) 0x189) +#define PPSMC_MSG_SCLKDPM_UnfreezeLevel ((uint16_t) 0x18A) +#define PPSMC_MSG_MCLKDPM_FreezeLevel ((uint16_t) 0x18B) +#define PPSMC_MSG_MCLKDPM_UnfreezeLevel ((uint16_t) 0x18C) +#define PPSMC_MSG_MASTER_DeepSleep_ON ((uint16_t) 0x18F) +#define PPSMC_MSG_MASTER_DeepSleep_OFF ((uint16_t) 0x190) +#define PPSMC_MSG_Remove_DC_Clamp ((uint16_t) 0x191) + +#define PPSMC_MSG_API_GetSclkFrequency ((uint16_t) 0x200) +#define PPSMC_MSG_API_GetMclkFrequency ((uint16_t) 0x201) + /* TN */ #define PPSMC_MSG_DPM_Config ((uint32_t) 0x102) #define PPSMC_MSG_DPM_ForceState ((uint32_t) 0x104) #define PPSMC_MSG_PG_SIMD_Config ((uint32_t) 0x108) #define PPSMC_MSG_DPM_N_LevelsDisabled ((uint32_t) 0x112) +#define PPSMC_MSG_Voltage_Cntl_Enable ((uint32_t) 0x109) +#define PPSMC_MSG_VCEPowerOFF ((uint32_t) 0x10e) +#define PPSMC_MSG_VCEPowerON ((uint32_t) 0x10f) #define PPSMC_MSG_DCE_RemoveVoltageAdjustment ((uint32_t) 0x11d) #define PPSMC_MSG_DCE_AllowVoltageAdjustment ((uint32_t) 0x11e) +#define PPSMC_MSG_EnableBAPM ((uint32_t) 0x120) +#define PPSMC_MSG_DisableBAPM ((uint32_t) 0x121) #define PPSMC_MSG_UVD_DPM_Config ((uint32_t) 0x124) diff --git a/sys/dev/drm/radeon/pptable.h b/sys/dev/drm/radeon/pptable.h new file mode 100644 index 0000000000..5d8b772919 --- /dev/null +++ b/sys/dev/drm/radeon/pptable.h @@ -0,0 +1,682 @@ +/* + * Copyright 2013 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef _PPTABLE_H +#define _PPTABLE_H + +#pragma pack(push, 1) + +typedef struct _ATOM_PPLIB_THERMALCONTROLLER + +{ + UCHAR ucType; // one of ATOM_PP_THERMALCONTROLLER_* + UCHAR ucI2cLine; // as interpreted by DAL I2C + UCHAR ucI2cAddress; + UCHAR ucFanParameters; // Fan Control Parameters. + UCHAR ucFanMinRPM; // Fan Minimum RPM (hundreds) -- for display purposes only. + UCHAR ucFanMaxRPM; // Fan Maximum RPM (hundreds) -- for display purposes only. + UCHAR ucReserved; // ---- + UCHAR ucFlags; // to be defined +} ATOM_PPLIB_THERMALCONTROLLER; + +#define ATOM_PP_FANPARAMETERS_TACHOMETER_PULSES_PER_REVOLUTION_MASK 0x0f +#define ATOM_PP_FANPARAMETERS_NOFAN 0x80 // No fan is connected to this controller. + +#define ATOM_PP_THERMALCONTROLLER_NONE 0 +#define ATOM_PP_THERMALCONTROLLER_LM63 1 // Not used by PPLib +#define ATOM_PP_THERMALCONTROLLER_ADM1032 2 // Not used by PPLib +#define ATOM_PP_THERMALCONTROLLER_ADM1030 3 // Not used by PPLib +#define ATOM_PP_THERMALCONTROLLER_MUA6649 4 // Not used by PPLib +#define ATOM_PP_THERMALCONTROLLER_LM64 5 +#define ATOM_PP_THERMALCONTROLLER_F75375 6 // Not used by PPLib +#define ATOM_PP_THERMALCONTROLLER_RV6xx 7 +#define ATOM_PP_THERMALCONTROLLER_RV770 8 +#define ATOM_PP_THERMALCONTROLLER_ADT7473 9 +#define ATOM_PP_THERMALCONTROLLER_KONG 10 +#define ATOM_PP_THERMALCONTROLLER_EXTERNAL_GPIO 11 +#define ATOM_PP_THERMALCONTROLLER_EVERGREEN 12 +#define ATOM_PP_THERMALCONTROLLER_EMC2103 13 /* 0x0D */ // Only fan control will be implemented, do NOT show this in PPGen. +#define ATOM_PP_THERMALCONTROLLER_SUMO 14 /* 0x0E */ // Sumo type, used internally +#define ATOM_PP_THERMALCONTROLLER_NISLANDS 15 +#define ATOM_PP_THERMALCONTROLLER_SISLANDS 16 +#define ATOM_PP_THERMALCONTROLLER_LM96163 17 +#define ATOM_PP_THERMALCONTROLLER_CISLANDS 18 +#define ATOM_PP_THERMALCONTROLLER_KAVERI 19 + + +// Thermal controller 'combo type' to use an external controller for Fan control and an internal controller for thermal. +// We probably should reserve the bit 0x80 for this use. +// To keep the number of these types low we should also use the same code for all ASICs (i.e. do not distinguish RV6xx and RV7xx Internal here). +// The driver can pick the correct internal controller based on the ASIC. + +#define ATOM_PP_THERMALCONTROLLER_ADT7473_WITH_INTERNAL 0x89 // ADT7473 Fan Control + Internal Thermal Controller +#define ATOM_PP_THERMALCONTROLLER_EMC2103_WITH_INTERNAL 0x8D // EMC2103 Fan Control + Internal Thermal Controller + +typedef struct _ATOM_PPLIB_STATE +{ + UCHAR ucNonClockStateIndex; + UCHAR ucClockStateIndices[1]; // variable-sized +} ATOM_PPLIB_STATE; + + +typedef struct _ATOM_PPLIB_FANTABLE +{ + UCHAR ucFanTableFormat; // Change this if the table format changes or version changes so that the other fields are not the same. + UCHAR ucTHyst; // Temperature hysteresis. Integer. + USHORT usTMin; // The temperature, in 0.01 centigrades, below which we just run at a minimal PWM. + USHORT usTMed; // The middle temperature where we change slopes. + USHORT usTHigh; // The high point above TMed for adjusting the second slope. + USHORT usPWMMin; // The minimum PWM value in percent (0.01% increments). + USHORT usPWMMed; // The PWM value (in percent) at TMed. + USHORT usPWMHigh; // The PWM value at THigh. +} ATOM_PPLIB_FANTABLE; + +typedef struct _ATOM_PPLIB_FANTABLE2 +{ + ATOM_PPLIB_FANTABLE basicTable; + USHORT usTMax; // The max temperature +} ATOM_PPLIB_FANTABLE2; + +typedef struct _ATOM_PPLIB_EXTENDEDHEADER +{ + USHORT usSize; + ULONG ulMaxEngineClock; // For Overdrive. + ULONG ulMaxMemoryClock; // For Overdrive. + // Add extra system parameters here, always adjust size to include all fields. + USHORT usVCETableOffset; //points to ATOM_PPLIB_VCE_Table + USHORT usUVDTableOffset; //points to ATOM_PPLIB_UVD_Table + USHORT usSAMUTableOffset; //points to ATOM_PPLIB_SAMU_Table + USHORT usPPMTableOffset; //points to ATOM_PPLIB_PPM_Table + USHORT usACPTableOffset; //points to ATOM_PPLIB_ACP_Table + USHORT usPowerTuneTableOffset; //points to ATOM_PPLIB_POWERTUNE_Table +} ATOM_PPLIB_EXTENDEDHEADER; + +//// ATOM_PPLIB_POWERPLAYTABLE::ulPlatformCaps +#define ATOM_PP_PLATFORM_CAP_BACKBIAS 1 +#define ATOM_PP_PLATFORM_CAP_POWERPLAY 2 +#define ATOM_PP_PLATFORM_CAP_SBIOSPOWERSOURCE 4 +#define ATOM_PP_PLATFORM_CAP_ASPM_L0s 8 +#define ATOM_PP_PLATFORM_CAP_ASPM_L1 16 +#define ATOM_PP_PLATFORM_CAP_HARDWAREDC 32 +#define ATOM_PP_PLATFORM_CAP_GEMINIPRIMARY 64 +#define ATOM_PP_PLATFORM_CAP_STEPVDDC 128 +#define ATOM_PP_PLATFORM_CAP_VOLTAGECONTROL 256 +#define ATOM_PP_PLATFORM_CAP_SIDEPORTCONTROL 512 +#define ATOM_PP_PLATFORM_CAP_TURNOFFPLL_ASPML1 1024 +#define ATOM_PP_PLATFORM_CAP_HTLINKCONTROL 2048 +#define ATOM_PP_PLATFORM_CAP_MVDDCONTROL 4096 +#define ATOM_PP_PLATFORM_CAP_GOTO_BOOT_ON_ALERT 0x2000 // Go to boot state on alerts, e.g. on an AC->DC transition. +#define ATOM_PP_PLATFORM_CAP_DONT_WAIT_FOR_VBLANK_ON_ALERT 0x4000 // Do NOT wait for VBLANK during an alert (e.g. AC->DC transition). +#define ATOM_PP_PLATFORM_CAP_VDDCI_CONTROL 0x8000 // Does the driver control VDDCI independently from VDDC. +#define ATOM_PP_PLATFORM_CAP_REGULATOR_HOT 0x00010000 // Enable the 'regulator hot' feature. +#define ATOM_PP_PLATFORM_CAP_BACO 0x00020000 // Does the driver supports BACO state. +#define ATOM_PP_PLATFORM_CAP_NEW_CAC_VOLTAGE 0x00040000 // Does the driver supports new CAC voltage table. +#define ATOM_PP_PLATFORM_CAP_REVERT_GPIO5_POLARITY 0x00080000 // Does the driver supports revert GPIO5 polarity. +#define ATOM_PP_PLATFORM_CAP_OUTPUT_THERMAL2GPIO17 0x00100000 // Does the driver supports thermal2GPIO17. +#define ATOM_PP_PLATFORM_CAP_VRHOT_GPIO_CONFIGURABLE 0x00200000 // Does the driver supports VR HOT GPIO Configurable. +#define ATOM_PP_PLATFORM_CAP_TEMP_INVERSION 0x00400000 // Does the driver supports Temp Inversion feature. +#define ATOM_PP_PLATFORM_CAP_EVV 0x00800000 + +typedef struct _ATOM_PPLIB_POWERPLAYTABLE +{ + ATOM_COMMON_TABLE_HEADER sHeader; + + UCHAR ucDataRevision; + + UCHAR ucNumStates; + UCHAR ucStateEntrySize; + UCHAR ucClockInfoSize; + UCHAR ucNonClockSize; + + // offset from start of this table to array of ucNumStates ATOM_PPLIB_STATE structures + USHORT usStateArrayOffset; + + // offset from start of this table to array of ASIC-specific structures, + // currently ATOM_PPLIB_CLOCK_INFO. + USHORT usClockInfoArrayOffset; + + // offset from start of this table to array of ATOM_PPLIB_NONCLOCK_INFO + USHORT usNonClockInfoArrayOffset; + + USHORT usBackbiasTime; // in microseconds + USHORT usVoltageTime; // in microseconds + USHORT usTableSize; //the size of this structure, or the extended structure + + ULONG ulPlatformCaps; // See ATOM_PPLIB_CAPS_* + + ATOM_PPLIB_THERMALCONTROLLER sThermalController; + + USHORT usBootClockInfoOffset; + USHORT usBootNonClockInfoOffset; + +} ATOM_PPLIB_POWERPLAYTABLE; + +typedef struct _ATOM_PPLIB_POWERPLAYTABLE2 +{ + ATOM_PPLIB_POWERPLAYTABLE basicTable; + UCHAR ucNumCustomThermalPolicy; + USHORT usCustomThermalPolicyArrayOffset; +}ATOM_PPLIB_POWERPLAYTABLE2, *LPATOM_PPLIB_POWERPLAYTABLE2; + +typedef struct _ATOM_PPLIB_POWERPLAYTABLE3 +{ + ATOM_PPLIB_POWERPLAYTABLE2 basicTable2; + USHORT usFormatID; // To be used ONLY by PPGen. + USHORT usFanTableOffset; + USHORT usExtendendedHeaderOffset; +} ATOM_PPLIB_POWERPLAYTABLE3, *LPATOM_PPLIB_POWERPLAYTABLE3; + +typedef struct _ATOM_PPLIB_POWERPLAYTABLE4 +{ + ATOM_PPLIB_POWERPLAYTABLE3 basicTable3; + ULONG ulGoldenPPID; // PPGen use only + ULONG ulGoldenRevision; // PPGen use only + USHORT usVddcDependencyOnSCLKOffset; + USHORT usVddciDependencyOnMCLKOffset; + USHORT usVddcDependencyOnMCLKOffset; + USHORT usMaxClockVoltageOnDCOffset; + USHORT usVddcPhaseShedLimitsTableOffset; // Points to ATOM_PPLIB_PhaseSheddingLimits_Table + USHORT usMvddDependencyOnMCLKOffset; +} ATOM_PPLIB_POWERPLAYTABLE4, *LPATOM_PPLIB_POWERPLAYTABLE4; + +typedef struct _ATOM_PPLIB_POWERPLAYTABLE5 +{ + ATOM_PPLIB_POWERPLAYTABLE4 basicTable4; + ULONG ulTDPLimit; + ULONG ulNearTDPLimit; + ULONG ulSQRampingThreshold; + USHORT usCACLeakageTableOffset; // Points to ATOM_PPLIB_CAC_Leakage_Table + ULONG ulCACLeakage; // The iLeakage for driver calculated CAC leakage table + USHORT usTDPODLimit; + USHORT usLoadLineSlope; // in milliOhms * 100 +} ATOM_PPLIB_POWERPLAYTABLE5, *LPATOM_PPLIB_POWERPLAYTABLE5; + +//// ATOM_PPLIB_NONCLOCK_INFO::usClassification +#define ATOM_PPLIB_CLASSIFICATION_UI_MASK 0x0007 +#define ATOM_PPLIB_CLASSIFICATION_UI_SHIFT 0 +#define ATOM_PPLIB_CLASSIFICATION_UI_NONE 0 +#define ATOM_PPLIB_CLASSIFICATION_UI_BATTERY 1 +#define ATOM_PPLIB_CLASSIFICATION_UI_BALANCED 3 +#define ATOM_PPLIB_CLASSIFICATION_UI_PERFORMANCE 5 +// 2, 4, 6, 7 are reserved + +#define ATOM_PPLIB_CLASSIFICATION_BOOT 0x0008 +#define ATOM_PPLIB_CLASSIFICATION_THERMAL 0x0010 +#define ATOM_PPLIB_CLASSIFICATION_LIMITEDPOWERSOURCE 0x0020 +#define ATOM_PPLIB_CLASSIFICATION_REST 0x0040 +#define ATOM_PPLIB_CLASSIFICATION_FORCED 0x0080 +#define ATOM_PPLIB_CLASSIFICATION_3DPERFORMANCE 0x0100 +#define ATOM_PPLIB_CLASSIFICATION_OVERDRIVETEMPLATE 0x0200 +#define ATOM_PPLIB_CLASSIFICATION_UVDSTATE 0x0400 +#define ATOM_PPLIB_CLASSIFICATION_3DLOW 0x0800 +#define ATOM_PPLIB_CLASSIFICATION_ACPI 0x1000 +#define ATOM_PPLIB_CLASSIFICATION_HD2STATE 0x2000 +#define ATOM_PPLIB_CLASSIFICATION_HDSTATE 0x4000 +#define ATOM_PPLIB_CLASSIFICATION_SDSTATE 0x8000 + +//// ATOM_PPLIB_NONCLOCK_INFO::usClassification2 +#define ATOM_PPLIB_CLASSIFICATION2_LIMITEDPOWERSOURCE_2 0x0001 +#define ATOM_PPLIB_CLASSIFICATION2_ULV 0x0002 +#define ATOM_PPLIB_CLASSIFICATION2_MVC 0x0004 //Multi-View Codec (BD-3D) + +//// ATOM_PPLIB_NONCLOCK_INFO::ulCapsAndSettings +#define ATOM_PPLIB_SINGLE_DISPLAY_ONLY 0x00000001 +#define ATOM_PPLIB_SUPPORTS_VIDEO_PLAYBACK 0x00000002 + +// 0 is 2.5Gb/s, 1 is 5Gb/s +#define ATOM_PPLIB_PCIE_LINK_SPEED_MASK 0x00000004 +#define ATOM_PPLIB_PCIE_LINK_SPEED_SHIFT 2 + +// lanes - 1: 1, 2, 4, 8, 12, 16 permitted by PCIE spec +#define ATOM_PPLIB_PCIE_LINK_WIDTH_MASK 0x000000F8 +#define ATOM_PPLIB_PCIE_LINK_WIDTH_SHIFT 3 + +// lookup into reduced refresh-rate table +#define ATOM_PPLIB_LIMITED_REFRESHRATE_VALUE_MASK 0x00000F00 +#define ATOM_PPLIB_LIMITED_REFRESHRATE_VALUE_SHIFT 8 + +#define ATOM_PPLIB_LIMITED_REFRESHRATE_UNLIMITED 0 +#define ATOM_PPLIB_LIMITED_REFRESHRATE_50HZ 1 +// 2-15 TBD as needed. + +#define ATOM_PPLIB_SOFTWARE_DISABLE_LOADBALANCING 0x00001000 +#define ATOM_PPLIB_SOFTWARE_ENABLE_SLEEP_FOR_TIMESTAMPS 0x00002000 + +#define ATOM_PPLIB_DISALLOW_ON_DC 0x00004000 + +#define ATOM_PPLIB_ENABLE_VARIBRIGHT 0x00008000 + +//memory related flags +#define ATOM_PPLIB_SWSTATE_MEMORY_DLL_OFF 0x000010000 + +//M3 Arb //2bits, current 3 sets of parameters in total +#define ATOM_PPLIB_M3ARB_MASK 0x00060000 +#define ATOM_PPLIB_M3ARB_SHIFT 17 + +#define ATOM_PPLIB_ENABLE_DRR 0x00080000 + +// remaining 16 bits are reserved +typedef struct _ATOM_PPLIB_THERMAL_STATE +{ + UCHAR ucMinTemperature; + UCHAR ucMaxTemperature; + UCHAR ucThermalAction; +}ATOM_PPLIB_THERMAL_STATE, *LPATOM_PPLIB_THERMAL_STATE; + +// Contained in an array starting at the offset +// in ATOM_PPLIB_POWERPLAYTABLE::usNonClockInfoArrayOffset. +// referenced from ATOM_PPLIB_STATE_INFO::ucNonClockStateIndex +#define ATOM_PPLIB_NONCLOCKINFO_VER1 12 +#define ATOM_PPLIB_NONCLOCKINFO_VER2 24 +typedef struct _ATOM_PPLIB_NONCLOCK_INFO +{ + USHORT usClassification; + UCHAR ucMinTemperature; + UCHAR ucMaxTemperature; + ULONG ulCapsAndSettings; + UCHAR ucRequiredPower; + USHORT usClassification2; + ULONG ulVCLK; + ULONG ulDCLK; + UCHAR ucUnused[5]; +} ATOM_PPLIB_NONCLOCK_INFO; + +// Contained in an array starting at the offset +// in ATOM_PPLIB_POWERPLAYTABLE::usClockInfoArrayOffset. +// referenced from ATOM_PPLIB_STATE::ucClockStateIndices +typedef struct _ATOM_PPLIB_R600_CLOCK_INFO +{ + USHORT usEngineClockLow; + UCHAR ucEngineClockHigh; + + USHORT usMemoryClockLow; + UCHAR ucMemoryClockHigh; + + USHORT usVDDC; + USHORT usUnused1; + USHORT usUnused2; + + ULONG ulFlags; // ATOM_PPLIB_R600_FLAGS_* + +} ATOM_PPLIB_R600_CLOCK_INFO; + +// ulFlags in ATOM_PPLIB_R600_CLOCK_INFO +#define ATOM_PPLIB_R600_FLAGS_PCIEGEN2 1 +#define ATOM_PPLIB_R600_FLAGS_UVDSAFE 2 +#define ATOM_PPLIB_R600_FLAGS_BACKBIASENABLE 4 +#define ATOM_PPLIB_R600_FLAGS_MEMORY_ODT_OFF 8 +#define ATOM_PPLIB_R600_FLAGS_MEMORY_DLL_OFF 16 +#define ATOM_PPLIB_R600_FLAGS_LOWPOWER 32 // On the RV770 use 'low power' setting (sequencer S0). + +typedef struct _ATOM_PPLIB_RS780_CLOCK_INFO + +{ + USHORT usLowEngineClockLow; // Low Engine clock in MHz (the same way as on the R600). + UCHAR ucLowEngineClockHigh; + USHORT usHighEngineClockLow; // High Engine clock in MHz. + UCHAR ucHighEngineClockHigh; + USHORT usMemoryClockLow; // For now one of the ATOM_PPLIB_RS780_SPMCLK_XXXX constants. + UCHAR ucMemoryClockHigh; // Currentyl unused. + UCHAR ucPadding; // For proper alignment and size. + USHORT usVDDC; // For the 780, use: None, Low, High, Variable + UCHAR ucMaxHTLinkWidth; // From SBIOS - {2, 4, 8, 16} + UCHAR ucMinHTLinkWidth; // From SBIOS - {2, 4, 8, 16}. Effective only if CDLW enabled. Minimum down stream width could + USHORT usHTLinkFreq; // See definition ATOM_PPLIB_RS780_HTLINKFREQ_xxx or in MHz(>=200). + ULONG ulFlags; +} ATOM_PPLIB_RS780_CLOCK_INFO; + +#define ATOM_PPLIB_RS780_VOLTAGE_NONE 0 +#define ATOM_PPLIB_RS780_VOLTAGE_LOW 1 +#define ATOM_PPLIB_RS780_VOLTAGE_HIGH 2 +#define ATOM_PPLIB_RS780_VOLTAGE_VARIABLE 3 + +#define ATOM_PPLIB_RS780_SPMCLK_NONE 0 // We cannot change the side port memory clock, leave it as it is. +#define ATOM_PPLIB_RS780_SPMCLK_LOW 1 +#define ATOM_PPLIB_RS780_SPMCLK_HIGH 2 + +#define ATOM_PPLIB_RS780_HTLINKFREQ_NONE 0 +#define ATOM_PPLIB_RS780_HTLINKFREQ_LOW 1 +#define ATOM_PPLIB_RS780_HTLINKFREQ_HIGH 2 + +typedef struct _ATOM_PPLIB_EVERGREEN_CLOCK_INFO +{ + USHORT usEngineClockLow; + UCHAR ucEngineClockHigh; + + USHORT usMemoryClockLow; + UCHAR ucMemoryClockHigh; + + USHORT usVDDC; + USHORT usVDDCI; + USHORT usUnused; + + ULONG ulFlags; // ATOM_PPLIB_R600_FLAGS_* + +} ATOM_PPLIB_EVERGREEN_CLOCK_INFO; + +typedef struct _ATOM_PPLIB_SI_CLOCK_INFO +{ + USHORT usEngineClockLow; + UCHAR ucEngineClockHigh; + + USHORT usMemoryClockLow; + UCHAR ucMemoryClockHigh; + + USHORT usVDDC; + USHORT usVDDCI; + UCHAR ucPCIEGen; + UCHAR ucUnused1; + + ULONG ulFlags; // ATOM_PPLIB_SI_FLAGS_*, no flag is necessary for now + +} ATOM_PPLIB_SI_CLOCK_INFO; + +typedef struct _ATOM_PPLIB_CI_CLOCK_INFO +{ + USHORT usEngineClockLow; + UCHAR ucEngineClockHigh; + + USHORT usMemoryClockLow; + UCHAR ucMemoryClockHigh; + + UCHAR ucPCIEGen; + USHORT usPCIELane; +} ATOM_PPLIB_CI_CLOCK_INFO; + +typedef struct _ATOM_PPLIB_SUMO_CLOCK_INFO{ + USHORT usEngineClockLow; //clockfrequency & 0xFFFF. The unit is in 10khz + UCHAR ucEngineClockHigh; //clockfrequency >> 16. + UCHAR vddcIndex; //2-bit vddc index; + USHORT tdpLimit; + //please initalize to 0 + USHORT rsv1; + //please initialize to 0s + ULONG rsv2[2]; +}ATOM_PPLIB_SUMO_CLOCK_INFO; + +typedef struct _ATOM_PPLIB_STATE_V2 +{ + //number of valid dpm levels in this state; Driver uses it to calculate the whole + //size of the state: sizeof(ATOM_PPLIB_STATE_V2) + (ucNumDPMLevels - 1) * sizeof(UCHAR) + UCHAR ucNumDPMLevels; + + //a index to the array of nonClockInfos + UCHAR nonClockInfoIndex; + /** + * Driver will read the first ucNumDPMLevels in this array + */ + UCHAR clockInfoIndex[1]; +} ATOM_PPLIB_STATE_V2; + +typedef struct _StateArray{ + //how many states we have + UCHAR ucNumEntries; + + ATOM_PPLIB_STATE_V2 states[1]; +}StateArray; + + +typedef struct _ClockInfoArray{ + //how many clock levels we have + UCHAR ucNumEntries; + + //sizeof(ATOM_PPLIB_CLOCK_INFO) + UCHAR ucEntrySize; + + UCHAR clockInfo[1]; +}ClockInfoArray; + +typedef struct _NonClockInfoArray{ + + //how many non-clock levels we have. normally should be same as number of states + UCHAR ucNumEntries; + //sizeof(ATOM_PPLIB_NONCLOCK_INFO) + UCHAR ucEntrySize; + + ATOM_PPLIB_NONCLOCK_INFO nonClockInfo[1]; +}NonClockInfoArray; + +typedef struct _ATOM_PPLIB_Clock_Voltage_Dependency_Record +{ + USHORT usClockLow; + UCHAR ucClockHigh; + USHORT usVoltage; +}ATOM_PPLIB_Clock_Voltage_Dependency_Record; + +typedef struct _ATOM_PPLIB_Clock_Voltage_Dependency_Table +{ + UCHAR ucNumEntries; // Number of entries. + ATOM_PPLIB_Clock_Voltage_Dependency_Record entries[1]; // Dynamically allocate entries. +}ATOM_PPLIB_Clock_Voltage_Dependency_Table; + +typedef struct _ATOM_PPLIB_Clock_Voltage_Limit_Record +{ + USHORT usSclkLow; + UCHAR ucSclkHigh; + USHORT usMclkLow; + UCHAR ucMclkHigh; + USHORT usVddc; + USHORT usVddci; +}ATOM_PPLIB_Clock_Voltage_Limit_Record; + +typedef struct _ATOM_PPLIB_Clock_Voltage_Limit_Table +{ + UCHAR ucNumEntries; // Number of entries. + ATOM_PPLIB_Clock_Voltage_Limit_Record entries[1]; // Dynamically allocate entries. +}ATOM_PPLIB_Clock_Voltage_Limit_Table; + +union _ATOM_PPLIB_CAC_Leakage_Record +{ + struct + { + USHORT usVddc; // We use this field for the "fake" standardized VDDC for power calculations; For CI and newer, we use this as the real VDDC value. in CI we read it as StdVoltageHiSidd + ULONG ulLeakageValue; // For CI and newer we use this as the "fake" standar VDDC value. in CI we read it as StdVoltageLoSidd + + }; + struct + { + USHORT usVddc1; + USHORT usVddc2; + USHORT usVddc3; + }; +}; + +typedef union _ATOM_PPLIB_CAC_Leakage_Record ATOM_PPLIB_CAC_Leakage_Record; + +typedef struct _ATOM_PPLIB_CAC_Leakage_Table +{ + UCHAR ucNumEntries; // Number of entries. + ATOM_PPLIB_CAC_Leakage_Record entries[1]; // Dynamically allocate entries. +}ATOM_PPLIB_CAC_Leakage_Table; + +typedef struct _ATOM_PPLIB_PhaseSheddingLimits_Record +{ + USHORT usVoltage; + USHORT usSclkLow; + UCHAR ucSclkHigh; + USHORT usMclkLow; + UCHAR ucMclkHigh; +}ATOM_PPLIB_PhaseSheddingLimits_Record; + +typedef struct _ATOM_PPLIB_PhaseSheddingLimits_Table +{ + UCHAR ucNumEntries; // Number of entries. + ATOM_PPLIB_PhaseSheddingLimits_Record entries[1]; // Dynamically allocate entries. +}ATOM_PPLIB_PhaseSheddingLimits_Table; + +typedef struct _VCEClockInfo{ + USHORT usEVClkLow; + UCHAR ucEVClkHigh; + USHORT usECClkLow; + UCHAR ucECClkHigh; +}VCEClockInfo; + +typedef struct _VCEClockInfoArray{ + UCHAR ucNumEntries; + VCEClockInfo entries[1]; +}VCEClockInfoArray; + +typedef struct _ATOM_PPLIB_VCE_Clock_Voltage_Limit_Record +{ + USHORT usVoltage; + UCHAR ucVCEClockInfoIndex; +}ATOM_PPLIB_VCE_Clock_Voltage_Limit_Record; + +typedef struct _ATOM_PPLIB_VCE_Clock_Voltage_Limit_Table +{ + UCHAR numEntries; + ATOM_PPLIB_VCE_Clock_Voltage_Limit_Record entries[1]; +}ATOM_PPLIB_VCE_Clock_Voltage_Limit_Table; + +typedef struct _ATOM_PPLIB_VCE_State_Record +{ + UCHAR ucVCEClockInfoIndex; + UCHAR ucClockInfoIndex; //highest 2 bits indicates memory p-states, lower 6bits indicates index to ClockInfoArrary +}ATOM_PPLIB_VCE_State_Record; + +typedef struct _ATOM_PPLIB_VCE_State_Table +{ + UCHAR numEntries; + ATOM_PPLIB_VCE_State_Record entries[1]; +}ATOM_PPLIB_VCE_State_Table; + + +typedef struct _ATOM_PPLIB_VCE_Table +{ + UCHAR revid; +// VCEClockInfoArray array; +// ATOM_PPLIB_VCE_Clock_Voltage_Limit_Table limits; +// ATOM_PPLIB_VCE_State_Table states; +}ATOM_PPLIB_VCE_Table; + + +typedef struct _UVDClockInfo{ + USHORT usVClkLow; + UCHAR ucVClkHigh; + USHORT usDClkLow; + UCHAR ucDClkHigh; +}UVDClockInfo; + +typedef struct _UVDClockInfoArray{ + UCHAR ucNumEntries; + UVDClockInfo entries[1]; +}UVDClockInfoArray; + +typedef struct _ATOM_PPLIB_UVD_Clock_Voltage_Limit_Record +{ + USHORT usVoltage; + UCHAR ucUVDClockInfoIndex; +}ATOM_PPLIB_UVD_Clock_Voltage_Limit_Record; + +typedef struct _ATOM_PPLIB_UVD_Clock_Voltage_Limit_Table +{ + UCHAR numEntries; + ATOM_PPLIB_UVD_Clock_Voltage_Limit_Record entries[1]; +}ATOM_PPLIB_UVD_Clock_Voltage_Limit_Table; + +typedef struct _ATOM_PPLIB_UVD_Table +{ + UCHAR revid; +// UVDClockInfoArray array; +// ATOM_PPLIB_UVD_Clock_Voltage_Limit_Table limits; +}ATOM_PPLIB_UVD_Table; + +typedef struct _ATOM_PPLIB_SAMClk_Voltage_Limit_Record +{ + USHORT usVoltage; + USHORT usSAMClockLow; + UCHAR ucSAMClockHigh; +}ATOM_PPLIB_SAMClk_Voltage_Limit_Record; + +typedef struct _ATOM_PPLIB_SAMClk_Voltage_Limit_Table{ + UCHAR numEntries; + ATOM_PPLIB_SAMClk_Voltage_Limit_Record entries[1]; +}ATOM_PPLIB_SAMClk_Voltage_Limit_Table; + +typedef struct _ATOM_PPLIB_SAMU_Table +{ + UCHAR revid; + ATOM_PPLIB_SAMClk_Voltage_Limit_Table limits; +}ATOM_PPLIB_SAMU_Table; + +typedef struct _ATOM_PPLIB_ACPClk_Voltage_Limit_Record +{ + USHORT usVoltage; + USHORT usACPClockLow; + UCHAR ucACPClockHigh; +}ATOM_PPLIB_ACPClk_Voltage_Limit_Record; + +typedef struct _ATOM_PPLIB_ACPClk_Voltage_Limit_Table{ + UCHAR numEntries; + ATOM_PPLIB_ACPClk_Voltage_Limit_Record entries[1]; +}ATOM_PPLIB_ACPClk_Voltage_Limit_Table; + +typedef struct _ATOM_PPLIB_ACP_Table +{ + UCHAR revid; + ATOM_PPLIB_ACPClk_Voltage_Limit_Table limits; +}ATOM_PPLIB_ACP_Table; + +typedef struct _ATOM_PowerTune_Table{ + USHORT usTDP; + USHORT usConfigurableTDP; + USHORT usTDC; + USHORT usBatteryPowerLimit; + USHORT usSmallPowerLimit; + USHORT usLowCACLeakage; + USHORT usHighCACLeakage; +}ATOM_PowerTune_Table; + +typedef struct _ATOM_PPLIB_POWERTUNE_Table +{ + UCHAR revid; + ATOM_PowerTune_Table power_tune_table; +}ATOM_PPLIB_POWERTUNE_Table; + +typedef struct _ATOM_PPLIB_POWERTUNE_Table_V1 +{ + UCHAR revid; + ATOM_PowerTune_Table power_tune_table; + USHORT usMaximumPowerDeliveryLimit; + USHORT usReserve[7]; +} ATOM_PPLIB_POWERTUNE_Table_V1; + +#define ATOM_PPM_A_A 1 +#define ATOM_PPM_A_I 2 +typedef struct _ATOM_PPLIB_PPM_Table +{ + UCHAR ucRevId; + UCHAR ucPpmDesign; //A+I or A+A + USHORT usCpuCoreNumber; + ULONG ulPlatformTDP; + ULONG ulSmallACPlatformTDP; + ULONG ulPlatformTDC; + ULONG ulSmallACPlatformTDC; + ULONG ulApuTDP; + ULONG ulDGpuTDP; + ULONG ulDGpuUlvPower; + ULONG ulTjmax; +} ATOM_PPLIB_PPM_Table; + +#pragma pack(pop) + +#endif diff --git a/sys/dev/drm/radeon/r100.c b/sys/dev/drm/radeon/r100.c index 9be7ff3bd8..0ee17c8622 100644 --- a/sys/dev/drm/radeon/r100.c +++ b/sys/dev/drm/radeon/r100.c @@ -1111,12 +1111,12 @@ int r100_cp_init(struct radeon_device *rdev, unsigned ring_size) } /* Align ring size */ - rb_bufsz = drm_order(ring_size / 8); + rb_bufsz = order_base_2(ring_size / 8); ring_size = (1 << (rb_bufsz + 1)) * 4; r100_cp_load_microcode(rdev); r = radeon_ring_init(rdev, ring, ring_size, RADEON_WB_CP_RPTR_OFFSET, RADEON_CP_RB_RPTR, RADEON_CP_RB_WPTR, - 0, 0x7fffff, RADEON_CP_PACKET2); + RADEON_CP_PACKET2); if (r) { return r; } @@ -2949,9 +2949,11 @@ static int r100_debugfs_cp_ring_info(struct seq_file *m, void *data) seq_printf(m, "CP_RB_RPTR 0x%08x\n", rdp); seq_printf(m, "%u free dwords in ring\n", ring->ring_free_dw); seq_printf(m, "%u dwords in ring\n", count); - for (j = 0; j <= count; j++) { - i = (rdp + j) & ring->ptr_mask; - seq_printf(m, "r[%04d]=0x%08x\n", i, ring->ring[i]); + if (ring->ready) { + for (j = 0; j <= count; j++) { + i = (rdp + j) & ring->ptr_mask; + seq_printf(m, "r[%04d]=0x%08x\n", i, ring->ring[i]); + } } return 0; } diff --git a/sys/dev/drm/radeon/r300.c b/sys/dev/drm/radeon/r300.c index a1c77e7fad..2fa94656c0 100644 --- a/sys/dev/drm/radeon/r300.c +++ b/sys/dev/drm/radeon/r300.c @@ -1550,7 +1550,7 @@ int r300_init(struct radeon_device *rdev) rdev->accel_working = true; r = r300_startup(rdev); if (r) { - /* Somethings want wront with the accel init stop accel */ + /* Something went wrong with the accel init, so stop accel */ dev_err(rdev->dev, "Disabling GPU acceleration\n"); r100_cp_fini(rdev); radeon_wb_fini(rdev); diff --git a/sys/dev/drm/radeon/r600.c b/sys/dev/drm/radeon/r600.c index 5383ab12ac..64ab234bd0 100644 --- a/sys/dev/drm/radeon/r600.c +++ b/sys/dev/drm/radeon/r600.c @@ -24,8 +24,6 @@ * Authors: Dave Airlie * Alex Deucher * Jerome Glisse - * - * $FreeBSD: head/sys/dev/drm2/radeon/r600.c 254885 2013-08-25 19:37:15Z dumbbell $ */ #include #include @@ -122,6 +120,11 @@ u32 r600_get_xclk(struct radeon_device *rdev) return rdev->clock.spll.reference_freq; } +int r600_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk) +{ + return 0; +} + /* get temperature in millidegrees */ int rv6xx_get_temp(struct radeon_device *rdev) { @@ -1181,8 +1184,8 @@ static void r600_vram_gtt_location(struct radeon_device *rdev, struct radeon_mc } mc->vram_end = mc->vram_start + mc->mc_vram_size - 1; dev_info(rdev->dev, "VRAM: %juM 0x%08jX - 0x%08jX (%juM used)\n", - (uintmax_t)mc->mc_vram_size >> 20, (uintmax_t)mc->vram_start, - (uintmax_t)mc->vram_end, (uintmax_t)mc->real_vram_size >> 20); + mc->mc_vram_size >> 20, mc->vram_start, + mc->vram_end, mc->real_vram_size >> 20); } else { u64 base = 0; if (rdev->flags & RADEON_IS_IGP) { @@ -1384,7 +1387,7 @@ static bool r600_is_display_hung(struct radeon_device *rdev) return true; } -static u32 r600_gpu_check_soft_reset(struct radeon_device *rdev) +u32 r600_gpu_check_soft_reset(struct radeon_device *rdev) { u32 reset_mask = 0; u32 tmp; @@ -1632,28 +1635,6 @@ bool r600_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring) return radeon_ring_test_lockup(rdev, ring); } -/** - * r600_dma_is_lockup - Check if the DMA engine is locked up - * - * @rdev: radeon_device pointer - * @ring: radeon_ring structure holding ring information - * - * Check if the async DMA engine is locked up. - * Returns true if the engine appears to be locked up, false if not. - */ -bool r600_dma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring) -{ - u32 reset_mask = r600_gpu_check_soft_reset(rdev); - - if (!(reset_mask & RADEON_RESET_DMA)) { - radeon_ring_lockup_update(ring); - return false; - } - /* force ring activities */ - radeon_ring_force_activity(rdev, ring); - return radeon_ring_test_lockup(rdev, ring); -} - u32 r6xx_remap_render_backend(struct radeon_device *rdev, u32 tiling_pipe_num, u32 max_rb_num, @@ -2315,6 +2296,7 @@ int r600_init_microcode(struct radeon_device *rdev) fw_name); release_firmware(rdev->smc_fw); rdev->smc_fw = NULL; + err = 0; } else if (rdev->smc_fw->datasize != smc_req_size) { printk(KERN_ERR "smc: Bogus length %zu in firmware \"%s\"\n", @@ -2447,8 +2429,8 @@ int r600_cp_resume(struct radeon_device *rdev) WREG32(GRBM_SOFT_RESET, 0); /* Set ring buffer size */ - rb_bufsz = drm_order(ring->ring_size / 8); - tmp = (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz; + rb_bufsz = order_base_2(ring->ring_size / 8); + tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz; #ifdef __BIG_ENDIAN tmp |= BUF_SWAP_32BIT; #endif @@ -2501,7 +2483,7 @@ void r600_ring_init(struct radeon_device *rdev, struct radeon_ring *ring, unsign int r; /* Align ring size */ - rb_bufsz = drm_order(ring_size / 8); + rb_bufsz = order_base_2(ring_size / 8); ring_size = (1 << (rb_bufsz + 1)) * 4; ring->ring_size = ring_size; ring->align_mask = 16 - 1; @@ -2524,345 +2506,6 @@ void r600_cp_fini(struct radeon_device *rdev) } /* - * DMA - * Starting with R600, the GPU has an asynchronous - * DMA engine. The programming model is very similar - * to the 3D engine (ring buffer, IBs, etc.), but the - * DMA controller has it's own packet format that is - * different form the PM4 format used by the 3D engine. - * It supports copying data, writing embedded data, - * solid fills, and a number of other things. It also - * has support for tiling/detiling of buffers. - */ -/** - * r600_dma_stop - stop the async dma engine - * - * @rdev: radeon_device pointer - * - * Stop the async dma engine (r6xx-evergreen). - */ -void r600_dma_stop(struct radeon_device *rdev) -{ - u32 rb_cntl = RREG32(DMA_RB_CNTL); - - radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size); - - rb_cntl &= ~DMA_RB_ENABLE; - WREG32(DMA_RB_CNTL, rb_cntl); - - rdev->ring[R600_RING_TYPE_DMA_INDEX].ready = false; -} - -/** - * r600_dma_resume - setup and start the async dma engine - * - * @rdev: radeon_device pointer - * - * Set up the DMA ring buffer and enable it. (r6xx-evergreen). - * Returns 0 for success, error for failure. - */ -int r600_dma_resume(struct radeon_device *rdev) -{ - struct radeon_ring *ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX]; - u32 rb_cntl, dma_cntl, ib_cntl; - u32 rb_bufsz; - int r; - - /* Reset dma */ - if (rdev->family >= CHIP_RV770) - WREG32(SRBM_SOFT_RESET, RV770_SOFT_RESET_DMA); - else - WREG32(SRBM_SOFT_RESET, SOFT_RESET_DMA); - RREG32(SRBM_SOFT_RESET); - udelay(50); - WREG32(SRBM_SOFT_RESET, 0); - - WREG32(DMA_SEM_INCOMPLETE_TIMER_CNTL, 0); - WREG32(DMA_SEM_WAIT_FAIL_TIMER_CNTL, 0); - - /* Set ring buffer size in dwords */ - rb_bufsz = drm_order(ring->ring_size / 4); - rb_cntl = rb_bufsz << 1; -#ifdef __BIG_ENDIAN - rb_cntl |= DMA_RB_SWAP_ENABLE | DMA_RPTR_WRITEBACK_SWAP_ENABLE; -#endif - WREG32(DMA_RB_CNTL, rb_cntl); - - /* Initialize the ring buffer's read and write pointers */ - WREG32(DMA_RB_RPTR, 0); - WREG32(DMA_RB_WPTR, 0); - - /* set the wb address whether it's enabled or not */ - WREG32(DMA_RB_RPTR_ADDR_HI, - upper_32_bits(rdev->wb.gpu_addr + R600_WB_DMA_RPTR_OFFSET) & 0xFF); - WREG32(DMA_RB_RPTR_ADDR_LO, - ((rdev->wb.gpu_addr + R600_WB_DMA_RPTR_OFFSET) & 0xFFFFFFFC)); - - if (rdev->wb.enabled) - rb_cntl |= DMA_RPTR_WRITEBACK_ENABLE; - - WREG32(DMA_RB_BASE, ring->gpu_addr >> 8); - - /* enable DMA IBs */ - ib_cntl = DMA_IB_ENABLE; -#ifdef __BIG_ENDIAN - ib_cntl |= DMA_IB_SWAP_ENABLE; -#endif - WREG32(DMA_IB_CNTL, ib_cntl); - - dma_cntl = RREG32(DMA_CNTL); - dma_cntl &= ~CTXEMPTY_INT_ENABLE; - WREG32(DMA_CNTL, dma_cntl); - - if (rdev->family >= CHIP_RV770) - WREG32(DMA_MODE, 1); - - ring->wptr = 0; - WREG32(DMA_RB_WPTR, ring->wptr << 2); - - ring->rptr = RREG32(DMA_RB_RPTR) >> 2; - - WREG32(DMA_RB_CNTL, rb_cntl | DMA_RB_ENABLE); - - ring->ready = true; - - r = radeon_ring_test(rdev, R600_RING_TYPE_DMA_INDEX, ring); - if (r) { - ring->ready = false; - return r; - } - - radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size); - - return 0; -} - -/** - * r600_dma_fini - tear down the async dma engine - * - * @rdev: radeon_device pointer - * - * Stop the async dma engine and free the ring (r6xx-evergreen). - */ -void r600_dma_fini(struct radeon_device *rdev) -{ - r600_dma_stop(rdev); - radeon_ring_fini(rdev, &rdev->ring[R600_RING_TYPE_DMA_INDEX]); -} - -/* - * UVD - */ -int r600_uvd_rbc_start(struct radeon_device *rdev) -{ - struct radeon_ring *ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX]; - uint64_t rptr_addr; - uint32_t rb_bufsz, tmp; - int r; - - rptr_addr = rdev->wb.gpu_addr + R600_WB_UVD_RPTR_OFFSET; - - if (upper_32_bits(rptr_addr) != upper_32_bits(ring->gpu_addr)) { - DRM_ERROR("UVD ring and rptr not in the same 4GB segment!\n"); - return -EINVAL; - } - - /* force RBC into idle state */ - WREG32(UVD_RBC_RB_CNTL, 0x11010101); - - /* Set the write pointer delay */ - WREG32(UVD_RBC_RB_WPTR_CNTL, 0); - - /* set the wb address */ - WREG32(UVD_RBC_RB_RPTR_ADDR, rptr_addr >> 2); - - /* programm the 4GB memory segment for rptr and ring buffer */ - WREG32(UVD_LMI_EXT40_ADDR, upper_32_bits(rptr_addr) | - (0x7 << 16) | (0x1 << 31)); - - /* Initialize the ring buffer's read and write pointers */ - WREG32(UVD_RBC_RB_RPTR, 0x0); - - ring->wptr = ring->rptr = RREG32(UVD_RBC_RB_RPTR); - WREG32(UVD_RBC_RB_WPTR, ring->wptr); - - /* set the ring address */ - WREG32(UVD_RBC_RB_BASE, ring->gpu_addr); - - /* Set ring buffer size */ - rb_bufsz = drm_order(ring->ring_size); - rb_bufsz = (0x1 << 8) | rb_bufsz; - WREG32(UVD_RBC_RB_CNTL, rb_bufsz); - - ring->ready = true; - r = radeon_ring_test(rdev, R600_RING_TYPE_UVD_INDEX, ring); - if (r) { - ring->ready = false; - return r; - } - - r = radeon_ring_lock(rdev, ring, 10); - if (r) { - DRM_ERROR("radeon: ring failed to lock UVD ring (%d).\n", r); - return r; - } - - tmp = PACKET0(UVD_SEMA_WAIT_FAULT_TIMEOUT_CNTL, 0); - radeon_ring_write(ring, tmp); - radeon_ring_write(ring, 0xFFFFF); - - tmp = PACKET0(UVD_SEMA_WAIT_INCOMPLETE_TIMEOUT_CNTL, 0); - radeon_ring_write(ring, tmp); - radeon_ring_write(ring, 0xFFFFF); - - tmp = PACKET0(UVD_SEMA_SIGNAL_INCOMPLETE_TIMEOUT_CNTL, 0); - radeon_ring_write(ring, tmp); - radeon_ring_write(ring, 0xFFFFF); - - /* Clear timeout status bits */ - radeon_ring_write(ring, PACKET0(UVD_SEMA_TIMEOUT_STATUS, 0)); - radeon_ring_write(ring, 0x8); - - radeon_ring_write(ring, PACKET0(UVD_SEMA_CNTL, 0)); - radeon_ring_write(ring, 3); - - radeon_ring_unlock_commit(rdev, ring); - - return 0; -} - -void r600_uvd_stop(struct radeon_device *rdev) -{ - struct radeon_ring *ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX]; - - /* force RBC into idle state */ - WREG32(UVD_RBC_RB_CNTL, 0x11010101); - - /* Stall UMC and register bus before resetting VCPU */ - WREG32_P(UVD_LMI_CTRL2, 1 << 8, ~(1 << 8)); - WREG32_P(UVD_RB_ARB_CTRL, 1 << 3, ~(1 << 3)); - mdelay(1); - - /* put VCPU into reset */ - WREG32(UVD_SOFT_RESET, VCPU_SOFT_RESET); - mdelay(5); - - /* disable VCPU clock */ - WREG32(UVD_VCPU_CNTL, 0x0); - - /* Unstall UMC and register bus */ - WREG32_P(UVD_LMI_CTRL2, 0, ~(1 << 8)); - WREG32_P(UVD_RB_ARB_CTRL, 0, ~(1 << 3)); - - ring->ready = false; -} - -int r600_uvd_init(struct radeon_device *rdev) -{ - int i, j, r; - /* disable byte swapping */ - u32 lmi_swap_cntl = 0; - u32 mp_swap_cntl = 0; - - /* raise clocks while booting up the VCPU */ - radeon_set_uvd_clocks(rdev, 53300, 40000); - - /* disable clock gating */ - WREG32(UVD_CGC_GATE, 0); - - /* disable interupt */ - WREG32_P(UVD_MASTINT_EN, 0, ~(1 << 1)); - - /* Stall UMC and register bus before resetting VCPU */ - WREG32_P(UVD_LMI_CTRL2, 1 << 8, ~(1 << 8)); - WREG32_P(UVD_RB_ARB_CTRL, 1 << 3, ~(1 << 3)); - mdelay(1); - - /* put LMI, VCPU, RBC etc... into reset */ - WREG32(UVD_SOFT_RESET, LMI_SOFT_RESET | VCPU_SOFT_RESET | - LBSI_SOFT_RESET | RBC_SOFT_RESET | CSM_SOFT_RESET | - CXW_SOFT_RESET | TAP_SOFT_RESET | LMI_UMC_SOFT_RESET); - mdelay(5); - - /* take UVD block out of reset */ - WREG32_P(SRBM_SOFT_RESET, 0, ~SOFT_RESET_UVD); - mdelay(5); - - /* initialize UVD memory controller */ - WREG32(UVD_LMI_CTRL, 0x40 | (1 << 8) | (1 << 13) | - (1 << 21) | (1 << 9) | (1 << 20)); - -#ifdef __BIG_ENDIAN - /* swap (8 in 32) RB and IB */ - lmi_swap_cntl = 0xa; - mp_swap_cntl = 0; -#endif - WREG32(UVD_LMI_SWAP_CNTL, lmi_swap_cntl); - WREG32(UVD_MP_SWAP_CNTL, mp_swap_cntl); - - WREG32(UVD_MPC_SET_MUXA0, 0x40c2040); - WREG32(UVD_MPC_SET_MUXA1, 0x0); - WREG32(UVD_MPC_SET_MUXB0, 0x40c2040); - WREG32(UVD_MPC_SET_MUXB1, 0x0); - WREG32(UVD_MPC_SET_ALU, 0); - WREG32(UVD_MPC_SET_MUX, 0x88); - - /* take all subblocks out of reset, except VCPU */ - WREG32(UVD_SOFT_RESET, VCPU_SOFT_RESET); - mdelay(5); - - /* enable VCPU clock */ - WREG32(UVD_VCPU_CNTL, 1 << 9); - - /* enable UMC */ - WREG32_P(UVD_LMI_CTRL2, 0, ~(1 << 8)); - - /* boot up the VCPU */ - WREG32(UVD_SOFT_RESET, 0); - mdelay(10); - - WREG32_P(UVD_RB_ARB_CTRL, 0, ~(1 << 3)); - - for (i = 0; i < 10; ++i) { - uint32_t status; - for (j = 0; j < 100; ++j) { - status = RREG32(UVD_STATUS); - if (status & 2) - break; - mdelay(10); - } - r = 0; - if (status & 2) - break; - - DRM_ERROR("UVD not responding, trying to reset the VCPU!!!\n"); - WREG32_P(UVD_SOFT_RESET, VCPU_SOFT_RESET, ~VCPU_SOFT_RESET); - mdelay(10); - WREG32_P(UVD_SOFT_RESET, 0, ~VCPU_SOFT_RESET); - mdelay(10); - r = -1; - } - - if (r) { - DRM_ERROR("UVD not responding, giving up!!!\n"); - radeon_set_uvd_clocks(rdev, 0, 0); - return r; - } - - /* enable interupt */ - WREG32_P(UVD_MASTINT_EN, 3<<1, ~(3 << 1)); - - r = r600_uvd_rbc_start(rdev); - if (!r) - DRM_INFO("UVD initialized successfully.\n"); - - /* lower clocks again */ - radeon_set_uvd_clocks(rdev, 0, 0); - - return r; -} - -/* * GPU scratch registers helpers function. */ void r600_scratch_init(struct radeon_device *rdev) @@ -2917,94 +2560,6 @@ int r600_ring_test(struct radeon_device *rdev, struct radeon_ring *ring) return r; } -/** - * r600_dma_ring_test - simple async dma engine test - * - * @rdev: radeon_device pointer - * @ring: radeon_ring structure holding ring information - * - * Test the DMA engine by writing using it to write an - * value to memory. (r6xx-SI). - * Returns 0 for success, error for failure. - */ -int r600_dma_ring_test(struct radeon_device *rdev, - struct radeon_ring *ring) -{ - unsigned i; - int r; - volatile uint32_t *ptr = rdev->vram_scratch.ptr; - u32 tmp; - - if (!ptr) { - DRM_ERROR("invalid vram scratch pointer\n"); - return -EINVAL; - } - - tmp = 0xCAFEDEAD; - *ptr = tmp; - - r = radeon_ring_lock(rdev, ring, 4); - if (r) { - DRM_ERROR("radeon: dma failed to lock ring %d (%d).\n", ring->idx, r); - return r; - } - radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 1)); - radeon_ring_write(ring, rdev->vram_scratch.gpu_addr & 0xfffffffc); - radeon_ring_write(ring, upper_32_bits(rdev->vram_scratch.gpu_addr) & 0xff); - radeon_ring_write(ring, 0xDEADBEEF); - radeon_ring_unlock_commit(rdev, ring); - - for (i = 0; i < rdev->usec_timeout; i++) { - tmp = *ptr; - if (tmp == 0xDEADBEEF) - break; - DRM_UDELAY(1); - } - - if (i < rdev->usec_timeout) { - DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i); - } else { - DRM_ERROR("radeon: ring %d test failed (0x%08X)\n", - ring->idx, tmp); - r = -EINVAL; - } - return r; -} - -int r600_uvd_ring_test(struct radeon_device *rdev, struct radeon_ring *ring) -{ - uint32_t tmp = 0; - unsigned i; - int r; - - WREG32(UVD_CONTEXT_ID, 0xCAFEDEAD); - r = radeon_ring_lock(rdev, ring, 3); - if (r) { - DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", - ring->idx, r); - return r; - } - radeon_ring_write(ring, PACKET0(UVD_CONTEXT_ID, 0)); - radeon_ring_write(ring, 0xDEADBEEF); - radeon_ring_unlock_commit(rdev, ring); - for (i = 0; i < rdev->usec_timeout; i++) { - tmp = RREG32(UVD_CONTEXT_ID); - if (tmp == 0xDEADBEEF) - break; - DRM_UDELAY(1); - } - - if (i < rdev->usec_timeout) { - DRM_INFO("ring test on %d succeeded in %d usecs\n", - ring->idx, i); - } else { - DRM_ERROR("radeon: ring %d test failed (0x%08X)\n", - ring->idx, tmp); - r = -EINVAL; - } - return r; -} - /* * CP fences/semaphores */ @@ -3056,30 +2611,6 @@ void r600_fence_ring_emit(struct radeon_device *rdev, } } -void r600_uvd_fence_emit(struct radeon_device *rdev, - struct radeon_fence *fence) -{ - struct radeon_ring *ring = &rdev->ring[fence->ring]; - uint64_t addr = rdev->fence_drv[fence->ring].gpu_addr; - - radeon_ring_write(ring, PACKET0(UVD_CONTEXT_ID, 0)); - radeon_ring_write(ring, fence->seq); - radeon_ring_write(ring, PACKET0(UVD_GPCOM_VCPU_DATA0, 0)); - radeon_ring_write(ring, addr & 0xffffffff); - radeon_ring_write(ring, PACKET0(UVD_GPCOM_VCPU_DATA1, 0)); - radeon_ring_write(ring, upper_32_bits(addr) & 0xff); - radeon_ring_write(ring, PACKET0(UVD_GPCOM_VCPU_CMD, 0)); - radeon_ring_write(ring, 0); - - radeon_ring_write(ring, PACKET0(UVD_GPCOM_VCPU_DATA0, 0)); - radeon_ring_write(ring, 0); - radeon_ring_write(ring, PACKET0(UVD_GPCOM_VCPU_DATA1, 0)); - radeon_ring_write(ring, 0); - radeon_ring_write(ring, PACKET0(UVD_GPCOM_VCPU_CMD, 0)); - radeon_ring_write(ring, 2); - return; -} - void r600_semaphore_ring_emit(struct radeon_device *rdev, struct radeon_ring *ring, struct radeon_semaphore *semaphore, @@ -3096,95 +2627,6 @@ void r600_semaphore_ring_emit(struct radeon_device *rdev, radeon_ring_write(ring, (upper_32_bits(addr) & 0xff) | sel); } -/* - * DMA fences/semaphores - */ - -/** - * r600_dma_fence_ring_emit - emit a fence on the DMA ring - * - * @rdev: radeon_device pointer - * @fence: radeon fence object - * - * Add a DMA fence packet to the ring to write - * the fence seq number and DMA trap packet to generate - * an interrupt if needed (r6xx-r7xx). - */ -void r600_dma_fence_ring_emit(struct radeon_device *rdev, - struct radeon_fence *fence) -{ - struct radeon_ring *ring = &rdev->ring[fence->ring]; - u64 addr = rdev->fence_drv[fence->ring].gpu_addr; - - /* write the fence */ - radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_FENCE, 0, 0, 0)); - radeon_ring_write(ring, addr & 0xfffffffc); - radeon_ring_write(ring, (upper_32_bits(addr) & 0xff)); - radeon_ring_write(ring, lower_32_bits(fence->seq)); - /* generate an interrupt */ - radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_TRAP, 0, 0, 0)); -} - -/** - * r600_dma_semaphore_ring_emit - emit a semaphore on the dma ring - * - * @rdev: radeon_device pointer - * @ring: radeon_ring structure holding ring information - * @semaphore: radeon semaphore object - * @emit_wait: wait or signal semaphore - * - * Add a DMA semaphore packet to the ring wait on or signal - * other rings (r6xx-SI). - */ -void r600_dma_semaphore_ring_emit(struct radeon_device *rdev, - struct radeon_ring *ring, - struct radeon_semaphore *semaphore, - bool emit_wait) -{ - u64 addr = semaphore->gpu_addr; - u32 s = emit_wait ? 0 : 1; - - radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SEMAPHORE, 0, s, 0)); - radeon_ring_write(ring, addr & 0xfffffffc); - radeon_ring_write(ring, upper_32_bits(addr) & 0xff); -} - -void r600_uvd_semaphore_emit(struct radeon_device *rdev, - struct radeon_ring *ring, - struct radeon_semaphore *semaphore, - bool emit_wait) -{ - uint64_t addr = semaphore->gpu_addr; - - radeon_ring_write(ring, PACKET0(UVD_SEMA_ADDR_LOW, 0)); - radeon_ring_write(ring, (addr >> 3) & 0x000FFFFF); - - radeon_ring_write(ring, PACKET0(UVD_SEMA_ADDR_HIGH, 0)); - radeon_ring_write(ring, (addr >> 23) & 0x000FFFFF); - - radeon_ring_write(ring, PACKET0(UVD_SEMA_CMD, 0)); - radeon_ring_write(ring, emit_wait ? 1 : 0); -} - -int r600_copy_blit(struct radeon_device *rdev, - uint64_t src_offset, - uint64_t dst_offset, - unsigned num_gpu_pages, - struct radeon_fence **fence) -{ - struct radeon_semaphore *sem = NULL; - struct radeon_sa_bo *vb = NULL; - int r; - - r = r600_blit_prepare_copy(rdev, num_gpu_pages, fence, &vb, &sem); - if (r) { - return r; - } - r600_kms_blit_copy(rdev, src_offset, dst_offset, num_gpu_pages, vb); - r600_blit_done_copy(rdev, fence, vb, sem); - return 0; -} - /** * r600_copy_cpdma - copy pages using the CP DMA engine * @@ -3269,80 +2711,6 @@ int r600_copy_cpdma(struct radeon_device *rdev, return r; } -/** - * r600_copy_dma - copy pages using the DMA engine - * - * @rdev: radeon_device pointer - * @src_offset: src GPU address - * @dst_offset: dst GPU address - * @num_gpu_pages: number of GPU pages to xfer - * @fence: radeon fence object - * - * Copy GPU paging using the DMA engine (r6xx). - * Used by the radeon ttm implementation to move pages if - * registered as the asic copy callback. - */ -int r600_copy_dma(struct radeon_device *rdev, - uint64_t src_offset, uint64_t dst_offset, - unsigned num_gpu_pages, - struct radeon_fence **fence) -{ - struct radeon_semaphore *sem = NULL; - int ring_index = rdev->asic->copy.dma_ring_index; - struct radeon_ring *ring = &rdev->ring[ring_index]; - u32 size_in_dw, cur_size_in_dw; - int i, num_loops; - int r = 0; - - r = radeon_semaphore_create(rdev, &sem); - if (r) { - DRM_ERROR("radeon: moving bo (%d).\n", r); - return r; - } - - size_in_dw = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT) / 4; - num_loops = DIV_ROUND_UP(size_in_dw, 0xFFFE); - r = radeon_ring_lock(rdev, ring, num_loops * 4 + 8); - if (r) { - DRM_ERROR("radeon: moving bo (%d).\n", r); - radeon_semaphore_free(rdev, &sem, NULL); - return r; - } - - if (radeon_fence_need_sync(*fence, ring->idx)) { - radeon_semaphore_sync_rings(rdev, sem, (*fence)->ring, - ring->idx); - radeon_fence_note_sync(*fence, ring->idx); - } else { - radeon_semaphore_free(rdev, &sem, NULL); - } - - for (i = 0; i < num_loops; i++) { - cur_size_in_dw = size_in_dw; - if (cur_size_in_dw > 0xFFFE) - cur_size_in_dw = 0xFFFE; - size_in_dw -= cur_size_in_dw; - radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_COPY, 0, 0, cur_size_in_dw)); - radeon_ring_write(ring, dst_offset & 0xfffffffc); - radeon_ring_write(ring, src_offset & 0xfffffffc); - radeon_ring_write(ring, (((upper_32_bits(dst_offset) & 0xff) << 16) | - (upper_32_bits(src_offset) & 0xff))); - src_offset += cur_size_in_dw * 4; - dst_offset += cur_size_in_dw * 4; - } - - r = radeon_fence_emit(rdev, fence, ring->idx); - if (r) { - radeon_ring_unlock_undo(rdev, ring); - return r; - } - - radeon_ring_unlock_commit(rdev, ring); - radeon_semaphore_free(rdev, &sem, *fence); - - return r; -} - int r600_set_surface_reg(struct radeon_device *rdev, int reg, uint32_t tiling_flags, uint32_t pitch, uint32_t offset, uint32_t obj_size) @@ -3364,6 +2732,11 @@ static int r600_startup(struct radeon_device *rdev) /* enable pcie gen2 link */ r600_pcie_gen2_enable(rdev); + /* scratch needs to be initialized before MC */ + r = r600_vram_scratch_init(rdev); + if (r) + return r; + r600_mc_program(rdev); if (!rdev->me_fw || !rdev->pfp_fw || !rdev->rlc_fw) { @@ -3374,10 +2747,6 @@ static int r600_startup(struct radeon_device *rdev) } } - r = r600_vram_scratch_init(rdev); - if (r) - return r; - if (rdev->flags & RADEON_IS_AGP) { r600_agp_enable(rdev); } else { @@ -3386,12 +2755,6 @@ static int r600_startup(struct radeon_device *rdev) return r; } r600_gpu_init(rdev); - r = r600_blit_init(rdev); - if (r) { - r600_blit_fini(rdev); - rdev->asic->copy.copy = NULL; - dev_warn(rdev->dev, "failed blitter (%d) falling back to memcpy\n", r); - } /* allocate wb buffer */ r = radeon_wb_init(rdev); @@ -3428,14 +2791,14 @@ static int r600_startup(struct radeon_device *rdev) ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET, R600_CP_RB_RPTR, R600_CP_RB_WPTR, - 0, 0xfffff, RADEON_CP_PACKET2); + RADEON_CP_PACKET2); if (r) return r; ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX]; r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET, DMA_RB_RPTR, DMA_RB_WPTR, - 2, 0x3fffc, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0)); + DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0)); if (r) return r; @@ -3604,7 +2967,6 @@ int r600_init(struct radeon_device *rdev) void r600_fini(struct radeon_device *rdev) { r600_audio_fini(rdev); - r600_blit_fini(rdev); r600_cp_fini(rdev); r600_dma_fini(rdev); r600_irq_fini(rdev); @@ -3657,16 +3019,6 @@ void r600_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib) radeon_ring_write(ring, ib->length_dw); } -void r600_uvd_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib) -{ - struct radeon_ring *ring = &rdev->ring[ib->ring]; - - radeon_ring_write(ring, PACKET0(UVD_RBC_IB_BASE, 0)); - radeon_ring_write(ring, ib->gpu_addr); - radeon_ring_write(ring, PACKET0(UVD_RBC_IB_SIZE, 0)); - radeon_ring_write(ring, ib->length_dw); -} - int r600_ib_test(struct radeon_device *rdev, struct radeon_ring *ring) { struct radeon_ib ib; @@ -3720,140 +3072,6 @@ free_scratch: return r; } -/** - * r600_dma_ib_test - test an IB on the DMA engine - * - * @rdev: radeon_device pointer - * @ring: radeon_ring structure holding ring information - * - * Test a simple IB in the DMA ring (r6xx-SI). - * Returns 0 on success, error on failure. - */ -int r600_dma_ib_test(struct radeon_device *rdev, struct radeon_ring *ring) -{ - struct radeon_ib ib; - unsigned i; - int r; - volatile uint32_t *ptr = rdev->vram_scratch.ptr; - u32 tmp = 0; - - if (!ptr) { - DRM_ERROR("invalid vram scratch pointer\n"); - return -EINVAL; - } - - tmp = 0xCAFEDEAD; - *ptr = tmp; - - r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256); - if (r) { - DRM_ERROR("radeon: failed to get ib (%d).\n", r); - return r; - } - - ib.ptr[0] = DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 1); - ib.ptr[1] = rdev->vram_scratch.gpu_addr & 0xfffffffc; - ib.ptr[2] = upper_32_bits(rdev->vram_scratch.gpu_addr) & 0xff; - ib.ptr[3] = 0xDEADBEEF; - ib.length_dw = 4; - - r = radeon_ib_schedule(rdev, &ib, NULL); - if (r) { - radeon_ib_free(rdev, &ib); - DRM_ERROR("radeon: failed to schedule ib (%d).\n", r); - return r; - } - r = radeon_fence_wait(ib.fence, false); - if (r) { - radeon_ib_free(rdev, &ib); - DRM_ERROR("radeon: fence wait failed (%d).\n", r); - return r; - } - for (i = 0; i < rdev->usec_timeout; i++) { - tmp = *ptr; - if (tmp == 0xDEADBEEF) - break; - DRM_UDELAY(1); - } - if (i < rdev->usec_timeout) { - DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i); - } else { - DRM_ERROR("radeon: ib test failed (0x%08X)\n", tmp); - r = -EINVAL; - } - radeon_ib_free(rdev, &ib); - return r; -} - -int r600_uvd_ib_test(struct radeon_device *rdev, struct radeon_ring *ring) -{ - struct radeon_fence *fence = NULL; - int r; - - r = radeon_set_uvd_clocks(rdev, 53300, 40000); - if (r) { - DRM_ERROR("radeon: failed to raise UVD clocks (%d).\n", r); - return r; - } - - r = radeon_uvd_get_create_msg(rdev, ring->idx, 1, NULL); - if (r) { - DRM_ERROR("radeon: failed to get create msg (%d).\n", r); - goto error; - } - - r = radeon_uvd_get_destroy_msg(rdev, ring->idx, 1, &fence); - if (r) { - DRM_ERROR("radeon: failed to get destroy ib (%d).\n", r); - goto error; - } - - r = radeon_fence_wait(fence, false); - if (r) { - DRM_ERROR("radeon: fence wait failed (%d).\n", r); - goto error; - } - DRM_INFO("ib test on ring %d succeeded\n", ring->idx); -error: - radeon_fence_unref(&fence); - radeon_set_uvd_clocks(rdev, 0, 0); - return r; -} - -/** - * r600_dma_ring_ib_execute - Schedule an IB on the DMA engine - * - * @rdev: radeon_device pointer - * @ib: IB object to schedule - * - * Schedule an IB in the DMA ring (r6xx-r7xx). - */ -void r600_dma_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib) -{ - struct radeon_ring *ring = &rdev->ring[ib->ring]; - - if (rdev->wb.enabled) { - u32 next_rptr = ring->wptr + 4; - while ((next_rptr & 7) != 5) - next_rptr++; - next_rptr += 3; - radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 1)); - radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc); - radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xff); - radeon_ring_write(ring, next_rptr); - } - - /* The indirect buffer packet must end on an 8 DW boundary in the DMA ring. - * Pad as necessary with NOPs. - */ - while ((ring->wptr & 7) != 5) - radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0)); - radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_INDIRECT_BUFFER, 0, 0, 0)); - radeon_ring_write(ring, (ib->gpu_addr & 0xFFFFFFE0)); - radeon_ring_write(ring, (ib->length_dw << 16) | (upper_32_bits(ib->gpu_addr) & 0xFF)); - -} - /* * Interrupts * @@ -3870,7 +3088,7 @@ void r600_ih_ring_init(struct radeon_device *rdev, unsigned ring_size) u32 rb_bufsz; /* Align ring size */ - rb_bufsz = drm_order(ring_size / 4); + rb_bufsz = order_base_2(ring_size / 4); ring_size = (1 << rb_bufsz) * 4; rdev->ih.ring_size = ring_size; rdev->ih.ptr_mask = rdev->ih.ring_size - 1; @@ -4115,7 +3333,7 @@ int r600_irq_init(struct radeon_device *rdev) WREG32(INTERRUPT_CNTL, interrupt_cntl); WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8); - rb_bufsz = drm_order(rdev->ih.ring_size / 4); + rb_bufsz = order_base_2(rdev->ih.ring_size / 4); ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE | IH_WPTR_OVERFLOW_CLEAR | diff --git a/sys/dev/drm/radeon/r600_audio.c b/sys/dev/drm/radeon/r600_audio.c index 58fc0919a2..62cfb0751a 100644 --- a/sys/dev/drm/radeon/r600_audio.c +++ b/sys/dev/drm/radeon/r600_audio.c @@ -57,12 +57,12 @@ static bool radeon_dig_encoder(struct drm_encoder *encoder) */ static int r600_audio_chipset_supported(struct radeon_device *rdev) { - return ASIC_IS_DCE2(rdev) && !ASIC_IS_DCE6(rdev); + return ASIC_IS_DCE2(rdev) && !ASIC_IS_NODCE(rdev); } -struct r600_audio r600_audio_status(struct radeon_device *rdev) +struct r600_audio_pin r600_audio_status(struct radeon_device *rdev) { - struct r600_audio status; + struct r600_audio_pin status; uint32_t value; value = RREG32(R600_AUDIO_RATE_BPS_CHANNEL); @@ -119,16 +119,16 @@ void r600_audio_update_hdmi(void *arg, int pending) { struct radeon_device *rdev = arg; struct drm_device *dev = rdev->ddev; - struct r600_audio audio_status = r600_audio_status(rdev); + struct r600_audio_pin audio_status = r600_audio_status(rdev); struct drm_encoder *encoder; bool changed = false; - if (rdev->audio_status.channels != audio_status.channels || - rdev->audio_status.rate != audio_status.rate || - rdev->audio_status.bits_per_sample != audio_status.bits_per_sample || - rdev->audio_status.status_bits != audio_status.status_bits || - rdev->audio_status.category_code != audio_status.category_code) { - rdev->audio_status = audio_status; + if (rdev->audio.pin[0].channels != audio_status.channels || + rdev->audio.pin[0].rate != audio_status.rate || + rdev->audio.pin[0].bits_per_sample != audio_status.bits_per_sample || + rdev->audio.pin[0].status_bits != audio_status.status_bits || + rdev->audio.pin[0].category_code != audio_status.category_code) { + rdev->audio.pin[0] = audio_status; changed = true; } @@ -140,13 +140,13 @@ void r600_audio_update_hdmi(void *arg, int pending) } } -/* - * turn on/off audio engine - */ -static void r600_audio_engine_enable(struct radeon_device *rdev, bool enable) +/* enable the audio stream */ +static void r600_audio_enable(struct radeon_device *rdev, + struct r600_audio_pin *pin, + bool enable) { u32 value = 0; - DRM_INFO("%s audio support\n", enable ? "Enabling" : "Disabling"); + if (ASIC_IS_DCE4(rdev)) { if (enable) { value |= 0x81000000; /* Required to enable audio */ @@ -157,7 +157,7 @@ static void r600_audio_engine_enable(struct radeon_device *rdev, bool enable) WREG32_P(R600_AUDIO_ENABLE, enable ? 0x81000000 : 0x0, ~0x81000000); } - rdev->audio_enabled = enable; + DRM_INFO("%s audio %d support\n", enable ? "Enabling" : "Disabling", pin->id); } /* @@ -168,13 +168,17 @@ int r600_audio_init(struct radeon_device *rdev) if (!radeon_audio || !r600_audio_chipset_supported(rdev)) return 0; - r600_audio_engine_enable(rdev, true); + rdev->audio.enabled = true; + + rdev->audio.num_pins = 1; + rdev->audio.pin[0].channels = -1; + rdev->audio.pin[0].rate = -1; + rdev->audio.pin[0].bits_per_sample = -1; + rdev->audio.pin[0].status_bits = 0; + rdev->audio.pin[0].category_code = 0; + rdev->audio.pin[0].id = 0; - rdev->audio_status.channels = -1; - rdev->audio_status.rate = -1; - rdev->audio_status.bits_per_sample = -1; - rdev->audio_status.status_bits = 0; - rdev->audio_status.category_code = 0; + r600_audio_enable(rdev, &rdev->audio.pin[0], true); return 0; } @@ -185,8 +189,16 @@ int r600_audio_init(struct radeon_device *rdev) */ void r600_audio_fini(struct radeon_device *rdev) { - if (!rdev->audio_enabled) + if (!rdev->audio.enabled) return; - r600_audio_engine_enable(rdev, false); + r600_audio_enable(rdev, &rdev->audio.pin[0], false); + + rdev->audio.enabled = false; +} + +struct r600_audio_pin *r600_audio_get_pin(struct radeon_device *rdev) +{ + /* only one pin on 6xx-NI */ + return &rdev->audio.pin[0]; } diff --git a/sys/dev/drm/radeon/r600_blit.c b/sys/dev/drm/radeon/r600_blit.c index cc30fd06b6..3da4a69075 100644 --- a/sys/dev/drm/radeon/r600_blit.c +++ b/sys/dev/drm/radeon/r600_blit.c @@ -31,6 +31,37 @@ #include "r600_blit_shaders.h" +/* 23 bits of float fractional data */ +#define I2F_FRAC_BITS 23 +#define I2F_MASK ((1 << I2F_FRAC_BITS) - 1) + +/* + * Converts unsigned integer into 32-bit IEEE floating point representation. + * Will be exact from 0 to 2^24. Above that, we round towards zero + * as the fractional bits will not fit in a float. (It would be better to + * round towards even as the fpu does, but that is slower.) + */ +static __pure uint32_t int2float(uint32_t x) +{ + uint32_t msb, exponent, fraction; + + /* Zero is special */ + if (!x) return 0; + + /* Get location of the most significant bit */ + msb = __fls(x); + + /* + * Use a rotate instead of a shift because that works both leftwards + * and rightwards due to the mod(32) behaviour. This means we don't + * need to check to see if we are above 2^24 or not. + */ + fraction = ror32(x, (msb - I2F_FRAC_BITS) & 0x1f) & I2F_MASK; + exponent = (127 + msb) << I2F_FRAC_BITS; + + return fraction + exponent; +} + #define DI_PT_RECTLIST 0x11 #define DI_INDEX_SIZE_16_BIT 0x0 #define DI_SRC_SEL_AUTO_INDEX 0x2 diff --git a/sys/dev/drm/radeon/r600_blit_kms.c b/sys/dev/drm/radeon/r600_blit_kms.c deleted file mode 100644 index c9aa85bf30..0000000000 --- a/sys/dev/drm/radeon/r600_blit_kms.c +++ /dev/null @@ -1,786 +0,0 @@ -/* - * Copyright 2009 Advanced Micro Devices, Inc. - * Copyright 2009 Red Hat Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - * - */ - -#include -#include -#include "radeon.h" -#include "radeon_asic.h" - -#include "r600d.h" -#include "r600_blit_shaders.h" -#include "radeon_blit_common.h" - -/* 23 bits of float fractional data */ -#define I2F_FRAC_BITS 23 -#define I2F_MASK ((1 << I2F_FRAC_BITS) - 1) - -/* - * Converts unsigned integer into 32-bit IEEE floating point representation. - * Will be exact from 0 to 2^24. Above that, we round towards zero - * as the fractional bits will not fit in a float. (It would be better to - * round towards even as the fpu does, but that is slower.) - */ -__pure uint32_t int2float(uint32_t x) -{ - uint32_t msb, exponent, fraction; - - /* Zero is special */ - if (!x) return 0; - - /* Get location of the most significant bit */ - msb = fls(x); - - /* - * Use a rotate instead of a shift because that works both leftwards - * and rightwards due to the mod(32) behaviour. This means we don't - * need to check to see if we are above 2^24 or not. - */ - fraction = ror32(x, (msb - I2F_FRAC_BITS) & 0x1f) & I2F_MASK; - exponent = (127 + msb) << I2F_FRAC_BITS; - - return fraction + exponent; -} - -/* emits 21 on rv770+, 23 on r600 */ -static void -set_render_target(struct radeon_device *rdev, int format, - int w, int h, u64 gpu_addr) -{ - struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; - u32 cb_color_info; - int pitch, slice; - - h = ALIGN(h, 8); - if (h < 8) - h = 8; - - cb_color_info = CB_FORMAT(format) | - CB_SOURCE_FORMAT(CB_SF_EXPORT_NORM) | - CB_ARRAY_MODE(ARRAY_1D_TILED_THIN1); - pitch = (w / 8) - 1; - slice = ((w * h) / 64) - 1; - - radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 1)); - radeon_ring_write(ring, (CB_COLOR0_BASE - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2); - radeon_ring_write(ring, gpu_addr >> 8); - - if (rdev->family > CHIP_R600 && rdev->family < CHIP_RV770) { - radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_BASE_UPDATE, 0)); - radeon_ring_write(ring, 2 << 0); - } - - radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 1)); - radeon_ring_write(ring, (CB_COLOR0_SIZE - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2); - radeon_ring_write(ring, (pitch << 0) | (slice << 10)); - - radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 1)); - radeon_ring_write(ring, (CB_COLOR0_VIEW - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2); - radeon_ring_write(ring, 0); - - radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 1)); - radeon_ring_write(ring, (CB_COLOR0_INFO - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2); - radeon_ring_write(ring, cb_color_info); - - radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 1)); - radeon_ring_write(ring, (CB_COLOR0_TILE - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2); - radeon_ring_write(ring, 0); - - radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 1)); - radeon_ring_write(ring, (CB_COLOR0_FRAG - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2); - radeon_ring_write(ring, 0); - - radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 1)); - radeon_ring_write(ring, (CB_COLOR0_MASK - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2); - radeon_ring_write(ring, 0); -} - -/* emits 5dw */ -static void -cp_set_surface_sync(struct radeon_device *rdev, - u32 sync_type, u32 size, - u64 mc_addr) -{ - struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; - u32 cp_coher_size; - - if (size == 0xffffffff) - cp_coher_size = 0xffffffff; - else - cp_coher_size = ((size + 255) >> 8); - - radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3)); - radeon_ring_write(ring, sync_type); - radeon_ring_write(ring, cp_coher_size); - radeon_ring_write(ring, mc_addr >> 8); - radeon_ring_write(ring, 10); /* poll interval */ -} - -/* emits 21dw + 1 surface sync = 26dw */ -static void -set_shaders(struct radeon_device *rdev) -{ - struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; - u64 gpu_addr; - u32 sq_pgm_resources; - - /* setup shader regs */ - sq_pgm_resources = (1 << 0); - - /* VS */ - gpu_addr = rdev->r600_blit.shader_gpu_addr + rdev->r600_blit.vs_offset; - radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 1)); - radeon_ring_write(ring, (SQ_PGM_START_VS - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2); - radeon_ring_write(ring, gpu_addr >> 8); - - radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 1)); - radeon_ring_write(ring, (SQ_PGM_RESOURCES_VS - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2); - radeon_ring_write(ring, sq_pgm_resources); - - radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 1)); - radeon_ring_write(ring, (SQ_PGM_CF_OFFSET_VS - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2); - radeon_ring_write(ring, 0); - - /* PS */ - gpu_addr = rdev->r600_blit.shader_gpu_addr + rdev->r600_blit.ps_offset; - radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 1)); - radeon_ring_write(ring, (SQ_PGM_START_PS - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2); - radeon_ring_write(ring, gpu_addr >> 8); - - radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 1)); - radeon_ring_write(ring, (SQ_PGM_RESOURCES_PS - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2); - radeon_ring_write(ring, sq_pgm_resources | (1 << 28)); - - radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 1)); - radeon_ring_write(ring, (SQ_PGM_EXPORTS_PS - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2); - radeon_ring_write(ring, 2); - - radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 1)); - radeon_ring_write(ring, (SQ_PGM_CF_OFFSET_PS - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2); - radeon_ring_write(ring, 0); - - gpu_addr = rdev->r600_blit.shader_gpu_addr + rdev->r600_blit.vs_offset; - cp_set_surface_sync(rdev, PACKET3_SH_ACTION_ENA, 512, gpu_addr); -} - -/* emits 9 + 1 sync (5) = 14*/ -static void -set_vtx_resource(struct radeon_device *rdev, u64 gpu_addr) -{ - struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; - u32 sq_vtx_constant_word2; - - sq_vtx_constant_word2 = SQ_VTXC_BASE_ADDR_HI(upper_32_bits(gpu_addr) & 0xff) | - SQ_VTXC_STRIDE(16); -#ifdef __BIG_ENDIAN - sq_vtx_constant_word2 |= SQ_VTXC_ENDIAN_SWAP(SQ_ENDIAN_8IN32); -#endif - - radeon_ring_write(ring, PACKET3(PACKET3_SET_RESOURCE, 7)); - radeon_ring_write(ring, 0x460); - radeon_ring_write(ring, gpu_addr & 0xffffffff); - radeon_ring_write(ring, 48 - 1); - radeon_ring_write(ring, sq_vtx_constant_word2); - radeon_ring_write(ring, 1 << 0); - radeon_ring_write(ring, 0); - radeon_ring_write(ring, 0); - radeon_ring_write(ring, SQ_TEX_VTX_VALID_BUFFER << 30); - - if ((rdev->family == CHIP_RV610) || - (rdev->family == CHIP_RV620) || - (rdev->family == CHIP_RS780) || - (rdev->family == CHIP_RS880) || - (rdev->family == CHIP_RV710)) - cp_set_surface_sync(rdev, - PACKET3_TC_ACTION_ENA, 48, gpu_addr); - else - cp_set_surface_sync(rdev, - PACKET3_VC_ACTION_ENA, 48, gpu_addr); -} - -/* emits 9 */ -static void -set_tex_resource(struct radeon_device *rdev, - int format, int w, int h, int pitch, - u64 gpu_addr, u32 size) -{ - struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; - uint32_t sq_tex_resource_word0, sq_tex_resource_word1, sq_tex_resource_word4; - - if (h < 1) - h = 1; - - sq_tex_resource_word0 = S_038000_DIM(V_038000_SQ_TEX_DIM_2D) | - S_038000_TILE_MODE(V_038000_ARRAY_1D_TILED_THIN1); - sq_tex_resource_word0 |= S_038000_PITCH((pitch >> 3) - 1) | - S_038000_TEX_WIDTH(w - 1); - - sq_tex_resource_word1 = S_038004_DATA_FORMAT(format); - sq_tex_resource_word1 |= S_038004_TEX_HEIGHT(h - 1); - - sq_tex_resource_word4 = S_038010_REQUEST_SIZE(1) | - S_038010_DST_SEL_X(SQ_SEL_X) | - S_038010_DST_SEL_Y(SQ_SEL_Y) | - S_038010_DST_SEL_Z(SQ_SEL_Z) | - S_038010_DST_SEL_W(SQ_SEL_W); - - cp_set_surface_sync(rdev, - PACKET3_TC_ACTION_ENA, size, gpu_addr); - - radeon_ring_write(ring, PACKET3(PACKET3_SET_RESOURCE, 7)); - radeon_ring_write(ring, 0); - radeon_ring_write(ring, sq_tex_resource_word0); - radeon_ring_write(ring, sq_tex_resource_word1); - radeon_ring_write(ring, gpu_addr >> 8); - radeon_ring_write(ring, gpu_addr >> 8); - radeon_ring_write(ring, sq_tex_resource_word4); - radeon_ring_write(ring, 0); - radeon_ring_write(ring, SQ_TEX_VTX_VALID_TEXTURE << 30); -} - -/* emits 12 */ -static void -set_scissors(struct radeon_device *rdev, int x1, int y1, - int x2, int y2) -{ - struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; - radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2)); - radeon_ring_write(ring, (PA_SC_SCREEN_SCISSOR_TL - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2); - radeon_ring_write(ring, (x1 << 0) | (y1 << 16)); - radeon_ring_write(ring, (x2 << 0) | (y2 << 16)); - - radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2)); - radeon_ring_write(ring, (PA_SC_GENERIC_SCISSOR_TL - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2); - radeon_ring_write(ring, (x1 << 0) | (y1 << 16) | (1 << 31)); - radeon_ring_write(ring, (x2 << 0) | (y2 << 16)); - - radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2)); - radeon_ring_write(ring, (PA_SC_WINDOW_SCISSOR_TL - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2); - radeon_ring_write(ring, (x1 << 0) | (y1 << 16) | (1 << 31)); - radeon_ring_write(ring, (x2 << 0) | (y2 << 16)); -} - -/* emits 10 */ -static void -draw_auto(struct radeon_device *rdev) -{ - struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; - radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1)); - radeon_ring_write(ring, (VGT_PRIMITIVE_TYPE - PACKET3_SET_CONFIG_REG_OFFSET) >> 2); - radeon_ring_write(ring, DI_PT_RECTLIST); - - radeon_ring_write(ring, PACKET3(PACKET3_INDEX_TYPE, 0)); - radeon_ring_write(ring, -#ifdef __BIG_ENDIAN - (2 << 2) | -#endif - DI_INDEX_SIZE_16_BIT); - - radeon_ring_write(ring, PACKET3(PACKET3_NUM_INSTANCES, 0)); - radeon_ring_write(ring, 1); - - radeon_ring_write(ring, PACKET3(PACKET3_DRAW_INDEX_AUTO, 1)); - radeon_ring_write(ring, 3); - radeon_ring_write(ring, DI_SRC_SEL_AUTO_INDEX); - -} - -/* emits 14 */ -static void -set_default_state(struct radeon_device *rdev) -{ - struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; - u32 sq_config, sq_gpr_resource_mgmt_1, sq_gpr_resource_mgmt_2; - u32 sq_thread_resource_mgmt, sq_stack_resource_mgmt_1, sq_stack_resource_mgmt_2; - int num_ps_gprs, num_vs_gprs, num_temp_gprs, num_gs_gprs, num_es_gprs; - int num_ps_threads, num_vs_threads, num_gs_threads, num_es_threads; - int num_ps_stack_entries, num_vs_stack_entries, num_gs_stack_entries, num_es_stack_entries; - u64 gpu_addr; - int dwords; - - switch (rdev->family) { - case CHIP_R600: - num_ps_gprs = 192; - num_vs_gprs = 56; - num_temp_gprs = 4; - num_gs_gprs = 0; - num_es_gprs = 0; - num_ps_threads = 136; - num_vs_threads = 48; - num_gs_threads = 4; - num_es_threads = 4; - num_ps_stack_entries = 128; - num_vs_stack_entries = 128; - num_gs_stack_entries = 0; - num_es_stack_entries = 0; - break; - case CHIP_RV630: - case CHIP_RV635: - num_ps_gprs = 84; - num_vs_gprs = 36; - num_temp_gprs = 4; - num_gs_gprs = 0; - num_es_gprs = 0; - num_ps_threads = 144; - num_vs_threads = 40; - num_gs_threads = 4; - num_es_threads = 4; - num_ps_stack_entries = 40; - num_vs_stack_entries = 40; - num_gs_stack_entries = 32; - num_es_stack_entries = 16; - break; - case CHIP_RV610: - case CHIP_RV620: - case CHIP_RS780: - case CHIP_RS880: - default: - num_ps_gprs = 84; - num_vs_gprs = 36; - num_temp_gprs = 4; - num_gs_gprs = 0; - num_es_gprs = 0; - num_ps_threads = 136; - num_vs_threads = 48; - num_gs_threads = 4; - num_es_threads = 4; - num_ps_stack_entries = 40; - num_vs_stack_entries = 40; - num_gs_stack_entries = 32; - num_es_stack_entries = 16; - break; - case CHIP_RV670: - num_ps_gprs = 144; - num_vs_gprs = 40; - num_temp_gprs = 4; - num_gs_gprs = 0; - num_es_gprs = 0; - num_ps_threads = 136; - num_vs_threads = 48; - num_gs_threads = 4; - num_es_threads = 4; - num_ps_stack_entries = 40; - num_vs_stack_entries = 40; - num_gs_stack_entries = 32; - num_es_stack_entries = 16; - break; - case CHIP_RV770: - num_ps_gprs = 192; - num_vs_gprs = 56; - num_temp_gprs = 4; - num_gs_gprs = 0; - num_es_gprs = 0; - num_ps_threads = 188; - num_vs_threads = 60; - num_gs_threads = 0; - num_es_threads = 0; - num_ps_stack_entries = 256; - num_vs_stack_entries = 256; - num_gs_stack_entries = 0; - num_es_stack_entries = 0; - break; - case CHIP_RV730: - case CHIP_RV740: - num_ps_gprs = 84; - num_vs_gprs = 36; - num_temp_gprs = 4; - num_gs_gprs = 0; - num_es_gprs = 0; - num_ps_threads = 188; - num_vs_threads = 60; - num_gs_threads = 0; - num_es_threads = 0; - num_ps_stack_entries = 128; - num_vs_stack_entries = 128; - num_gs_stack_entries = 0; - num_es_stack_entries = 0; - break; - case CHIP_RV710: - num_ps_gprs = 192; - num_vs_gprs = 56; - num_temp_gprs = 4; - num_gs_gprs = 0; - num_es_gprs = 0; - num_ps_threads = 144; - num_vs_threads = 48; - num_gs_threads = 0; - num_es_threads = 0; - num_ps_stack_entries = 128; - num_vs_stack_entries = 128; - num_gs_stack_entries = 0; - num_es_stack_entries = 0; - break; - } - - if ((rdev->family == CHIP_RV610) || - (rdev->family == CHIP_RV620) || - (rdev->family == CHIP_RS780) || - (rdev->family == CHIP_RS880) || - (rdev->family == CHIP_RV710)) - sq_config = 0; - else - sq_config = VC_ENABLE; - - sq_config |= (DX9_CONSTS | - ALU_INST_PREFER_VECTOR | - PS_PRIO(0) | - VS_PRIO(1) | - GS_PRIO(2) | - ES_PRIO(3)); - - sq_gpr_resource_mgmt_1 = (NUM_PS_GPRS(num_ps_gprs) | - NUM_VS_GPRS(num_vs_gprs) | - NUM_CLAUSE_TEMP_GPRS(num_temp_gprs)); - sq_gpr_resource_mgmt_2 = (NUM_GS_GPRS(num_gs_gprs) | - NUM_ES_GPRS(num_es_gprs)); - sq_thread_resource_mgmt = (NUM_PS_THREADS(num_ps_threads) | - NUM_VS_THREADS(num_vs_threads) | - NUM_GS_THREADS(num_gs_threads) | - NUM_ES_THREADS(num_es_threads)); - sq_stack_resource_mgmt_1 = (NUM_PS_STACK_ENTRIES(num_ps_stack_entries) | - NUM_VS_STACK_ENTRIES(num_vs_stack_entries)); - sq_stack_resource_mgmt_2 = (NUM_GS_STACK_ENTRIES(num_gs_stack_entries) | - NUM_ES_STACK_ENTRIES(num_es_stack_entries)); - - /* emit an IB pointing at default state */ - dwords = ALIGN(rdev->r600_blit.state_len, 0x10); - gpu_addr = rdev->r600_blit.shader_gpu_addr + rdev->r600_blit.state_offset; - radeon_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2)); - radeon_ring_write(ring, -#ifdef __BIG_ENDIAN - (2 << 0) | -#endif - (gpu_addr & 0xFFFFFFFC)); - radeon_ring_write(ring, upper_32_bits(gpu_addr) & 0xFF); - radeon_ring_write(ring, dwords); - - /* SQ config */ - radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 6)); - radeon_ring_write(ring, (SQ_CONFIG - PACKET3_SET_CONFIG_REG_OFFSET) >> 2); - radeon_ring_write(ring, sq_config); - radeon_ring_write(ring, sq_gpr_resource_mgmt_1); - radeon_ring_write(ring, sq_gpr_resource_mgmt_2); - radeon_ring_write(ring, sq_thread_resource_mgmt); - radeon_ring_write(ring, sq_stack_resource_mgmt_1); - radeon_ring_write(ring, sq_stack_resource_mgmt_2); -} - -int r600_blit_init(struct radeon_device *rdev) -{ - u32 obj_size; - int i, r, dwords; - void *ptr; - u32 packet2s[16]; - int num_packet2s = 0; - - rdev->r600_blit.primitives.set_render_target = set_render_target; - rdev->r600_blit.primitives.cp_set_surface_sync = cp_set_surface_sync; - rdev->r600_blit.primitives.set_shaders = set_shaders; - rdev->r600_blit.primitives.set_vtx_resource = set_vtx_resource; - rdev->r600_blit.primitives.set_tex_resource = set_tex_resource; - rdev->r600_blit.primitives.set_scissors = set_scissors; - rdev->r600_blit.primitives.draw_auto = draw_auto; - rdev->r600_blit.primitives.set_default_state = set_default_state; - - rdev->r600_blit.ring_size_common = 8; /* sync semaphore */ - rdev->r600_blit.ring_size_common += 40; /* shaders + def state */ - rdev->r600_blit.ring_size_common += 5; /* done copy */ - rdev->r600_blit.ring_size_common += 16; /* fence emit for done copy */ - - rdev->r600_blit.ring_size_per_loop = 76; - /* set_render_target emits 2 extra dwords on rv6xx */ - if (rdev->family > CHIP_R600 && rdev->family < CHIP_RV770) - rdev->r600_blit.ring_size_per_loop += 2; - - rdev->r600_blit.max_dim = 8192; - - rdev->r600_blit.state_offset = 0; - - if (rdev->family >= CHIP_RV770) - rdev->r600_blit.state_len = r7xx_default_size; - else - rdev->r600_blit.state_len = r6xx_default_size; - - dwords = rdev->r600_blit.state_len; - while (dwords & 0xf) { - packet2s[num_packet2s++] = cpu_to_le32(PACKET2(0)); - dwords++; - } - - obj_size = dwords * 4; - obj_size = ALIGN(obj_size, 256); - - rdev->r600_blit.vs_offset = obj_size; - obj_size += r6xx_vs_size * 4; - obj_size = ALIGN(obj_size, 256); - - rdev->r600_blit.ps_offset = obj_size; - obj_size += r6xx_ps_size * 4; - obj_size = ALIGN(obj_size, 256); - - /* pin copy shader into vram if not already initialized */ - if (rdev->r600_blit.shader_obj == NULL) { - r = radeon_bo_create(rdev, obj_size, PAGE_SIZE, true, - RADEON_GEM_DOMAIN_VRAM, - NULL, &rdev->r600_blit.shader_obj); - if (r) { - DRM_ERROR("r600 failed to allocate shader\n"); - return r; - } - - r = radeon_bo_reserve(rdev->r600_blit.shader_obj, false); - if (unlikely(r != 0)) - return r; - r = radeon_bo_pin(rdev->r600_blit.shader_obj, RADEON_GEM_DOMAIN_VRAM, - &rdev->r600_blit.shader_gpu_addr); - radeon_bo_unreserve(rdev->r600_blit.shader_obj); - if (r) { - dev_err(rdev->dev, "(%d) pin blit object failed\n", r); - return r; - } - } - - DRM_DEBUG("r6xx blit allocated bo %08x vs %08x ps %08x\n", - obj_size, - rdev->r600_blit.vs_offset, rdev->r600_blit.ps_offset); - - r = radeon_bo_reserve(rdev->r600_blit.shader_obj, false); - if (unlikely(r != 0)) - return r; - r = radeon_bo_kmap(rdev->r600_blit.shader_obj, &ptr); - if (r) { - DRM_ERROR("failed to map blit object %d\n", r); - return r; - } - if (rdev->family >= CHIP_RV770) - memcpy_toio((char *)ptr + rdev->r600_blit.state_offset, - r7xx_default_state, rdev->r600_blit.state_len * 4); - else - memcpy_toio((char *)ptr + rdev->r600_blit.state_offset, - r6xx_default_state, rdev->r600_blit.state_len * 4); - if (num_packet2s) - memcpy_toio((char *)ptr + rdev->r600_blit.state_offset + (rdev->r600_blit.state_len * 4), - packet2s, num_packet2s * 4); - for (i = 0; i < r6xx_vs_size; i++) - *(u32 *)((unsigned long)ptr + rdev->r600_blit.vs_offset + i * 4) = cpu_to_le32(r6xx_vs[i]); - for (i = 0; i < r6xx_ps_size; i++) - *(u32 *)((unsigned long)ptr + rdev->r600_blit.ps_offset + i * 4) = cpu_to_le32(r6xx_ps[i]); - radeon_bo_kunmap(rdev->r600_blit.shader_obj); - radeon_bo_unreserve(rdev->r600_blit.shader_obj); - - radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size); - return 0; -} - -void r600_blit_fini(struct radeon_device *rdev) -{ - int r; - - radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size); - if (rdev->r600_blit.shader_obj == NULL) - return; - /* If we can't reserve the bo, unref should be enough to destroy - * it when it becomes idle. - */ - r = radeon_bo_reserve(rdev->r600_blit.shader_obj, false); - if (!r) { - radeon_bo_unpin(rdev->r600_blit.shader_obj); - radeon_bo_unreserve(rdev->r600_blit.shader_obj); - } - radeon_bo_unref(&rdev->r600_blit.shader_obj); -} - -static unsigned r600_blit_create_rect(unsigned num_gpu_pages, - int *width, int *height, int max_dim) -{ - unsigned max_pages; - unsigned pages = num_gpu_pages; - int w, h; - - if (num_gpu_pages == 0) { - /* not supposed to be called with no pages, but just in case */ - h = 0; - w = 0; - pages = 0; - WARN_ON(1); - } else { - int rect_order = 2; - h = RECT_UNIT_H; - while (num_gpu_pages / rect_order) { - h *= 2; - rect_order *= 4; - if (h >= max_dim) { - h = max_dim; - break; - } - } - max_pages = (max_dim * h) / (RECT_UNIT_W * RECT_UNIT_H); - if (pages > max_pages) - pages = max_pages; - w = (pages * RECT_UNIT_W * RECT_UNIT_H) / h; - w = (w / RECT_UNIT_W) * RECT_UNIT_W; - pages = (w * h) / (RECT_UNIT_W * RECT_UNIT_H); - BUG_ON(pages == 0); - } - - - DRM_DEBUG("blit_rectangle: h=%d, w=%d, pages=%d\n", h, w, pages); - - /* return width and height only of the caller wants it */ - if (height) - *height = h; - if (width) - *width = w; - - return pages; -} - - -int r600_blit_prepare_copy(struct radeon_device *rdev, unsigned num_gpu_pages, - struct radeon_fence **fence, struct radeon_sa_bo **vb, - struct radeon_semaphore **sem) -{ - struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; - int r; - int ring_size; - int num_loops = 0; - int dwords_per_loop = rdev->r600_blit.ring_size_per_loop; - - /* num loops */ - while (num_gpu_pages) { - num_gpu_pages -= - r600_blit_create_rect(num_gpu_pages, NULL, NULL, - rdev->r600_blit.max_dim); - num_loops++; - } - - /* 48 bytes for vertex per loop */ - r = radeon_sa_bo_new(rdev, &rdev->ring_tmp_bo, vb, - (num_loops*48)+256, 256, true); - if (r) { - return r; - } - - r = radeon_semaphore_create(rdev, sem); - if (r) { - radeon_sa_bo_free(rdev, vb, NULL); - return r; - } - - /* calculate number of loops correctly */ - ring_size = num_loops * dwords_per_loop; - ring_size += rdev->r600_blit.ring_size_common; - r = radeon_ring_lock(rdev, ring, ring_size); - if (r) { - radeon_sa_bo_free(rdev, vb, NULL); - radeon_semaphore_free(rdev, sem, NULL); - return r; - } - - if (radeon_fence_need_sync(*fence, RADEON_RING_TYPE_GFX_INDEX)) { - radeon_semaphore_sync_rings(rdev, *sem, (*fence)->ring, - RADEON_RING_TYPE_GFX_INDEX); - radeon_fence_note_sync(*fence, RADEON_RING_TYPE_GFX_INDEX); - } else { - radeon_semaphore_free(rdev, sem, NULL); - } - - rdev->r600_blit.primitives.set_default_state(rdev); - rdev->r600_blit.primitives.set_shaders(rdev); - return 0; -} - -void r600_blit_done_copy(struct radeon_device *rdev, struct radeon_fence **fence, - struct radeon_sa_bo *vb, struct radeon_semaphore *sem) -{ - struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; - int r; - - r = radeon_fence_emit(rdev, fence, RADEON_RING_TYPE_GFX_INDEX); - if (r) { - radeon_ring_unlock_undo(rdev, ring); - return; - } - - radeon_ring_unlock_commit(rdev, ring); - radeon_sa_bo_free(rdev, &vb, *fence); - radeon_semaphore_free(rdev, &sem, *fence); -} - -void r600_kms_blit_copy(struct radeon_device *rdev, - u64 src_gpu_addr, u64 dst_gpu_addr, - unsigned num_gpu_pages, - struct radeon_sa_bo *vb) -{ - u64 vb_gpu_addr; - u32 *vb_cpu_addr; - - DRM_DEBUG("emitting copy %16jx %16jx %d\n", - (uintmax_t)src_gpu_addr, (uintmax_t)dst_gpu_addr, num_gpu_pages); - vb_cpu_addr = (u32 *)radeon_sa_bo_cpu_addr(vb); - vb_gpu_addr = radeon_sa_bo_gpu_addr(vb); - - while (num_gpu_pages) { - int w, h; - unsigned size_in_bytes; - unsigned pages_per_loop = - r600_blit_create_rect(num_gpu_pages, &w, &h, - rdev->r600_blit.max_dim); - - size_in_bytes = pages_per_loop * RADEON_GPU_PAGE_SIZE; - DRM_DEBUG("rectangle w=%d h=%d\n", w, h); - - vb_cpu_addr[0] = 0; - vb_cpu_addr[1] = 0; - vb_cpu_addr[2] = 0; - vb_cpu_addr[3] = 0; - - vb_cpu_addr[4] = 0; - vb_cpu_addr[5] = int2float(h); - vb_cpu_addr[6] = 0; - vb_cpu_addr[7] = int2float(h); - - vb_cpu_addr[8] = int2float(w); - vb_cpu_addr[9] = int2float(h); - vb_cpu_addr[10] = int2float(w); - vb_cpu_addr[11] = int2float(h); - - rdev->r600_blit.primitives.set_tex_resource(rdev, FMT_8_8_8_8, - w, h, w, src_gpu_addr, size_in_bytes); - rdev->r600_blit.primitives.set_render_target(rdev, COLOR_8_8_8_8, - w, h, dst_gpu_addr); - rdev->r600_blit.primitives.set_scissors(rdev, 0, 0, w, h); - rdev->r600_blit.primitives.set_vtx_resource(rdev, vb_gpu_addr); - rdev->r600_blit.primitives.draw_auto(rdev); - rdev->r600_blit.primitives.cp_set_surface_sync(rdev, - PACKET3_CB_ACTION_ENA | PACKET3_CB0_DEST_BASE_ENA, - size_in_bytes, dst_gpu_addr); - - vb_cpu_addr += 12; - vb_gpu_addr += 4*12; - src_gpu_addr += size_in_bytes; - dst_gpu_addr += size_in_bytes; - num_gpu_pages -= pages_per_loop; - } -} diff --git a/sys/dev/drm/radeon/r600_blit_shaders.h b/sys/dev/drm/radeon/r600_blit_shaders.h index 2f3ce7a759..f437d36dd9 100644 --- a/sys/dev/drm/radeon/r600_blit_shaders.h +++ b/sys/dev/drm/radeon/r600_blit_shaders.h @@ -35,5 +35,4 @@ extern const u32 r6xx_default_state[]; extern const u32 r6xx_ps_size, r6xx_vs_size; extern const u32 r6xx_default_size, r7xx_default_size; -__pure uint32_t int2float(uint32_t x); #endif diff --git a/sys/dev/drm/radeon/r600_cp.c b/sys/dev/drm/radeon/r600_cp.c index c45a9d8b81..78d34505d9 100644 --- a/sys/dev/drm/radeon/r600_cp.c +++ b/sys/dev/drm/radeon/r600_cp.c @@ -2207,13 +2207,13 @@ int r600_do_init_cp(struct drm_device *dev, drm_radeon_init_t *init, dev_priv->ring.end = ((u32 *) dev_priv->cp_ring->handle + init->ring_size / sizeof(u32)); dev_priv->ring.size = init->ring_size; - dev_priv->ring.size_l2qw = drm_order(init->ring_size / 8); + dev_priv->ring.size_l2qw = order_base_2(init->ring_size / 8); dev_priv->ring.rptr_update = /* init->rptr_update */ 4096; - dev_priv->ring.rptr_update_l2qw = drm_order(/* init->rptr_update */ 4096 / 8); + dev_priv->ring.rptr_update_l2qw = order_base_2(/* init->rptr_update */ 4096 / 8); dev_priv->ring.fetch_size = /* init->fetch_size */ 32; - dev_priv->ring.fetch_size_l2ow = drm_order(/* init->fetch_size */ 32 / 16); + dev_priv->ring.fetch_size_l2ow = order_base_2(/* init->fetch_size */ 32 / 16); dev_priv->ring.tail_mask = (dev_priv->ring.size / sizeof(u32)) - 1; diff --git a/sys/dev/drm/radeon/r600_cs.c b/sys/dev/drm/radeon/r600_cs.c index 54a1d2976c..9c837adb4e 100644 --- a/sys/dev/drm/radeon/r600_cs.c +++ b/sys/dev/drm/radeon/r600_cs.c @@ -424,7 +424,7 @@ static int r600_cs_track_validate_cb(struct radeon_cs_parser *p, int i) } if (!IS_ALIGNED(base_offset, base_align)) { dev_warn(p->dev, "%s offset[%d] 0x%jx 0x%jx, %d not aligned\n", __func__, i, - (uintmax_t)base_offset, (uintmax_t)base_align, array_mode); + base_offset, base_align, array_mode); return -EINVAL; } @@ -453,7 +453,7 @@ static int r600_cs_track_validate_cb(struct radeon_cs_parser *p, int i) } else { dev_warn(p->dev, "%s offset[%d] %d %ju %d %lu too big (%d %d) (%d %d %d)\n", __func__, i, array_mode, - (uintmax_t)track->cb_color_bo_offset[i], tmp, + track->cb_color_bo_offset[i], tmp, radeon_bo_size(track->cb_color_bo[i]), pitch, height, r600_fmt_get_nblocksx(format, pitch), r600_fmt_get_nblocksy(format, height), @@ -485,7 +485,7 @@ static int r600_cs_track_validate_cb(struct radeon_cs_parser *p, int i) dev_warn(p->dev, "%s FMASK_TILE_MAX too large " "(tile_max=%u, bytes=%u, offset=%ju, bo_size=%lu)\n", __func__, tile_max, bytes, - (uintmax_t)track->cb_color_frag_offset[i], + track->cb_color_frag_offset[i], radeon_bo_size(track->cb_color_frag_bo[i])); return -EINVAL; } @@ -503,7 +503,7 @@ static int r600_cs_track_validate_cb(struct radeon_cs_parser *p, int i) dev_warn(p->dev, "%s CMASK_BLOCK_MAX too large " "(block_max=%u, bytes=%u, offset=%ju, bo_size=%lu)\n", __func__, block_max, bytes, - (uintmax_t)track->cb_color_tile_offset[i], + track->cb_color_tile_offset[i], radeon_bo_size(track->cb_color_tile_bo[i])); return -EINVAL; } @@ -615,7 +615,7 @@ static int r600_cs_track_validate_db(struct radeon_cs_parser *p) } if (!IS_ALIGNED(base_offset, base_align)) { dev_warn(p->dev, "%s offset 0x%jx, 0x%jx, %d not aligned\n", __func__, - (uintmax_t)base_offset, (uintmax_t)base_align, array_mode); + base_offset, base_align, array_mode); return -EINVAL; } @@ -724,7 +724,7 @@ static int r600_cs_track_check(struct radeon_cs_parser *p) (u64)track->vgt_strmout_size[i]; if (offset > radeon_bo_size(track->vgt_strmout_bo[i])) { DRM_ERROR("streamout %d bo too small: 0x%jx, 0x%lx\n", - i, (uintmax_t)offset, + i, offset, radeon_bo_size(track->vgt_strmout_bo[i])); return -EINVAL; } @@ -1556,12 +1556,12 @@ static int r600_check_texture_resource(struct radeon_cs_parser *p, u32 idx, } if (!IS_ALIGNED(base_offset, base_align)) { dev_warn(p->dev, "%s:%d tex base offset (0x%jx, 0x%jx, %d) invalid\n", - __func__, __LINE__, (uintmax_t)base_offset, (uintmax_t)base_align, G_038000_TILE_MODE(word0)); + __func__, __LINE__, base_offset, base_align, G_038000_TILE_MODE(word0)); return -EINVAL; } if (!IS_ALIGNED(mip_offset, base_align)) { dev_warn(p->dev, "%s:%d tex mip offset (0x%jx, 0x%jx, %d) invalid\n", - __func__, __LINE__, (uintmax_t)mip_offset, (uintmax_t)base_align, G_038000_TILE_MODE(word0)); + __func__, __LINE__, mip_offset, base_align, G_038000_TILE_MODE(word0)); return -EINVAL; } @@ -1584,7 +1584,7 @@ static int r600_check_texture_resource(struct radeon_cs_parser *p, u32 idx, w0, h0, pitch_align, height_align, array_check.array_mode, format, word2, l0_size, radeon_bo_size(texture)); - dev_warn(p->dev, "alignments %d %d %d %jd\n", pitch, pitch_align, height_align, (uintmax_t)base_align); + dev_warn(p->dev, "alignments %d %d %d %jd\n", pitch, pitch_align, height_align, base_align); return -EINVAL; } /* using get ib will give us the offset into the mipmap bo */ @@ -1795,7 +1795,7 @@ static int r600_packet3_check(struct radeon_cs_parser *p, if ((tmp + size) > radeon_bo_size(reloc->robj)) { dev_warn(p->dev, "CP DMA src buffer too small (%ju %lu)\n", - (uintmax_t)tmp + size, radeon_bo_size(reloc->robj)); + tmp + size, radeon_bo_size(reloc->robj)); return -EINVAL; } @@ -1825,7 +1825,7 @@ static int r600_packet3_check(struct radeon_cs_parser *p, if ((tmp + size) > radeon_bo_size(reloc->robj)) { dev_warn(p->dev, "CP DMA dst buffer too small (%ju %lu)\n", - (uintmax_t)tmp + size, radeon_bo_size(reloc->robj)); + tmp + size, radeon_bo_size(reloc->robj)); return -EINVAL; } @@ -2095,13 +2095,13 @@ static int r600_packet3_check(struct radeon_cs_parser *p, offset = radeon_get_ib_value(p, idx+1) << 8; if (offset != track->vgt_strmout_bo_offset[idx_value]) { DRM_ERROR("bad STRMOUT_BASE_UPDATE, bo offset does not match: 0x%jx, 0x%x\n", - (uintmax_t)offset, track->vgt_strmout_bo_offset[idx_value]); + offset, track->vgt_strmout_bo_offset[idx_value]); return -EINVAL; } if ((offset + 4) > radeon_bo_size(reloc->robj)) { DRM_ERROR("bad STRMOUT_BASE_UPDATE bo too small: 0x%jx, 0x%lx\n", - (uintmax_t)offset + 4, radeon_bo_size(reloc->robj)); + offset + 4, radeon_bo_size(reloc->robj)); return -EINVAL; } ib[idx+1] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff); @@ -2134,7 +2134,7 @@ static int r600_packet3_check(struct radeon_cs_parser *p, offset += ((u64)(radeon_get_ib_value(p, idx+2) & 0xff)) << 32; if ((offset + 4) > radeon_bo_size(reloc->robj)) { DRM_ERROR("bad STRMOUT_BUFFER_UPDATE dst bo too small: 0x%jx, 0x%lx\n", - (uintmax_t)offset + 4, radeon_bo_size(reloc->robj)); + offset + 4, radeon_bo_size(reloc->robj)); return -EINVAL; } offset += reloc->lobj.gpu_offset; @@ -2153,7 +2153,7 @@ static int r600_packet3_check(struct radeon_cs_parser *p, offset += ((u64)(radeon_get_ib_value(p, idx+4) & 0xff)) << 32; if ((offset + 4) > radeon_bo_size(reloc->robj)) { DRM_ERROR("bad STRMOUT_BUFFER_UPDATE src bo too small: 0x%jx, 0x%lx\n", - (uintmax_t)offset + 4, radeon_bo_size(reloc->robj)); + offset + 4, radeon_bo_size(reloc->robj)); return -EINVAL; } offset += reloc->lobj.gpu_offset; @@ -2182,7 +2182,7 @@ static int r600_packet3_check(struct radeon_cs_parser *p, } if ((offset + 8) > radeon_bo_size(reloc->robj)) { DRM_ERROR("bad MEM_WRITE bo too small: 0x%jx, 0x%lx\n", - (uintmax_t)offset + 8, radeon_bo_size(reloc->robj)); + offset + 8, radeon_bo_size(reloc->robj)); return -EINVAL; } offset += reloc->lobj.gpu_offset; @@ -2207,7 +2207,7 @@ static int r600_packet3_check(struct radeon_cs_parser *p, offset += ((u64)(radeon_get_ib_value(p, idx+2) & 0xff)) << 32; if ((offset + 4) > radeon_bo_size(reloc->robj)) { DRM_ERROR("bad COPY_DW src bo too small: 0x%jx, 0x%lx\n", - (uintmax_t)offset + 4, radeon_bo_size(reloc->robj)); + offset + 4, radeon_bo_size(reloc->robj)); return -EINVAL; } offset += reloc->lobj.gpu_offset; @@ -2231,7 +2231,7 @@ static int r600_packet3_check(struct radeon_cs_parser *p, offset += ((u64)(radeon_get_ib_value(p, idx+4) & 0xff)) << 32; if ((offset + 4) > radeon_bo_size(reloc->robj)) { DRM_ERROR("bad COPY_DW dst bo too small: 0x%jx, 0x%lx\n", - (uintmax_t)offset + 4, radeon_bo_size(reloc->robj)); + offset + 4, radeon_bo_size(reloc->robj)); return -EINVAL; } offset += reloc->lobj.gpu_offset; @@ -2307,7 +2307,7 @@ int r600_cs_parse(struct radeon_cs_parser *p) } while (p->idx < p->chunks[p->chunk_ib_idx].length_dw); #if 0 for (r = 0; r < p->ib.length_dw; r++) { - DRM_INFO("%05d 0x%08X\n", r, p->ib.ptr[r]); + printk(KERN_INFO "%05d 0x%08X\n", r, p->ib.ptr[r]); mdelay(1); } #endif @@ -2510,7 +2510,7 @@ int r600_dma_cs_parse(struct radeon_cs_parser *p) } if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) { dev_warn(p->dev, "DMA write buffer too small (%ju %lu)\n", - (uintmax_t)dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj)); + dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj)); return -EINVAL; } break; @@ -2577,12 +2577,12 @@ int r600_dma_cs_parse(struct radeon_cs_parser *p) } if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) { dev_warn(p->dev, "DMA copy src buffer too small (%ju %lu)\n", - (uintmax_t)src_offset + (count * 4), radeon_bo_size(src_reloc->robj)); + src_offset + (count * 4), radeon_bo_size(src_reloc->robj)); return -EINVAL; } if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) { dev_warn(p->dev, "DMA write dst buffer too small (%ju %lu)\n", - (uintmax_t)dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj)); + dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj)); return -EINVAL; } break; @@ -2600,7 +2600,7 @@ int r600_dma_cs_parse(struct radeon_cs_parser *p) dst_offset |= ((u64)(radeon_get_ib_value(p, idx+3) & 0x00ff0000)) << 16; if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) { dev_warn(p->dev, "DMA constant fill buffer too small (%ju %lu)\n", - (uintmax_t)dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj)); + dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj)); return -EINVAL; } ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc); @@ -2617,7 +2617,7 @@ int r600_dma_cs_parse(struct radeon_cs_parser *p) } while (p->idx < p->chunks[p->chunk_ib_idx].length_dw); #if 0 for (r = 0; r < p->ib->length_dw; r++) { - DRM_INFO("%05d 0x%08X\n", r, p->ib.ptr[r]); + printk(KERN_INFO "%05d 0x%08X\n", r, p->ib.ptr[r]); mdelay(1); } #endif diff --git a/sys/dev/drm/radeon/r600_dma.c b/sys/dev/drm/radeon/r600_dma.c new file mode 100644 index 0000000000..cdbae22f7b --- /dev/null +++ b/sys/dev/drm/radeon/r600_dma.c @@ -0,0 +1,496 @@ +/* + * Copyright 2013 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: Alex Deucher + */ +#include +#include "radeon.h" +#include "radeon_asic.h" +#include "r600d.h" + +/* + * DMA + * Starting with R600, the GPU has an asynchronous + * DMA engine. The programming model is very similar + * to the 3D engine (ring buffer, IBs, etc.), but the + * DMA controller has it's own packet format that is + * different form the PM4 format used by the 3D engine. + * It supports copying data, writing embedded data, + * solid fills, and a number of other things. It also + * has support for tiling/detiling of buffers. + */ + +/** + * r600_dma_get_rptr - get the current read pointer + * + * @rdev: radeon_device pointer + * @ring: radeon ring pointer + * + * Get the current rptr from the hardware (r6xx+). + */ +uint32_t r600_dma_get_rptr(struct radeon_device *rdev, + struct radeon_ring *ring) +{ + return (radeon_ring_generic_get_rptr(rdev, ring) & 0x3fffc) >> 2; +} + +/** + * r600_dma_get_wptr - get the current write pointer + * + * @rdev: radeon_device pointer + * @ring: radeon ring pointer + * + * Get the current wptr from the hardware (r6xx+). + */ +uint32_t r600_dma_get_wptr(struct radeon_device *rdev, + struct radeon_ring *ring) +{ + return (RREG32(ring->wptr_reg) & 0x3fffc) >> 2; +} + +/** + * r600_dma_set_wptr - commit the write pointer + * + * @rdev: radeon_device pointer + * @ring: radeon ring pointer + * + * Write the wptr back to the hardware (r6xx+). + */ +void r600_dma_set_wptr(struct radeon_device *rdev, + struct radeon_ring *ring) +{ + WREG32(ring->wptr_reg, (ring->wptr << 2) & 0x3fffc); +} + +/** + * r600_dma_stop - stop the async dma engine + * + * @rdev: radeon_device pointer + * + * Stop the async dma engine (r6xx-evergreen). + */ +void r600_dma_stop(struct radeon_device *rdev) +{ + u32 rb_cntl = RREG32(DMA_RB_CNTL); + + radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size); + + rb_cntl &= ~DMA_RB_ENABLE; + WREG32(DMA_RB_CNTL, rb_cntl); + + rdev->ring[R600_RING_TYPE_DMA_INDEX].ready = false; +} + +/** + * r600_dma_resume - setup and start the async dma engine + * + * @rdev: radeon_device pointer + * + * Set up the DMA ring buffer and enable it. (r6xx-evergreen). + * Returns 0 for success, error for failure. + */ +int r600_dma_resume(struct radeon_device *rdev) +{ + struct radeon_ring *ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX]; + u32 rb_cntl, dma_cntl, ib_cntl; + u32 rb_bufsz; + int r; + + /* Reset dma */ + if (rdev->family >= CHIP_RV770) + WREG32(SRBM_SOFT_RESET, RV770_SOFT_RESET_DMA); + else + WREG32(SRBM_SOFT_RESET, SOFT_RESET_DMA); + RREG32(SRBM_SOFT_RESET); + udelay(50); + WREG32(SRBM_SOFT_RESET, 0); + + WREG32(DMA_SEM_INCOMPLETE_TIMER_CNTL, 0); + WREG32(DMA_SEM_WAIT_FAIL_TIMER_CNTL, 0); + + /* Set ring buffer size in dwords */ + rb_bufsz = order_base_2(ring->ring_size / 4); + rb_cntl = rb_bufsz << 1; +#ifdef __BIG_ENDIAN + rb_cntl |= DMA_RB_SWAP_ENABLE | DMA_RPTR_WRITEBACK_SWAP_ENABLE; +#endif + WREG32(DMA_RB_CNTL, rb_cntl); + + /* Initialize the ring buffer's read and write pointers */ + WREG32(DMA_RB_RPTR, 0); + WREG32(DMA_RB_WPTR, 0); + + /* set the wb address whether it's enabled or not */ + WREG32(DMA_RB_RPTR_ADDR_HI, + upper_32_bits(rdev->wb.gpu_addr + R600_WB_DMA_RPTR_OFFSET) & 0xFF); + WREG32(DMA_RB_RPTR_ADDR_LO, + ((rdev->wb.gpu_addr + R600_WB_DMA_RPTR_OFFSET) & 0xFFFFFFFC)); + + if (rdev->wb.enabled) + rb_cntl |= DMA_RPTR_WRITEBACK_ENABLE; + + WREG32(DMA_RB_BASE, ring->gpu_addr >> 8); + + /* enable DMA IBs */ + ib_cntl = DMA_IB_ENABLE; +#ifdef __BIG_ENDIAN + ib_cntl |= DMA_IB_SWAP_ENABLE; +#endif + WREG32(DMA_IB_CNTL, ib_cntl); + + dma_cntl = RREG32(DMA_CNTL); + dma_cntl &= ~CTXEMPTY_INT_ENABLE; + WREG32(DMA_CNTL, dma_cntl); + + if (rdev->family >= CHIP_RV770) + WREG32(DMA_MODE, 1); + + ring->wptr = 0; + WREG32(DMA_RB_WPTR, ring->wptr << 2); + + ring->rptr = RREG32(DMA_RB_RPTR) >> 2; + + WREG32(DMA_RB_CNTL, rb_cntl | DMA_RB_ENABLE); + + ring->ready = true; + + r = radeon_ring_test(rdev, R600_RING_TYPE_DMA_INDEX, ring); + if (r) { + ring->ready = false; + return r; + } + + radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size); + + return 0; +} + +/** + * r600_dma_fini - tear down the async dma engine + * + * @rdev: radeon_device pointer + * + * Stop the async dma engine and free the ring (r6xx-evergreen). + */ +void r600_dma_fini(struct radeon_device *rdev) +{ + r600_dma_stop(rdev); + radeon_ring_fini(rdev, &rdev->ring[R600_RING_TYPE_DMA_INDEX]); +} + +/** + * r600_dma_is_lockup - Check if the DMA engine is locked up + * + * @rdev: radeon_device pointer + * @ring: radeon_ring structure holding ring information + * + * Check if the async DMA engine is locked up. + * Returns true if the engine appears to be locked up, false if not. + */ +bool r600_dma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring) +{ + u32 reset_mask = r600_gpu_check_soft_reset(rdev); + + if (!(reset_mask & RADEON_RESET_DMA)) { + radeon_ring_lockup_update(ring); + return false; + } + /* force ring activities */ + radeon_ring_force_activity(rdev, ring); + return radeon_ring_test_lockup(rdev, ring); +} + + +/** + * r600_dma_ring_test - simple async dma engine test + * + * @rdev: radeon_device pointer + * @ring: radeon_ring structure holding ring information + * + * Test the DMA engine by writing using it to write an + * value to memory. (r6xx-SI). + * Returns 0 for success, error for failure. + */ +int r600_dma_ring_test(struct radeon_device *rdev, + struct radeon_ring *ring) +{ + unsigned i; + int r; + volatile uint32_t *ptr = rdev->vram_scratch.ptr; + u32 tmp; + + if (!ptr) { + DRM_ERROR("invalid vram scratch pointer\n"); + return -EINVAL; + } + + tmp = 0xCAFEDEAD; + *ptr = tmp; + + r = radeon_ring_lock(rdev, ring, 4); + if (r) { + DRM_ERROR("radeon: dma failed to lock ring %d (%d).\n", ring->idx, r); + return r; + } + radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 1)); + radeon_ring_write(ring, rdev->vram_scratch.gpu_addr & 0xfffffffc); + radeon_ring_write(ring, upper_32_bits(rdev->vram_scratch.gpu_addr) & 0xff); + radeon_ring_write(ring, 0xDEADBEEF); + radeon_ring_unlock_commit(rdev, ring); + + for (i = 0; i < rdev->usec_timeout; i++) { + tmp = *ptr; + if (tmp == 0xDEADBEEF) + break; + DRM_UDELAY(1); + } + + if (i < rdev->usec_timeout) { + DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i); + } else { + DRM_ERROR("radeon: ring %d test failed (0x%08X)\n", + ring->idx, tmp); + r = -EINVAL; + } + return r; +} + +/** + * r600_dma_fence_ring_emit - emit a fence on the DMA ring + * + * @rdev: radeon_device pointer + * @fence: radeon fence object + * + * Add a DMA fence packet to the ring to write + * the fence seq number and DMA trap packet to generate + * an interrupt if needed (r6xx-r7xx). + */ +void r600_dma_fence_ring_emit(struct radeon_device *rdev, + struct radeon_fence *fence) +{ + struct radeon_ring *ring = &rdev->ring[fence->ring]; + u64 addr = rdev->fence_drv[fence->ring].gpu_addr; + + /* write the fence */ + radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_FENCE, 0, 0, 0)); + radeon_ring_write(ring, addr & 0xfffffffc); + radeon_ring_write(ring, (upper_32_bits(addr) & 0xff)); + radeon_ring_write(ring, lower_32_bits(fence->seq)); + /* generate an interrupt */ + radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_TRAP, 0, 0, 0)); +} + +/** + * r600_dma_semaphore_ring_emit - emit a semaphore on the dma ring + * + * @rdev: radeon_device pointer + * @ring: radeon_ring structure holding ring information + * @semaphore: radeon semaphore object + * @emit_wait: wait or signal semaphore + * + * Add a DMA semaphore packet to the ring wait on or signal + * other rings (r6xx-SI). + */ +void r600_dma_semaphore_ring_emit(struct radeon_device *rdev, + struct radeon_ring *ring, + struct radeon_semaphore *semaphore, + bool emit_wait) +{ + u64 addr = semaphore->gpu_addr; + u32 s = emit_wait ? 0 : 1; + + radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SEMAPHORE, 0, s, 0)); + radeon_ring_write(ring, addr & 0xfffffffc); + radeon_ring_write(ring, upper_32_bits(addr) & 0xff); +} + +/** + * r600_dma_ib_test - test an IB on the DMA engine + * + * @rdev: radeon_device pointer + * @ring: radeon_ring structure holding ring information + * + * Test a simple IB in the DMA ring (r6xx-SI). + * Returns 0 on success, error on failure. + */ +int r600_dma_ib_test(struct radeon_device *rdev, struct radeon_ring *ring) +{ + struct radeon_ib ib; + unsigned i; + int r; + volatile uint32_t *ptr = rdev->vram_scratch.ptr; + u32 tmp = 0; + + if (!ptr) { + DRM_ERROR("invalid vram scratch pointer\n"); + return -EINVAL; + } + + tmp = 0xCAFEDEAD; + *ptr = tmp; + + r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256); + if (r) { + DRM_ERROR("radeon: failed to get ib (%d).\n", r); + return r; + } + + ib.ptr[0] = DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 1); + ib.ptr[1] = rdev->vram_scratch.gpu_addr & 0xfffffffc; + ib.ptr[2] = upper_32_bits(rdev->vram_scratch.gpu_addr) & 0xff; + ib.ptr[3] = 0xDEADBEEF; + ib.length_dw = 4; + + r = radeon_ib_schedule(rdev, &ib, NULL); + if (r) { + radeon_ib_free(rdev, &ib); + DRM_ERROR("radeon: failed to schedule ib (%d).\n", r); + return r; + } + r = radeon_fence_wait(ib.fence, false); + if (r) { + radeon_ib_free(rdev, &ib); /* zRJ XXX culprit */ + DRM_ERROR("radeon: fence wait failed (%d).\n", r); + return r; + } + for (i = 0; i < rdev->usec_timeout; i++) { + tmp = *ptr; + if (tmp == 0xDEADBEEF) + break; + DRM_UDELAY(1); + } + if (i < rdev->usec_timeout) { + DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i); + } else { + DRM_ERROR("radeon: ib test failed (0x%08X)\n", tmp); + r = -EINVAL; + } + radeon_ib_free(rdev, &ib); + return r; +} + +/** + * r600_dma_ring_ib_execute - Schedule an IB on the DMA engine + * + * @rdev: radeon_device pointer + * @ib: IB object to schedule + * + * Schedule an IB in the DMA ring (r6xx-r7xx). + */ +void r600_dma_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib) +{ + struct radeon_ring *ring = &rdev->ring[ib->ring]; + + if (rdev->wb.enabled) { + u32 next_rptr = ring->wptr + 4; + while ((next_rptr & 7) != 5) + next_rptr++; + next_rptr += 3; + radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 1)); + radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc); + radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xff); + radeon_ring_write(ring, next_rptr); + } + + /* The indirect buffer packet must end on an 8 DW boundary in the DMA ring. + * Pad as necessary with NOPs. + */ + while ((ring->wptr & 7) != 5) + radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0)); + radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_INDIRECT_BUFFER, 0, 0, 0)); + radeon_ring_write(ring, (ib->gpu_addr & 0xFFFFFFE0)); + radeon_ring_write(ring, (ib->length_dw << 16) | (upper_32_bits(ib->gpu_addr) & 0xFF)); + +} + +/** + * r600_copy_dma - copy pages using the DMA engine + * + * @rdev: radeon_device pointer + * @src_offset: src GPU address + * @dst_offset: dst GPU address + * @num_gpu_pages: number of GPU pages to xfer + * @fence: radeon fence object + * + * Copy GPU paging using the DMA engine (r6xx). + * Used by the radeon ttm implementation to move pages if + * registered as the asic copy callback. + */ +int r600_copy_dma(struct radeon_device *rdev, + uint64_t src_offset, uint64_t dst_offset, + unsigned num_gpu_pages, + struct radeon_fence **fence) +{ + struct radeon_semaphore *sem = NULL; + int ring_index = rdev->asic->copy.dma_ring_index; + struct radeon_ring *ring = &rdev->ring[ring_index]; + u32 size_in_dw, cur_size_in_dw; + int i, num_loops; + int r = 0; + + r = radeon_semaphore_create(rdev, &sem); + if (r) { + DRM_ERROR("radeon: moving bo (%d).\n", r); + return r; + } + + size_in_dw = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT) / 4; + num_loops = DIV_ROUND_UP(size_in_dw, 0xFFFE); + r = radeon_ring_lock(rdev, ring, num_loops * 4 + 8); + if (r) { + DRM_ERROR("radeon: moving bo (%d).\n", r); + radeon_semaphore_free(rdev, &sem, NULL); + return r; + } + + if (radeon_fence_need_sync(*fence, ring->idx)) { + radeon_semaphore_sync_rings(rdev, sem, (*fence)->ring, + ring->idx); + radeon_fence_note_sync(*fence, ring->idx); + } else { + radeon_semaphore_free(rdev, &sem, NULL); + } + + for (i = 0; i < num_loops; i++) { + cur_size_in_dw = size_in_dw; + if (cur_size_in_dw > 0xFFFE) + cur_size_in_dw = 0xFFFE; + size_in_dw -= cur_size_in_dw; + radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_COPY, 0, 0, cur_size_in_dw)); + radeon_ring_write(ring, dst_offset & 0xfffffffc); + radeon_ring_write(ring, src_offset & 0xfffffffc); + radeon_ring_write(ring, (((upper_32_bits(dst_offset) & 0xff) << 16) | + (upper_32_bits(src_offset) & 0xff))); + src_offset += cur_size_in_dw * 4; + dst_offset += cur_size_in_dw * 4; + } + + r = radeon_fence_emit(rdev, fence, ring->idx); + if (r) { + radeon_ring_unlock_undo(rdev, ring); + return r; + } + + radeon_ring_unlock_commit(rdev, ring); + radeon_semaphore_free(rdev, &sem, *fence); + + return r; +} diff --git a/sys/dev/drm/radeon/r600_dpm.c b/sys/dev/drm/radeon/r600_dpm.c index b972413675..8c9bd4413a 100644 --- a/sys/dev/drm/radeon/r600_dpm.c +++ b/sys/dev/drm/radeon/r600_dpm.c @@ -175,6 +175,24 @@ u32 r600_dpm_get_vblank_time(struct radeon_device *rdev) return vblank_time_us; } +u32 r600_dpm_get_vrefresh(struct radeon_device *rdev) +{ + struct drm_device *dev = rdev->ddev; + struct drm_crtc *crtc; + struct radeon_crtc *radeon_crtc; + u32 vrefresh = 0; + + list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) { + radeon_crtc = to_radeon_crtc(crtc); + if (crtc->enabled && radeon_crtc->enabled && radeon_crtc->hw_mode.clock) { + vrefresh = radeon_crtc->hw_mode.vrefresh; + break; + } + } + + return vrefresh; +} + void r600_calculate_u_and_p(u32 i, u32 r_c, u32 p_b, u32 *p, u32 *u) { @@ -746,6 +764,8 @@ bool r600_is_internal_thermal_sensor(enum radeon_int_thermal_type sensor) case THERMAL_TYPE_SUMO: case THERMAL_TYPE_NI: case THERMAL_TYPE_SI: + case THERMAL_TYPE_CI: + case THERMAL_TYPE_KV: return true; case THERMAL_TYPE_ADT7473_WITH_INTERNAL: case THERMAL_TYPE_EMC2103_WITH_INTERNAL: @@ -780,15 +800,19 @@ static int r600_parse_clk_voltage_dep_table(struct radeon_clock_voltage_dependen u32 size = atom_table->ucNumEntries * sizeof(struct radeon_clock_voltage_dependency_entry); int i; + ATOM_PPLIB_Clock_Voltage_Dependency_Record *entry; radeon_table->entries = kzalloc(size, GFP_KERNEL); if (!radeon_table->entries) return -ENOMEM; + entry = &atom_table->entries[0]; for (i = 0; i < atom_table->ucNumEntries; i++) { - radeon_table->entries[i].clk = le16_to_cpu(atom_table->entries[i].usClockLow) | - (atom_table->entries[i].ucClockHigh << 16); - radeon_table->entries[i].v = le16_to_cpu(atom_table->entries[i].usVoltage); + radeon_table->entries[i].clk = le16_to_cpu(entry->usClockLow) | + (entry->ucClockHigh << 16); + radeon_table->entries[i].v = le16_to_cpu(entry->usVoltage); + entry = (ATOM_PPLIB_Clock_Voltage_Dependency_Record *) + ((u8 *)entry + sizeof(ATOM_PPLIB_Clock_Voltage_Dependency_Record)); } radeon_table->count = atom_table->ucNumEntries; @@ -876,6 +900,19 @@ int r600_parse_extended_power_table(struct radeon_device *rdev) return ret; } } + if (power_info->pplib4.usMvddDependencyOnMCLKOffset) { + dep_table = (ATOM_PPLIB_Clock_Voltage_Dependency_Table *) + (mode_info->atom_context->bios + data_offset + + le16_to_cpu(power_info->pplib4.usMvddDependencyOnMCLKOffset)); + ret = r600_parse_clk_voltage_dep_table(&rdev->pm.dpm.dyn_state.mvdd_dependency_on_mclk, + dep_table); + if (ret) { + kfree(rdev->pm.dpm.dyn_state.vddc_dependency_on_sclk.entries); + kfree(rdev->pm.dpm.dyn_state.vddci_dependency_on_mclk.entries); + kfree(rdev->pm.dpm.dyn_state.vddc_dependency_on_mclk.entries); + return ret; + } + } if (power_info->pplib4.usMaxClockVoltageOnDCOffset) { ATOM_PPLIB_Clock_Voltage_Limit_Table *clk_v = (ATOM_PPLIB_Clock_Voltage_Limit_Table *) @@ -899,27 +936,27 @@ int r600_parse_extended_power_table(struct radeon_device *rdev) (ATOM_PPLIB_PhaseSheddingLimits_Table *) ((uint8_t*)mode_info->atom_context->bios + data_offset + le16_to_cpu(power_info->pplib4.usVddcPhaseShedLimitsTableOffset)); + ATOM_PPLIB_PhaseSheddingLimits_Record *entry; rdev->pm.dpm.dyn_state.phase_shedding_limits_table.entries = kzalloc(psl->ucNumEntries * sizeof(struct radeon_phase_shedding_limits_entry), GFP_KERNEL); if (!rdev->pm.dpm.dyn_state.phase_shedding_limits_table.entries) { - kfree(rdev->pm.dpm.dyn_state.vddc_dependency_on_sclk.entries); - kfree(rdev->pm.dpm.dyn_state.vddci_dependency_on_mclk.entries); - kfree(rdev->pm.dpm.dyn_state.vddc_dependency_on_mclk.entries); + r600_free_extended_power_table(rdev); return -ENOMEM; } + entry = &psl->entries[0]; for (i = 0; i < psl->ucNumEntries; i++) { rdev->pm.dpm.dyn_state.phase_shedding_limits_table.entries[i].sclk = - le16_to_cpu(psl->entries[i].usSclkLow) | - (psl->entries[i].ucSclkHigh << 16); + le16_to_cpu(entry->usSclkLow) | (entry->ucSclkHigh << 16); rdev->pm.dpm.dyn_state.phase_shedding_limits_table.entries[i].mclk = - le16_to_cpu(psl->entries[i].usMclkLow) | - (psl->entries[i].ucMclkHigh << 16); + le16_to_cpu(entry->usMclkLow) | (entry->ucMclkHigh << 16); rdev->pm.dpm.dyn_state.phase_shedding_limits_table.entries[i].voltage = - le16_to_cpu(psl->entries[i].usVoltage); + le16_to_cpu(entry->usVoltage); + entry = (ATOM_PPLIB_PhaseSheddingLimits_Record *) + ((u8 *)entry + sizeof(ATOM_PPLIB_PhaseSheddingLimits_Record)); } rdev->pm.dpm.dyn_state.phase_shedding_limits_table.count = psl->ucNumEntries; @@ -946,30 +983,140 @@ int r600_parse_extended_power_table(struct radeon_device *rdev) (ATOM_PPLIB_CAC_Leakage_Table *) ((uint8_t*)mode_info->atom_context->bios + data_offset + le16_to_cpu(power_info->pplib5.usCACLeakageTableOffset)); + ATOM_PPLIB_CAC_Leakage_Record *entry; u32 size = cac_table->ucNumEntries * sizeof(struct radeon_cac_leakage_table); rdev->pm.dpm.dyn_state.cac_leakage_table.entries = kzalloc(size, GFP_KERNEL); if (!rdev->pm.dpm.dyn_state.cac_leakage_table.entries) { - kfree(rdev->pm.dpm.dyn_state.vddc_dependency_on_sclk.entries); - kfree(rdev->pm.dpm.dyn_state.vddci_dependency_on_mclk.entries); - kfree(rdev->pm.dpm.dyn_state.vddc_dependency_on_mclk.entries); + r600_free_extended_power_table(rdev); return -ENOMEM; } + entry = &cac_table->entries[0]; for (i = 0; i < cac_table->ucNumEntries; i++) { - rdev->pm.dpm.dyn_state.cac_leakage_table.entries[i].vddc = - le16_to_cpu(cac_table->entries[i].usVddc); - rdev->pm.dpm.dyn_state.cac_leakage_table.entries[i].leakage = - le32_to_cpu(cac_table->entries[i].ulLeakageValue); + if (rdev->pm.dpm.platform_caps & ATOM_PP_PLATFORM_CAP_EVV) { + rdev->pm.dpm.dyn_state.cac_leakage_table.entries[i].vddc1 = + le16_to_cpu(entry->usVddc1); + rdev->pm.dpm.dyn_state.cac_leakage_table.entries[i].vddc2 = + le16_to_cpu(entry->usVddc2); + rdev->pm.dpm.dyn_state.cac_leakage_table.entries[i].vddc3 = + le16_to_cpu(entry->usVddc3); + } else { + rdev->pm.dpm.dyn_state.cac_leakage_table.entries[i].vddc = + le16_to_cpu(entry->usVddc); + rdev->pm.dpm.dyn_state.cac_leakage_table.entries[i].leakage = + le32_to_cpu(entry->ulLeakageValue); + } + entry = (ATOM_PPLIB_CAC_Leakage_Record *) + ((u8 *)entry + sizeof(ATOM_PPLIB_CAC_Leakage_Record)); } rdev->pm.dpm.dyn_state.cac_leakage_table.count = cac_table->ucNumEntries; } } - /* ppm table */ + /* ext tables */ if (le16_to_cpu(power_info->pplib.usTableSize) >= sizeof(struct _ATOM_PPLIB_POWERPLAYTABLE3)) { ATOM_PPLIB_EXTENDEDHEADER *ext_hdr = (ATOM_PPLIB_EXTENDEDHEADER *) (mode_info->atom_context->bios + data_offset + le16_to_cpu(power_info->pplib3.usExtendendedHeaderOffset)); + if ((le16_to_cpu(ext_hdr->usSize) >= SIZE_OF_ATOM_PPLIB_EXTENDEDHEADER_V2) && + ext_hdr->usVCETableOffset) { + VCEClockInfoArray *array = (VCEClockInfoArray *) + (mode_info->atom_context->bios + data_offset + + le16_to_cpu(ext_hdr->usVCETableOffset) + 1); + ATOM_PPLIB_VCE_Clock_Voltage_Limit_Table *limits = + (ATOM_PPLIB_VCE_Clock_Voltage_Limit_Table *) + (mode_info->atom_context->bios + data_offset + + le16_to_cpu(ext_hdr->usVCETableOffset) + 1 + + 1 + array->ucNumEntries * sizeof(VCEClockInfo)); + ATOM_PPLIB_VCE_Clock_Voltage_Limit_Record *entry; + u32 size = limits->numEntries * + sizeof(struct radeon_vce_clock_voltage_dependency_entry); + rdev->pm.dpm.dyn_state.vce_clock_voltage_dependency_table.entries = + kzalloc(size, GFP_KERNEL); + if (!rdev->pm.dpm.dyn_state.vce_clock_voltage_dependency_table.entries) { + r600_free_extended_power_table(rdev); + return -ENOMEM; + } + rdev->pm.dpm.dyn_state.vce_clock_voltage_dependency_table.count = + limits->numEntries; + entry = &limits->entries[0]; + for (i = 0; i < limits->numEntries; i++) { + VCEClockInfo *vce_clk = (VCEClockInfo *) + ((u8 *)&array->entries[0] + + (entry->ucVCEClockInfoIndex * sizeof(VCEClockInfo))); + rdev->pm.dpm.dyn_state.vce_clock_voltage_dependency_table.entries[i].evclk = + le16_to_cpu(vce_clk->usEVClkLow) | (vce_clk->ucEVClkHigh << 16); + rdev->pm.dpm.dyn_state.vce_clock_voltage_dependency_table.entries[i].ecclk = + le16_to_cpu(vce_clk->usECClkLow) | (vce_clk->ucECClkHigh << 16); + rdev->pm.dpm.dyn_state.vce_clock_voltage_dependency_table.entries[i].v = + le16_to_cpu(entry->usVoltage); + entry = (ATOM_PPLIB_VCE_Clock_Voltage_Limit_Record *) + ((u8 *)entry + sizeof(ATOM_PPLIB_VCE_Clock_Voltage_Limit_Record)); + } + } + if ((le16_to_cpu(ext_hdr->usSize) >= SIZE_OF_ATOM_PPLIB_EXTENDEDHEADER_V3) && + ext_hdr->usUVDTableOffset) { + UVDClockInfoArray *array = (UVDClockInfoArray *) + (mode_info->atom_context->bios + data_offset + + le16_to_cpu(ext_hdr->usUVDTableOffset) + 1); + ATOM_PPLIB_UVD_Clock_Voltage_Limit_Table *limits = + (ATOM_PPLIB_UVD_Clock_Voltage_Limit_Table *) + (mode_info->atom_context->bios + data_offset + + le16_to_cpu(ext_hdr->usUVDTableOffset) + 1 + + 1 + (array->ucNumEntries * sizeof (UVDClockInfo))); + ATOM_PPLIB_UVD_Clock_Voltage_Limit_Record *entry; + u32 size = limits->numEntries * + sizeof(struct radeon_uvd_clock_voltage_dependency_entry); + rdev->pm.dpm.dyn_state.uvd_clock_voltage_dependency_table.entries = + kzalloc(size, GFP_KERNEL); + if (!rdev->pm.dpm.dyn_state.uvd_clock_voltage_dependency_table.entries) { + r600_free_extended_power_table(rdev); + return -ENOMEM; + } + rdev->pm.dpm.dyn_state.uvd_clock_voltage_dependency_table.count = + limits->numEntries; + entry = &limits->entries[0]; + for (i = 0; i < limits->numEntries; i++) { + UVDClockInfo *uvd_clk = (UVDClockInfo *) + ((u8 *)&array->entries[0] + + (entry->ucUVDClockInfoIndex * sizeof(UVDClockInfo))); + rdev->pm.dpm.dyn_state.uvd_clock_voltage_dependency_table.entries[i].vclk = + le16_to_cpu(uvd_clk->usVClkLow) | (uvd_clk->ucVClkHigh << 16); + rdev->pm.dpm.dyn_state.uvd_clock_voltage_dependency_table.entries[i].dclk = + le16_to_cpu(uvd_clk->usDClkLow) | (uvd_clk->ucDClkHigh << 16); + rdev->pm.dpm.dyn_state.uvd_clock_voltage_dependency_table.entries[i].v = + le16_to_cpu(entry->usVoltage); + entry = (ATOM_PPLIB_UVD_Clock_Voltage_Limit_Record *) + ((u8 *)entry + sizeof(ATOM_PPLIB_UVD_Clock_Voltage_Limit_Record)); + } + } + if ((le16_to_cpu(ext_hdr->usSize) >= SIZE_OF_ATOM_PPLIB_EXTENDEDHEADER_V4) && + ext_hdr->usSAMUTableOffset) { + ATOM_PPLIB_SAMClk_Voltage_Limit_Table *limits = + (ATOM_PPLIB_SAMClk_Voltage_Limit_Table *) + (mode_info->atom_context->bios + data_offset + + le16_to_cpu(ext_hdr->usSAMUTableOffset) + 1); + ATOM_PPLIB_SAMClk_Voltage_Limit_Record *entry; + u32 size = limits->numEntries * + sizeof(struct radeon_clock_voltage_dependency_entry); + rdev->pm.dpm.dyn_state.samu_clock_voltage_dependency_table.entries = + kzalloc(size, GFP_KERNEL); + if (!rdev->pm.dpm.dyn_state.samu_clock_voltage_dependency_table.entries) { + r600_free_extended_power_table(rdev); + return -ENOMEM; + } + rdev->pm.dpm.dyn_state.samu_clock_voltage_dependency_table.count = + limits->numEntries; + entry = &limits->entries[0]; + for (i = 0; i < limits->numEntries; i++) { + rdev->pm.dpm.dyn_state.samu_clock_voltage_dependency_table.entries[i].clk = + le16_to_cpu(entry->usSAMClockLow) | (entry->ucSAMClockHigh << 16); + rdev->pm.dpm.dyn_state.samu_clock_voltage_dependency_table.entries[i].v = + le16_to_cpu(entry->usVoltage); + entry = (ATOM_PPLIB_SAMClk_Voltage_Limit_Record *) + ((u8 *)entry + sizeof(ATOM_PPLIB_SAMClk_Voltage_Limit_Record)); + } + } if ((le16_to_cpu(ext_hdr->usSize) >= SIZE_OF_ATOM_PPLIB_EXTENDEDHEADER_V5) && ext_hdr->usPPMTableOffset) { ATOM_PPLIB_PPM_Table *ppm = (ATOM_PPLIB_PPM_Table *) @@ -978,10 +1125,7 @@ int r600_parse_extended_power_table(struct radeon_device *rdev) rdev->pm.dpm.dyn_state.ppm_table = kzalloc(sizeof(struct radeon_ppm_table), GFP_KERNEL); if (!rdev->pm.dpm.dyn_state.ppm_table) { - kfree(rdev->pm.dpm.dyn_state.vddc_dependency_on_sclk.entries); - kfree(rdev->pm.dpm.dyn_state.vddci_dependency_on_mclk.entries); - kfree(rdev->pm.dpm.dyn_state.vddc_dependency_on_mclk.entries); - kfree(rdev->pm.dpm.dyn_state.cac_leakage_table.entries); + r600_free_extended_power_table(rdev); return -ENOMEM; } rdev->pm.dpm.dyn_state.ppm_table->ppm_design = ppm->ucPpmDesign; @@ -1004,6 +1148,71 @@ int r600_parse_extended_power_table(struct radeon_device *rdev) rdev->pm.dpm.dyn_state.ppm_table->tj_max = le32_to_cpu(ppm->ulTjmax); } + if ((le16_to_cpu(ext_hdr->usSize) >= SIZE_OF_ATOM_PPLIB_EXTENDEDHEADER_V6) && + ext_hdr->usACPTableOffset) { + ATOM_PPLIB_ACPClk_Voltage_Limit_Table *limits = + (ATOM_PPLIB_ACPClk_Voltage_Limit_Table *) + (mode_info->atom_context->bios + data_offset + + le16_to_cpu(ext_hdr->usACPTableOffset) + 1); + ATOM_PPLIB_ACPClk_Voltage_Limit_Record *entry; + u32 size = limits->numEntries * + sizeof(struct radeon_clock_voltage_dependency_entry); + rdev->pm.dpm.dyn_state.acp_clock_voltage_dependency_table.entries = + kzalloc(size, GFP_KERNEL); + if (!rdev->pm.dpm.dyn_state.acp_clock_voltage_dependency_table.entries) { + r600_free_extended_power_table(rdev); + return -ENOMEM; + } + rdev->pm.dpm.dyn_state.acp_clock_voltage_dependency_table.count = + limits->numEntries; + entry = &limits->entries[0]; + for (i = 0; i < limits->numEntries; i++) { + rdev->pm.dpm.dyn_state.acp_clock_voltage_dependency_table.entries[i].clk = + le16_to_cpu(entry->usACPClockLow) | (entry->ucACPClockHigh << 16); + rdev->pm.dpm.dyn_state.acp_clock_voltage_dependency_table.entries[i].v = + le16_to_cpu(entry->usVoltage); + entry = (ATOM_PPLIB_ACPClk_Voltage_Limit_Record *) + ((u8 *)entry + sizeof(ATOM_PPLIB_ACPClk_Voltage_Limit_Record)); + } + } + if ((le16_to_cpu(ext_hdr->usSize) >= SIZE_OF_ATOM_PPLIB_EXTENDEDHEADER_V7) && + ext_hdr->usPowerTuneTableOffset) { + u8 rev = *(u8 *)(mode_info->atom_context->bios + data_offset + + le16_to_cpu(ext_hdr->usPowerTuneTableOffset)); + ATOM_PowerTune_Table *pt; + rdev->pm.dpm.dyn_state.cac_tdp_table = + kzalloc(sizeof(struct radeon_cac_tdp_table), GFP_KERNEL); + if (!rdev->pm.dpm.dyn_state.cac_tdp_table) { + r600_free_extended_power_table(rdev); + return -ENOMEM; + } + if (rev > 0) { + ATOM_PPLIB_POWERTUNE_Table_V1 *ppt = (ATOM_PPLIB_POWERTUNE_Table_V1 *) + (mode_info->atom_context->bios + data_offset + + le16_to_cpu(ext_hdr->usPowerTuneTableOffset)); + rdev->pm.dpm.dyn_state.cac_tdp_table->maximum_power_delivery_limit = + ppt->usMaximumPowerDeliveryLimit; + pt = &ppt->power_tune_table; + } else { + ATOM_PPLIB_POWERTUNE_Table *ppt = (ATOM_PPLIB_POWERTUNE_Table *) + (mode_info->atom_context->bios + data_offset + + le16_to_cpu(ext_hdr->usPowerTuneTableOffset)); + rdev->pm.dpm.dyn_state.cac_tdp_table->maximum_power_delivery_limit = 255; + pt = &ppt->power_tune_table; + } + rdev->pm.dpm.dyn_state.cac_tdp_table->tdp = le16_to_cpu(pt->usTDP); + rdev->pm.dpm.dyn_state.cac_tdp_table->configurable_tdp = + le16_to_cpu(pt->usConfigurableTDP); + rdev->pm.dpm.dyn_state.cac_tdp_table->tdc = le16_to_cpu(pt->usTDC); + rdev->pm.dpm.dyn_state.cac_tdp_table->battery_power_limit = + le16_to_cpu(pt->usBatteryPowerLimit); + rdev->pm.dpm.dyn_state.cac_tdp_table->small_power_limit = + le16_to_cpu(pt->usSmallPowerLimit); + rdev->pm.dpm.dyn_state.cac_tdp_table->low_cac_leakage = + le16_to_cpu(pt->usLowCACLeakage); + rdev->pm.dpm.dyn_state.cac_tdp_table->high_cac_leakage = + le16_to_cpu(pt->usHighCACLeakage); + } } return 0; @@ -1011,18 +1220,20 @@ int r600_parse_extended_power_table(struct radeon_device *rdev) void r600_free_extended_power_table(struct radeon_device *rdev) { - if (rdev->pm.dpm.dyn_state.vddc_dependency_on_sclk.entries) - kfree(rdev->pm.dpm.dyn_state.vddc_dependency_on_sclk.entries); - if (rdev->pm.dpm.dyn_state.vddci_dependency_on_mclk.entries) - kfree(rdev->pm.dpm.dyn_state.vddci_dependency_on_mclk.entries); - if (rdev->pm.dpm.dyn_state.vddc_dependency_on_mclk.entries) - kfree(rdev->pm.dpm.dyn_state.vddc_dependency_on_mclk.entries); - if (rdev->pm.dpm.dyn_state.cac_leakage_table.entries) - kfree(rdev->pm.dpm.dyn_state.cac_leakage_table.entries); - if (rdev->pm.dpm.dyn_state.phase_shedding_limits_table.entries) - kfree(rdev->pm.dpm.dyn_state.phase_shedding_limits_table.entries); - if (rdev->pm.dpm.dyn_state.ppm_table) - kfree(rdev->pm.dpm.dyn_state.ppm_table); + struct radeon_dpm_dynamic_state *dyn_state = &rdev->pm.dpm.dyn_state; + + kfree(dyn_state->vddc_dependency_on_sclk.entries); + kfree(dyn_state->vddci_dependency_on_mclk.entries); + kfree(dyn_state->vddc_dependency_on_mclk.entries); + kfree(dyn_state->mvdd_dependency_on_mclk.entries); + kfree(dyn_state->cac_leakage_table.entries); + kfree(dyn_state->phase_shedding_limits_table.entries); + kfree(dyn_state->ppm_table); + kfree(dyn_state->cac_tdp_table); + kfree(dyn_state->vce_clock_voltage_dependency_table.entries); + kfree(dyn_state->uvd_clock_voltage_dependency_table.entries); + kfree(dyn_state->samu_clock_voltage_dependency_table.entries); + kfree(dyn_state->acp_clock_voltage_dependency_table.entries); } enum radeon_pcie_gen r600_get_pcie_gen_support(struct radeon_device *rdev, @@ -1047,3 +1258,36 @@ enum radeon_pcie_gen r600_get_pcie_gen_support(struct radeon_device *rdev, } return RADEON_PCIE_GEN1; } + +u16 r600_get_pcie_lane_support(struct radeon_device *rdev, + u16 asic_lanes, + u16 default_lanes) +{ + switch (asic_lanes) { + case 0: + default: + return default_lanes; + case 1: + return 1; + case 2: + return 2; + case 4: + return 4; + case 8: + return 8; + case 12: + return 12; + case 16: + return 16; + } +} + +u8 r600_encode_pci_lane_width(u32 lanes) +{ + u8 encoded_lanes[] = { 0, 1, 2, 0, 3, 0, 0, 0, 4, 0, 0, 0, 5, 0, 0, 0, 6 }; + + if (lanes > 16) + return 0; + + return encoded_lanes[lanes]; +} diff --git a/sys/dev/drm/radeon/r600_dpm.h b/sys/dev/drm/radeon/r600_dpm.h index 7c822d9ae5..1000bf9719 100644 --- a/sys/dev/drm/radeon/r600_dpm.h +++ b/sys/dev/drm/radeon/r600_dpm.h @@ -130,6 +130,7 @@ void r600_dpm_print_cap_info(u32 caps); void r600_dpm_print_ps_status(struct radeon_device *rdev, struct radeon_ps *rps); u32 r600_dpm_get_vblank_time(struct radeon_device *rdev); +u32 r600_dpm_get_vrefresh(struct radeon_device *rdev); bool r600_is_uvd_state(u32 class, u32 class2); void r600_calculate_u_and_p(u32 i, u32 r_c, u32 p_b, u32 *p, u32 *u); @@ -224,4 +225,9 @@ enum radeon_pcie_gen r600_get_pcie_gen_support(struct radeon_device *rdev, enum radeon_pcie_gen asic_gen, enum radeon_pcie_gen default_gen); +u16 r600_get_pcie_lane_support(struct radeon_device *rdev, + u16 asic_lanes, + u16 default_lanes); +u8 r600_encode_pci_lane_width(u32 lanes); + #endif diff --git a/sys/dev/drm/radeon/r600_hdmi.c b/sys/dev/drm/radeon/r600_hdmi.c index e1b1053c34..63809a8cef 100644 --- a/sys/dev/drm/radeon/r600_hdmi.c +++ b/sys/dev/drm/radeon/r600_hdmi.c @@ -57,15 +57,15 @@ enum r600_hdmi_iec_status_bits { static const struct radeon_hdmi_acr r600_hdmi_predefined_acr[] = { /* 32kHz 44.1kHz 48kHz */ /* Clock N CTS N CTS N CTS */ - { 25174, 4576, 28125, 7007, 31250, 6864, 28125 }, /* 25,20/1.001 MHz */ + { 25175, 4576, 28125, 7007, 31250, 6864, 28125 }, /* 25,20/1.001 MHz */ { 25200, 4096, 25200, 6272, 28000, 6144, 25200 }, /* 25.20 MHz */ { 27000, 4096, 27000, 6272, 30000, 6144, 27000 }, /* 27.00 MHz */ { 27027, 4096, 27027, 6272, 30030, 6144, 27027 }, /* 27.00*1.001 MHz */ { 54000, 4096, 54000, 6272, 60000, 6144, 54000 }, /* 54.00 MHz */ { 54054, 4096, 54054, 6272, 60060, 6144, 54054 }, /* 54.00*1.001 MHz */ - { 74175, 11648, 210937, 17836, 234375, 11648, 140625 }, /* 74.25/1.001 MHz */ + { 74176, 11648, 210937, 17836, 234375, 11648, 140625 }, /* 74.25/1.001 MHz */ { 74250, 4096, 74250, 6272, 82500, 6144, 74250 }, /* 74.25 MHz */ - { 148351, 11648, 421875, 8918, 234375, 5824, 140625 }, /* 148.50/1.001 MHz */ + { 148352, 11648, 421875, 8918, 234375, 5824, 140625 }, /* 148.50/1.001 MHz */ { 148500, 4096, 148500, 6272, 165000, 6144, 148500 }, /* 148.50 MHz */ { 0, 4096, 0, 6272, 0, 6144, 0 } /* Other */ }; @@ -75,8 +75,15 @@ static const struct radeon_hdmi_acr r600_hdmi_predefined_acr[] = { */ static void r600_hdmi_calc_cts(uint32_t clock, int *CTS, int N, int freq) { - if (*CTS == 0) - *CTS = clock * N / (128 * freq) * 1000; + u64 n; + u32 d; + + if (*CTS == 0) { + n = (u64)clock * (u64)N * 1000ULL; + d = 128 * freq; + do_div(n, d); + *CTS = n; + } DRM_DEBUG("Using ACR timing N=%d CTS=%d for frequency %d\n", N, *CTS, freq); } @@ -257,10 +264,7 @@ void r600_audio_set_dto(struct drm_encoder *encoder, u32 clock) * number (coefficient of two integer numbers. DCCG_AUDIO_DTOx_PHASE * is the numerator, DCCG_AUDIO_DTOx_MODULE is the denominator */ - if (ASIC_IS_DCE3(rdev)) { - /* according to the reg specs, this should DCE3.2 only, but in - * practice it seems to cover DCE3.0 as well. - */ + if (ASIC_IS_DCE32(rdev)) { if (dig->dig_encoder == 0) { dto_cntl = RREG32(DCCG_AUDIO_DTO0_CNTL) & ~DCCG_AUDIO_DTO_WALLCLOCK_RATIO_MASK; dto_cntl |= DCCG_AUDIO_DTO_WALLCLOCK_RATIO(wallclock_ratio); @@ -276,13 +280,130 @@ void r600_audio_set_dto(struct drm_encoder *encoder, u32 clock) WREG32(DCCG_AUDIO_DTO1_MODULE, dto_modulo); WREG32(DCCG_AUDIO_DTO_SELECT, 1); /* select DTO1 */ } + } else if (ASIC_IS_DCE3(rdev)) { + /* according to the reg specs, this should DCE3.2 only, but in + * practice it seems to cover DCE3.0/3.1 as well. + */ + if (dig->dig_encoder == 0) { + WREG32(DCCG_AUDIO_DTO0_PHASE, base_rate * 100); + WREG32(DCCG_AUDIO_DTO0_MODULE, clock * 100); + WREG32(DCCG_AUDIO_DTO_SELECT, 0); /* select DTO0 */ + } else { + WREG32(DCCG_AUDIO_DTO1_PHASE, base_rate * 100); + WREG32(DCCG_AUDIO_DTO1_MODULE, clock * 100); + WREG32(DCCG_AUDIO_DTO_SELECT, 1); /* select DTO1 */ + } } else { - /* according to the reg specs, this should be DCE2.0 and DCE3.0 */ + /* according to the reg specs, this should be DCE2.0 and DCE3.0/3.1 */ WREG32(AUDIO_DTO, AUDIO_DTO_PHASE(base_rate / 10) | AUDIO_DTO_MODULE(clock / 10)); } } +static void dce3_2_afmt_write_speaker_allocation(struct drm_encoder *encoder) +{ + struct radeon_device *rdev = encoder->dev->dev_private; + struct drm_connector *connector; + struct radeon_connector *radeon_connector = NULL; + u32 tmp; + u8 *sadb; + int sad_count; + + /* XXX: setting this register causes hangs on some asics */ + return; + + list_for_each_entry(connector, &encoder->dev->mode_config.connector_list, head) { + if (connector->encoder == encoder) + radeon_connector = to_radeon_connector(connector); + } + + if (!radeon_connector) { + DRM_ERROR("Couldn't find encoder's connector\n"); + return; + } + + sad_count = drm_edid_to_speaker_allocation(radeon_connector->edid, &sadb); + if (sad_count < 0) { + DRM_ERROR("Couldn't read Speaker Allocation Data Block: %d\n", sad_count); + return; + } + + /* program the speaker allocation */ + tmp = RREG32(AZ_F0_CODEC_PIN0_CONTROL_CHANNEL_SPEAKER); + tmp &= ~(DP_CONNECTION | SPEAKER_ALLOCATION_MASK); + /* set HDMI mode */ + tmp |= HDMI_CONNECTION; + if (sad_count) + tmp |= SPEAKER_ALLOCATION(sadb[0]); + else + tmp |= SPEAKER_ALLOCATION(5); /* stereo */ + WREG32(AZ_F0_CODEC_PIN0_CONTROL_CHANNEL_SPEAKER, tmp); + + kfree(sadb); +} + +static void dce3_2_afmt_write_sad_regs(struct drm_encoder *encoder) +{ + struct radeon_device *rdev = encoder->dev->dev_private; + struct drm_connector *connector; + struct radeon_connector *radeon_connector = NULL; + struct cea_sad *sads; + int i, sad_count; + + static const u16 eld_reg_to_type[][2] = { + { AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR0, HDMI_AUDIO_CODING_TYPE_PCM }, + { AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR1, HDMI_AUDIO_CODING_TYPE_AC3 }, + { AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR2, HDMI_AUDIO_CODING_TYPE_MPEG1 }, + { AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR3, HDMI_AUDIO_CODING_TYPE_MP3 }, + { AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR4, HDMI_AUDIO_CODING_TYPE_MPEG2 }, + { AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR5, HDMI_AUDIO_CODING_TYPE_AAC_LC }, + { AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR6, HDMI_AUDIO_CODING_TYPE_DTS }, + { AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR7, HDMI_AUDIO_CODING_TYPE_ATRAC }, + { AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR9, HDMI_AUDIO_CODING_TYPE_EAC3 }, + { AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR10, HDMI_AUDIO_CODING_TYPE_DTS_HD }, + { AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR11, HDMI_AUDIO_CODING_TYPE_MLP }, + { AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR13, HDMI_AUDIO_CODING_TYPE_WMA_PRO }, + }; + + list_for_each_entry(connector, &encoder->dev->mode_config.connector_list, head) { + if (connector->encoder == encoder) + radeon_connector = to_radeon_connector(connector); + } + + if (!radeon_connector) { + DRM_ERROR("Couldn't find encoder's connector\n"); + return; + } + + sad_count = drm_edid_to_sad(radeon_connector->edid, &sads); + if (sad_count < 0) { + DRM_ERROR("Couldn't read SADs: %d\n", sad_count); + return; + } + BUG_ON(!sads); + + for (i = 0; i < ARRAY_SIZE(eld_reg_to_type); i++) { + u32 value = 0; + int j; + + for (j = 0; j < sad_count; j++) { + struct cea_sad *sad = &sads[j]; + + if (sad->format == eld_reg_to_type[i][1]) { + value = MAX_CHANNELS(sad->channels) | + DESCRIPTOR_BYTE_2(sad->byte2) | + SUPPORTED_FREQUENCIES(sad->freq); + if (sad->format == HDMI_AUDIO_CODING_TYPE_PCM) + value |= SUPPORTED_FREQUENCIES_STEREO(sad->freq); + break; + } + } + WREG32(eld_reg_to_type[i][0], value); + } + + kfree(sads); +} + /* * update the info frames with the data from the current display mode */ @@ -327,9 +448,14 @@ void r600_hdmi_setmode(struct drm_encoder *encoder, struct drm_display_mode *mod HDMI0_60958_CS_UPDATE); /* allow 60958 channel status fields to be updated */ } + if (ASIC_IS_DCE32(rdev)) { + dce3_2_afmt_write_speaker_allocation(encoder); + dce3_2_afmt_write_sad_regs(encoder); + } + WREG32(HDMI0_ACR_PACKET_CONTROL + offset, - HDMI0_ACR_AUTO_SEND | /* allow hw to sent ACR packets when required */ - HDMI0_ACR_SOURCE); /* select SW CTS value */ + HDMI0_ACR_SOURCE | /* select SW CTS value - XXX verify that hw CTS works on all families */ + HDMI0_ACR_AUTO_SEND); /* allow hw to sent ACR packets when required */ WREG32(HDMI0_VBI_PACKET_CONTROL + offset, HDMI0_NULL_SEND | /* send null packets when required */ @@ -382,7 +508,7 @@ void r600_hdmi_update_audio_settings(struct drm_encoder *encoder) struct radeon_device *rdev = dev->dev_private; struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder); struct radeon_encoder_atom_dig *dig = radeon_encoder->enc_priv; - struct r600_audio audio = r600_audio_status(rdev); + struct r600_audio_pin audio = r600_audio_status(rdev); uint8_t buffer[HDMI_INFOFRAME_HEADER_SIZE + HDMI_AUDIO_INFOFRAME_SIZE]; struct hdmi_audio_infoframe frame; uint32_t offset; @@ -491,6 +617,11 @@ void r600_hdmi_enable(struct drm_encoder *encoder, bool enable) if (!enable && !dig->afmt->enabled) return; + if (enable) + dig->afmt->pin = r600_audio_get_pin(rdev); + else + dig->afmt->pin = NULL; + /* Older chipsets require setting HDMI and routing manually */ if (!ASIC_IS_DCE3(rdev)) { if (enable) diff --git a/sys/dev/drm/radeon/r600d.h b/sys/dev/drm/radeon/r600d.h index 7c780839a7..7b3c7b5932 100644 --- a/sys/dev/drm/radeon/r600d.h +++ b/sys/dev/drm/radeon/r600d.h @@ -960,6 +960,42 @@ # define DIG_MODE_SDVO 4 #define DIG1_CNTL 0x79a0 +#define AZ_F0_CODEC_PIN0_CONTROL_CHANNEL_SPEAKER 0x71bc +#define SPEAKER_ALLOCATION(x) (((x) & 0x7f) << 0) +#define SPEAKER_ALLOCATION_MASK (0x7f << 0) +#define SPEAKER_ALLOCATION_SHIFT 0 +#define HDMI_CONNECTION (1 << 16) +#define DP_CONNECTION (1 << 17) + +#define AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR0 0x71c8 /* LPCM */ +#define AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR1 0x71cc /* AC3 */ +#define AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR2 0x71d0 /* MPEG1 */ +#define AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR3 0x71d4 /* MP3 */ +#define AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR4 0x71d8 /* MPEG2 */ +#define AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR5 0x71dc /* AAC */ +#define AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR6 0x71e0 /* DTS */ +#define AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR7 0x71e4 /* ATRAC */ +#define AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR8 0x71e8 /* one bit audio - leave at 0 (default) */ +#define AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR9 0x71ec /* Dolby Digital */ +#define AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR10 0x71f0 /* DTS-HD */ +#define AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR11 0x71f4 /* MAT-MLP */ +#define AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR12 0x71f8 /* DTS */ +#define AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR13 0x71fc /* WMA Pro */ +# define MAX_CHANNELS(x) (((x) & 0x7) << 0) +/* max channels minus one. 7 = 8 channels */ +# define SUPPORTED_FREQUENCIES(x) (((x) & 0xff) << 8) +# define DESCRIPTOR_BYTE_2(x) (((x) & 0xff) << 16) +# define SUPPORTED_FREQUENCIES_STEREO(x) (((x) & 0xff) << 24) /* LPCM only */ +/* SUPPORTED_FREQUENCIES, SUPPORTED_FREQUENCIES_STEREO + * bit0 = 32 kHz + * bit1 = 44.1 kHz + * bit2 = 48 kHz + * bit3 = 88.2 kHz + * bit4 = 96 kHz + * bit5 = 176.4 kHz + * bit6 = 192 kHz + */ + /* rs6xx/rs740 and r6xx share the same HDMI blocks, however, rs6xx has only one * instance of the blocks while r6xx has 2. DCE 3.0 cards are slightly * different due to the new DIG blocks, but also have 2 instances. @@ -1004,7 +1040,7 @@ # define HDMI0_AVI_INFO_CONT (1 << 1) # define HDMI0_AUDIO_INFO_SEND (1 << 4) # define HDMI0_AUDIO_INFO_CONT (1 << 5) -# define HDMI0_AUDIO_INFO_SOURCE (1 << 6) /* 0 - sound block; 1 - hmdi regs */ +# define HDMI0_AUDIO_INFO_SOURCE (1 << 6) /* 0 - sound block; 1 - hdmi regs */ # define HDMI0_AUDIO_INFO_UPDATE (1 << 7) # define HDMI0_MPEG_INFO_SEND (1 << 8) # define HDMI0_MPEG_INFO_CONT (1 << 9) @@ -1487,7 +1523,7 @@ */ # define PACKET3_CP_DMA_CP_SYNC (1 << 31) /* COMMAND */ -# define PACKET3_CP_DMA_CMD_SRC_SWAP(x) ((x) << 23) +# define PACKET3_CP_DMA_CMD_SRC_SWAP(x) ((x) << 22) /* 0 - none * 1 - 8 in 16 * 2 - 8 in 32 diff --git a/sys/dev/drm/radeon/radeon.h b/sys/dev/drm/radeon/radeon.h index 6cd33ac1e7..28105451f2 100644 --- a/sys/dev/drm/radeon/radeon.h +++ b/sys/dev/drm/radeon/radeon.h @@ -114,7 +114,7 @@ extern int radeon_aspm; * symbol; */ #define RADEON_MAX_USEC_TIMEOUT 100000 /* 100 ms */ -#define RADEON_FENCE_JIFFIES_TIMEOUT (DRM_HZ / 2) +#define RADEON_FENCE_JIFFIES_TIMEOUT (HZ / 2) /* RADEON_IB_POOL_SIZE must be a power of 2 */ #define RADEON_IB_POOL_SIZE 16 #define RADEON_DEBUGFS_MAX_COMPONENTS 32 @@ -162,6 +162,47 @@ extern int radeon_aspm; #define RADEON_RESET_MC (1 << 10) #define RADEON_RESET_DISPLAY (1 << 11) +/* CG block flags */ +#define RADEON_CG_BLOCK_GFX (1 << 0) +#define RADEON_CG_BLOCK_MC (1 << 1) +#define RADEON_CG_BLOCK_SDMA (1 << 2) +#define RADEON_CG_BLOCK_UVD (1 << 3) +#define RADEON_CG_BLOCK_VCE (1 << 4) +#define RADEON_CG_BLOCK_HDP (1 << 5) +#define RADEON_CG_BLOCK_BIF (1 << 6) + +/* CG flags */ +#define RADEON_CG_SUPPORT_GFX_MGCG (1 << 0) +#define RADEON_CG_SUPPORT_GFX_MGLS (1 << 1) +#define RADEON_CG_SUPPORT_GFX_CGCG (1 << 2) +#define RADEON_CG_SUPPORT_GFX_CGLS (1 << 3) +#define RADEON_CG_SUPPORT_GFX_CGTS (1 << 4) +#define RADEON_CG_SUPPORT_GFX_CGTS_LS (1 << 5) +#define RADEON_CG_SUPPORT_GFX_CP_LS (1 << 6) +#define RADEON_CG_SUPPORT_GFX_RLC_LS (1 << 7) +#define RADEON_CG_SUPPORT_MC_LS (1 << 8) +#define RADEON_CG_SUPPORT_MC_MGCG (1 << 9) +#define RADEON_CG_SUPPORT_SDMA_LS (1 << 10) +#define RADEON_CG_SUPPORT_SDMA_MGCG (1 << 11) +#define RADEON_CG_SUPPORT_BIF_LS (1 << 12) +#define RADEON_CG_SUPPORT_UVD_MGCG (1 << 13) +#define RADEON_CG_SUPPORT_VCE_MGCG (1 << 14) +#define RADEON_CG_SUPPORT_HDP_LS (1 << 15) +#define RADEON_CG_SUPPORT_HDP_MGCG (1 << 16) + +/* PG flags */ +#define RADEON_PG_SUPPORT_GFX_PG (1 << 0) +#define RADEON_PG_SUPPORT_GFX_SMG (1 << 1) +#define RADEON_PG_SUPPORT_GFX_DMG (1 << 2) +#define RADEON_PG_SUPPORT_UVD (1 << 3) +#define RADEON_PG_SUPPORT_VCE (1 << 4) +#define RADEON_PG_SUPPORT_CP (1 << 5) +#define RADEON_PG_SUPPORT_GDS (1 << 6) +#define RADEON_PG_SUPPORT_RLC_SMU_HS (1 << 7) +#define RADEON_PG_SUPPORT_SDMA (1 << 8) +#define RADEON_PG_SUPPORT_ACP (1 << 9) +#define RADEON_PG_SUPPORT_SAMU (1 << 10) + /* max cursor sizes (in pixels) */ #define CURSOR_WIDTH 64 #define CURSOR_HEIGHT 64 @@ -248,6 +289,12 @@ int radeon_atom_get_max_vddc(struct radeon_device *rdev, u8 voltage_type, int radeon_atom_get_leakage_vddc_based_on_leakage_idx(struct radeon_device *rdev, u16 *voltage, u16 leakage_idx); +int radeon_atom_get_leakage_id_from_vbios(struct radeon_device *rdev, + u16 *leakage_id); +int radeon_atom_get_leakage_vddc_based_on_leakage_params(struct radeon_device *rdev, + u16 *vddc, u16 *vddci, + u16 virtual_voltage_id, + u16 vbios_voltage_id); int radeon_atom_round_to_true_voltage(struct radeon_device *rdev, u8 voltage_type, u16 nominal_voltage, @@ -273,6 +320,7 @@ int radeon_atom_get_memory_info(struct radeon_device *rdev, int radeon_atom_get_mclk_range_table(struct radeon_device *rdev, bool gddr5, u8 module_index, struct atom_memory_clock_range_table *mclk_range_table); +void radeon_atom_copy_swap(u8 *dst, u8 *src, u8 num_bytes, bool to_le); void rs690_pm_info(struct radeon_device *rdev); extern void evergreen_tiling_fields(unsigned tiling_flags, unsigned *bankw, unsigned *bankh, unsigned *mtaspect, @@ -501,9 +549,6 @@ int radeon_mode_dumb_create(struct drm_file *file_priv, int radeon_mode_dumb_mmap(struct drm_file *filp, struct drm_device *dev, uint32_t handle, uint64_t *offset_p); -int radeon_mode_dumb_destroy(struct drm_file *file_priv, - struct drm_device *dev, - uint32_t handle); /* * Semaphores. @@ -694,7 +739,7 @@ union radeon_irq_stat_regs { #define RADEON_MAX_HPD_PINS 6 #define RADEON_MAX_CRTCS 6 -#define RADEON_MAX_AFMT_BLOCKS 6 +#define RADEON_MAX_AFMT_BLOCKS 7 struct radeon_irq { bool installed; @@ -758,8 +803,6 @@ struct radeon_ring { uint32_t align_mask; uint32_t ptr_mask; bool ready; - u32 ptr_reg_shift; - u32 ptr_reg_mask; u32 nop; u32 idx; u64 last_semaphore_signal_addr; @@ -856,35 +899,6 @@ struct r600_ih { bool enabled; }; -struct r600_blit_cp_primitives { - void (*set_render_target)(struct radeon_device *rdev, int format, - int w, int h, u64 gpu_addr); - void (*cp_set_surface_sync)(struct radeon_device *rdev, - u32 sync_type, u32 size, - u64 mc_addr); - void (*set_shaders)(struct radeon_device *rdev); - void (*set_vtx_resource)(struct radeon_device *rdev, u64 gpu_addr); - void (*set_tex_resource)(struct radeon_device *rdev, - int format, int w, int h, int pitch, - u64 gpu_addr, u32 size); - void (*set_scissors)(struct radeon_device *rdev, int x1, int y1, - int x2, int y2); - void (*draw_auto)(struct radeon_device *rdev); - void (*set_default_state)(struct radeon_device *rdev); -}; - -struct r600_blit { - struct radeon_bo *shader_obj; - struct r600_blit_cp_primitives primitives; - int max_dim; - int ring_size_common; - int ring_size_per_loop; - u64 shader_gpu_addr; - u32 vs_offset, ps_offset; - u32 state_offset; - u32 state_len; -}; - /* * RLC stuff */ @@ -895,13 +909,19 @@ struct radeon_rlc { struct radeon_bo *save_restore_obj; uint64_t save_restore_gpu_addr; volatile uint32_t *sr_ptr; - u32 *reg_list; + const u32 *reg_list; u32 reg_list_size; /* for clear state */ struct radeon_bo *clear_state_obj; uint64_t clear_state_gpu_addr; volatile uint32_t *cs_ptr; - struct cs_section_def *cs_data; + const struct cs_section_def *cs_data; + u32 clear_state_size; + /* for cp tables */ + struct radeon_bo *cp_table_obj; + uint64_t cp_table_gpu_addr; + volatile uint32_t *cp_table_ptr; + u32 cp_table_size; }; int radeon_ib_get(struct radeon_device *rdev, int ring, @@ -939,8 +959,7 @@ unsigned radeon_ring_backup(struct radeon_device *rdev, struct radeon_ring *ring int radeon_ring_restore(struct radeon_device *rdev, struct radeon_ring *ring, unsigned size, uint32_t *data); int radeon_ring_init(struct radeon_device *rdev, struct radeon_ring *cp, unsigned ring_size, - unsigned rptr_offs, unsigned rptr_reg, unsigned wptr_reg, - u32 ptr_reg_shift, u32 ptr_reg_mask, u32 nop); + unsigned rptr_offs, unsigned rptr_reg, unsigned wptr_reg, u32 nop); void radeon_ring_fini(struct radeon_device *rdev, struct radeon_ring *cp); @@ -1004,6 +1023,7 @@ struct radeon_cs_parser { u32 cs_flags; u32 ring; s32 priority; + struct ww_acquire_ctx ticket; }; extern int radeon_cs_finish_pages(struct radeon_cs_parser *p); @@ -1053,7 +1073,6 @@ struct radeon_wb { #define R600_WB_DMA_RPTR_OFFSET 1792 #define R600_WB_IH_WPTR_OFFSET 2048 #define CAYMAN_WB_DMA1_RPTR_OFFSET 2304 -#define R600_WB_UVD_RPTR_OFFSET 2560 #define R600_WB_EVENT_OFFSET 3072 #define CIK_WB_CP1_WPTR_OFFSET 3328 #define CIK_WB_CP2_WPTR_OFFSET 3584 @@ -1164,6 +1183,7 @@ enum radeon_int_thermal_type { THERMAL_TYPE_SI, THERMAL_TYPE_EMC2103_WITH_INTERNAL, THERMAL_TYPE_CI, + THERMAL_TYPE_KV, }; struct radeon_voltage { @@ -1237,6 +1257,9 @@ struct radeon_ps { /* UVD clocks */ u32 vclk; u32 dclk; + /* VCE clocks */ + u32 evclk; + u32 ecclk; /* asic priv */ void *ps_priv; }; @@ -1268,8 +1291,8 @@ struct radeon_blacklist_clocks struct radeon_clock_and_voltage_limits { u32 sclk; u32 mclk; - u32 vddc; - u32 vddci; + u16 vddc; + u16 vddci; }; struct radeon_clock_array { @@ -1287,14 +1310,21 @@ struct radeon_clock_voltage_dependency_table { struct radeon_clock_voltage_dependency_entry *entries; }; -struct radeon_cac_leakage_entry { - u16 vddc; - u32 leakage; +union radeon_cac_leakage_entry { + struct { + u16 vddc; + u32 leakage; + }; + struct { + u16 vddc1; + u16 vddc2; + u16 vddc3; + }; }; struct radeon_cac_leakage_table { u32 count; - struct radeon_cac_leakage_entry *entries; + union radeon_cac_leakage_entry *entries; }; struct radeon_phase_shedding_limits_entry { @@ -1308,6 +1338,28 @@ struct radeon_phase_shedding_limits_table { struct radeon_phase_shedding_limits_entry *entries; }; +struct radeon_uvd_clock_voltage_dependency_entry { + u32 vclk; + u32 dclk; + u16 v; +}; + +struct radeon_uvd_clock_voltage_dependency_table { + u8 count; + struct radeon_uvd_clock_voltage_dependency_entry *entries; +}; + +struct radeon_vce_clock_voltage_dependency_entry { + u32 ecclk; + u32 evclk; + u16 v; +}; + +struct radeon_vce_clock_voltage_dependency_table { + u8 count; + struct radeon_vce_clock_voltage_dependency_entry *entries; +}; + struct radeon_ppm_table { u8 ppm_design; u16 cpu_core_number; @@ -1321,11 +1373,27 @@ struct radeon_ppm_table { u32 tj_max; }; +struct radeon_cac_tdp_table { + u16 tdp; + u16 configurable_tdp; + u16 tdc; + u16 battery_power_limit; + u16 small_power_limit; + u16 low_cac_leakage; + u16 high_cac_leakage; + u16 maximum_power_delivery_limit; +}; + struct radeon_dpm_dynamic_state { struct radeon_clock_voltage_dependency_table vddc_dependency_on_sclk; struct radeon_clock_voltage_dependency_table vddci_dependency_on_mclk; struct radeon_clock_voltage_dependency_table vddc_dependency_on_mclk; + struct radeon_clock_voltage_dependency_table mvdd_dependency_on_mclk; struct radeon_clock_voltage_dependency_table vddc_dependency_on_dispclk; + struct radeon_uvd_clock_voltage_dependency_table uvd_clock_voltage_dependency_table; + struct radeon_vce_clock_voltage_dependency_table vce_clock_voltage_dependency_table; + struct radeon_clock_voltage_dependency_table samu_clock_voltage_dependency_table; + struct radeon_clock_voltage_dependency_table acp_clock_voltage_dependency_table; struct radeon_clock_array valid_sclk_values; struct radeon_clock_array valid_mclk_values; struct radeon_clock_and_voltage_limits max_clock_voltage_on_dc; @@ -1337,6 +1405,7 @@ struct radeon_dpm_dynamic_state { struct radeon_cac_leakage_table cac_leakage_table; struct radeon_phase_shedding_limits_table phase_shedding_limits_table; struct radeon_ppm_table *ppm_table; + struct radeon_cac_tdp_table *cac_tdp_table; }; struct radeon_dpm_fan { @@ -1406,11 +1475,12 @@ struct radeon_dpm { struct radeon_dpm_thermal thermal; /* forced levels */ enum radeon_dpm_forced_level forced_level; + /* track UVD streams */ + unsigned sd; + unsigned hd; }; -void radeon_dpm_enable_power_state(struct radeon_device *rdev, - enum radeon_pm_state_type dpm_state); - +void radeon_dpm_enable_uvd(struct radeon_device *rdev, bool enable); struct radeon_pm { struct lock mutex; @@ -1490,6 +1560,7 @@ struct radeon_uvd { void *saved_bo; atomic_t handles[RADEON_MAX_UVD_HANDLES]; struct drm_file *filp[RADEON_MAX_UVD_HANDLES]; + unsigned img_size[RADEON_MAX_UVD_HANDLES]; struct delayed_work idle_work; }; @@ -1518,12 +1589,21 @@ int radeon_uvd_calc_upll_dividers(struct radeon_device *rdev, int radeon_uvd_send_upll_ctlreq(struct radeon_device *rdev, unsigned cg_upll_func_cntl); -struct r600_audio { +struct r600_audio_pin { int channels; int rate; int bits_per_sample; u8 status_bits; u8 category_code; + u32 offset; + bool connected; + u32 id; +}; + +struct r600_audio { + bool enabled; + struct r600_audio_pin pin[RADEON_MAX_AFMT_BLOCKS]; + int num_pins; }; /* @@ -1555,6 +1635,34 @@ int radeon_debugfs_add_files(struct radeon_device *rdev, unsigned nfiles); int radeon_debugfs_fence_init(struct radeon_device *rdev); +/* + * ASIC ring specific functions. + */ +struct radeon_asic_ring { + /* ring read/write ptr handling */ + u32 (*get_rptr)(struct radeon_device *rdev, struct radeon_ring *ring); + u32 (*get_wptr)(struct radeon_device *rdev, struct radeon_ring *ring); + void (*set_wptr)(struct radeon_device *rdev, struct radeon_ring *ring); + + /* validating and patching of IBs */ + int (*ib_parse)(struct radeon_device *rdev, struct radeon_ib *ib); + int (*cs_parse)(struct radeon_cs_parser *p); + + /* command emmit functions */ + void (*ib_execute)(struct radeon_device *rdev, struct radeon_ib *ib); + void (*emit_fence)(struct radeon_device *rdev, struct radeon_fence *fence); + void (*emit_semaphore)(struct radeon_device *rdev, struct radeon_ring *cp, + struct radeon_semaphore *semaphore, bool emit_wait); + void (*vm_flush)(struct radeon_device *rdev, int ridx, struct radeon_vm *vm); + + /* testing functions */ + int (*ring_test)(struct radeon_device *rdev, struct radeon_ring *cp); + int (*ib_test)(struct radeon_device *rdev, struct radeon_ring *cp); + bool (*is_lockup)(struct radeon_device *rdev, struct radeon_ring *cp); + + /* deprecated */ + void (*ring_start)(struct radeon_device *rdev, struct radeon_ring *cp); +}; /* * ASIC specific functions. @@ -1598,23 +1706,7 @@ struct radeon_asic { uint32_t incr, uint32_t flags); } vm; /* ring specific callbacks */ - struct { - void (*ib_execute)(struct radeon_device *rdev, struct radeon_ib *ib); - int (*ib_parse)(struct radeon_device *rdev, struct radeon_ib *ib); - void (*emit_fence)(struct radeon_device *rdev, struct radeon_fence *fence); - void (*emit_semaphore)(struct radeon_device *rdev, struct radeon_ring *cp, - struct radeon_semaphore *semaphore, bool emit_wait); - int (*cs_parse)(struct radeon_cs_parser *p); - void (*ring_start)(struct radeon_device *rdev, struct radeon_ring *cp); - int (*ring_test)(struct radeon_device *rdev, struct radeon_ring *cp); - int (*ib_test)(struct radeon_device *rdev, struct radeon_ring *cp); - bool (*is_lockup)(struct radeon_device *rdev, struct radeon_ring *cp); - void (*vm_flush)(struct radeon_device *rdev, int ridx, struct radeon_vm *vm); - - u32 (*get_rptr)(struct radeon_device *rdev, struct radeon_ring *ring); - u32 (*get_wptr)(struct radeon_device *rdev, struct radeon_ring *ring); - void (*set_wptr)(struct radeon_device *rdev, struct radeon_ring *ring); - } ring[RADEON_NUM_RINGS]; + struct radeon_asic_ring *ring[RADEON_NUM_RINGS]; /* irqs */ struct { int (*set)(struct radeon_device *rdev); @@ -1707,6 +1799,8 @@ struct radeon_asic { void (*debugfs_print_current_performance_level)(struct radeon_device *rdev, struct seq_file *m); int (*force_performance_level)(struct radeon_device *rdev, enum radeon_dpm_forced_level level); bool (*vblank_too_short)(struct radeon_device *rdev); + void (*powergate_uvd)(struct radeon_device *rdev, bool gate); + void (*enable_bapm)(struct radeon_device *rdev, bool enable); } dpm; /* pageflipping */ struct { @@ -2087,7 +2181,6 @@ struct radeon_device { const struct firmware *sdma_fw; /* CIK SDMA firmware */ const struct firmware *smc_fw; /* SMC firmware */ const struct firmware *uvd_fw; /* UVD firmware */ - struct r600_blit r600_blit; struct r600_vram_scratch vram_scratch; int msi_enabled; /* msi enabled */ struct r600_ih ih; /* r6/700 interrupt ring */ @@ -2099,9 +2192,8 @@ struct radeon_device { struct task reset_work; int num_crtc; /* number of crtcs */ struct lock dc_hw_i2c_mutex; /* display controller hw i2c mutex */ - bool audio_enabled; bool has_uvd; - struct r600_audio audio_status; /* audio stuff */ + struct r600_audio audio; /* audio stuff */ struct { ACPI_HANDLE handle; ACPI_NOTIFY_HANDLER notifier_call; @@ -2122,6 +2214,9 @@ struct radeon_device { struct radeon_atcs atcs; /* srbm instance registers */ struct spinlock srbm_mutex; + /* clock, powergating flags */ + u32 cg_flags; + u32 pg_flags; }; int radeon_device_init(struct radeon_device *rdev, @@ -2179,6 +2274,8 @@ void cik_mm_wdoorbell(struct radeon_device *rdev, u32 offset, u32 v); #define WREG32_PIF_PHY1(reg, v) eg_pif_phy1_wreg(rdev, (reg), (v)) #define RREG32_UVD_CTX(reg) r600_uvd_ctx_rreg(rdev, (reg)) #define WREG32_UVD_CTX(reg, v) r600_uvd_ctx_wreg(rdev, (reg), (v)) +#define RREG32_DIDT(reg) cik_didt_rreg(rdev, (reg)) +#define WREG32_DIDT(reg, v) cik_didt_wreg(rdev, (reg), (v)) #define WREG32_P(reg, val, mask) \ do { \ uint32_t tmp_ = RREG32(reg); \ @@ -2310,6 +2407,21 @@ static inline void r600_uvd_ctx_wreg(struct radeon_device *rdev, u32 reg, u32 v) WREG32(R600_UVD_CTX_DATA, (v)); } +static inline u32 cik_didt_rreg(struct radeon_device *rdev, u32 reg) +{ + u32 r; + + WREG32(CIK_DIDT_IND_INDEX, (reg)); + r = RREG32(CIK_DIDT_IND_DATA); + return r; +} + +static inline void cik_didt_wreg(struct radeon_device *rdev, u32 reg, u32 v) +{ + WREG32(CIK_DIDT_IND_INDEX, (reg)); + WREG32(CIK_DIDT_IND_DATA, (v)); +} + void r100_pll_errata_after_index(struct radeon_device *rdev); @@ -2405,7 +2517,7 @@ void radeon_ring_write(struct radeon_ring *ring, uint32_t v); #define radeon_fini(rdev) (rdev)->asic->fini((rdev)) #define radeon_resume(rdev) (rdev)->asic->resume((rdev)) #define radeon_suspend(rdev) (rdev)->asic->suspend((rdev)) -#define radeon_cs_parse(rdev, r, p) (rdev)->asic->ring[(r)].cs_parse((p)) +#define radeon_cs_parse(rdev, r, p) (rdev)->asic->ring[(r)]->cs_parse((p)) #define radeon_vga_set_state(rdev, state) (rdev)->asic->vga_set_state((rdev), (state)) #define radeon_asic_reset(rdev) (rdev)->asic->asic_reset((rdev)) #define radeon_gart_tlb_flush(rdev) (rdev)->asic->gart.tlb_flush((rdev)) @@ -2413,16 +2525,16 @@ void radeon_ring_write(struct radeon_ring *ring, uint32_t v); #define radeon_asic_vm_init(rdev) (rdev)->asic->vm.init((rdev)) #define radeon_asic_vm_fini(rdev) (rdev)->asic->vm.fini((rdev)) #define radeon_asic_vm_set_page(rdev, ib, pe, addr, count, incr, flags) ((rdev)->asic->vm.set_page((rdev), (ib), (pe), (addr), (count), (incr), (flags))) -#define radeon_ring_start(rdev, r, cp) (rdev)->asic->ring[(r)].ring_start((rdev), (cp)) -#define radeon_ring_test(rdev, r, cp) (rdev)->asic->ring[(r)].ring_test((rdev), (cp)) -#define radeon_ib_test(rdev, r, cp) (rdev)->asic->ring[(r)].ib_test((rdev), (cp)) -#define radeon_ring_ib_execute(rdev, r, ib) (rdev)->asic->ring[(r)].ib_execute((rdev), (ib)) -#define radeon_ring_ib_parse(rdev, r, ib) (rdev)->asic->ring[(r)].ib_parse((rdev), (ib)) -#define radeon_ring_is_lockup(rdev, r, cp) (rdev)->asic->ring[(r)].is_lockup((rdev), (cp)) -#define radeon_ring_vm_flush(rdev, r, vm) (rdev)->asic->ring[(r)].vm_flush((rdev), (r), (vm)) -#define radeon_ring_get_rptr(rdev, r) (rdev)->asic->ring[(r)->idx].get_rptr((rdev), (r)) -#define radeon_ring_get_wptr(rdev, r) (rdev)->asic->ring[(r)->idx].get_wptr((rdev), (r)) -#define radeon_ring_set_wptr(rdev, r) (rdev)->asic->ring[(r)->idx].set_wptr((rdev), (r)) +#define radeon_ring_start(rdev, r, cp) (rdev)->asic->ring[(r)]->ring_start((rdev), (cp)) +#define radeon_ring_test(rdev, r, cp) (rdev)->asic->ring[(r)]->ring_test((rdev), (cp)) +#define radeon_ib_test(rdev, r, cp) (rdev)->asic->ring[(r)]->ib_test((rdev), (cp)) +#define radeon_ring_ib_execute(rdev, r, ib) (rdev)->asic->ring[(r)]->ib_execute((rdev), (ib)) +#define radeon_ring_ib_parse(rdev, r, ib) (rdev)->asic->ring[(r)]->ib_parse((rdev), (ib)) +#define radeon_ring_is_lockup(rdev, r, cp) (rdev)->asic->ring[(r)]->is_lockup((rdev), (cp)) +#define radeon_ring_vm_flush(rdev, r, vm) (rdev)->asic->ring[(r)]->vm_flush((rdev), (r), (vm)) +#define radeon_ring_get_rptr(rdev, r) (rdev)->asic->ring[(r)->idx]->get_rptr((rdev), (r)) +#define radeon_ring_get_wptr(rdev, r) (rdev)->asic->ring[(r)->idx]->get_wptr((rdev), (r)) +#define radeon_ring_set_wptr(rdev, r) (rdev)->asic->ring[(r)->idx]->set_wptr((rdev), (r)) #define radeon_irq_set(rdev) (rdev)->asic->irq.set((rdev)) #define radeon_irq_process(rdev) (rdev)->asic->irq.process((rdev)) #define radeon_get_vblank_counter(rdev, crtc) (rdev)->asic->display.get_vblank_counter((rdev), (crtc)) @@ -2430,8 +2542,8 @@ void radeon_ring_write(struct radeon_ring *ring, uint32_t v); #define radeon_get_backlight_level(rdev, e) (rdev)->asic->display.get_backlight_level((e)) #define radeon_hdmi_enable(rdev, e, b) (rdev)->asic->display.hdmi_enable((e), (b)) #define radeon_hdmi_setmode(rdev, e, m) (rdev)->asic->display.hdmi_setmode((e), (m)) -#define radeon_fence_ring_emit(rdev, r, fence) (rdev)->asic->ring[(r)].emit_fence((rdev), (fence)) -#define radeon_semaphore_ring_emit(rdev, r, cp, semaphore, emit_wait) (rdev)->asic->ring[(r)].emit_semaphore((rdev), (cp), (semaphore), (emit_wait)) +#define radeon_fence_ring_emit(rdev, r, fence) (rdev)->asic->ring[(r)]->emit_fence((rdev), (fence)) +#define radeon_semaphore_ring_emit(rdev, r, cp, semaphore, emit_wait) (rdev)->asic->ring[(r)]->emit_semaphore((rdev), (cp), (semaphore), (emit_wait)) #define radeon_copy_blit(rdev, s, d, np, f) (rdev)->asic->copy.blit((rdev), (s), (d), (np), (f)) #define radeon_copy_dma(rdev, s, d, np, f) (rdev)->asic->copy.dma((rdev), (s), (d), (np), (f)) #define radeon_copy(rdev, s, d, np, f) (rdev)->asic->copy.copy((rdev), (s), (d), (np), (f)) @@ -2482,6 +2594,8 @@ void radeon_ring_write(struct radeon_ring *ring, uint32_t v); #define radeon_dpm_debugfs_print_current_performance_level(rdev, m) rdev->asic->dpm.debugfs_print_current_performance_level((rdev), (m)) #define radeon_dpm_force_performance_level(rdev, l) rdev->asic->dpm.force_performance_level((rdev), (l)) #define radeon_dpm_vblank_too_short(rdev) rdev->asic->dpm.vblank_too_short((rdev)) +#define radeon_dpm_powergate_uvd(rdev, g) rdev->asic->dpm.powergate_uvd((rdev), (g)) +#define radeon_dpm_enable_bapm(rdev, e) rdev->asic->dpm.enable_bapm((rdev), (e)) /* Common functions */ /* AGP */ @@ -2546,6 +2660,8 @@ int radeon_vm_bo_rmv(struct radeon_device *rdev, /* audio */ void r600_audio_update_hdmi(void *arg, int pending); +struct r600_audio_pin *r600_audio_get_pin(struct radeon_device *rdev); +struct r600_audio_pin *dce6_audio_get_pin(struct radeon_device *rdev); /* * R600 vram scratch functions diff --git a/sys/dev/drm/radeon/radeon_agp.c b/sys/dev/drm/radeon/radeon_agp.c index d31793537b..039b30f871 100644 --- a/sys/dev/drm/radeon/radeon_agp.c +++ b/sys/dev/drm/radeon/radeon_agp.c @@ -23,10 +23,7 @@ * Authors: * Dave Airlie * Jerome Glisse - * - * $FreeBSD: head/sys/dev/drm2/radeon/radeon_agp.c 254885 2013-08-25 19:37:15Z dumbbell $ */ - #include #include "radeon.h" #include @@ -156,7 +153,7 @@ int radeon_agp_init(struct radeon_device *rdev) drm_agp_release(rdev->ddev); dev_warn(rdev->dev, "AGP aperture too small (%juM) " "need at least 32M, disabling AGP\n", - (uintmax_t)rdev->ddev->agp->agp_info.ai_aperture_size); + rdev->ddev->agp->agp_info.ai_aperture_size); return -EINVAL; } @@ -249,7 +246,7 @@ int radeon_agp_init(struct radeon_device *rdev) rdev->mc.gtt_start = rdev->mc.agp_base; rdev->mc.gtt_end = rdev->mc.gtt_start + rdev->mc.gtt_size - 1; dev_info(rdev->dev, "GTT: %juM 0x%08jX - 0x%08jX\n", - (uintmax_t)rdev->mc.gtt_size >> 20, (uintmax_t)rdev->mc.gtt_start, (uintmax_t)rdev->mc.gtt_end); + rdev->mc.gtt_size >> 20, rdev->mc.gtt_start, rdev->mc.gtt_end); /* workaround some hw issues */ if (rdev->family < CHIP_R200) { diff --git a/sys/dev/drm/radeon/radeon_asic.c b/sys/dev/drm/radeon/radeon_asic.c index e2b8e1a052..7beee5cb1c 100644 --- a/sys/dev/drm/radeon/radeon_asic.c +++ b/sys/dev/drm/radeon/radeon_asic.c @@ -171,6 +171,21 @@ void radeon_agp_disable(struct radeon_device *rdev) /* * ASIC */ + +static struct radeon_asic_ring r100_gfx_ring = { + .ib_execute = &r100_ring_ib_execute, + .emit_fence = &r100_fence_ring_emit, + .emit_semaphore = &r100_semaphore_ring_emit, + .cs_parse = &r100_cs_parse, + .ring_start = &r100_ring_start, + .ring_test = &r100_ring_test, + .ib_test = &r100_ib_test, + .is_lockup = &r100_gpu_is_lockup, + .get_rptr = &radeon_ring_generic_get_rptr, + .get_wptr = &radeon_ring_generic_get_wptr, + .set_wptr = &radeon_ring_generic_set_wptr, +}; + static struct radeon_asic r100_asic = { .init = &r100_init, .fini = &r100_fini, @@ -186,19 +201,7 @@ static struct radeon_asic r100_asic = { .set_page = &r100_pci_gart_set_page, }, .ring = { - [RADEON_RING_TYPE_GFX_INDEX] = { - .ib_execute = &r100_ring_ib_execute, - .emit_fence = &r100_fence_ring_emit, - .emit_semaphore = &r100_semaphore_ring_emit, - .cs_parse = &r100_cs_parse, - .ring_start = &r100_ring_start, - .ring_test = &r100_ring_test, - .ib_test = &r100_ib_test, - .is_lockup = &r100_gpu_is_lockup, - .get_rptr = &radeon_ring_generic_get_rptr, - .get_wptr = &radeon_ring_generic_get_wptr, - .set_wptr = &radeon_ring_generic_set_wptr, - } + [RADEON_RING_TYPE_GFX_INDEX] = &r100_gfx_ring }, .irq = { .set = &r100_irq_set, @@ -265,19 +268,7 @@ static struct radeon_asic r200_asic = { .set_page = &r100_pci_gart_set_page, }, .ring = { - [RADEON_RING_TYPE_GFX_INDEX] = { - .ib_execute = &r100_ring_ib_execute, - .emit_fence = &r100_fence_ring_emit, - .emit_semaphore = &r100_semaphore_ring_emit, - .cs_parse = &r100_cs_parse, - .ring_start = &r100_ring_start, - .ring_test = &r100_ring_test, - .ib_test = &r100_ib_test, - .is_lockup = &r100_gpu_is_lockup, - .get_rptr = &radeon_ring_generic_get_rptr, - .get_wptr = &radeon_ring_generic_get_wptr, - .set_wptr = &radeon_ring_generic_set_wptr, - } + [RADEON_RING_TYPE_GFX_INDEX] = &r100_gfx_ring }, .irq = { .set = &r100_irq_set, @@ -329,6 +320,20 @@ static struct radeon_asic r200_asic = { }, }; +static struct radeon_asic_ring r300_gfx_ring = { + .ib_execute = &r100_ring_ib_execute, + .emit_fence = &r300_fence_ring_emit, + .emit_semaphore = &r100_semaphore_ring_emit, + .cs_parse = &r300_cs_parse, + .ring_start = &r300_ring_start, + .ring_test = &r100_ring_test, + .ib_test = &r100_ib_test, + .is_lockup = &r100_gpu_is_lockup, + .get_rptr = &radeon_ring_generic_get_rptr, + .get_wptr = &radeon_ring_generic_get_wptr, + .set_wptr = &radeon_ring_generic_set_wptr, +}; + static struct radeon_asic r300_asic = { .init = &r300_init, .fini = &r300_fini, @@ -344,19 +349,7 @@ static struct radeon_asic r300_asic = { .set_page = &r100_pci_gart_set_page, }, .ring = { - [RADEON_RING_TYPE_GFX_INDEX] = { - .ib_execute = &r100_ring_ib_execute, - .emit_fence = &r300_fence_ring_emit, - .emit_semaphore = &r100_semaphore_ring_emit, - .cs_parse = &r300_cs_parse, - .ring_start = &r300_ring_start, - .ring_test = &r100_ring_test, - .ib_test = &r100_ib_test, - .is_lockup = &r100_gpu_is_lockup, - .get_rptr = &radeon_ring_generic_get_rptr, - .get_wptr = &radeon_ring_generic_get_wptr, - .set_wptr = &radeon_ring_generic_set_wptr, - } + [RADEON_RING_TYPE_GFX_INDEX] = &r300_gfx_ring }, .irq = { .set = &r100_irq_set, @@ -423,19 +416,7 @@ static struct radeon_asic r300_asic_pcie = { .set_page = &rv370_pcie_gart_set_page, }, .ring = { - [RADEON_RING_TYPE_GFX_INDEX] = { - .ib_execute = &r100_ring_ib_execute, - .emit_fence = &r300_fence_ring_emit, - .emit_semaphore = &r100_semaphore_ring_emit, - .cs_parse = &r300_cs_parse, - .ring_start = &r300_ring_start, - .ring_test = &r100_ring_test, - .ib_test = &r100_ib_test, - .is_lockup = &r100_gpu_is_lockup, - .get_rptr = &radeon_ring_generic_get_rptr, - .get_wptr = &radeon_ring_generic_get_wptr, - .set_wptr = &radeon_ring_generic_set_wptr, - } + [RADEON_RING_TYPE_GFX_INDEX] = &r300_gfx_ring }, .irq = { .set = &r100_irq_set, @@ -502,19 +483,7 @@ static struct radeon_asic r420_asic = { .set_page = &rv370_pcie_gart_set_page, }, .ring = { - [RADEON_RING_TYPE_GFX_INDEX] = { - .ib_execute = &r100_ring_ib_execute, - .emit_fence = &r300_fence_ring_emit, - .emit_semaphore = &r100_semaphore_ring_emit, - .cs_parse = &r300_cs_parse, - .ring_start = &r300_ring_start, - .ring_test = &r100_ring_test, - .ib_test = &r100_ib_test, - .is_lockup = &r100_gpu_is_lockup, - .get_rptr = &radeon_ring_generic_get_rptr, - .get_wptr = &radeon_ring_generic_get_wptr, - .set_wptr = &radeon_ring_generic_set_wptr, - } + [RADEON_RING_TYPE_GFX_INDEX] = &r300_gfx_ring }, .irq = { .set = &r100_irq_set, @@ -581,19 +550,7 @@ static struct radeon_asic rs400_asic = { .set_page = &rs400_gart_set_page, }, .ring = { - [RADEON_RING_TYPE_GFX_INDEX] = { - .ib_execute = &r100_ring_ib_execute, - .emit_fence = &r300_fence_ring_emit, - .emit_semaphore = &r100_semaphore_ring_emit, - .cs_parse = &r300_cs_parse, - .ring_start = &r300_ring_start, - .ring_test = &r100_ring_test, - .ib_test = &r100_ib_test, - .is_lockup = &r100_gpu_is_lockup, - .get_rptr = &radeon_ring_generic_get_rptr, - .get_wptr = &radeon_ring_generic_get_wptr, - .set_wptr = &radeon_ring_generic_set_wptr, - } + [RADEON_RING_TYPE_GFX_INDEX] = &r300_gfx_ring }, .irq = { .set = &r100_irq_set, @@ -660,19 +617,7 @@ static struct radeon_asic rs600_asic = { .set_page = &rs600_gart_set_page, }, .ring = { - [RADEON_RING_TYPE_GFX_INDEX] = { - .ib_execute = &r100_ring_ib_execute, - .emit_fence = &r300_fence_ring_emit, - .emit_semaphore = &r100_semaphore_ring_emit, - .cs_parse = &r300_cs_parse, - .ring_start = &r300_ring_start, - .ring_test = &r100_ring_test, - .ib_test = &r100_ib_test, - .is_lockup = &r100_gpu_is_lockup, - .get_rptr = &radeon_ring_generic_get_rptr, - .get_wptr = &radeon_ring_generic_get_wptr, - .set_wptr = &radeon_ring_generic_set_wptr, - } + [RADEON_RING_TYPE_GFX_INDEX] = &r300_gfx_ring }, .irq = { .set = &rs600_irq_set, @@ -741,19 +686,7 @@ static struct radeon_asic rs690_asic = { .set_page = &rs400_gart_set_page, }, .ring = { - [RADEON_RING_TYPE_GFX_INDEX] = { - .ib_execute = &r100_ring_ib_execute, - .emit_fence = &r300_fence_ring_emit, - .emit_semaphore = &r100_semaphore_ring_emit, - .cs_parse = &r300_cs_parse, - .ring_start = &r300_ring_start, - .ring_test = &r100_ring_test, - .ib_test = &r100_ib_test, - .is_lockup = &r100_gpu_is_lockup, - .get_rptr = &radeon_ring_generic_get_rptr, - .get_wptr = &radeon_ring_generic_get_wptr, - .set_wptr = &radeon_ring_generic_set_wptr, - } + [RADEON_RING_TYPE_GFX_INDEX] = &r300_gfx_ring }, .irq = { .set = &rs600_irq_set, @@ -822,19 +755,7 @@ static struct radeon_asic rv515_asic = { .set_page = &rv370_pcie_gart_set_page, }, .ring = { - [RADEON_RING_TYPE_GFX_INDEX] = { - .ib_execute = &r100_ring_ib_execute, - .emit_fence = &r300_fence_ring_emit, - .emit_semaphore = &r100_semaphore_ring_emit, - .cs_parse = &r300_cs_parse, - .ring_start = &rv515_ring_start, - .ring_test = &r100_ring_test, - .ib_test = &r100_ib_test, - .is_lockup = &r100_gpu_is_lockup, - .get_rptr = &radeon_ring_generic_get_rptr, - .get_wptr = &radeon_ring_generic_get_wptr, - .set_wptr = &radeon_ring_generic_set_wptr, - } + [RADEON_RING_TYPE_GFX_INDEX] = &r300_gfx_ring }, .irq = { .set = &rs600_irq_set, @@ -901,19 +822,7 @@ static struct radeon_asic r520_asic = { .set_page = &rv370_pcie_gart_set_page, }, .ring = { - [RADEON_RING_TYPE_GFX_INDEX] = { - .ib_execute = &r100_ring_ib_execute, - .emit_fence = &r300_fence_ring_emit, - .emit_semaphore = &r100_semaphore_ring_emit, - .cs_parse = &r300_cs_parse, - .ring_start = &rv515_ring_start, - .ring_test = &r100_ring_test, - .ib_test = &r100_ib_test, - .is_lockup = &r100_gpu_is_lockup, - .get_rptr = &radeon_ring_generic_get_rptr, - .get_wptr = &radeon_ring_generic_get_wptr, - .set_wptr = &radeon_ring_generic_set_wptr, - } + [RADEON_RING_TYPE_GFX_INDEX] = &r300_gfx_ring }, .irq = { .set = &rs600_irq_set, @@ -965,6 +874,32 @@ static struct radeon_asic r520_asic = { }, }; +static struct radeon_asic_ring r600_gfx_ring = { + .ib_execute = &r600_ring_ib_execute, + .emit_fence = &r600_fence_ring_emit, + .emit_semaphore = &r600_semaphore_ring_emit, + .cs_parse = &r600_cs_parse, + .ring_test = &r600_ring_test, + .ib_test = &r600_ib_test, + .is_lockup = &r600_gfx_is_lockup, + .get_rptr = &radeon_ring_generic_get_rptr, + .get_wptr = &radeon_ring_generic_get_wptr, + .set_wptr = &radeon_ring_generic_set_wptr, +}; + +static struct radeon_asic_ring r600_dma_ring = { + .ib_execute = &r600_dma_ring_ib_execute, + .emit_fence = &r600_dma_fence_ring_emit, + .emit_semaphore = &r600_dma_semaphore_ring_emit, + .cs_parse = &r600_dma_cs_parse, + .ring_test = &r600_dma_ring_test, + .ib_test = &r600_dma_ib_test, + .is_lockup = &r600_dma_is_lockup, + .get_rptr = &r600_dma_get_rptr, + .get_wptr = &r600_dma_get_wptr, + .set_wptr = &r600_dma_set_wptr, +}; + static struct radeon_asic r600_asic = { .init = &r600_init, .fini = &r600_fini, @@ -982,30 +917,8 @@ static struct radeon_asic r600_asic = { .set_page = &rs600_gart_set_page, }, .ring = { - [RADEON_RING_TYPE_GFX_INDEX] = { - .ib_execute = &r600_ring_ib_execute, - .emit_fence = &r600_fence_ring_emit, - .emit_semaphore = &r600_semaphore_ring_emit, - .cs_parse = &r600_cs_parse, - .ring_test = &r600_ring_test, - .ib_test = &r600_ib_test, - .is_lockup = &r600_gfx_is_lockup, - .get_rptr = &radeon_ring_generic_get_rptr, - .get_wptr = &radeon_ring_generic_get_wptr, - .set_wptr = &radeon_ring_generic_set_wptr, - }, - [R600_RING_TYPE_DMA_INDEX] = { - .ib_execute = &r600_dma_ring_ib_execute, - .emit_fence = &r600_dma_fence_ring_emit, - .emit_semaphore = &r600_dma_semaphore_ring_emit, - .cs_parse = &r600_dma_cs_parse, - .ring_test = &r600_dma_ring_test, - .ib_test = &r600_dma_ib_test, - .is_lockup = &r600_dma_is_lockup, - .get_rptr = &radeon_ring_generic_get_rptr, - .get_wptr = &radeon_ring_generic_get_wptr, - .set_wptr = &radeon_ring_generic_set_wptr, - } + [RADEON_RING_TYPE_GFX_INDEX] = &r600_gfx_ring, + [R600_RING_TYPE_DMA_INDEX] = &r600_dma_ring, }, .irq = { .set = &r600_irq_set, @@ -1021,7 +934,7 @@ static struct radeon_asic r600_asic = { .hdmi_setmode = &r600_hdmi_setmode, }, .copy = { - .blit = &r600_copy_blit, + .blit = &r600_copy_cpdma, .blit_ring_index = RADEON_RING_TYPE_GFX_INDEX, .dma = &r600_copy_dma, .dma_ring_index = R600_RING_TYPE_DMA_INDEX, @@ -1077,30 +990,8 @@ static struct radeon_asic rv6xx_asic = { .set_page = &rs600_gart_set_page, }, .ring = { - [RADEON_RING_TYPE_GFX_INDEX] = { - .ib_execute = &r600_ring_ib_execute, - .emit_fence = &r600_fence_ring_emit, - .emit_semaphore = &r600_semaphore_ring_emit, - .cs_parse = &r600_cs_parse, - .ring_test = &r600_ring_test, - .ib_test = &r600_ib_test, - .is_lockup = &r600_gfx_is_lockup, - .get_rptr = &radeon_ring_generic_get_rptr, - .get_wptr = &radeon_ring_generic_get_wptr, - .set_wptr = &radeon_ring_generic_set_wptr, - }, - [R600_RING_TYPE_DMA_INDEX] = { - .ib_execute = &r600_dma_ring_ib_execute, - .emit_fence = &r600_dma_fence_ring_emit, - .emit_semaphore = &r600_dma_semaphore_ring_emit, - .cs_parse = &r600_dma_cs_parse, - .ring_test = &r600_dma_ring_test, - .ib_test = &r600_dma_ib_test, - .is_lockup = &r600_dma_is_lockup, - .get_rptr = &radeon_ring_generic_get_rptr, - .get_wptr = &radeon_ring_generic_get_wptr, - .set_wptr = &radeon_ring_generic_set_wptr, - } + [RADEON_RING_TYPE_GFX_INDEX] = &r600_gfx_ring, + [R600_RING_TYPE_DMA_INDEX] = &r600_dma_ring, }, .irq = { .set = &r600_irq_set, @@ -1112,9 +1003,11 @@ static struct radeon_asic rv6xx_asic = { .wait_for_vblank = &avivo_wait_for_vblank, .set_backlight_level = &atombios_set_backlight_level, .get_backlight_level = &atombios_get_backlight_level, + .hdmi_enable = &r600_hdmi_enable, + .hdmi_setmode = &r600_hdmi_setmode, }, .copy = { - .blit = &r600_copy_blit, + .blit = &r600_copy_cpdma, .blit_ring_index = RADEON_RING_TYPE_GFX_INDEX, .dma = &r600_copy_dma, .dma_ring_index = R600_RING_TYPE_DMA_INDEX, @@ -1145,6 +1038,7 @@ static struct radeon_asic rv6xx_asic = { .set_pcie_lanes = &r600_set_pcie_lanes, .set_clock_gating = NULL, .get_temperature = &rv6xx_get_temp, + .set_uvd_clocks = &r600_set_uvd_clocks, }, .dpm = { .init = &rv6xx_dpm_init, @@ -1186,30 +1080,8 @@ static struct radeon_asic rs780_asic = { .set_page = &rs600_gart_set_page, }, .ring = { - [RADEON_RING_TYPE_GFX_INDEX] = { - .ib_execute = &r600_ring_ib_execute, - .emit_fence = &r600_fence_ring_emit, - .emit_semaphore = &r600_semaphore_ring_emit, - .cs_parse = &r600_cs_parse, - .ring_test = &r600_ring_test, - .ib_test = &r600_ib_test, - .is_lockup = &r600_gfx_is_lockup, - .get_rptr = &radeon_ring_generic_get_rptr, - .get_wptr = &radeon_ring_generic_get_wptr, - .set_wptr = &radeon_ring_generic_set_wptr, - }, - [R600_RING_TYPE_DMA_INDEX] = { - .ib_execute = &r600_dma_ring_ib_execute, - .emit_fence = &r600_dma_fence_ring_emit, - .emit_semaphore = &r600_dma_semaphore_ring_emit, - .cs_parse = &r600_dma_cs_parse, - .ring_test = &r600_dma_ring_test, - .ib_test = &r600_dma_ib_test, - .is_lockup = &r600_dma_is_lockup, - .get_rptr = &radeon_ring_generic_get_rptr, - .get_wptr = &radeon_ring_generic_get_wptr, - .set_wptr = &radeon_ring_generic_set_wptr, - } + [RADEON_RING_TYPE_GFX_INDEX] = &r600_gfx_ring, + [R600_RING_TYPE_DMA_INDEX] = &r600_dma_ring, }, .irq = { .set = &r600_irq_set, @@ -1225,7 +1097,7 @@ static struct radeon_asic rs780_asic = { .hdmi_setmode = &r600_hdmi_setmode, }, .copy = { - .blit = &r600_copy_blit, + .blit = &r600_copy_cpdma, .blit_ring_index = RADEON_RING_TYPE_GFX_INDEX, .dma = &r600_copy_dma, .dma_ring_index = R600_RING_TYPE_DMA_INDEX, @@ -1256,6 +1128,7 @@ static struct radeon_asic rs780_asic = { .set_pcie_lanes = NULL, .set_clock_gating = NULL, .get_temperature = &rv6xx_get_temp, + .set_uvd_clocks = &r600_set_uvd_clocks, }, .dpm = { .init = &rs780_dpm_init, @@ -1271,6 +1144,7 @@ static struct radeon_asic rs780_asic = { .get_mclk = &rs780_dpm_get_mclk, .print_power_state = &rs780_dpm_print_power_state, .debugfs_print_current_performance_level = &rs780_dpm_debugfs_print_current_performance_level, + .force_performance_level = &rs780_dpm_force_performance_level, }, .pflip = { .pre_page_flip = &rs600_pre_page_flip, @@ -1279,6 +1153,19 @@ static struct radeon_asic rs780_asic = { }, }; +static struct radeon_asic_ring rv770_uvd_ring = { + .ib_execute = &uvd_v1_0_ib_execute, + .emit_fence = &uvd_v2_2_fence_emit, + .emit_semaphore = &uvd_v1_0_semaphore_emit, + .cs_parse = &radeon_uvd_cs_parse, + .ring_test = &uvd_v1_0_ring_test, + .ib_test = &uvd_v1_0_ib_test, + .is_lockup = &radeon_ring_test_lockup, + .get_rptr = &uvd_v1_0_get_rptr, + .get_wptr = &uvd_v1_0_get_wptr, + .set_wptr = &uvd_v1_0_set_wptr, +}; + static struct radeon_asic rv770_asic = { .init = &rv770_init, .fini = &rv770_fini, @@ -1296,42 +1183,9 @@ static struct radeon_asic rv770_asic = { .set_page = &rs600_gart_set_page, }, .ring = { - [RADEON_RING_TYPE_GFX_INDEX] = { - .ib_execute = &r600_ring_ib_execute, - .emit_fence = &r600_fence_ring_emit, - .emit_semaphore = &r600_semaphore_ring_emit, - .cs_parse = &r600_cs_parse, - .ring_test = &r600_ring_test, - .ib_test = &r600_ib_test, - .is_lockup = &r600_gfx_is_lockup, - .get_rptr = &radeon_ring_generic_get_rptr, - .get_wptr = &radeon_ring_generic_get_wptr, - .set_wptr = &radeon_ring_generic_set_wptr, - }, - [R600_RING_TYPE_DMA_INDEX] = { - .ib_execute = &r600_dma_ring_ib_execute, - .emit_fence = &r600_dma_fence_ring_emit, - .emit_semaphore = &r600_dma_semaphore_ring_emit, - .cs_parse = &r600_dma_cs_parse, - .ring_test = &r600_dma_ring_test, - .ib_test = &r600_dma_ib_test, - .is_lockup = &r600_dma_is_lockup, - .get_rptr = &radeon_ring_generic_get_rptr, - .get_wptr = &radeon_ring_generic_get_wptr, - .set_wptr = &radeon_ring_generic_set_wptr, - }, - [R600_RING_TYPE_UVD_INDEX] = { - .ib_execute = &r600_uvd_ib_execute, - .emit_fence = &r600_uvd_fence_emit, - .emit_semaphore = &r600_uvd_semaphore_emit, - .cs_parse = &radeon_uvd_cs_parse, - .ring_test = &r600_uvd_ring_test, - .ib_test = &r600_uvd_ib_test, - .is_lockup = &radeon_ring_test_lockup, - .get_rptr = &radeon_ring_generic_get_rptr, - .get_wptr = &radeon_ring_generic_get_wptr, - .set_wptr = &radeon_ring_generic_set_wptr, - } + [RADEON_RING_TYPE_GFX_INDEX] = &r600_gfx_ring, + [R600_RING_TYPE_DMA_INDEX] = &r600_dma_ring, + [R600_RING_TYPE_UVD_INDEX] = &rv770_uvd_ring, }, .irq = { .set = &r600_irq_set, @@ -1347,7 +1201,7 @@ static struct radeon_asic rv770_asic = { .hdmi_setmode = &r600_hdmi_setmode, }, .copy = { - .blit = &r600_copy_blit, + .blit = &r600_copy_cpdma, .blit_ring_index = RADEON_RING_TYPE_GFX_INDEX, .dma = &rv770_copy_dma, .dma_ring_index = R600_RING_TYPE_DMA_INDEX, @@ -1404,6 +1258,32 @@ static struct radeon_asic rv770_asic = { }, }; +static struct radeon_asic_ring evergreen_gfx_ring = { + .ib_execute = &evergreen_ring_ib_execute, + .emit_fence = &r600_fence_ring_emit, + .emit_semaphore = &r600_semaphore_ring_emit, + .cs_parse = &evergreen_cs_parse, + .ring_test = &r600_ring_test, + .ib_test = &r600_ib_test, + .is_lockup = &evergreen_gfx_is_lockup, + .get_rptr = &radeon_ring_generic_get_rptr, + .get_wptr = &radeon_ring_generic_get_wptr, + .set_wptr = &radeon_ring_generic_set_wptr, +}; + +static struct radeon_asic_ring evergreen_dma_ring = { + .ib_execute = &evergreen_dma_ring_ib_execute, + .emit_fence = &evergreen_dma_fence_ring_emit, + .emit_semaphore = &r600_dma_semaphore_ring_emit, + .cs_parse = &evergreen_dma_cs_parse, + .ring_test = &r600_dma_ring_test, + .ib_test = &r600_dma_ib_test, + .is_lockup = &evergreen_dma_is_lockup, + .get_rptr = &r600_dma_get_rptr, + .get_wptr = &r600_dma_get_wptr, + .set_wptr = &r600_dma_set_wptr, +}; + static struct radeon_asic evergreen_asic = { .init = &evergreen_init, .fini = &evergreen_fini, @@ -1421,42 +1301,9 @@ static struct radeon_asic evergreen_asic = { .set_page = &rs600_gart_set_page, }, .ring = { - [RADEON_RING_TYPE_GFX_INDEX] = { - .ib_execute = &evergreen_ring_ib_execute, - .emit_fence = &r600_fence_ring_emit, - .emit_semaphore = &r600_semaphore_ring_emit, - .cs_parse = &evergreen_cs_parse, - .ring_test = &r600_ring_test, - .ib_test = &r600_ib_test, - .is_lockup = &evergreen_gfx_is_lockup, - .get_rptr = &radeon_ring_generic_get_rptr, - .get_wptr = &radeon_ring_generic_get_wptr, - .set_wptr = &radeon_ring_generic_set_wptr, - }, - [R600_RING_TYPE_DMA_INDEX] = { - .ib_execute = &evergreen_dma_ring_ib_execute, - .emit_fence = &evergreen_dma_fence_ring_emit, - .emit_semaphore = &r600_dma_semaphore_ring_emit, - .cs_parse = &evergreen_dma_cs_parse, - .ring_test = &r600_dma_ring_test, - .ib_test = &r600_dma_ib_test, - .is_lockup = &evergreen_dma_is_lockup, - .get_rptr = &radeon_ring_generic_get_rptr, - .get_wptr = &radeon_ring_generic_get_wptr, - .set_wptr = &radeon_ring_generic_set_wptr, - }, - [R600_RING_TYPE_UVD_INDEX] = { - .ib_execute = &r600_uvd_ib_execute, - .emit_fence = &r600_uvd_fence_emit, - .emit_semaphore = &r600_uvd_semaphore_emit, - .cs_parse = &radeon_uvd_cs_parse, - .ring_test = &r600_uvd_ring_test, - .ib_test = &r600_uvd_ib_test, - .is_lockup = &radeon_ring_test_lockup, - .get_rptr = &radeon_ring_generic_get_rptr, - .get_wptr = &radeon_ring_generic_get_wptr, - .set_wptr = &radeon_ring_generic_set_wptr, - } + [RADEON_RING_TYPE_GFX_INDEX] = &evergreen_gfx_ring, + [R600_RING_TYPE_DMA_INDEX] = &evergreen_dma_ring, + [R600_RING_TYPE_UVD_INDEX] = &rv770_uvd_ring, }, .irq = { .set = &evergreen_irq_set, @@ -1472,7 +1319,7 @@ static struct radeon_asic evergreen_asic = { .hdmi_setmode = &evergreen_hdmi_setmode, }, .copy = { - .blit = &r600_copy_blit, + .blit = &r600_copy_cpdma, .blit_ring_index = RADEON_RING_TYPE_GFX_INDEX, .dma = &evergreen_copy_dma, .dma_ring_index = R600_RING_TYPE_DMA_INDEX, @@ -1546,42 +1393,9 @@ static struct radeon_asic sumo_asic = { .set_page = &rs600_gart_set_page, }, .ring = { - [RADEON_RING_TYPE_GFX_INDEX] = { - .ib_execute = &evergreen_ring_ib_execute, - .emit_fence = &r600_fence_ring_emit, - .emit_semaphore = &r600_semaphore_ring_emit, - .cs_parse = &evergreen_cs_parse, - .ring_test = &r600_ring_test, - .ib_test = &r600_ib_test, - .is_lockup = &evergreen_gfx_is_lockup, - .get_rptr = &radeon_ring_generic_get_rptr, - .get_wptr = &radeon_ring_generic_get_wptr, - .set_wptr = &radeon_ring_generic_set_wptr, - }, - [R600_RING_TYPE_DMA_INDEX] = { - .ib_execute = &evergreen_dma_ring_ib_execute, - .emit_fence = &evergreen_dma_fence_ring_emit, - .emit_semaphore = &r600_dma_semaphore_ring_emit, - .cs_parse = &evergreen_dma_cs_parse, - .ring_test = &r600_dma_ring_test, - .ib_test = &r600_dma_ib_test, - .is_lockup = &evergreen_dma_is_lockup, - .get_rptr = &radeon_ring_generic_get_rptr, - .get_wptr = &radeon_ring_generic_get_wptr, - .set_wptr = &radeon_ring_generic_set_wptr, - }, - [R600_RING_TYPE_UVD_INDEX] = { - .ib_execute = &r600_uvd_ib_execute, - .emit_fence = &r600_uvd_fence_emit, - .emit_semaphore = &r600_uvd_semaphore_emit, - .cs_parse = &radeon_uvd_cs_parse, - .ring_test = &r600_uvd_ring_test, - .ib_test = &r600_uvd_ib_test, - .is_lockup = &radeon_ring_test_lockup, - .get_rptr = &radeon_ring_generic_get_rptr, - .get_wptr = &radeon_ring_generic_get_wptr, - .set_wptr = &radeon_ring_generic_set_wptr, - } + [RADEON_RING_TYPE_GFX_INDEX] = &evergreen_gfx_ring, + [R600_RING_TYPE_DMA_INDEX] = &evergreen_dma_ring, + [R600_RING_TYPE_UVD_INDEX] = &rv770_uvd_ring, }, .irq = { .set = &evergreen_irq_set, @@ -1597,7 +1411,7 @@ static struct radeon_asic sumo_asic = { .hdmi_setmode = &evergreen_hdmi_setmode, }, .copy = { - .blit = &r600_copy_blit, + .blit = &r600_copy_cpdma, .blit_ring_index = RADEON_RING_TYPE_GFX_INDEX, .dma = &evergreen_copy_dma, .dma_ring_index = R600_RING_TYPE_DMA_INDEX, @@ -1670,42 +1484,9 @@ static struct radeon_asic btc_asic = { .set_page = &rs600_gart_set_page, }, .ring = { - [RADEON_RING_TYPE_GFX_INDEX] = { - .ib_execute = &evergreen_ring_ib_execute, - .emit_fence = &r600_fence_ring_emit, - .emit_semaphore = &r600_semaphore_ring_emit, - .cs_parse = &evergreen_cs_parse, - .ring_test = &r600_ring_test, - .ib_test = &r600_ib_test, - .is_lockup = &evergreen_gfx_is_lockup, - .get_rptr = &radeon_ring_generic_get_rptr, - .get_wptr = &radeon_ring_generic_get_wptr, - .set_wptr = &radeon_ring_generic_set_wptr, - }, - [R600_RING_TYPE_DMA_INDEX] = { - .ib_execute = &evergreen_dma_ring_ib_execute, - .emit_fence = &evergreen_dma_fence_ring_emit, - .emit_semaphore = &r600_dma_semaphore_ring_emit, - .cs_parse = &evergreen_dma_cs_parse, - .ring_test = &r600_dma_ring_test, - .ib_test = &r600_dma_ib_test, - .is_lockup = &evergreen_dma_is_lockup, - .get_rptr = &radeon_ring_generic_get_rptr, - .get_wptr = &radeon_ring_generic_get_wptr, - .set_wptr = &radeon_ring_generic_set_wptr, - }, - [R600_RING_TYPE_UVD_INDEX] = { - .ib_execute = &r600_uvd_ib_execute, - .emit_fence = &r600_uvd_fence_emit, - .emit_semaphore = &r600_uvd_semaphore_emit, - .cs_parse = &radeon_uvd_cs_parse, - .ring_test = &r600_uvd_ring_test, - .ib_test = &r600_uvd_ib_test, - .is_lockup = &radeon_ring_test_lockup, - .get_rptr = &radeon_ring_generic_get_rptr, - .get_wptr = &radeon_ring_generic_get_wptr, - .set_wptr = &radeon_ring_generic_set_wptr, - } + [RADEON_RING_TYPE_GFX_INDEX] = &evergreen_gfx_ring, + [R600_RING_TYPE_DMA_INDEX] = &evergreen_dma_ring, + [R600_RING_TYPE_UVD_INDEX] = &rv770_uvd_ring, }, .irq = { .set = &evergreen_irq_set, @@ -1721,7 +1502,7 @@ static struct radeon_asic btc_asic = { .hdmi_setmode = &evergreen_hdmi_setmode, }, .copy = { - .blit = &r600_copy_blit, + .blit = &r600_copy_cpdma, .blit_ring_index = RADEON_RING_TYPE_GFX_INDEX, .dma = &evergreen_copy_dma, .dma_ring_index = R600_RING_TYPE_DMA_INDEX, @@ -1778,6 +1559,49 @@ static struct radeon_asic btc_asic = { }, }; +static struct radeon_asic_ring cayman_gfx_ring = { + .ib_execute = &cayman_ring_ib_execute, + .ib_parse = &evergreen_ib_parse, + .emit_fence = &cayman_fence_ring_emit, + .emit_semaphore = &r600_semaphore_ring_emit, + .cs_parse = &evergreen_cs_parse, + .ring_test = &r600_ring_test, + .ib_test = &r600_ib_test, + .is_lockup = &cayman_gfx_is_lockup, + .vm_flush = &cayman_vm_flush, + .get_rptr = &radeon_ring_generic_get_rptr, + .get_wptr = &radeon_ring_generic_get_wptr, + .set_wptr = &radeon_ring_generic_set_wptr, +}; + +static struct radeon_asic_ring cayman_dma_ring = { + .ib_execute = &cayman_dma_ring_ib_execute, + .ib_parse = &evergreen_dma_ib_parse, + .emit_fence = &evergreen_dma_fence_ring_emit, + .emit_semaphore = &r600_dma_semaphore_ring_emit, + .cs_parse = &evergreen_dma_cs_parse, + .ring_test = &r600_dma_ring_test, + .ib_test = &r600_dma_ib_test, + .is_lockup = &cayman_dma_is_lockup, + .vm_flush = &cayman_dma_vm_flush, + .get_rptr = &r600_dma_get_rptr, + .get_wptr = &r600_dma_get_wptr, + .set_wptr = &r600_dma_set_wptr +}; + +static struct radeon_asic_ring cayman_uvd_ring = { + .ib_execute = &uvd_v1_0_ib_execute, + .emit_fence = &uvd_v2_2_fence_emit, + .emit_semaphore = &uvd_v3_1_semaphore_emit, + .cs_parse = &radeon_uvd_cs_parse, + .ring_test = &uvd_v1_0_ring_test, + .ib_test = &uvd_v1_0_ib_test, + .is_lockup = &radeon_ring_test_lockup, + .get_rptr = &uvd_v1_0_get_rptr, + .get_wptr = &uvd_v1_0_get_wptr, + .set_wptr = &uvd_v1_0_set_wptr, +}; + static struct radeon_asic cayman_asic = { .init = &cayman_init, .fini = &cayman_fini, @@ -1801,88 +1625,12 @@ static struct radeon_asic cayman_asic = { .set_page = &cayman_vm_set_page, }, .ring = { - [RADEON_RING_TYPE_GFX_INDEX] = { - .ib_execute = &cayman_ring_ib_execute, - .ib_parse = &evergreen_ib_parse, - .emit_fence = &cayman_fence_ring_emit, - .emit_semaphore = &r600_semaphore_ring_emit, - .cs_parse = &evergreen_cs_parse, - .ring_test = &r600_ring_test, - .ib_test = &r600_ib_test, - .is_lockup = &cayman_gfx_is_lockup, - .vm_flush = &cayman_vm_flush, - .get_rptr = &radeon_ring_generic_get_rptr, - .get_wptr = &radeon_ring_generic_get_wptr, - .set_wptr = &radeon_ring_generic_set_wptr, - }, - [CAYMAN_RING_TYPE_CP1_INDEX] = { - .ib_execute = &cayman_ring_ib_execute, - .ib_parse = &evergreen_ib_parse, - .emit_fence = &cayman_fence_ring_emit, - .emit_semaphore = &r600_semaphore_ring_emit, - .cs_parse = &evergreen_cs_parse, - .ring_test = &r600_ring_test, - .ib_test = &r600_ib_test, - .is_lockup = &cayman_gfx_is_lockup, - .vm_flush = &cayman_vm_flush, - .get_rptr = &radeon_ring_generic_get_rptr, - .get_wptr = &radeon_ring_generic_get_wptr, - .set_wptr = &radeon_ring_generic_set_wptr, - }, - [CAYMAN_RING_TYPE_CP2_INDEX] = { - .ib_execute = &cayman_ring_ib_execute, - .ib_parse = &evergreen_ib_parse, - .emit_fence = &cayman_fence_ring_emit, - .emit_semaphore = &r600_semaphore_ring_emit, - .cs_parse = &evergreen_cs_parse, - .ring_test = &r600_ring_test, - .ib_test = &r600_ib_test, - .is_lockup = &cayman_gfx_is_lockup, - .vm_flush = &cayman_vm_flush, - .get_rptr = &radeon_ring_generic_get_rptr, - .get_wptr = &radeon_ring_generic_get_wptr, - .set_wptr = &radeon_ring_generic_set_wptr, - }, - [R600_RING_TYPE_DMA_INDEX] = { - .ib_execute = &cayman_dma_ring_ib_execute, - .ib_parse = &evergreen_dma_ib_parse, - .emit_fence = &evergreen_dma_fence_ring_emit, - .emit_semaphore = &r600_dma_semaphore_ring_emit, - .cs_parse = &evergreen_dma_cs_parse, - .ring_test = &r600_dma_ring_test, - .ib_test = &r600_dma_ib_test, - .is_lockup = &cayman_dma_is_lockup, - .vm_flush = &cayman_dma_vm_flush, - .get_rptr = &radeon_ring_generic_get_rptr, - .get_wptr = &radeon_ring_generic_get_wptr, - .set_wptr = &radeon_ring_generic_set_wptr, - }, - [CAYMAN_RING_TYPE_DMA1_INDEX] = { - .ib_execute = &cayman_dma_ring_ib_execute, - .ib_parse = &evergreen_dma_ib_parse, - .emit_fence = &evergreen_dma_fence_ring_emit, - .emit_semaphore = &r600_dma_semaphore_ring_emit, - .cs_parse = &evergreen_dma_cs_parse, - .ring_test = &r600_dma_ring_test, - .ib_test = &r600_dma_ib_test, - .is_lockup = &cayman_dma_is_lockup, - .vm_flush = &cayman_dma_vm_flush, - .get_rptr = &radeon_ring_generic_get_rptr, - .get_wptr = &radeon_ring_generic_get_wptr, - .set_wptr = &radeon_ring_generic_set_wptr, - }, - [R600_RING_TYPE_UVD_INDEX] = { - .ib_execute = &r600_uvd_ib_execute, - .emit_fence = &r600_uvd_fence_emit, - .emit_semaphore = &cayman_uvd_semaphore_emit, - .cs_parse = &radeon_uvd_cs_parse, - .ring_test = &r600_uvd_ring_test, - .ib_test = &r600_uvd_ib_test, - .is_lockup = &radeon_ring_test_lockup, - .get_rptr = &radeon_ring_generic_get_rptr, - .get_wptr = &radeon_ring_generic_get_wptr, - .set_wptr = &radeon_ring_generic_set_wptr, - } + [RADEON_RING_TYPE_GFX_INDEX] = &cayman_gfx_ring, + [CAYMAN_RING_TYPE_CP1_INDEX] = &cayman_gfx_ring, + [CAYMAN_RING_TYPE_CP2_INDEX] = &cayman_gfx_ring, + [R600_RING_TYPE_DMA_INDEX] = &cayman_dma_ring, + [CAYMAN_RING_TYPE_DMA1_INDEX] = &cayman_dma_ring, + [R600_RING_TYPE_UVD_INDEX] = &cayman_uvd_ring, }, .irq = { .set = &evergreen_irq_set, @@ -1898,7 +1646,7 @@ static struct radeon_asic cayman_asic = { .hdmi_setmode = &evergreen_hdmi_setmode, }, .copy = { - .blit = &r600_copy_blit, + .blit = &r600_copy_cpdma, .blit_ring_index = RADEON_RING_TYPE_GFX_INDEX, .dma = &evergreen_copy_dma, .dma_ring_index = R600_RING_TYPE_DMA_INDEX, @@ -1978,88 +1726,12 @@ static struct radeon_asic trinity_asic = { .set_page = &cayman_vm_set_page, }, .ring = { - [RADEON_RING_TYPE_GFX_INDEX] = { - .ib_execute = &cayman_ring_ib_execute, - .ib_parse = &evergreen_ib_parse, - .emit_fence = &cayman_fence_ring_emit, - .emit_semaphore = &r600_semaphore_ring_emit, - .cs_parse = &evergreen_cs_parse, - .ring_test = &r600_ring_test, - .ib_test = &r600_ib_test, - .is_lockup = &cayman_gfx_is_lockup, - .vm_flush = &cayman_vm_flush, - .get_rptr = &radeon_ring_generic_get_rptr, - .get_wptr = &radeon_ring_generic_get_wptr, - .set_wptr = &radeon_ring_generic_set_wptr, - }, - [CAYMAN_RING_TYPE_CP1_INDEX] = { - .ib_execute = &cayman_ring_ib_execute, - .ib_parse = &evergreen_ib_parse, - .emit_fence = &cayman_fence_ring_emit, - .emit_semaphore = &r600_semaphore_ring_emit, - .cs_parse = &evergreen_cs_parse, - .ring_test = &r600_ring_test, - .ib_test = &r600_ib_test, - .is_lockup = &cayman_gfx_is_lockup, - .vm_flush = &cayman_vm_flush, - .get_rptr = &radeon_ring_generic_get_rptr, - .get_wptr = &radeon_ring_generic_get_wptr, - .set_wptr = &radeon_ring_generic_set_wptr, - }, - [CAYMAN_RING_TYPE_CP2_INDEX] = { - .ib_execute = &cayman_ring_ib_execute, - .ib_parse = &evergreen_ib_parse, - .emit_fence = &cayman_fence_ring_emit, - .emit_semaphore = &r600_semaphore_ring_emit, - .cs_parse = &evergreen_cs_parse, - .ring_test = &r600_ring_test, - .ib_test = &r600_ib_test, - .is_lockup = &cayman_gfx_is_lockup, - .vm_flush = &cayman_vm_flush, - .get_rptr = &radeon_ring_generic_get_rptr, - .get_wptr = &radeon_ring_generic_get_wptr, - .set_wptr = &radeon_ring_generic_set_wptr, - }, - [R600_RING_TYPE_DMA_INDEX] = { - .ib_execute = &cayman_dma_ring_ib_execute, - .ib_parse = &evergreen_dma_ib_parse, - .emit_fence = &evergreen_dma_fence_ring_emit, - .emit_semaphore = &r600_dma_semaphore_ring_emit, - .cs_parse = &evergreen_dma_cs_parse, - .ring_test = &r600_dma_ring_test, - .ib_test = &r600_dma_ib_test, - .is_lockup = &cayman_dma_is_lockup, - .vm_flush = &cayman_dma_vm_flush, - .get_rptr = &radeon_ring_generic_get_rptr, - .get_wptr = &radeon_ring_generic_get_wptr, - .set_wptr = &radeon_ring_generic_set_wptr, - }, - [CAYMAN_RING_TYPE_DMA1_INDEX] = { - .ib_execute = &cayman_dma_ring_ib_execute, - .ib_parse = &evergreen_dma_ib_parse, - .emit_fence = &evergreen_dma_fence_ring_emit, - .emit_semaphore = &r600_dma_semaphore_ring_emit, - .cs_parse = &evergreen_dma_cs_parse, - .ring_test = &r600_dma_ring_test, - .ib_test = &r600_dma_ib_test, - .is_lockup = &cayman_dma_is_lockup, - .vm_flush = &cayman_dma_vm_flush, - .get_rptr = &radeon_ring_generic_get_rptr, - .get_wptr = &radeon_ring_generic_get_wptr, - .set_wptr = &radeon_ring_generic_set_wptr, - }, - [R600_RING_TYPE_UVD_INDEX] = { - .ib_execute = &r600_uvd_ib_execute, - .emit_fence = &r600_uvd_fence_emit, - .emit_semaphore = &cayman_uvd_semaphore_emit, - .cs_parse = &radeon_uvd_cs_parse, - .ring_test = &r600_uvd_ring_test, - .ib_test = &r600_uvd_ib_test, - .is_lockup = &radeon_ring_test_lockup, - .get_rptr = &radeon_ring_generic_get_rptr, - .get_wptr = &radeon_ring_generic_get_wptr, - .set_wptr = &radeon_ring_generic_set_wptr, - } + [RADEON_RING_TYPE_GFX_INDEX] = &cayman_gfx_ring, + [CAYMAN_RING_TYPE_CP1_INDEX] = &cayman_gfx_ring, + [CAYMAN_RING_TYPE_CP2_INDEX] = &cayman_gfx_ring, + [R600_RING_TYPE_DMA_INDEX] = &cayman_dma_ring, + [CAYMAN_RING_TYPE_DMA1_INDEX] = &cayman_dma_ring, + [R600_RING_TYPE_UVD_INDEX] = &cayman_uvd_ring, }, .irq = { .set = &evergreen_irq_set, @@ -2071,9 +1743,11 @@ static struct radeon_asic trinity_asic = { .wait_for_vblank = &dce4_wait_for_vblank, .set_backlight_level = &atombios_set_backlight_level, .get_backlight_level = &atombios_get_backlight_level, + .hdmi_enable = &evergreen_hdmi_enable, + .hdmi_setmode = &evergreen_hdmi_setmode, }, .copy = { - .blit = &r600_copy_blit, + .blit = &r600_copy_cpdma, .blit_ring_index = RADEON_RING_TYPE_GFX_INDEX, .dma = &evergreen_copy_dma, .dma_ring_index = R600_RING_TYPE_DMA_INDEX, @@ -2121,6 +1795,7 @@ static struct radeon_asic trinity_asic = { .print_power_state = &trinity_dpm_print_power_state, .debugfs_print_current_performance_level = &trinity_dpm_debugfs_print_current_performance_level, .force_performance_level = &trinity_dpm_force_performance_level, + .enable_bapm = &trinity_dpm_enable_bapm, }, .pflip = { .pre_page_flip = &evergreen_pre_page_flip, @@ -2129,6 +1804,36 @@ static struct radeon_asic trinity_asic = { }, }; +static struct radeon_asic_ring si_gfx_ring = { + .ib_execute = &si_ring_ib_execute, + .ib_parse = &si_ib_parse, + .emit_fence = &si_fence_ring_emit, + .emit_semaphore = &r600_semaphore_ring_emit, + .cs_parse = NULL, + .ring_test = &r600_ring_test, + .ib_test = &r600_ib_test, + .is_lockup = &si_gfx_is_lockup, + .vm_flush = &si_vm_flush, + .get_rptr = &radeon_ring_generic_get_rptr, + .get_wptr = &radeon_ring_generic_get_wptr, + .set_wptr = &radeon_ring_generic_set_wptr, +}; + +static struct radeon_asic_ring si_dma_ring = { + .ib_execute = &cayman_dma_ring_ib_execute, + .ib_parse = &evergreen_dma_ib_parse, + .emit_fence = &evergreen_dma_fence_ring_emit, + .emit_semaphore = &r600_dma_semaphore_ring_emit, + .cs_parse = NULL, + .ring_test = &r600_dma_ring_test, + .ib_test = &r600_dma_ib_test, + .is_lockup = &si_dma_is_lockup, + .vm_flush = &si_dma_vm_flush, + .get_rptr = &r600_dma_get_rptr, + .get_wptr = &r600_dma_get_wptr, + .set_wptr = &r600_dma_set_wptr, +}; + static struct radeon_asic si_asic = { .init = &si_init, .fini = &si_fini, @@ -2152,88 +1857,12 @@ static struct radeon_asic si_asic = { .set_page = &si_vm_set_page, }, .ring = { - [RADEON_RING_TYPE_GFX_INDEX] = { - .ib_execute = &si_ring_ib_execute, - .ib_parse = &si_ib_parse, - .emit_fence = &si_fence_ring_emit, - .emit_semaphore = &r600_semaphore_ring_emit, - .cs_parse = NULL, - .ring_test = &r600_ring_test, - .ib_test = &r600_ib_test, - .is_lockup = &si_gfx_is_lockup, - .vm_flush = &si_vm_flush, - .get_rptr = &radeon_ring_generic_get_rptr, - .get_wptr = &radeon_ring_generic_get_wptr, - .set_wptr = &radeon_ring_generic_set_wptr, - }, - [CAYMAN_RING_TYPE_CP1_INDEX] = { - .ib_execute = &si_ring_ib_execute, - .ib_parse = &si_ib_parse, - .emit_fence = &si_fence_ring_emit, - .emit_semaphore = &r600_semaphore_ring_emit, - .cs_parse = NULL, - .ring_test = &r600_ring_test, - .ib_test = &r600_ib_test, - .is_lockup = &si_gfx_is_lockup, - .vm_flush = &si_vm_flush, - .get_rptr = &radeon_ring_generic_get_rptr, - .get_wptr = &radeon_ring_generic_get_wptr, - .set_wptr = &radeon_ring_generic_set_wptr, - }, - [CAYMAN_RING_TYPE_CP2_INDEX] = { - .ib_execute = &si_ring_ib_execute, - .ib_parse = &si_ib_parse, - .emit_fence = &si_fence_ring_emit, - .emit_semaphore = &r600_semaphore_ring_emit, - .cs_parse = NULL, - .ring_test = &r600_ring_test, - .ib_test = &r600_ib_test, - .is_lockup = &si_gfx_is_lockup, - .vm_flush = &si_vm_flush, - .get_rptr = &radeon_ring_generic_get_rptr, - .get_wptr = &radeon_ring_generic_get_wptr, - .set_wptr = &radeon_ring_generic_set_wptr, - }, - [R600_RING_TYPE_DMA_INDEX] = { - .ib_execute = &cayman_dma_ring_ib_execute, - .ib_parse = &evergreen_dma_ib_parse, - .emit_fence = &evergreen_dma_fence_ring_emit, - .emit_semaphore = &r600_dma_semaphore_ring_emit, - .cs_parse = NULL, - .ring_test = &r600_dma_ring_test, - .ib_test = &r600_dma_ib_test, - .is_lockup = &si_dma_is_lockup, - .vm_flush = &si_dma_vm_flush, - .get_rptr = &radeon_ring_generic_get_rptr, - .get_wptr = &radeon_ring_generic_get_wptr, - .set_wptr = &radeon_ring_generic_set_wptr, - }, - [CAYMAN_RING_TYPE_DMA1_INDEX] = { - .ib_execute = &cayman_dma_ring_ib_execute, - .ib_parse = &evergreen_dma_ib_parse, - .emit_fence = &evergreen_dma_fence_ring_emit, - .emit_semaphore = &r600_dma_semaphore_ring_emit, - .cs_parse = NULL, - .ring_test = &r600_dma_ring_test, - .ib_test = &r600_dma_ib_test, - .is_lockup = &si_dma_is_lockup, - .vm_flush = &si_dma_vm_flush, - .get_rptr = &radeon_ring_generic_get_rptr, - .get_wptr = &radeon_ring_generic_get_wptr, - .set_wptr = &radeon_ring_generic_set_wptr, - }, - [R600_RING_TYPE_UVD_INDEX] = { - .ib_execute = &r600_uvd_ib_execute, - .emit_fence = &r600_uvd_fence_emit, - .emit_semaphore = &cayman_uvd_semaphore_emit, - .cs_parse = &radeon_uvd_cs_parse, - .ring_test = &r600_uvd_ring_test, - .ib_test = &r600_uvd_ib_test, - .is_lockup = &radeon_ring_test_lockup, - .get_rptr = &radeon_ring_generic_get_rptr, - .get_wptr = &radeon_ring_generic_get_wptr, - .set_wptr = &radeon_ring_generic_set_wptr, - } + [RADEON_RING_TYPE_GFX_INDEX] = &si_gfx_ring, + [CAYMAN_RING_TYPE_CP1_INDEX] = &si_gfx_ring, + [CAYMAN_RING_TYPE_CP2_INDEX] = &si_gfx_ring, + [R600_RING_TYPE_DMA_INDEX] = &si_dma_ring, + [CAYMAN_RING_TYPE_DMA1_INDEX] = &si_dma_ring, + [R600_RING_TYPE_UVD_INDEX] = &cayman_uvd_ring, }, .irq = { .set = &si_irq_set, @@ -2245,6 +1874,8 @@ static struct radeon_asic si_asic = { .wait_for_vblank = &dce4_wait_for_vblank, .set_backlight_level = &atombios_set_backlight_level, .get_backlight_level = &atombios_get_backlight_level, + .hdmi_enable = &evergreen_hdmi_enable, + .hdmi_setmode = &evergreen_hdmi_setmode, }, .copy = { .blit = NULL, @@ -2304,6 +1935,51 @@ static struct radeon_asic si_asic = { }, }; +static struct radeon_asic_ring ci_gfx_ring = { + .ib_execute = &cik_ring_ib_execute, + .ib_parse = &cik_ib_parse, + .emit_fence = &cik_fence_gfx_ring_emit, + .emit_semaphore = &cik_semaphore_ring_emit, + .cs_parse = NULL, + .ring_test = &cik_ring_test, + .ib_test = &cik_ib_test, + .is_lockup = &cik_gfx_is_lockup, + .vm_flush = &cik_vm_flush, + .get_rptr = &radeon_ring_generic_get_rptr, + .get_wptr = &radeon_ring_generic_get_wptr, + .set_wptr = &radeon_ring_generic_set_wptr, +}; + +static struct radeon_asic_ring ci_cp_ring = { + .ib_execute = &cik_ring_ib_execute, + .ib_parse = &cik_ib_parse, + .emit_fence = &cik_fence_compute_ring_emit, + .emit_semaphore = &cik_semaphore_ring_emit, + .cs_parse = NULL, + .ring_test = &cik_ring_test, + .ib_test = &cik_ib_test, + .is_lockup = &cik_gfx_is_lockup, + .vm_flush = &cik_vm_flush, + .get_rptr = &cik_compute_ring_get_rptr, + .get_wptr = &cik_compute_ring_get_wptr, + .set_wptr = &cik_compute_ring_set_wptr, +}; + +static struct radeon_asic_ring ci_dma_ring = { + .ib_execute = &cik_sdma_ring_ib_execute, + .ib_parse = &cik_ib_parse, + .emit_fence = &cik_sdma_fence_ring_emit, + .emit_semaphore = &cik_sdma_semaphore_ring_emit, + .cs_parse = NULL, + .ring_test = &cik_sdma_ring_test, + .ib_test = &cik_sdma_ib_test, + .is_lockup = &cik_sdma_is_lockup, + .vm_flush = &cik_dma_vm_flush, + .get_rptr = &r600_dma_get_rptr, + .get_wptr = &r600_dma_get_wptr, + .set_wptr = &r600_dma_set_wptr, +}; + static struct radeon_asic ci_asic = { .init = &cik_init, .fini = &cik_fini, @@ -2327,88 +2003,12 @@ static struct radeon_asic ci_asic = { .set_page = &cik_vm_set_page, }, .ring = { - [RADEON_RING_TYPE_GFX_INDEX] = { - .ib_execute = &cik_ring_ib_execute, - .ib_parse = &cik_ib_parse, - .emit_fence = &cik_fence_gfx_ring_emit, - .emit_semaphore = &cik_semaphore_ring_emit, - .cs_parse = NULL, - .ring_test = &cik_ring_test, - .ib_test = &cik_ib_test, - .is_lockup = &cik_gfx_is_lockup, - .vm_flush = &cik_vm_flush, - .get_rptr = &radeon_ring_generic_get_rptr, - .get_wptr = &radeon_ring_generic_get_wptr, - .set_wptr = &radeon_ring_generic_set_wptr, - }, - [CAYMAN_RING_TYPE_CP1_INDEX] = { - .ib_execute = &cik_ring_ib_execute, - .ib_parse = &cik_ib_parse, - .emit_fence = &cik_fence_compute_ring_emit, - .emit_semaphore = &cik_semaphore_ring_emit, - .cs_parse = NULL, - .ring_test = &cik_ring_test, - .ib_test = &cik_ib_test, - .is_lockup = &cik_gfx_is_lockup, - .vm_flush = &cik_vm_flush, - .get_rptr = &cik_compute_ring_get_rptr, - .get_wptr = &cik_compute_ring_get_wptr, - .set_wptr = &cik_compute_ring_set_wptr, - }, - [CAYMAN_RING_TYPE_CP2_INDEX] = { - .ib_execute = &cik_ring_ib_execute, - .ib_parse = &cik_ib_parse, - .emit_fence = &cik_fence_compute_ring_emit, - .emit_semaphore = &cik_semaphore_ring_emit, - .cs_parse = NULL, - .ring_test = &cik_ring_test, - .ib_test = &cik_ib_test, - .is_lockup = &cik_gfx_is_lockup, - .vm_flush = &cik_vm_flush, - .get_rptr = &cik_compute_ring_get_rptr, - .get_wptr = &cik_compute_ring_get_wptr, - .set_wptr = &cik_compute_ring_set_wptr, - }, - [R600_RING_TYPE_DMA_INDEX] = { - .ib_execute = &cik_sdma_ring_ib_execute, - .ib_parse = &cik_ib_parse, - .emit_fence = &cik_sdma_fence_ring_emit, - .emit_semaphore = &cik_sdma_semaphore_ring_emit, - .cs_parse = NULL, - .ring_test = &cik_sdma_ring_test, - .ib_test = &cik_sdma_ib_test, - .is_lockup = &cik_sdma_is_lockup, - .vm_flush = &cik_dma_vm_flush, - .get_rptr = &radeon_ring_generic_get_rptr, - .get_wptr = &radeon_ring_generic_get_wptr, - .set_wptr = &radeon_ring_generic_set_wptr, - }, - [CAYMAN_RING_TYPE_DMA1_INDEX] = { - .ib_execute = &cik_sdma_ring_ib_execute, - .ib_parse = &cik_ib_parse, - .emit_fence = &cik_sdma_fence_ring_emit, - .emit_semaphore = &cik_sdma_semaphore_ring_emit, - .cs_parse = NULL, - .ring_test = &cik_sdma_ring_test, - .ib_test = &cik_sdma_ib_test, - .is_lockup = &cik_sdma_is_lockup, - .vm_flush = &cik_dma_vm_flush, - .get_rptr = &radeon_ring_generic_get_rptr, - .get_wptr = &radeon_ring_generic_get_wptr, - .set_wptr = &radeon_ring_generic_set_wptr, - }, - [R600_RING_TYPE_UVD_INDEX] = { - .ib_execute = &r600_uvd_ib_execute, - .emit_fence = &r600_uvd_fence_emit, - .emit_semaphore = &cayman_uvd_semaphore_emit, - .cs_parse = &radeon_uvd_cs_parse, - .ring_test = &r600_uvd_ring_test, - .ib_test = &r600_uvd_ib_test, - .is_lockup = &radeon_ring_test_lockup, - .get_rptr = &radeon_ring_generic_get_rptr, - .get_wptr = &radeon_ring_generic_get_wptr, - .set_wptr = &radeon_ring_generic_set_wptr, - } + [RADEON_RING_TYPE_GFX_INDEX] = &ci_gfx_ring, + [CAYMAN_RING_TYPE_CP1_INDEX] = &ci_cp_ring, + [CAYMAN_RING_TYPE_CP2_INDEX] = &ci_cp_ring, + [R600_RING_TYPE_DMA_INDEX] = &ci_dma_ring, + [CAYMAN_RING_TYPE_DMA1_INDEX] = &ci_dma_ring, + [R600_RING_TYPE_UVD_INDEX] = &cayman_uvd_ring, }, .irq = { .set = &cik_irq_set, @@ -2418,6 +2018,8 @@ static struct radeon_asic ci_asic = { .bandwidth_update = &dce8_bandwidth_update, .get_vblank_counter = &evergreen_get_vblank_counter, .wait_for_vblank = &dce4_wait_for_vblank, + .hdmi_enable = &evergreen_hdmi_enable, + .hdmi_setmode = &evergreen_hdmi_setmode, }, .copy = { .blit = NULL, @@ -2451,6 +2053,25 @@ static struct radeon_asic ci_asic = { .set_pcie_lanes = NULL, .set_clock_gating = NULL, .set_uvd_clocks = &cik_set_uvd_clocks, + .get_temperature = &ci_get_temp, + }, + .dpm = { + .init = &ci_dpm_init, + .setup_asic = &ci_dpm_setup_asic, + .enable = &ci_dpm_enable, + .disable = &ci_dpm_disable, + .pre_set_power_state = &ci_dpm_pre_set_power_state, + .set_power_state = &ci_dpm_set_power_state, + .post_set_power_state = &ci_dpm_post_set_power_state, + .display_configuration_changed = &ci_dpm_display_configuration_changed, + .fini = &ci_dpm_fini, + .get_sclk = &ci_dpm_get_sclk, + .get_mclk = &ci_dpm_get_mclk, + .print_power_state = &ci_dpm_print_power_state, + .debugfs_print_current_performance_level = &ci_dpm_debugfs_print_current_performance_level, + .force_performance_level = &ci_dpm_force_performance_level, + .vblank_too_short = &ci_dpm_vblank_too_short, + .powergate_uvd = &ci_dpm_powergate_uvd, }, .pflip = { .pre_page_flip = &evergreen_pre_page_flip, @@ -2482,88 +2103,12 @@ static struct radeon_asic kv_asic = { .set_page = &cik_vm_set_page, }, .ring = { - [RADEON_RING_TYPE_GFX_INDEX] = { - .ib_execute = &cik_ring_ib_execute, - .ib_parse = &cik_ib_parse, - .emit_fence = &cik_fence_gfx_ring_emit, - .emit_semaphore = &cik_semaphore_ring_emit, - .cs_parse = NULL, - .ring_test = &cik_ring_test, - .ib_test = &cik_ib_test, - .is_lockup = &cik_gfx_is_lockup, - .vm_flush = &cik_vm_flush, - .get_rptr = &radeon_ring_generic_get_rptr, - .get_wptr = &radeon_ring_generic_get_wptr, - .set_wptr = &radeon_ring_generic_set_wptr, - }, - [CAYMAN_RING_TYPE_CP1_INDEX] = { - .ib_execute = &cik_ring_ib_execute, - .ib_parse = &cik_ib_parse, - .emit_fence = &cik_fence_compute_ring_emit, - .emit_semaphore = &cik_semaphore_ring_emit, - .cs_parse = NULL, - .ring_test = &cik_ring_test, - .ib_test = &cik_ib_test, - .is_lockup = &cik_gfx_is_lockup, - .vm_flush = &cik_vm_flush, - .get_rptr = &cik_compute_ring_get_rptr, - .get_wptr = &cik_compute_ring_get_wptr, - .set_wptr = &cik_compute_ring_set_wptr, - }, - [CAYMAN_RING_TYPE_CP2_INDEX] = { - .ib_execute = &cik_ring_ib_execute, - .ib_parse = &cik_ib_parse, - .emit_fence = &cik_fence_compute_ring_emit, - .emit_semaphore = &cik_semaphore_ring_emit, - .cs_parse = NULL, - .ring_test = &cik_ring_test, - .ib_test = &cik_ib_test, - .is_lockup = &cik_gfx_is_lockup, - .vm_flush = &cik_vm_flush, - .get_rptr = &cik_compute_ring_get_rptr, - .get_wptr = &cik_compute_ring_get_wptr, - .set_wptr = &cik_compute_ring_set_wptr, - }, - [R600_RING_TYPE_DMA_INDEX] = { - .ib_execute = &cik_sdma_ring_ib_execute, - .ib_parse = &cik_ib_parse, - .emit_fence = &cik_sdma_fence_ring_emit, - .emit_semaphore = &cik_sdma_semaphore_ring_emit, - .cs_parse = NULL, - .ring_test = &cik_sdma_ring_test, - .ib_test = &cik_sdma_ib_test, - .is_lockup = &cik_sdma_is_lockup, - .vm_flush = &cik_dma_vm_flush, - .get_rptr = &radeon_ring_generic_get_rptr, - .get_wptr = &radeon_ring_generic_get_wptr, - .set_wptr = &radeon_ring_generic_set_wptr, - }, - [CAYMAN_RING_TYPE_DMA1_INDEX] = { - .ib_execute = &cik_sdma_ring_ib_execute, - .ib_parse = &cik_ib_parse, - .emit_fence = &cik_sdma_fence_ring_emit, - .emit_semaphore = &cik_sdma_semaphore_ring_emit, - .cs_parse = NULL, - .ring_test = &cik_sdma_ring_test, - .ib_test = &cik_sdma_ib_test, - .is_lockup = &cik_sdma_is_lockup, - .vm_flush = &cik_dma_vm_flush, - .get_rptr = &radeon_ring_generic_get_rptr, - .get_wptr = &radeon_ring_generic_get_wptr, - .set_wptr = &radeon_ring_generic_set_wptr, - }, - [R600_RING_TYPE_UVD_INDEX] = { - .ib_execute = &r600_uvd_ib_execute, - .emit_fence = &r600_uvd_fence_emit, - .emit_semaphore = &cayman_uvd_semaphore_emit, - .cs_parse = &radeon_uvd_cs_parse, - .ring_test = &r600_uvd_ring_test, - .ib_test = &r600_uvd_ib_test, - .is_lockup = &radeon_ring_test_lockup, - .get_rptr = &radeon_ring_generic_get_rptr, - .get_wptr = &radeon_ring_generic_get_wptr, - .set_wptr = &radeon_ring_generic_set_wptr, - } + [RADEON_RING_TYPE_GFX_INDEX] = &ci_gfx_ring, + [CAYMAN_RING_TYPE_CP1_INDEX] = &ci_cp_ring, + [CAYMAN_RING_TYPE_CP2_INDEX] = &ci_cp_ring, + [R600_RING_TYPE_DMA_INDEX] = &ci_dma_ring, + [CAYMAN_RING_TYPE_DMA1_INDEX] = &ci_dma_ring, + [R600_RING_TYPE_UVD_INDEX] = &cayman_uvd_ring, }, .irq = { .set = &cik_irq_set, @@ -2573,6 +2118,8 @@ static struct radeon_asic kv_asic = { .bandwidth_update = &dce8_bandwidth_update, .get_vblank_counter = &evergreen_get_vblank_counter, .wait_for_vblank = &dce4_wait_for_vblank, + .hdmi_enable = &evergreen_hdmi_enable, + .hdmi_setmode = &evergreen_hdmi_setmode, }, .copy = { .blit = NULL, @@ -2606,6 +2153,25 @@ static struct radeon_asic kv_asic = { .set_pcie_lanes = NULL, .set_clock_gating = NULL, .set_uvd_clocks = &cik_set_uvd_clocks, + .get_temperature = &kv_get_temp, + }, + .dpm = { + .init = &kv_dpm_init, + .setup_asic = &kv_dpm_setup_asic, + .enable = &kv_dpm_enable, + .disable = &kv_dpm_disable, + .pre_set_power_state = &kv_dpm_pre_set_power_state, + .set_power_state = &kv_dpm_set_power_state, + .post_set_power_state = &kv_dpm_post_set_power_state, + .display_configuration_changed = &kv_dpm_display_configuration_changed, + .fini = &kv_dpm_fini, + .get_sclk = &kv_dpm_get_sclk, + .get_mclk = &kv_dpm_get_mclk, + .print_power_state = &kv_dpm_print_power_state, + .debugfs_print_current_performance_level = &kv_dpm_debugfs_print_current_performance_level, + .force_performance_level = &kv_dpm_force_performance_level, + .powergate_uvd = &kv_dpm_powergate_uvd, + .enable_bapm = &kv_dpm_enable_bapm, }, .pflip = { .pre_page_flip = &evergreen_pre_page_flip, @@ -2775,19 +2341,188 @@ int radeon_asic_init(struct radeon_device *rdev) rdev->has_uvd = false; else rdev->has_uvd = true; + switch (rdev->family) { + case CHIP_TAHITI: + rdev->cg_flags = + RADEON_CG_SUPPORT_GFX_MGCG | + RADEON_CG_SUPPORT_GFX_MGLS | + /*RADEON_CG_SUPPORT_GFX_CGCG |*/ + RADEON_CG_SUPPORT_GFX_CGLS | + RADEON_CG_SUPPORT_GFX_CGTS | + RADEON_CG_SUPPORT_GFX_CP_LS | + RADEON_CG_SUPPORT_MC_MGCG | + RADEON_CG_SUPPORT_SDMA_MGCG | + RADEON_CG_SUPPORT_BIF_LS | + RADEON_CG_SUPPORT_VCE_MGCG | + RADEON_CG_SUPPORT_UVD_MGCG | + RADEON_CG_SUPPORT_HDP_LS | + RADEON_CG_SUPPORT_HDP_MGCG; + rdev->pg_flags = 0; + break; + case CHIP_PITCAIRN: + rdev->cg_flags = + RADEON_CG_SUPPORT_GFX_MGCG | + RADEON_CG_SUPPORT_GFX_MGLS | + /*RADEON_CG_SUPPORT_GFX_CGCG |*/ + RADEON_CG_SUPPORT_GFX_CGLS | + RADEON_CG_SUPPORT_GFX_CGTS | + RADEON_CG_SUPPORT_GFX_CP_LS | + RADEON_CG_SUPPORT_GFX_RLC_LS | + RADEON_CG_SUPPORT_MC_LS | + RADEON_CG_SUPPORT_MC_MGCG | + RADEON_CG_SUPPORT_SDMA_MGCG | + RADEON_CG_SUPPORT_BIF_LS | + RADEON_CG_SUPPORT_VCE_MGCG | + RADEON_CG_SUPPORT_UVD_MGCG | + RADEON_CG_SUPPORT_HDP_LS | + RADEON_CG_SUPPORT_HDP_MGCG; + rdev->pg_flags = 0; + break; + case CHIP_VERDE: + rdev->cg_flags = + RADEON_CG_SUPPORT_GFX_MGCG | + RADEON_CG_SUPPORT_GFX_MGLS | + /*RADEON_CG_SUPPORT_GFX_CGCG |*/ + RADEON_CG_SUPPORT_GFX_CGLS | + RADEON_CG_SUPPORT_GFX_CGTS | + RADEON_CG_SUPPORT_GFX_CP_LS | + RADEON_CG_SUPPORT_GFX_RLC_LS | + RADEON_CG_SUPPORT_MC_LS | + RADEON_CG_SUPPORT_MC_MGCG | + RADEON_CG_SUPPORT_SDMA_MGCG | + RADEON_CG_SUPPORT_BIF_LS | + RADEON_CG_SUPPORT_VCE_MGCG | + RADEON_CG_SUPPORT_UVD_MGCG | + RADEON_CG_SUPPORT_HDP_LS | + RADEON_CG_SUPPORT_HDP_MGCG; + rdev->pg_flags = 0 | + /*RADEON_PG_SUPPORT_GFX_PG | */ + RADEON_PG_SUPPORT_SDMA; + break; + case CHIP_OLAND: + rdev->cg_flags = + RADEON_CG_SUPPORT_GFX_MGCG | + RADEON_CG_SUPPORT_GFX_MGLS | + /*RADEON_CG_SUPPORT_GFX_CGCG |*/ + RADEON_CG_SUPPORT_GFX_CGLS | + RADEON_CG_SUPPORT_GFX_CGTS | + RADEON_CG_SUPPORT_GFX_CP_LS | + RADEON_CG_SUPPORT_GFX_RLC_LS | + RADEON_CG_SUPPORT_MC_LS | + RADEON_CG_SUPPORT_MC_MGCG | + RADEON_CG_SUPPORT_SDMA_MGCG | + RADEON_CG_SUPPORT_BIF_LS | + RADEON_CG_SUPPORT_UVD_MGCG | + RADEON_CG_SUPPORT_HDP_LS | + RADEON_CG_SUPPORT_HDP_MGCG; + rdev->pg_flags = 0; + break; + case CHIP_HAINAN: + rdev->cg_flags = + RADEON_CG_SUPPORT_GFX_MGCG | + RADEON_CG_SUPPORT_GFX_MGLS | + /*RADEON_CG_SUPPORT_GFX_CGCG |*/ + RADEON_CG_SUPPORT_GFX_CGLS | + RADEON_CG_SUPPORT_GFX_CGTS | + RADEON_CG_SUPPORT_GFX_CP_LS | + RADEON_CG_SUPPORT_GFX_RLC_LS | + RADEON_CG_SUPPORT_MC_LS | + RADEON_CG_SUPPORT_MC_MGCG | + RADEON_CG_SUPPORT_SDMA_MGCG | + RADEON_CG_SUPPORT_BIF_LS | + RADEON_CG_SUPPORT_HDP_LS | + RADEON_CG_SUPPORT_HDP_MGCG; + rdev->pg_flags = 0; + break; + default: + rdev->cg_flags = 0; + rdev->pg_flags = 0; + break; + } break; case CHIP_BONAIRE: rdev->asic = &ci_asic; rdev->num_crtc = 6; + rdev->has_uvd = true; + rdev->cg_flags = + RADEON_CG_SUPPORT_GFX_MGCG | + RADEON_CG_SUPPORT_GFX_MGLS | + /*RADEON_CG_SUPPORT_GFX_CGCG |*/ + RADEON_CG_SUPPORT_GFX_CGLS | + RADEON_CG_SUPPORT_GFX_CGTS | + RADEON_CG_SUPPORT_GFX_CGTS_LS | + RADEON_CG_SUPPORT_GFX_CP_LS | + RADEON_CG_SUPPORT_MC_LS | + RADEON_CG_SUPPORT_MC_MGCG | + RADEON_CG_SUPPORT_SDMA_MGCG | + RADEON_CG_SUPPORT_SDMA_LS | + RADEON_CG_SUPPORT_BIF_LS | + RADEON_CG_SUPPORT_VCE_MGCG | + RADEON_CG_SUPPORT_UVD_MGCG | + RADEON_CG_SUPPORT_HDP_LS | + RADEON_CG_SUPPORT_HDP_MGCG; + rdev->pg_flags = 0; break; case CHIP_KAVERI: case CHIP_KABINI: rdev->asic = &kv_asic; /* set num crtcs */ - if (rdev->family == CHIP_KAVERI) + if (rdev->family == CHIP_KAVERI) { rdev->num_crtc = 4; - else + rdev->cg_flags = + RADEON_CG_SUPPORT_GFX_MGCG | + RADEON_CG_SUPPORT_GFX_MGLS | + /*RADEON_CG_SUPPORT_GFX_CGCG |*/ + RADEON_CG_SUPPORT_GFX_CGLS | + RADEON_CG_SUPPORT_GFX_CGTS | + RADEON_CG_SUPPORT_GFX_CGTS_LS | + RADEON_CG_SUPPORT_GFX_CP_LS | + RADEON_CG_SUPPORT_SDMA_MGCG | + RADEON_CG_SUPPORT_SDMA_LS | + RADEON_CG_SUPPORT_BIF_LS | + RADEON_CG_SUPPORT_VCE_MGCG | + RADEON_CG_SUPPORT_UVD_MGCG | + RADEON_CG_SUPPORT_HDP_LS | + RADEON_CG_SUPPORT_HDP_MGCG; + rdev->pg_flags = 0; + /*RADEON_PG_SUPPORT_GFX_PG | + RADEON_PG_SUPPORT_GFX_SMG | + RADEON_PG_SUPPORT_GFX_DMG | + RADEON_PG_SUPPORT_UVD | + RADEON_PG_SUPPORT_VCE | + RADEON_PG_SUPPORT_CP | + RADEON_PG_SUPPORT_GDS | + RADEON_PG_SUPPORT_RLC_SMU_HS | + RADEON_PG_SUPPORT_ACP | + RADEON_PG_SUPPORT_SAMU;*/ + } else { rdev->num_crtc = 2; + rdev->cg_flags = + RADEON_CG_SUPPORT_GFX_MGCG | + RADEON_CG_SUPPORT_GFX_MGLS | + /*RADEON_CG_SUPPORT_GFX_CGCG |*/ + RADEON_CG_SUPPORT_GFX_CGLS | + RADEON_CG_SUPPORT_GFX_CGTS | + RADEON_CG_SUPPORT_GFX_CGTS_LS | + RADEON_CG_SUPPORT_GFX_CP_LS | + RADEON_CG_SUPPORT_SDMA_MGCG | + RADEON_CG_SUPPORT_SDMA_LS | + RADEON_CG_SUPPORT_BIF_LS | + RADEON_CG_SUPPORT_VCE_MGCG | + RADEON_CG_SUPPORT_UVD_MGCG | + RADEON_CG_SUPPORT_HDP_LS | + RADEON_CG_SUPPORT_HDP_MGCG; + rdev->pg_flags = 0; + /*RADEON_PG_SUPPORT_GFX_PG | + RADEON_PG_SUPPORT_GFX_SMG | + RADEON_PG_SUPPORT_UVD | + RADEON_PG_SUPPORT_VCE | + RADEON_PG_SUPPORT_CP | + RADEON_PG_SUPPORT_GDS | + RADEON_PG_SUPPORT_RLC_SMU_HS | + RADEON_PG_SUPPORT_SAMU;*/ + } + rdev->has_uvd = true; break; default: /* FIXME: not supported yet */ diff --git a/sys/dev/drm/radeon/radeon_asic.h b/sys/dev/drm/radeon/radeon_asic.h index ad65a2f6b4..869cd86c5d 100644 --- a/sys/dev/drm/radeon/radeon_asic.h +++ b/sys/dev/drm/radeon/radeon_asic.h @@ -305,6 +305,7 @@ int r600_cs_parse(struct radeon_cs_parser *p); int r600_dma_cs_parse(struct radeon_cs_parser *p); int r600_dma_cs_next_reloc(struct radeon_cs_parser *p, struct radeon_cs_reloc **cs_reloc); +u32 r600_gpu_check_soft_reset(struct radeon_device *rdev); void r600_fence_ring_emit(struct radeon_device *rdev, struct radeon_fence *fence); void r600_semaphore_ring_emit(struct radeon_device *rdev, @@ -330,10 +331,6 @@ int r600_dma_ib_test(struct radeon_device *rdev, struct radeon_ring *ring); void r600_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib); int r600_ring_test(struct radeon_device *rdev, struct radeon_ring *cp); int r600_dma_ring_test(struct radeon_device *rdev, struct radeon_ring *cp); -int r600_uvd_ring_test(struct radeon_device *rdev, struct radeon_ring *ring); -int r600_copy_blit(struct radeon_device *rdev, - uint64_t src_offset, uint64_t dst_offset, - unsigned num_gpu_pages, struct radeon_fence **fence); int r600_copy_cpdma(struct radeon_device *rdev, uint64_t src_offset, uint64_t dst_offset, unsigned num_gpu_pages, struct radeon_fence **fence); @@ -365,8 +362,6 @@ int r600_count_pipe_bits(uint32_t val); int r600_mc_wait_for_idle(struct radeon_device *rdev); int r600_pcie_gart_init(struct radeon_device *rdev); void r600_scratch_init(struct radeon_device *rdev); -int r600_blit_init(struct radeon_device *rdev); -void r600_blit_fini(struct radeon_device *rdev); int r600_init_microcode(struct radeon_device *rdev); void r600_fini_microcode(struct radeon_device *rdev); /* r600 irq */ @@ -380,28 +375,26 @@ void r600_disable_interrupts(struct radeon_device *rdev); void r600_rlc_stop(struct radeon_device *rdev); /* r600 audio */ int r600_audio_init(struct radeon_device *rdev); -struct r600_audio r600_audio_status(struct radeon_device *rdev); +struct r600_audio_pin r600_audio_status(struct radeon_device *rdev); void r600_audio_fini(struct radeon_device *rdev); void r600_audio_set_dto(struct drm_encoder *encoder, u32 clock); int r600_hdmi_buffer_status_changed(struct drm_encoder *encoder); void r600_hdmi_update_audio_settings(struct drm_encoder *encoder); void r600_hdmi_enable(struct drm_encoder *encoder, bool enable); void r600_hdmi_setmode(struct drm_encoder *encoder, struct drm_display_mode *mode); -/* r600 blit */ -int r600_blit_prepare_copy(struct radeon_device *rdev, unsigned num_gpu_pages, - struct radeon_fence **fence, struct radeon_sa_bo **vb, - struct radeon_semaphore **sem); -void r600_blit_done_copy(struct radeon_device *rdev, struct radeon_fence **fence, - struct radeon_sa_bo *vb, struct radeon_semaphore *sem); -void r600_kms_blit_copy(struct radeon_device *rdev, - u64 src_gpu_addr, u64 dst_gpu_addr, - unsigned num_gpu_pages, - struct radeon_sa_bo *vb); u32 r600_get_xclk(struct radeon_device *rdev); uint64_t r600_get_gpu_clock_counter(struct radeon_device *rdev); int rv6xx_get_temp(struct radeon_device *rdev); +int r600_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk); int r600_dpm_pre_set_power_state(struct radeon_device *rdev); void r600_dpm_post_set_power_state(struct radeon_device *rdev); +/* r600 dma */ +uint32_t r600_dma_get_rptr(struct radeon_device *rdev, + struct radeon_ring *ring); +uint32_t r600_dma_get_wptr(struct radeon_device *rdev, + struct radeon_ring *ring); +void r600_dma_set_wptr(struct radeon_device *rdev, + struct radeon_ring *ring); /* rv6xx dpm */ int rv6xx_dpm_init(struct radeon_device *rdev); int rv6xx_dpm_enable(struct radeon_device *rdev); @@ -432,19 +425,8 @@ void rs780_dpm_print_power_state(struct radeon_device *rdev, struct radeon_ps *ps); void rs780_dpm_debugfs_print_current_performance_level(struct radeon_device *rdev, struct seq_file *m); - -/* uvd */ -int r600_uvd_init(struct radeon_device *rdev); -int r600_uvd_rbc_start(struct radeon_device *rdev); -void r600_uvd_stop(struct radeon_device *rdev); -int r600_uvd_ib_test(struct radeon_device *rdev, struct radeon_ring *ring); -void r600_uvd_fence_emit(struct radeon_device *rdev, - struct radeon_fence *fence); -void r600_uvd_semaphore_emit(struct radeon_device *rdev, - struct radeon_ring *ring, - struct radeon_semaphore *semaphore, - bool emit_wait); -void r600_uvd_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib); +int rs780_dpm_force_performance_level(struct radeon_device *rdev, + enum radeon_dpm_forced_level level); /* * rv770,rv730,rv710,rv740 @@ -463,7 +445,6 @@ int rv770_copy_dma(struct radeon_device *rdev, unsigned num_gpu_pages, struct radeon_fence **fence); u32 rv770_get_xclk(struct radeon_device *rdev); -int rv770_uvd_resume(struct radeon_device *rdev); int rv770_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk); int rv770_get_temp(struct radeon_device *rdev); /* rv7xx pm */ @@ -523,7 +504,6 @@ extern u32 evergreen_page_flip(struct radeon_device *rdev, int crtc, u64 crtc_ba extern void evergreen_post_page_flip(struct radeon_device *rdev, int crtc); extern void dce4_wait_for_vblank(struct radeon_device *rdev, int crtc); void evergreen_disable_interrupt_state(struct radeon_device *rdev); -int evergreen_blit_init(struct radeon_device *rdev); int evergreen_mc_wait_for_idle(struct radeon_device *rdev); void evergreen_dma_fence_ring_emit(struct radeon_device *rdev, struct radeon_fence *fence); @@ -533,6 +513,7 @@ int evergreen_copy_dma(struct radeon_device *rdev, uint64_t src_offset, uint64_t dst_offset, unsigned num_gpu_pages, struct radeon_fence **fence); +u32 evergreen_gpu_check_soft_reset(struct radeon_device *rdev); void evergreen_pcie_gen2_enable(struct radeon_device *rdev); void evergreen_print_gpu_status_regs(struct radeon_device *rdev); void evergreen_program_aspm(struct radeon_device *rdev); @@ -616,6 +597,12 @@ void cayman_vm_set_page(struct radeon_device *rdev, uint32_t incr, uint32_t flags); void cayman_vm_decode_fault(struct radeon_device *rdev, u32 status, u32 addr); +u32 cayman_gpu_check_soft_reset(struct radeon_device *rdev); +void cayman_dma_vm_set_page(struct radeon_device *rdev, + struct radeon_ib *ib, + uint64_t pe, + uint64_t addr, unsigned count, + uint32_t incr, uint32_t flags); int evergreen_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib); int evergreen_dma_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib); void cayman_dma_ring_ib_execute(struct radeon_device *rdev, @@ -660,9 +647,16 @@ void trinity_dpm_debugfs_print_current_performance_level(struct radeon_device *r struct seq_file *m); int trinity_dpm_force_performance_level(struct radeon_device *rdev, enum radeon_dpm_forced_level level); +void trinity_dpm_enable_bapm(struct radeon_device *rdev, bool enable); /* DCE6 - SI */ void dce6_bandwidth_update(struct radeon_device *rdev); +int dce6_audio_init(struct radeon_device *rdev); +void dce6_audio_fini(struct radeon_device *rdev); + +void dce6_afmt_write_sad_regs(struct drm_encoder *encoder); +void dce6_afmt_select_pin(struct drm_encoder *encoder); +void dce6_afmt_write_speaker_allocation(struct drm_encoder *encoder); /* * si @@ -695,13 +689,30 @@ int si_copy_dma(struct radeon_device *rdev, struct radeon_fence **fence); void si_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm); u32 si_get_xclk(struct radeon_device *rdev); +u8 si_get_mclk_frequency_ratio(u32 memory_clock, bool strobe_mode); +u8 si_get_ddr3_mclk_frequency_ratio(u32 memory_clock); +u32 si_get_csb_size(struct radeon_device *rdev); +void si_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer); +void si_trim_voltage_table_to_fit_state_table(struct radeon_device *rdev, + u32 max_voltage_steps, + struct atom_voltage_table *voltage_table); +void si_dma_vm_set_page(struct radeon_device *rdev, + struct radeon_ib *ib, + uint64_t pe, + uint64_t addr, unsigned count, + uint32_t incr, uint32_t flags); +u32 si_gpu_check_soft_reset(struct radeon_device *rdev); uint64_t si_get_gpu_clock_counter(struct radeon_device *rdev); int si_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk); int si_get_temp(struct radeon_device *rdev); void si_rlc_fini(struct radeon_device *rdev); int si_rlc_init(struct radeon_device *rdev); +void si_rlc_reset(struct radeon_device *rdev); void si_vram_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc); +void si_init_uvd_internal_cg(struct radeon_device *rdev); +void si_update_cg(struct radeon_device *rdev, + u32 block, bool enable); int si_dpm_init(struct radeon_device *rdev); void si_dpm_setup_asic(struct radeon_device *rdev); int si_dpm_enable(struct radeon_device *rdev); @@ -722,12 +733,18 @@ void dce8_bandwidth_update(struct radeon_device *rdev); /* * cik */ +u32 cik_get_csb_size(struct radeon_device *rdev); +void cik_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer); uint64_t cik_get_gpu_clock_counter(struct radeon_device *rdev); u32 cik_get_xclk(struct radeon_device *rdev); uint32_t cik_pciep_rreg(struct radeon_device *rdev, uint32_t reg); void cik_pciep_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v); int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk); -int cik_uvd_resume(struct radeon_device *rdev); +void cik_init_cp_pg_table(struct radeon_device *rdev); +void cik_update_cg(struct radeon_device *rdev, + u32 block, bool enable); +void cik_enter_rlc_safe_mode(struct radeon_device *rdev); +void cik_exit_rlc_safe_mode(struct radeon_device *rdev); void cik_sdma_fence_ring_emit(struct radeon_device *rdev, struct radeon_fence *fence); void cik_sdma_semaphore_ring_emit(struct radeon_device *rdev, @@ -781,4 +798,91 @@ void cik_compute_ring_set_wptr(struct radeon_device *rdev, bool cik_gpu_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring); void cik_fence_ring_emit(struct radeon_device *rdev, struct radeon_fence *fence); +u32 cik_gpu_check_soft_reset(struct radeon_device *rdev); +void cik_sdma_enable(struct radeon_device *rdev, bool enable); +int cik_sdma_resume(struct radeon_device *rdev); +void cik_sdma_fini(struct radeon_device *rdev); +void cik_sdma_vm_set_page(struct radeon_device *rdev, + struct radeon_ib *ib, + uint64_t pe, + uint64_t addr, unsigned count, + uint32_t incr, uint32_t flags); +int ci_get_temp(struct radeon_device *rdev); +int kv_get_temp(struct radeon_device *rdev); + +int ci_dpm_init(struct radeon_device *rdev); +int ci_dpm_enable(struct radeon_device *rdev); +void ci_dpm_disable(struct radeon_device *rdev); +int ci_dpm_pre_set_power_state(struct radeon_device *rdev); +int ci_dpm_set_power_state(struct radeon_device *rdev); +void ci_dpm_post_set_power_state(struct radeon_device *rdev); +void ci_dpm_setup_asic(struct radeon_device *rdev); +void ci_dpm_display_configuration_changed(struct radeon_device *rdev); +void ci_dpm_fini(struct radeon_device *rdev); +u32 ci_dpm_get_sclk(struct radeon_device *rdev, bool low); +u32 ci_dpm_get_mclk(struct radeon_device *rdev, bool low); +void ci_dpm_print_power_state(struct radeon_device *rdev, + struct radeon_ps *ps); +void ci_dpm_debugfs_print_current_performance_level(struct radeon_device *rdev, + struct seq_file *m); +int ci_dpm_force_performance_level(struct radeon_device *rdev, + enum radeon_dpm_forced_level level); +bool ci_dpm_vblank_too_short(struct radeon_device *rdev); +void ci_dpm_powergate_uvd(struct radeon_device *rdev, bool gate); + +int kv_dpm_init(struct radeon_device *rdev); +int kv_dpm_enable(struct radeon_device *rdev); +void kv_dpm_disable(struct radeon_device *rdev); +int kv_dpm_pre_set_power_state(struct radeon_device *rdev); +int kv_dpm_set_power_state(struct radeon_device *rdev); +void kv_dpm_post_set_power_state(struct radeon_device *rdev); +void kv_dpm_setup_asic(struct radeon_device *rdev); +void kv_dpm_display_configuration_changed(struct radeon_device *rdev); +void kv_dpm_fini(struct radeon_device *rdev); +u32 kv_dpm_get_sclk(struct radeon_device *rdev, bool low); +u32 kv_dpm_get_mclk(struct radeon_device *rdev, bool low); +void kv_dpm_print_power_state(struct radeon_device *rdev, + struct radeon_ps *ps); +void kv_dpm_debugfs_print_current_performance_level(struct radeon_device *rdev, + struct seq_file *m); +int kv_dpm_force_performance_level(struct radeon_device *rdev, + enum radeon_dpm_forced_level level); +void kv_dpm_powergate_uvd(struct radeon_device *rdev, bool gate); +void kv_dpm_enable_bapm(struct radeon_device *rdev, bool enable); + +/* uvd v1.0 */ +uint32_t uvd_v1_0_get_rptr(struct radeon_device *rdev, + struct radeon_ring *ring); +uint32_t uvd_v1_0_get_wptr(struct radeon_device *rdev, + struct radeon_ring *ring); +void uvd_v1_0_set_wptr(struct radeon_device *rdev, + struct radeon_ring *ring); + +int uvd_v1_0_init(struct radeon_device *rdev); +void uvd_v1_0_fini(struct radeon_device *rdev); +int uvd_v1_0_start(struct radeon_device *rdev); +void uvd_v1_0_stop(struct radeon_device *rdev); + +int uvd_v1_0_ring_test(struct radeon_device *rdev, struct radeon_ring *ring); +int uvd_v1_0_ib_test(struct radeon_device *rdev, struct radeon_ring *ring); +void uvd_v1_0_semaphore_emit(struct radeon_device *rdev, + struct radeon_ring *ring, + struct radeon_semaphore *semaphore, + bool emit_wait); +void uvd_v1_0_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib); + +/* uvd v2.2 */ +int uvd_v2_2_resume(struct radeon_device *rdev); +void uvd_v2_2_fence_emit(struct radeon_device *rdev, + struct radeon_fence *fence); + +/* uvd v3.1 */ +void uvd_v3_1_semaphore_emit(struct radeon_device *rdev, + struct radeon_ring *ring, + struct radeon_semaphore *semaphore, + bool emit_wait); + +/* uvd v4.2 */ +int uvd_v4_2_resume(struct radeon_device *rdev); + #endif diff --git a/sys/dev/drm/radeon/radeon_atombios.c b/sys/dev/drm/radeon/radeon_atombios.c index 49578fa5f5..40886b3344 100644 --- a/sys/dev/drm/radeon/radeon_atombios.c +++ b/sys/dev/drm/radeon/radeon_atombios.c @@ -29,7 +29,7 @@ #include #include #include "radeon.h" -#include "radeon_asic.h" /* Declares several prototypes; clang is pleased. */ +#include "radeon_asic.h" #include "atom.h" #include "atom-bits.h" @@ -141,8 +141,8 @@ static struct radeon_i2c_bus_rec radeon_lookup_i2c_gpio(struct radeon_device *rd num_indices = (size - sizeof(ATOM_COMMON_TABLE_HEADER)) / sizeof(ATOM_GPIO_I2C_ASSIGMENT); + gpio = &i2c_info->asGPIO_Info[0]; for (i = 0; i < num_indices; i++) { - gpio = &i2c_info->asGPIO_Info[i]; radeon_lookup_i2c_gpio_quirks(rdev, gpio, i); @@ -150,6 +150,8 @@ static struct radeon_i2c_bus_rec radeon_lookup_i2c_gpio(struct radeon_device *rd i2c = radeon_get_bus_rec_for_i2c_gpio(gpio); break; } + gpio = (ATOM_GPIO_I2C_ASSIGMENT *) + ((u8 *)gpio + sizeof(ATOM_GPIO_I2C_ASSIGMENT)); } } @@ -173,9 +175,8 @@ void radeon_atombios_i2c_init(struct radeon_device *rdev) num_indices = (size - sizeof(ATOM_COMMON_TABLE_HEADER)) / sizeof(ATOM_GPIO_I2C_ASSIGMENT); + gpio = &i2c_info->asGPIO_Info[0]; for (i = 0; i < num_indices; i++) { - gpio = &i2c_info->asGPIO_Info[i]; - radeon_lookup_i2c_gpio_quirks(rdev, gpio, i); i2c = radeon_get_bus_rec_for_i2c_gpio(gpio); @@ -184,12 +185,14 @@ void radeon_atombios_i2c_init(struct radeon_device *rdev) ksprintf(stmp, "0x%x", i2c.i2c_id); rdev->i2c_bus[i] = radeon_i2c_create(rdev->ddev, &i2c, stmp); } + gpio = (ATOM_GPIO_I2C_ASSIGMENT *) + ((u8 *)gpio + sizeof(ATOM_GPIO_I2C_ASSIGMENT)); } } } static struct radeon_gpio_rec radeon_lookup_gpio(struct radeon_device *rdev, - u8 id) + u8 id) { struct atom_context *ctx = rdev->mode_info.atom_context; struct radeon_gpio_rec gpio; @@ -208,8 +211,8 @@ static struct radeon_gpio_rec radeon_lookup_gpio(struct radeon_device *rdev, num_indices = (size - sizeof(ATOM_COMMON_TABLE_HEADER)) / sizeof(ATOM_GPIO_PIN_ASSIGNMENT); + pin = gpio_info->asGPIO_Pin; for (i = 0; i < num_indices; i++) { - pin = &gpio_info->asGPIO_Pin[i]; if (id == pin->ucGPIO_ID) { gpio.id = pin->ucGPIO_ID; gpio.reg = le16_to_cpu(pin->usGpioPin_AIndex) * 4; @@ -217,6 +220,8 @@ static struct radeon_gpio_rec radeon_lookup_gpio(struct radeon_device *rdev, gpio.valid = true; break; } + pin = (ATOM_GPIO_PIN_ASSIGNMENT *) + ((u8 *)pin + sizeof(ATOM_GPIO_PIN_ASSIGNMENT)); } } @@ -689,13 +694,16 @@ bool radeon_get_atom_connector_info_from_object_table(struct drm_device *dev) (ATOM_SRC_DST_TABLE_FOR_ONE_OBJECT *) ((char *)ctx->bios + data_offset + le16_to_cpu(router_obj->asObjects[k].usSrcDstTableOffset)); + u8 *num_dst_objs = (u8 *) + ((u8 *)router_src_dst_table + 1 + + (router_src_dst_table->ucNumberOfSrc * 2)); + u16 *dst_objs = (u16 *)(num_dst_objs + 1); int enum_id; router.router_id = router_obj_id; - for (enum_id = 0; enum_id < router_src_dst_table->ucNumberOfDst; - enum_id++) { + for (enum_id = 0; enum_id < (*num_dst_objs); enum_id++) { if (le16_to_cpu(path->usConnObjectId) == - le16_to_cpu(router_src_dst_table->usDstObjectID[enum_id])) + le16_to_cpu(dst_objs[enum_id])) break; } @@ -1337,6 +1345,7 @@ bool radeon_atombios_get_ppll_ss_info(struct radeon_device *rdev, int index = GetIndexIntoMasterTable(DATA, PPLL_SS_Info); uint16_t data_offset, size; struct _ATOM_SPREAD_SPECTRUM_INFO *ss_info; + struct _ATOM_SPREAD_SPECTRUM_ASSIGNMENT *ss_assign; uint8_t frev, crev; int i, num_indices; @@ -1348,18 +1357,21 @@ bool radeon_atombios_get_ppll_ss_info(struct radeon_device *rdev, num_indices = (size - sizeof(ATOM_COMMON_TABLE_HEADER)) / sizeof(ATOM_SPREAD_SPECTRUM_ASSIGNMENT); - + ss_assign = (struct _ATOM_SPREAD_SPECTRUM_ASSIGNMENT*) + ((u8 *)&ss_info->asSS_Info[0]); for (i = 0; i < num_indices; i++) { - if (ss_info->asSS_Info[i].ucSS_Id == id) { + if (ss_assign->ucSS_Id == id) { ss->percentage = - le16_to_cpu(ss_info->asSS_Info[i].usSpreadSpectrumPercentage); - ss->type = ss_info->asSS_Info[i].ucSpreadSpectrumType; - ss->step = ss_info->asSS_Info[i].ucSS_Step; - ss->delay = ss_info->asSS_Info[i].ucSS_Delay; - ss->range = ss_info->asSS_Info[i].ucSS_Range; - ss->refdiv = ss_info->asSS_Info[i].ucRecommendedRef_Div; + le16_to_cpu(ss_assign->usSpreadSpectrumPercentage); + ss->type = ss_assign->ucSpreadSpectrumType; + ss->step = ss_assign->ucSS_Step; + ss->delay = ss_assign->ucSS_Delay; + ss->range = ss_assign->ucSS_Range; + ss->refdiv = ss_assign->ucRecommendedRef_Div; return true; } + ss_assign = (struct _ATOM_SPREAD_SPECTRUM_ASSIGNMENT*) + ((u8 *)ss_assign + sizeof(struct _ATOM_SPREAD_SPECTRUM_ASSIGNMENT)); } } return false; @@ -1447,6 +1459,12 @@ union asic_ss_info { struct _ATOM_ASIC_INTERNAL_SS_INFO_V3 info_3; }; +union asic_ss_assignment { + struct _ATOM_ASIC_SS_ASSIGNMENT v1; + struct _ATOM_ASIC_SS_ASSIGNMENT_V2 v2; + struct _ATOM_ASIC_SS_ASSIGNMENT_V3 v3; +}; + bool radeon_atombios_get_asic_ss_info(struct radeon_device *rdev, struct radeon_atom_ss *ss, int id, u32 clock) @@ -1455,9 +1473,19 @@ bool radeon_atombios_get_asic_ss_info(struct radeon_device *rdev, int index = GetIndexIntoMasterTable(DATA, ASIC_InternalSS_Info); uint16_t data_offset, size; union asic_ss_info *ss_info; + union asic_ss_assignment *ss_assign; uint8_t frev, crev; int i, num_indices; + if (id == ASIC_INTERNAL_MEMORY_SS) { + if (!(rdev->mode_info.firmware_flags & ATOM_BIOS_INFO_MEMORY_CLOCK_SS_SUPPORT)) + return false; + } + if (id == ASIC_INTERNAL_ENGINE_SS) { + if (!(rdev->mode_info.firmware_flags & ATOM_BIOS_INFO_ENGINE_CLOCK_SS_SUPPORT)) + return false; + } + memset(ss, 0, sizeof(struct radeon_atom_ss)); if (atom_parse_data_header(mode_info->atom_context, index, &size, &frev, &crev, &data_offset)) { @@ -1470,45 +1498,52 @@ bool radeon_atombios_get_asic_ss_info(struct radeon_device *rdev, num_indices = (size - sizeof(ATOM_COMMON_TABLE_HEADER)) / sizeof(ATOM_ASIC_SS_ASSIGNMENT); + ss_assign = (union asic_ss_assignment *)((u8 *)&ss_info->info.asSpreadSpectrum[0]); for (i = 0; i < num_indices; i++) { - if ((ss_info->info.asSpreadSpectrum[i].ucClockIndication == id) && - (clock <= le32_to_cpu(ss_info->info.asSpreadSpectrum[i].ulTargetClockRange))) { + if ((ss_assign->v1.ucClockIndication == id) && + (clock <= le32_to_cpu(ss_assign->v1.ulTargetClockRange))) { ss->percentage = - le16_to_cpu(ss_info->info.asSpreadSpectrum[i].usSpreadSpectrumPercentage); - ss->type = ss_info->info.asSpreadSpectrum[i].ucSpreadSpectrumMode; - ss->rate = le16_to_cpu(ss_info->info.asSpreadSpectrum[i].usSpreadRateInKhz); + le16_to_cpu(ss_assign->v1.usSpreadSpectrumPercentage); + ss->type = ss_assign->v1.ucSpreadSpectrumMode; + ss->rate = le16_to_cpu(ss_assign->v1.usSpreadRateInKhz); return true; } + ss_assign = (union asic_ss_assignment *) + ((u8 *)ss_assign + sizeof(ATOM_ASIC_SS_ASSIGNMENT)); } break; case 2: num_indices = (size - sizeof(ATOM_COMMON_TABLE_HEADER)) / sizeof(ATOM_ASIC_SS_ASSIGNMENT_V2); + ss_assign = (union asic_ss_assignment *)((u8 *)&ss_info->info_2.asSpreadSpectrum[0]); for (i = 0; i < num_indices; i++) { - if ((ss_info->info_2.asSpreadSpectrum[i].ucClockIndication == id) && - (clock <= le32_to_cpu(ss_info->info_2.asSpreadSpectrum[i].ulTargetClockRange))) { + if ((ss_assign->v2.ucClockIndication == id) && + (clock <= le32_to_cpu(ss_assign->v2.ulTargetClockRange))) { ss->percentage = - le16_to_cpu(ss_info->info_2.asSpreadSpectrum[i].usSpreadSpectrumPercentage); - ss->type = ss_info->info_2.asSpreadSpectrum[i].ucSpreadSpectrumMode; - ss->rate = le16_to_cpu(ss_info->info_2.asSpreadSpectrum[i].usSpreadRateIn10Hz); + le16_to_cpu(ss_assign->v2.usSpreadSpectrumPercentage); + ss->type = ss_assign->v2.ucSpreadSpectrumMode; + ss->rate = le16_to_cpu(ss_assign->v2.usSpreadRateIn10Hz); if ((crev == 2) && ((id == ASIC_INTERNAL_ENGINE_SS) || (id == ASIC_INTERNAL_MEMORY_SS))) ss->rate /= 100; return true; } + ss_assign = (union asic_ss_assignment *) + ((u8 *)ss_assign + sizeof(ATOM_ASIC_SS_ASSIGNMENT_V2)); } break; case 3: num_indices = (size - sizeof(ATOM_COMMON_TABLE_HEADER)) / sizeof(ATOM_ASIC_SS_ASSIGNMENT_V3); + ss_assign = (union asic_ss_assignment *)((u8 *)&ss_info->info_3.asSpreadSpectrum[0]); for (i = 0; i < num_indices; i++) { - if ((ss_info->info_3.asSpreadSpectrum[i].ucClockIndication == id) && - (clock <= le32_to_cpu(ss_info->info_3.asSpreadSpectrum[i].ulTargetClockRange))) { + if ((ss_assign->v3.ucClockIndication == id) && + (clock <= le32_to_cpu(ss_assign->v3.ulTargetClockRange))) { ss->percentage = - le16_to_cpu(ss_info->info_3.asSpreadSpectrum[i].usSpreadSpectrumPercentage); - ss->type = ss_info->info_3.asSpreadSpectrum[i].ucSpreadSpectrumMode; - ss->rate = le16_to_cpu(ss_info->info_3.asSpreadSpectrum[i].usSpreadRateIn10Hz); + le16_to_cpu(ss_assign->v3.usSpreadSpectrumPercentage); + ss->type = ss_assign->v3.ucSpreadSpectrumMode; + ss->rate = le16_to_cpu(ss_assign->v3.usSpreadRateIn10Hz); if ((id == ASIC_INTERNAL_ENGINE_SS) || (id == ASIC_INTERNAL_MEMORY_SS)) ss->rate /= 100; @@ -1516,6 +1551,8 @@ bool radeon_atombios_get_asic_ss_info(struct radeon_device *rdev, radeon_atombios_get_igp_ss_overrides(rdev, ss, id); return true; } + ss_assign = (union asic_ss_assignment *) + ((u8 *)ss_assign + sizeof(ATOM_ASIC_SS_ASSIGNMENT_V3)); } break; default: @@ -1650,7 +1687,9 @@ struct radeon_encoder_atom_dig *radeon_atombios_get_lvds_info(struct kfree(edid); } } - record += sizeof(ATOM_FAKE_EDID_PATCH_RECORD); + record += fake_edid_record->ucFakeEDIDLength ? + fake_edid_record->ucFakeEDIDLength + 2 : + sizeof(ATOM_FAKE_EDID_PATCH_RECORD); break; case LCD_PANEL_RESOLUTION_RECORD_TYPE: panel_res_record = (ATOM_PANEL_RESOLUTION_PATCH_RECORD *)record; @@ -2217,6 +2256,11 @@ static void radeon_atombios_add_pplib_thermal_controller(struct radeon_device *r (controller->ucFanParameters & ATOM_PP_FANPARAMETERS_NOFAN) ? "without" : "with"); rdev->pm.int_thermal_type = THERMAL_TYPE_CI; + } else if (controller->ucType == ATOM_PP_THERMALCONTROLLER_KAVERI) { + DRM_INFO("Internal thermal controller %s fan control\n", + (controller->ucFanParameters & + ATOM_PP_FANPARAMETERS_NOFAN) ? "without" : "with"); + rdev->pm.int_thermal_type = THERMAL_TYPE_KV; } else if ((controller->ucType == ATOM_PP_THERMALCONTROLLER_EXTERNAL_GPIO) || (controller->ucType == @@ -3059,6 +3103,121 @@ int radeon_atom_get_leakage_vddc_based_on_leakage_idx(struct radeon_device *rdev return radeon_atom_get_max_vddc(rdev, VOLTAGE_TYPE_VDDC, leakage_idx, voltage); } +int radeon_atom_get_leakage_id_from_vbios(struct radeon_device *rdev, + u16 *leakage_id) +{ + union set_voltage args; + int index = GetIndexIntoMasterTable(COMMAND, SetVoltage); + u8 frev, crev; + + if (!atom_parse_cmd_header(rdev->mode_info.atom_context, index, &frev, &crev)) + return -EINVAL; + + switch (crev) { + case 3: + case 4: + args.v3.ucVoltageType = 0; + args.v3.ucVoltageMode = ATOM_GET_LEAKAGE_ID; + args.v3.usVoltageLevel = 0; + + atom_execute_table(rdev->mode_info.atom_context, index, (uint32_t *)&args); + + *leakage_id = le16_to_cpu(args.v3.usVoltageLevel); + break; + default: + DRM_ERROR("Unknown table version %d, %d\n", frev, crev); + return -EINVAL; + } + + return 0; +} + +int radeon_atom_get_leakage_vddc_based_on_leakage_params(struct radeon_device *rdev, + u16 *vddc, u16 *vddci, + u16 virtual_voltage_id, + u16 vbios_voltage_id) +{ + int index = GetIndexIntoMasterTable(DATA, ASIC_ProfilingInfo); + u8 frev, crev; + u16 data_offset, size; + int i, j; + ATOM_ASIC_PROFILING_INFO_V2_1 *profile; + u16 *leakage_bin, *vddc_id_buf, *vddc_buf, *vddci_id_buf, *vddci_buf; + + *vddc = 0; + *vddci = 0; + + if (!atom_parse_data_header(rdev->mode_info.atom_context, index, &size, + &frev, &crev, &data_offset)) + return -EINVAL; + + profile = (ATOM_ASIC_PROFILING_INFO_V2_1 *) + (rdev->mode_info.atom_context->bios + data_offset); + + switch (frev) { + case 1: + return -EINVAL; + case 2: + switch (crev) { + case 1: + if (size < sizeof(ATOM_ASIC_PROFILING_INFO_V2_1)) + return -EINVAL; + leakage_bin = (u16 *) + (rdev->mode_info.atom_context->bios + data_offset + + le16_to_cpu(profile->usLeakageBinArrayOffset)); + vddc_id_buf = (u16 *) + (rdev->mode_info.atom_context->bios + data_offset + + le16_to_cpu(profile->usElbVDDC_IdArrayOffset)); + vddc_buf = (u16 *) + (rdev->mode_info.atom_context->bios + data_offset + + le16_to_cpu(profile->usElbVDDC_LevelArrayOffset)); + vddci_id_buf = (u16 *) + (rdev->mode_info.atom_context->bios + data_offset + + le16_to_cpu(profile->usElbVDDCI_IdArrayOffset)); + vddci_buf = (u16 *) + (rdev->mode_info.atom_context->bios + data_offset + + le16_to_cpu(profile->usElbVDDCI_LevelArrayOffset)); + + if (profile->ucElbVDDC_Num > 0) { + for (i = 0; i < profile->ucElbVDDC_Num; i++) { + if (vddc_id_buf[i] == virtual_voltage_id) { + for (j = 0; j < profile->ucLeakageBinNum; j++) { + if (vbios_voltage_id <= leakage_bin[j]) { + *vddc = vddc_buf[j * profile->ucElbVDDC_Num + i]; + break; + } + } + break; + } + } + } + if (profile->ucElbVDDCI_Num > 0) { + for (i = 0; i < profile->ucElbVDDCI_Num; i++) { + if (vddci_id_buf[i] == virtual_voltage_id) { + for (j = 0; j < profile->ucLeakageBinNum; j++) { + if (vbios_voltage_id <= leakage_bin[j]) { + *vddci = vddci_buf[j * profile->ucElbVDDCI_Num + i]; + break; + } + } + break; + } + } + } + break; + default: + DRM_ERROR("Unknown table version %d, %d\n", frev, crev); + return -EINVAL; + } + break; + default: + DRM_ERROR("Unknown table version %d, %d\n", frev, crev); + return -EINVAL; + } + + return 0; +} + int radeon_atom_get_voltage_gpio_settings(struct radeon_device *rdev, u16 voltage_level, u8 voltage_type, u32 *gpio_value, u32 *gpio_mask) @@ -3261,10 +3420,11 @@ int radeon_atom_get_max_voltage(struct radeon_device *rdev, ATOM_VOLTAGE_FORMULA_V2 *formula = &voltage_object->v2.asFormula; if (formula->ucNumOfVoltageEntries) { + VOLTAGE_LUT_ENTRY *lut = (VOLTAGE_LUT_ENTRY *) + ((u8 *)&formula->asVIDAdjustEntries[0] + + (sizeof(VOLTAGE_LUT_ENTRY) * (formula->ucNumOfVoltageEntries - 1))); *max_voltage = - le16_to_cpu(formula->asVIDAdjustEntries[ - formula->ucNumOfVoltageEntries - 1 - ].usVoltageValue); + le16_to_cpu(lut->usVoltageValue); return 0; } } @@ -3424,11 +3584,13 @@ int radeon_atom_get_voltage_table(struct radeon_device *rdev, if (voltage_object) { ATOM_VOLTAGE_FORMULA_V2 *formula = &voltage_object->v2.asFormula; + VOLTAGE_LUT_ENTRY *lut; if (formula->ucNumOfVoltageEntries > MAX_VOLTAGE_ENTRIES) return -EINVAL; + lut = &formula->asVIDAdjustEntries[0]; for (i = 0; i < formula->ucNumOfVoltageEntries; i++) { voltage_table->entries[i].value = - le16_to_cpu(formula->asVIDAdjustEntries[i].usVoltageValue); + le16_to_cpu(lut->usVoltageValue); ret = radeon_atom_get_voltage_gpio_settings(rdev, voltage_table->entries[i].value, voltage_type, @@ -3436,6 +3598,8 @@ int radeon_atom_get_voltage_table(struct radeon_device *rdev, &voltage_table->mask_low); if (ret) return ret; + lut = (VOLTAGE_LUT_ENTRY *) + ((u8 *)lut + sizeof(VOLTAGE_LUT_ENTRY)); } voltage_table->count = formula->ucNumOfVoltageEntries; return 0; @@ -3455,13 +3619,17 @@ int radeon_atom_get_voltage_table(struct radeon_device *rdev, if (voltage_object) { ATOM_GPIO_VOLTAGE_OBJECT_V3 *gpio = &voltage_object->v3.asGpioVoltageObj; + VOLTAGE_LUT_ENTRY_V2 *lut; if (gpio->ucGpioEntryNum > MAX_VOLTAGE_ENTRIES) return -EINVAL; + lut = &gpio->asVolGpioLut[0]; for (i = 0; i < gpio->ucGpioEntryNum; i++) { voltage_table->entries[i].value = - le16_to_cpu(gpio->asVolGpioLut[i].usVoltageValue); + le16_to_cpu(lut->usVoltageValue); voltage_table->entries[i].smio_low = - le32_to_cpu(gpio->asVolGpioLut[i].ulVoltageId); + le32_to_cpu(lut->ulVoltageId); + lut = (VOLTAGE_LUT_ENTRY_V2 *) + ((u8 *)lut + sizeof(VOLTAGE_LUT_ENTRY_V2)); } voltage_table->mask_low = le32_to_cpu(gpio->ulGpioMaskVal); voltage_table->count = gpio->ucGpioEntryNum; @@ -3587,7 +3755,6 @@ int radeon_atom_get_mclk_range_table(struct radeon_device *rdev, union vram_info *vram_info; u32 mem_timing_size = gddr5 ? sizeof(ATOM_MEMORY_TIMING_FORMAT_V2) : sizeof(ATOM_MEMORY_TIMING_FORMAT); - u8 *p; memset(mclk_range_table, 0, sizeof(struct atom_memory_clock_range_table)); @@ -3606,6 +3773,7 @@ int radeon_atom_get_mclk_range_table(struct radeon_device *rdev, if (module_index < vram_info->v1_4.ucNumOfVRAMModule) { ATOM_VRAM_MODULE_V4 *vram_module = (ATOM_VRAM_MODULE_V4 *)vram_info->v1_4.aVramInfo; + ATOM_MEMORY_TIMING_FORMAT *format; for (i = 0; i < module_index; i++) { if (le16_to_cpu(vram_module->usModuleSize) == 0) @@ -3616,11 +3784,11 @@ int radeon_atom_get_mclk_range_table(struct radeon_device *rdev, mclk_range_table->num_entries = (u8) ((le16_to_cpu(vram_module->usModuleSize) - offsetof(ATOM_VRAM_MODULE_V4, asMemTiming)) / mem_timing_size); - p = (u8 *)&vram_module->asMemTiming[0]; + format = &vram_module->asMemTiming[0]; for (i = 0; i < mclk_range_table->num_entries; i++) { - ATOM_MEMORY_TIMING_FORMAT *format = (ATOM_MEMORY_TIMING_FORMAT *)p; mclk_range_table->mclk[i] = le32_to_cpu(format->ulClkRange); - p += mem_timing_size; + format = (ATOM_MEMORY_TIMING_FORMAT *) + ((u8 *)format + mem_timing_size); } } else return -EINVAL; diff --git a/sys/dev/drm/radeon/radeon_blit_common.h b/sys/dev/drm/radeon/radeon_blit_common.h deleted file mode 100644 index 4ecbe72c9d..0000000000 --- a/sys/dev/drm/radeon/radeon_blit_common.h +++ /dev/null @@ -1,44 +0,0 @@ -/* - * Copyright 2009 Advanced Micro Devices, Inc. - * Copyright 2009 Red Hat Inc. - * Copyright 2012 Alcatel-Lucent, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - * - */ - -#ifndef __RADEON_BLIT_COMMON_H__ - -#define DI_PT_RECTLIST 0x11 -#define DI_INDEX_SIZE_16_BIT 0x0 -#define DI_SRC_SEL_AUTO_INDEX 0x2 - -#define FMT_8 0x1 -#define FMT_5_6_5 0x8 -#define FMT_8_8_8_8 0x1a -#define COLOR_8 0x1 -#define COLOR_5_6_5 0x8 -#define COLOR_8_8_8_8 0x1a - -#define RECT_UNIT_H 32 -#define RECT_UNIT_W (RADEON_GPU_PAGE_SIZE / 4 / RECT_UNIT_H) - -#define __RADEON_BLIT_COMMON_H__ -#endif diff --git a/sys/dev/drm/radeon/radeon_combios.c b/sys/dev/drm/radeon/radeon_combios.c index c89c83e8b1..adaeca9172 100644 --- a/sys/dev/drm/radeon/radeon_combios.c +++ b/sys/dev/drm/radeon/radeon_combios.c @@ -895,8 +895,8 @@ struct radeon_encoder_primary_dac *radeon_combios_get_primary_dac_info(struct /* quirks */ /* Radeon 7000 (RV100) */ if (((rdev->ddev->pci_device == 0x5159) && -+ (rdev->ddev->pci_subvendor == 0x174B) && -+ (rdev->ddev->pci_subdevice == 0x7c28)) || + (rdev->ddev->pci_subvendor == 0x174B) && + (rdev->ddev->pci_subdevice == 0x7c28)) || /* Radeon 9100 (R200) */ ((rdev->ddev->pci_device == 0x514D) && (rdev->ddev->pci_subvendor == 0x174B) && diff --git a/sys/dev/drm/radeon/radeon_connectors.c b/sys/dev/drm/radeon/radeon_connectors.c index 5a909b97c1..694f0c0e08 100644 --- a/sys/dev/drm/radeon/radeon_connectors.c +++ b/sys/dev/drm/radeon/radeon_connectors.c @@ -394,6 +394,21 @@ static int radeon_connector_set_property(struct drm_connector *connector, struct } } + if (property == rdev->mode_info.audio_property) { + struct radeon_connector *radeon_connector = to_radeon_connector(connector); + /* need to find digital encoder on connector */ + encoder = radeon_find_encoder(connector, DRM_MODE_ENCODER_TMDS); + if (!encoder) + return 0; + + radeon_encoder = to_radeon_encoder(encoder); + + if (radeon_connector->audio != val) { + radeon_connector->audio = val; + radeon_property_change_mode(&radeon_encoder->base); + } + } + if (property == rdev->mode_info.underscan_property) { /* need to find digital encoder on connector */ encoder = radeon_find_encoder(connector, DRM_MODE_ENCODER_TMDS); @@ -1423,7 +1438,7 @@ radeon_dp_detect(struct drm_connector *connector, bool force) if (radeon_dp_getdpcd(radeon_connector)) ret = connector_status_connected; } else { - /* try non-aux ddc (DP to DVI/HMDI/etc. adapter) */ + /* try non-aux ddc (DP to DVI/HDMI/etc. adapter) */ if (radeon_ddc_probe(radeon_connector, false)) ret = connector_status_connected; } @@ -1492,6 +1507,24 @@ static const struct drm_connector_funcs radeon_dp_connector_funcs = { .force = radeon_dvi_force, }; +static const struct drm_connector_funcs radeon_edp_connector_funcs = { + .dpms = drm_helper_connector_dpms, + .detect = radeon_dp_detect, + .fill_modes = drm_helper_probe_single_connector_modes, + .set_property = radeon_lvds_set_property, + .destroy = radeon_dp_connector_destroy, + .force = radeon_dvi_force, +}; + +static const struct drm_connector_funcs radeon_lvds_bridge_connector_funcs = { + .dpms = drm_helper_connector_dpms, + .detect = radeon_dp_detect, + .fill_modes = drm_helper_probe_single_connector_modes, + .set_property = radeon_lvds_set_property, + .destroy = radeon_dp_connector_destroy, + .force = radeon_dvi_force, +}; + void radeon_add_atom_connector(struct drm_device *dev, uint32_t connector_id, @@ -1583,8 +1616,6 @@ radeon_add_atom_connector(struct drm_device *dev, goto failed; radeon_dig_connector->igp_lane_info = igp_lane_info; radeon_connector->con_priv = radeon_dig_connector; - drm_connector_init(dev, &radeon_connector->base, &radeon_dp_connector_funcs, connector_type); - drm_connector_helper_add(&radeon_connector->base, &radeon_dp_connector_helper_funcs); if (i2c_bus->valid) { /* add DP i2c bus */ if (connector_type == DRM_MODE_CONNECTOR_eDP) @@ -1601,6 +1632,10 @@ radeon_add_atom_connector(struct drm_device *dev, case DRM_MODE_CONNECTOR_VGA: case DRM_MODE_CONNECTOR_DVIA: default: + drm_connector_init(dev, &radeon_connector->base, + &radeon_dp_connector_funcs, connector_type); + drm_connector_helper_add(&radeon_connector->base, + &radeon_dp_connector_helper_funcs); connector->interlace_allowed = true; connector->doublescan_allowed = true; radeon_connector->dac_load_detect = true; @@ -1613,6 +1648,10 @@ radeon_add_atom_connector(struct drm_device *dev, case DRM_MODE_CONNECTOR_HDMIA: case DRM_MODE_CONNECTOR_HDMIB: case DRM_MODE_CONNECTOR_DisplayPort: + drm_connector_init(dev, &radeon_connector->base, + &radeon_dp_connector_funcs, connector_type); + drm_connector_helper_add(&radeon_connector->base, + &radeon_dp_connector_helper_funcs); drm_object_attach_property(&radeon_connector->base.base, rdev->mode_info.underscan_property, UNDERSCAN_OFF); @@ -1622,6 +1661,12 @@ radeon_add_atom_connector(struct drm_device *dev, drm_object_attach_property(&radeon_connector->base.base, rdev->mode_info.underscan_vborder_property, 0); + if (radeon_audio != 0) + drm_object_attach_property(&radeon_connector->base.base, + rdev->mode_info.audio_property, + (radeon_audio == 1) ? + RADEON_AUDIO_AUTO : + RADEON_AUDIO_DISABLE); subpixel_order = SubPixelHorizontalRGB; connector->interlace_allowed = true; if (connector_type == DRM_MODE_CONNECTOR_HDMIB) @@ -1637,6 +1682,10 @@ radeon_add_atom_connector(struct drm_device *dev, break; case DRM_MODE_CONNECTOR_LVDS: case DRM_MODE_CONNECTOR_eDP: + drm_connector_init(dev, &radeon_connector->base, + &radeon_lvds_bridge_connector_funcs, connector_type); + drm_connector_helper_add(&radeon_connector->base, + &radeon_dp_connector_helper_funcs); drm_object_attach_property(&radeon_connector->base.base, dev->mode_config.scaling_mode_property, DRM_MODE_SCALE_FULLSCREEN); @@ -1711,6 +1760,13 @@ radeon_add_atom_connector(struct drm_device *dev, rdev->mode_info.underscan_vborder_property, 0); } + if (ASIC_IS_DCE2(rdev) && (radeon_audio != 0)) { + drm_object_attach_property(&radeon_connector->base.base, + rdev->mode_info.audio_property, + (radeon_audio == 1) ? + RADEON_AUDIO_AUTO : + RADEON_AUDIO_DISABLE); + } if (connector_type == DRM_MODE_CONNECTOR_DVII) { radeon_connector->dac_load_detect = true; drm_object_attach_property(&radeon_connector->base.base, @@ -1751,6 +1807,13 @@ radeon_add_atom_connector(struct drm_device *dev, rdev->mode_info.underscan_vborder_property, 0); } + if (ASIC_IS_DCE2(rdev) && (radeon_audio != 0)) { + drm_object_attach_property(&radeon_connector->base.base, + rdev->mode_info.audio_property, + (radeon_audio == 1) ? + RADEON_AUDIO_AUTO : + RADEON_AUDIO_DISABLE); + } subpixel_order = SubPixelHorizontalRGB; connector->interlace_allowed = true; if (connector_type == DRM_MODE_CONNECTOR_HDMIB) @@ -1790,6 +1853,13 @@ radeon_add_atom_connector(struct drm_device *dev, rdev->mode_info.underscan_vborder_property, 0); } + if (ASIC_IS_DCE2(rdev) && (radeon_audio != 0)) { + drm_object_attach_property(&radeon_connector->base.base, + rdev->mode_info.audio_property, + (radeon_audio == 1) ? + RADEON_AUDIO_AUTO : + RADEON_AUDIO_DISABLE); + } connector->interlace_allowed = true; /* in theory with a DP to VGA converter... */ connector->doublescan_allowed = false; @@ -1800,7 +1870,7 @@ radeon_add_atom_connector(struct drm_device *dev, goto failed; radeon_dig_connector->igp_lane_info = igp_lane_info; radeon_connector->con_priv = radeon_dig_connector; - drm_connector_init(dev, &radeon_connector->base, &radeon_dp_connector_funcs, connector_type); + drm_connector_init(dev, &radeon_connector->base, &radeon_edp_connector_funcs, connector_type); drm_connector_helper_add(&radeon_connector->base, &radeon_dp_connector_helper_funcs); if (i2c_bus->valid) { /* add DP i2c bus */ diff --git a/sys/dev/drm/radeon/radeon_cp.c b/sys/dev/drm/radeon/radeon_cp.c index e86f95a2eb..dfdf405e67 100644 --- a/sys/dev/drm/radeon/radeon_cp.c +++ b/sys/dev/drm/radeon/radeon_cp.c @@ -1442,13 +1442,13 @@ static int radeon_do_init_cp(struct drm_device *dev, drm_radeon_init_t *init, dev_priv->ring.end = ((u32 *) dev_priv->cp_ring->handle + init->ring_size / sizeof(u32)); dev_priv->ring.size = init->ring_size; - dev_priv->ring.size_l2qw = drm_order(init->ring_size / 8); + dev_priv->ring.size_l2qw = order_base_2(init->ring_size / 8); dev_priv->ring.rptr_update = /* init->rptr_update */ 4096; - dev_priv->ring.rptr_update_l2qw = drm_order( /* init->rptr_update */ 4096 / 8); + dev_priv->ring.rptr_update_l2qw = order_base_2( /* init->rptr_update */ 4096 / 8); dev_priv->ring.fetch_size = /* init->fetch_size */ 32; - dev_priv->ring.fetch_size_l2ow = drm_order( /* init->fetch_size */ 32 / 16); + dev_priv->ring.fetch_size_l2ow = order_base_2( /* init->fetch_size */ 32 / 16); dev_priv->ring.tail_mask = (dev_priv->ring.size / sizeof(u32)) - 1; dev_priv->ring.high_mark = RADEON_RING_HIGH_MARK; diff --git a/sys/dev/drm/radeon/radeon_cs.c b/sys/dev/drm/radeon/radeon_cs.c index 54dab4bb27..508aef6ba4 100644 --- a/sys/dev/drm/radeon/radeon_cs.c +++ b/sys/dev/drm/radeon/radeon_cs.c @@ -23,10 +23,7 @@ * * Authors: * Jerome Glisse - * - * $FreeBSD: head/sys/dev/drm2/radeon/radeon_cs.c 254885 2013-08-25 19:37:15Z dumbbell $ */ - #include #include #include "radeon_reg.h" @@ -83,9 +80,11 @@ static int radeon_cs_parser_relocs(struct radeon_cs_parser *p) p->relocs[i].lobj.bo = p->relocs[i].robj; p->relocs[i].lobj.written = !!r->write_domain; - /* the first reloc of an UVD job is the - msg and that must be in VRAM */ - if (p->ring == R600_RING_TYPE_UVD_INDEX && i == 0) { + /* the first reloc of an UVD job is the msg and that must be in + VRAM, also but everything into VRAM on AGP cards to avoid + image corruptions */ + if (p->ring == R600_RING_TYPE_UVD_INDEX && + (i == 0 || p->rdev->flags & RADEON_IS_AGP)) { /* TODO: is this still needed for NI+ ? */ p->relocs[i].lobj.domain = RADEON_GEM_DOMAIN_VRAM; @@ -109,7 +108,7 @@ static int radeon_cs_parser_relocs(struct radeon_cs_parser *p) radeon_bo_list_add_object(&p->relocs[i].lobj, &p->validated); } - return radeon_bo_list_validate(&p->validated, p->ring); + return radeon_bo_list_validate(&p->ticket, &p->validated, p->ring); } static int radeon_cs_get_ring(struct radeon_cs_parser *p, u32 ring, s32 priority) @@ -272,7 +271,7 @@ int radeon_cs_parser_init(struct radeon_cs_parser *p, void *data) return -EINVAL; /* we only support VM on some SI+ rings */ - if ((p->rdev->asic->ring[p->ring].cs_parse == NULL) && + if ((p->rdev->asic->ring[p->ring]->cs_parse == NULL) && ((p->cs_flags & RADEON_CS_USE_VM) == 0)) { DRM_ERROR("Ring %d requires VM!\n", p->ring); return -EINVAL; @@ -322,15 +321,17 @@ int radeon_cs_parser_init(struct radeon_cs_parser *p, void *data) * If error is set than unvalidate buffer, otherwise just free memory * used by parsing context. **/ -static void radeon_cs_parser_fini(struct radeon_cs_parser *parser, int error) +static void radeon_cs_parser_fini(struct radeon_cs_parser *parser, int error, bool backoff) { unsigned i; if (!error) { - ttm_eu_fence_buffer_objects(&parser->validated, + ttm_eu_fence_buffer_objects(&parser->ticket, + &parser->validated, parser->ib.fence); - } else { - ttm_eu_backoff_reservation(&parser->validated); + } else if (backoff) { + ttm_eu_backoff_reservation(&parser->ticket, + &parser->validated); } if (parser->relocs != NULL) { @@ -389,6 +390,10 @@ static int radeon_cs_ib_chunk(struct radeon_device *rdev, DRM_ERROR("Invalid command stream !\n"); return r; } + + if (parser->ring == R600_RING_TYPE_UVD_INDEX) + radeon_uvd_note_usage(rdev); + radeon_cs_sync_rings(parser); r = radeon_ib_schedule(rdev, &parser->ib, NULL); if (r) { @@ -480,6 +485,9 @@ static int radeon_cs_ib_vm_chunk(struct radeon_device *rdev, return r; } + if (parser->ring == R600_RING_TYPE_UVD_INDEX) + radeon_uvd_note_usage(rdev); + lockmgr(&rdev->vm_manager.lock, LK_EXCLUSIVE); lockmgr(&vm->mutex, LK_EXCLUSIVE); r = radeon_vm_alloc_pt(rdev, vm); @@ -543,7 +551,7 @@ int radeon_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) r = radeon_cs_parser_init(&parser, data); if (r) { DRM_ERROR("Failed to initialize parser !\n"); - radeon_cs_parser_fini(&parser, r); + radeon_cs_parser_fini(&parser, r, false); lockmgr(&rdev->exclusive_lock, LK_RELEASE); r = radeon_cs_handle_lockup(rdev, r); return r; @@ -552,15 +560,15 @@ int radeon_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) if (r) { if (r != -ERESTARTSYS) DRM_ERROR("Failed to parse relocation %d!\n", r); - radeon_cs_parser_fini(&parser, r); + radeon_cs_parser_fini(&parser, r, false); lockmgr(&rdev->exclusive_lock, LK_RELEASE); r = radeon_cs_handle_lockup(rdev, r); return r; } - /* XXX pick SD/HD/MVC */ - if (parser.ring == R600_RING_TYPE_UVD_INDEX) - radeon_uvd_note_usage(rdev); +#if 0 + trace_radeon_cs(&parser); +#endif r = radeon_cs_ib_chunk(rdev, &parser); if (r) { @@ -571,7 +579,7 @@ int radeon_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) goto out; } out: - radeon_cs_parser_fini(&parser, r); + radeon_cs_parser_fini(&parser, r, true); lockmgr(&rdev->exclusive_lock, LK_RELEASE); r = radeon_cs_handle_lockup(rdev, r); return r; diff --git a/sys/dev/drm/radeon/radeon_device.c b/sys/dev/drm/radeon/radeon_device.c index 5d928fbfb7..f0b3921eb9 100644 --- a/sys/dev/drm/radeon/radeon_device.c +++ b/sys/dev/drm/radeon/radeon_device.c @@ -497,8 +497,8 @@ void radeon_vram_location(struct radeon_device *rdev, struct radeon_mc *mc, u64 if (limit && limit < mc->real_vram_size) mc->real_vram_size = limit; dev_info(rdev->dev, "VRAM: %juM 0x%016jX - 0x%016jX (%juM used)\n", - (uintmax_t)mc->mc_vram_size >> 20, (uintmax_t)mc->vram_start, - (uintmax_t)mc->vram_end, (uintmax_t)mc->real_vram_size >> 20); + mc->mc_vram_size >> 20, mc->vram_start, + mc->vram_end, mc->real_vram_size >> 20); } /** @@ -534,7 +534,7 @@ void radeon_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc) } mc->gtt_end = mc->gtt_start + mc->gtt_size - 1; dev_info(rdev->dev, "GTT: %juM 0x%016jX - 0x%016jX\n", - (uintmax_t)mc->gtt_size >> 20, (uintmax_t)mc->gtt_start, (uintmax_t)mc->gtt_end); + mc->gtt_size >> 20, mc->gtt_start, mc->gtt_end); } /* @@ -1002,16 +1002,28 @@ static void radeon_check_arguments(struct radeon_device *rdev) radeon_vram_limit = 0; } + if (radeon_gart_size == -1) { + /* default to a larger gart size on newer asics */ + if (rdev->family >= CHIP_RV770) + radeon_gart_size = 1024; + else + radeon_gart_size = 512; + } /* gtt size must be power of two and greater or equal to 32M */ if (radeon_gart_size < 32) { - dev_warn(rdev->dev, "gart size (%d) too small forcing to 512M\n", + dev_warn(rdev->dev, "gart size (%d) too small\n", radeon_gart_size); - radeon_gart_size = 512; - + if (rdev->family >= CHIP_RV770) + radeon_gart_size = 1024; + else + radeon_gart_size = 512; } else if (!radeon_check_pot_argument(radeon_gart_size)) { dev_warn(rdev->dev, "gart size (%d) must be a power of 2\n", radeon_gart_size); - radeon_gart_size = 512; + if (rdev->family >= CHIP_RV770) + radeon_gart_size = 1024; + else + radeon_gart_size = 512; } rdev->mc.gtt_size = (uint64_t)radeon_gart_size << 20; @@ -1146,7 +1158,7 @@ int radeon_device_init(struct radeon_device *rdev, rdev->family = flags & RADEON_FAMILY_MASK; rdev->is_atom_bios = false; rdev->usec_timeout = RADEON_MAX_USEC_TIMEOUT; - rdev->mc.gtt_size = radeon_gart_size * 1024 * 1024; + rdev->mc.gtt_size = 512 * 1024 * 1024; rdev->accel_working = false; rdev->fictitious_range_registered = false; /* set up ring ids */ @@ -1292,7 +1304,7 @@ int radeon_device_init(struct radeon_device *rdev, /* this will fail for cards that aren't VGA class devices, just * ignore it */ vga_client_register(rdev->pdev, rdev, NULL, radeon_vga_set_decode); - vga_switcheroo_register_client(rdev->pdev, &radeon_switcheroo_ops); + vga_switcheroo_register_client(rdev->pdev, &radeon_switcheroo_ops, false); #endif /* DUMBBELL_WIP */ r = radeon_init(rdev); @@ -1336,13 +1348,22 @@ int radeon_device_init(struct radeon_device *rdev, rdev->fictitious_range_registered = true; if ((radeon_testing & 1)) { - radeon_test_moves(rdev); + if (rdev->accel_working) + radeon_test_moves(rdev); + else + DRM_INFO("radeon: acceleration disabled, skipping move tests\n"); } if ((radeon_testing & 2)) { - radeon_test_syncing(rdev); + if (rdev->accel_working) + radeon_test_syncing(rdev); + else + DRM_INFO("radeon: acceleration disabled, skipping sync tests\n"); } if (radeon_benchmarking) { - radeon_benchmark(rdev, radeon_benchmarking); + if (rdev->accel_working) + radeon_benchmark(rdev, radeon_benchmarking); + else + DRM_INFO("radeon: acceleration disabled, skipping benchmarks\n"); } return 0; } diff --git a/sys/dev/drm/radeon/radeon_display.c b/sys/dev/drm/radeon/radeon_display.c index 8b3db825d6..92b484eebc 100644 --- a/sys/dev/drm/radeon/radeon_display.c +++ b/sys/dev/drm/radeon/radeon_display.c @@ -909,7 +909,7 @@ void radeon_compute_pll_legacy(struct radeon_pll *pll, uint32_t post_div; u32 pll_out_min, pll_out_max; - DRM_DEBUG_KMS("PLL freq %ju %u %u\n", (uintmax_t)freq, pll->min_ref_div, pll->max_ref_div); + DRM_DEBUG_KMS("PLL freq %ju %u %u\n", freq, pll->min_ref_div, pll->max_ref_div); freq = freq * 1000; if (pll->flags & RADEON_PLL_IS_LCD) { @@ -1176,6 +1176,12 @@ static struct drm_prop_enum_list radeon_underscan_enum_list[] = { UNDERSCAN_AUTO, "auto" }, }; +static struct drm_prop_enum_list radeon_audio_enum_list[] = +{ { RADEON_AUDIO_DISABLE, "off" }, + { RADEON_AUDIO_ENABLE, "on" }, + { RADEON_AUDIO_AUTO, "auto" }, +}; + static int radeon_modeset_create_props(struct radeon_device *rdev) { int sz; @@ -1226,6 +1232,12 @@ static int radeon_modeset_create_props(struct radeon_device *rdev) if (!rdev->mode_info.underscan_vborder_property) return -ENOMEM; + sz = ARRAY_SIZE(radeon_audio_enum_list); + rdev->mode_info.audio_property = + drm_property_create_enum(rdev->ddev, 0, + "audio", + radeon_audio_enum_list, sz); + return 0; } @@ -1259,41 +1271,41 @@ static void radeon_afmt_init(struct radeon_device *rdev) for (i = 0; i < RADEON_MAX_AFMT_BLOCKS; i++) rdev->mode_info.afmt[i] = NULL; - if (ASIC_IS_DCE6(rdev)) { - /* todo */ + if (ASIC_IS_NODCE(rdev)) { + /* nothing to do */ } else if (ASIC_IS_DCE4(rdev)) { + static uint32_t eg_offsets[] = { + EVERGREEN_CRTC0_REGISTER_OFFSET, + EVERGREEN_CRTC1_REGISTER_OFFSET, + EVERGREEN_CRTC2_REGISTER_OFFSET, + EVERGREEN_CRTC3_REGISTER_OFFSET, + EVERGREEN_CRTC4_REGISTER_OFFSET, + EVERGREEN_CRTC5_REGISTER_OFFSET, + 0x13830 - 0x7030, + }; + int num_afmt; + + /* DCE8 has 7 audio blocks tied to DIG encoders */ + /* DCE6 has 6 audio blocks tied to DIG encoders */ /* DCE4/5 has 6 audio blocks tied to DIG encoders */ /* DCE4.1 has 2 audio blocks tied to DIG encoders */ - rdev->mode_info.afmt[0] = kzalloc(sizeof(struct radeon_afmt), GFP_KERNEL); - if (rdev->mode_info.afmt[0]) { - rdev->mode_info.afmt[0]->offset = EVERGREEN_CRTC0_REGISTER_OFFSET; - rdev->mode_info.afmt[0]->id = 0; - } - rdev->mode_info.afmt[1] = kzalloc(sizeof(struct radeon_afmt), GFP_KERNEL); - if (rdev->mode_info.afmt[1]) { - rdev->mode_info.afmt[1]->offset = EVERGREEN_CRTC1_REGISTER_OFFSET; - rdev->mode_info.afmt[1]->id = 1; - } - if (!ASIC_IS_DCE41(rdev)) { - rdev->mode_info.afmt[2] = kzalloc(sizeof(struct radeon_afmt), GFP_KERNEL); - if (rdev->mode_info.afmt[2]) { - rdev->mode_info.afmt[2]->offset = EVERGREEN_CRTC2_REGISTER_OFFSET; - rdev->mode_info.afmt[2]->id = 2; - } - rdev->mode_info.afmt[3] = kzalloc(sizeof(struct radeon_afmt), GFP_KERNEL); - if (rdev->mode_info.afmt[3]) { - rdev->mode_info.afmt[3]->offset = EVERGREEN_CRTC3_REGISTER_OFFSET; - rdev->mode_info.afmt[3]->id = 3; - } - rdev->mode_info.afmt[4] = kzalloc(sizeof(struct radeon_afmt), GFP_KERNEL); - if (rdev->mode_info.afmt[4]) { - rdev->mode_info.afmt[4]->offset = EVERGREEN_CRTC4_REGISTER_OFFSET; - rdev->mode_info.afmt[4]->id = 4; - } - rdev->mode_info.afmt[5] = kzalloc(sizeof(struct radeon_afmt), GFP_KERNEL); - if (rdev->mode_info.afmt[5]) { - rdev->mode_info.afmt[5]->offset = EVERGREEN_CRTC5_REGISTER_OFFSET; - rdev->mode_info.afmt[5]->id = 5; + if (ASIC_IS_DCE8(rdev)) + num_afmt = 7; + else if (ASIC_IS_DCE6(rdev)) + num_afmt = 6; + else if (ASIC_IS_DCE5(rdev)) + num_afmt = 6; + else if (ASIC_IS_DCE41(rdev)) + num_afmt = 2; + else /* DCE4 */ + num_afmt = 6; + + BUG_ON(num_afmt > ARRAY_SIZE(eg_offsets)); + for (i = 0; i < num_afmt; i++) { + rdev->mode_info.afmt[i] = kzalloc(sizeof(struct radeon_afmt), GFP_KERNEL); + if (rdev->mode_info.afmt[i]) { + rdev->mode_info.afmt[i]->offset = eg_offsets[i]; + rdev->mode_info.afmt[i]->id = i; } } } else if (ASIC_IS_DCE3(rdev)) { diff --git a/sys/dev/drm/radeon/radeon_drv.c b/sys/dev/drm/radeon/radeon_drv.c index 1aad0c876d..9cb9433457 100644 --- a/sys/dev/drm/radeon/radeon_drv.c +++ b/sys/dev/drm/radeon/radeon_drv.c @@ -99,9 +99,6 @@ int radeon_mode_dumb_mmap(struct drm_file *filp, int radeon_mode_dumb_create(struct drm_file *file_priv, struct drm_device *dev, struct drm_mode_create_dumb *args); -int radeon_mode_dumb_destroy(struct drm_file *file_priv, - struct drm_device *dev, - uint32_t handle); struct sg_table *radeon_gem_prime_get_sg_table(struct drm_gem_object *obj); struct drm_gem_object *radeon_gem_prime_import_sg_table(struct drm_device *dev, size_t size, @@ -131,12 +128,12 @@ int radeon_dynclks = -1; int radeon_r4xx_atom = 0; int radeon_agpmode = 0; int radeon_vram_limit = 0; -int radeon_gart_size = 512; /* default gart size */ +int radeon_gart_size = -1; /* auto */ int radeon_benchmarking = 0; int radeon_testing = 0; int radeon_connector_table = 0; int radeon_tv = 1; -int radeon_audio = 0; +int radeon_audio = -1; int radeon_disp_priority = 0; int radeon_hw_i2c = 0; int radeon_pcie_gen2 = -1; @@ -170,7 +167,7 @@ MODULE_PARM_DESC(agpmode, "AGP Mode (-1 == PCI)"); module_param_named(agpmode, radeon_agpmode, int, 0444); TUNABLE_INT("drm.radeon.gart_size", &radeon_gart_size); -MODULE_PARM_DESC(gartsize, "Size of PCIE/IGP gart to setup in megabytes (32, 64, etc)"); +MODULE_PARM_DESC(gartsize, "Size of PCIE/IGP gart to setup in megabytes (32, 64, etc., -1 = auto)"); module_param_named(gartsize, radeon_gart_size, int, 0600); TUNABLE_INT("drm.radeon.benchmarking", &radeon_benchmarking); @@ -190,7 +187,7 @@ MODULE_PARM_DESC(tv, "TV enable (0 = disable)"); module_param_named(tv, radeon_tv, int, 0444); TUNABLE_INT("drm.radeon.audio", &radeon_audio); -MODULE_PARM_DESC(audio, "Audio enable (1 = enable)"); +MODULE_PARM_DESC(audio, "Audio enable (-1 = auto, 0 = disable, 1 = enable)"); module_param_named(audio, radeon_audio, int, 0444); TUNABLE_INT("drm.radeon.disp_priority", &radeon_disp_priority); @@ -270,7 +267,6 @@ static const struct file_operations radeon_driver_old_fops = { .unlocked_ioctl = drm_ioctl, .mmap = drm_mmap, .poll = drm_poll, - .fasync = drm_fasync, .read = drm_read, #ifdef CONFIG_COMPAT .compat_ioctl = radeon_compat_ioctl, @@ -383,7 +379,6 @@ static const struct file_operations radeon_driver_kms_fops = { .unlocked_ioctl = drm_ioctl, .mmap = radeon_mmap, .poll = drm_poll, - .fasync = drm_fasync, .read = drm_read, #ifdef CONFIG_COMPAT .compat_ioctl = radeon_kms_compat_ioctl, @@ -393,15 +388,14 @@ static const struct file_operations radeon_driver_kms_fops = { static struct drm_driver kms_driver = { .driver_features = - DRIVER_USE_AGP | DRIVER_PCI_DMA | DRIVER_SG | - DRIVER_HAVE_IRQ | DRIVER_HAVE_DMA | DRIVER_IRQ_SHARED | DRIVER_GEM | - DRIVER_PRIME /* | DRIVE_MODESET */, + DRIVER_USE_AGP | + DRIVER_HAVE_IRQ | DRIVER_IRQ_SHARED | DRIVER_GEM | + DRIVER_PRIME | DRIVER_RENDER, #ifdef DUMBBELL_WIP .dev_priv_size = 0, #endif /* DUMBBELL_WIP */ .load = radeon_driver_load_kms, .use_msi = radeon_msi_ok, - .firstopen = radeon_driver_firstopen_kms, .open = radeon_driver_open_kms, .preclose = radeon_driver_preclose_kms, .postclose = radeon_driver_postclose_kms, @@ -424,10 +418,9 @@ static struct drm_driver kms_driver = { .gem_free_object = radeon_gem_object_free, .gem_open_object = radeon_gem_object_open, .gem_close_object = radeon_gem_object_close, - .dma_ioctl = radeon_dma_ioctl_kms, .dumb_create = radeon_mode_dumb_create, .dumb_map_offset = radeon_mode_dumb_mmap, - .dumb_destroy = radeon_mode_dumb_destroy, + .dumb_destroy = drm_gem_dumb_destroy, #ifdef DUMBBELL_WIP .fops = &radeon_driver_kms_fops, #endif /* DUMBBELL_WIP */ diff --git a/sys/dev/drm/radeon/radeon_fence.c b/sys/dev/drm/radeon/radeon_fence.c index 0e7f213795..be846e4cba 100644 --- a/sys/dev/drm/radeon/radeon_fence.c +++ b/sys/dev/drm/radeon/radeon_fence.c @@ -796,7 +796,7 @@ int radeon_fence_driver_start_ring(struct radeon_device *rdev, int ring) radeon_fence_write(rdev, atomic64_read(&rdev->fence_drv[ring].last_seq), ring); rdev->fence_drv[ring].initialized = true; dev_info(rdev->dev, "fence driver on ring %d use gpu addr 0x%016jx and cpu addr 0x%p\n", - ring, (uintmax_t)rdev->fence_drv[ring].gpu_addr, rdev->fence_drv[ring].cpu_addr); + ring, rdev->fence_drv[ring].gpu_addr, rdev->fence_drv[ring].cpu_addr); return 0; } diff --git a/sys/dev/drm/radeon/radeon_gem.c b/sys/dev/drm/radeon/radeon_gem.c index c14ccfd8cd..6712a4e03a 100644 --- a/sys/dev/drm/radeon/radeon_gem.c +++ b/sys/dev/drm/radeon/radeon_gem.c @@ -574,13 +574,6 @@ int radeon_mode_dumb_create(struct drm_file *file_priv, return 0; } -int radeon_mode_dumb_destroy(struct drm_file *file_priv, - struct drm_device *dev, - uint32_t handle) -{ - return drm_gem_handle_delete(file_priv, handle); -} - #if defined(CONFIG_DEBUG_FS) static int radeon_debugfs_gem_info(struct seq_file *m, void *data) { diff --git a/sys/dev/drm/radeon/radeon_irq_kms.c b/sys/dev/drm/radeon/radeon_irq_kms.c index a6b71608b9..d57487b39c 100644 --- a/sys/dev/drm/radeon/radeon_irq_kms.c +++ b/sys/dev/drm/radeon/radeon_irq_kms.c @@ -24,10 +24,7 @@ * Authors: Dave Airlie * Alex Deucher * Jerome Glisse - * - * $FreeBSD: head/sys/dev/drm2/radeon/radeon_irq_kms.c 254885 2013-08-25 19:37:15Z dumbbell $ */ - #include #include #include @@ -264,7 +261,6 @@ int radeon_irq_kms_init(struct radeon_device *rdev) { int r = 0; - lockinit(&rdev->irq.lock, "drm__radeon_device__irq__lock", 0, LK_CANRECURSE); r = drm_vblank_init(rdev->ddev, rdev->num_crtc); if (r) { @@ -273,19 +269,20 @@ int radeon_irq_kms_init(struct radeon_device *rdev) /* enable msi */ rdev->msi_enabled = (rdev->ddev->irq_type == PCI_INTR_TYPE_MSI); + TASK_INIT(&rdev->hotplug_work, 0, radeon_hotplug_work_func, rdev); + TASK_INIT(&rdev->audio_work, 0, r600_audio_update_hdmi, rdev); + TASK_INIT(&rdev->reset_work, 0, radeon_irq_reset_work_func, rdev); + rdev->irq.installed = true; DRM_UNLOCK(rdev->ddev); r = drm_irq_install(rdev->ddev, rdev->ddev->irq); DRM_LOCK(rdev->ddev); if (r) { rdev->irq.installed = false; + taskqueue_drain(rdev->tq, &rdev->hotplug_work); return r; } - TASK_INIT(&rdev->hotplug_work, 0, radeon_hotplug_work_func, rdev); - TASK_INIT(&rdev->audio_work, 0, r600_audio_update_hdmi, rdev); - TASK_INIT(&rdev->reset_work, 0, radeon_irq_reset_work_func, rdev); - DRM_INFO("radeon: irq initialized.\n"); return 0; } @@ -303,8 +300,8 @@ void radeon_irq_kms_fini(struct radeon_device *rdev) if (rdev->irq.installed) { drm_irq_uninstall(rdev->ddev); rdev->irq.installed = false; + taskqueue_drain(rdev->tq, &rdev->hotplug_work); } - taskqueue_drain(rdev->tq, &rdev->hotplug_work); } /** diff --git a/sys/dev/drm/radeon/radeon_kms.c b/sys/dev/drm/radeon/radeon_kms.c index d8a6323537..b321487ef9 100644 --- a/sys/dev/drm/radeon/radeon_kms.c +++ b/sys/dev/drm/radeon/radeon_kms.c @@ -24,10 +24,7 @@ * Authors: Dave Airlie * Alex Deucher * Jerome Glisse - * - * $FreeBSD: head/sys/dev/drm2/radeon/radeon_kms.c 254885 2013-08-25 19:37:15Z dumbbell $ */ - #include #include "radeon.h" #include @@ -439,6 +436,9 @@ static int radeon_info_ioctl(struct drm_device *dev, void *data, struct drm_file return -EINVAL; } break; + case RADEON_INFO_SI_CP_DMA_COMPUTE: + *value = 1; + break; default: DRM_DEBUG_KMS("Invalid request %d\n", info->request); return -EINVAL; @@ -455,19 +455,6 @@ static int radeon_info_ioctl(struct drm_device *dev, void *data, struct drm_file * Outdated mess for old drm with Xorg being in charge (void function now). */ /** - * radeon_driver_firstopen_kms - drm callback for first open - * - * @dev: drm dev pointer - * - * Nothing to be done for KMS (all asics). - * Returns 0 on success. - */ -int radeon_driver_firstopen_kms(struct drm_device *dev) -{ - return 0; -} - -/** * radeon_driver_firstopen_kms - drm callback for last close * * @dev: drm dev pointer @@ -689,16 +676,6 @@ int radeon_get_vblank_timestamp_kms(struct drm_device *dev, int crtc, drmcrtc, &drmcrtc->hwmode); } -/* - * IOCTL. - */ -int radeon_dma_ioctl_kms(struct drm_device *dev, void *data, - struct drm_file *file_priv) -{ - /* Not valid in KMS. */ - return -EINVAL; -} - #define KMS_INVALID_IOCTL(name) \ static int \ name(struct drm_device *dev, void *data, struct drm_file *file_priv) \ @@ -768,18 +745,18 @@ struct drm_ioctl_desc radeon_ioctls_kms[] = { DRM_IOCTL_DEF_DRV(RADEON_SURF_ALLOC, radeon_surface_alloc_kms, DRM_AUTH), DRM_IOCTL_DEF_DRV(RADEON_SURF_FREE, radeon_surface_free_kms, DRM_AUTH), /* KMS */ - DRM_IOCTL_DEF_DRV(RADEON_GEM_INFO, radeon_gem_info_ioctl, DRM_AUTH|DRM_UNLOCKED), - DRM_IOCTL_DEF_DRV(RADEON_GEM_CREATE, radeon_gem_create_ioctl, DRM_AUTH|DRM_UNLOCKED), - DRM_IOCTL_DEF_DRV(RADEON_GEM_MMAP, radeon_gem_mmap_ioctl, DRM_AUTH|DRM_UNLOCKED), - DRM_IOCTL_DEF_DRV(RADEON_GEM_SET_DOMAIN, radeon_gem_set_domain_ioctl, DRM_AUTH|DRM_UNLOCKED), + DRM_IOCTL_DEF_DRV(RADEON_GEM_INFO, radeon_gem_info_ioctl, DRM_AUTH|DRM_UNLOCKED|DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(RADEON_GEM_CREATE, radeon_gem_create_ioctl, DRM_AUTH|DRM_UNLOCKED|DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(RADEON_GEM_MMAP, radeon_gem_mmap_ioctl, DRM_AUTH|DRM_UNLOCKED|DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(RADEON_GEM_SET_DOMAIN, radeon_gem_set_domain_ioctl, DRM_AUTH|DRM_UNLOCKED|DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(RADEON_GEM_PREAD, radeon_gem_pread_ioctl, DRM_AUTH|DRM_UNLOCKED), DRM_IOCTL_DEF_DRV(RADEON_GEM_PWRITE, radeon_gem_pwrite_ioctl, DRM_AUTH|DRM_UNLOCKED), - DRM_IOCTL_DEF_DRV(RADEON_GEM_WAIT_IDLE, radeon_gem_wait_idle_ioctl, DRM_AUTH|DRM_UNLOCKED), - DRM_IOCTL_DEF_DRV(RADEON_CS, radeon_cs_ioctl, DRM_AUTH|DRM_UNLOCKED), - DRM_IOCTL_DEF_DRV(RADEON_INFO, radeon_info_ioctl, DRM_AUTH|DRM_UNLOCKED), - DRM_IOCTL_DEF_DRV(RADEON_GEM_SET_TILING, radeon_gem_set_tiling_ioctl, DRM_AUTH|DRM_UNLOCKED), - DRM_IOCTL_DEF_DRV(RADEON_GEM_GET_TILING, radeon_gem_get_tiling_ioctl, DRM_AUTH|DRM_UNLOCKED), - DRM_IOCTL_DEF_DRV(RADEON_GEM_BUSY, radeon_gem_busy_ioctl, DRM_AUTH|DRM_UNLOCKED), - DRM_IOCTL_DEF_DRV(RADEON_GEM_VA, radeon_gem_va_ioctl, DRM_AUTH|DRM_UNLOCKED), + DRM_IOCTL_DEF_DRV(RADEON_GEM_WAIT_IDLE, radeon_gem_wait_idle_ioctl, DRM_AUTH|DRM_UNLOCKED|DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(RADEON_CS, radeon_cs_ioctl, DRM_AUTH|DRM_UNLOCKED|DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(RADEON_INFO, radeon_info_ioctl, DRM_AUTH|DRM_UNLOCKED|DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(RADEON_GEM_SET_TILING, radeon_gem_set_tiling_ioctl, DRM_AUTH|DRM_UNLOCKED|DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(RADEON_GEM_GET_TILING, radeon_gem_get_tiling_ioctl, DRM_AUTH|DRM_UNLOCKED|DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(RADEON_GEM_BUSY, radeon_gem_busy_ioctl, DRM_AUTH|DRM_UNLOCKED|DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(RADEON_GEM_VA, radeon_gem_va_ioctl, DRM_AUTH|DRM_UNLOCKED|DRM_RENDER_ALLOW), }; int radeon_max_kms_ioctl = DRM_ARRAY_SIZE(radeon_ioctls_kms); diff --git a/sys/dev/drm/radeon/radeon_kms.h b/sys/dev/drm/radeon/radeon_kms.h index 8999e299df..66a0ecc202 100644 --- a/sys/dev/drm/radeon/radeon_kms.h +++ b/sys/dev/drm/radeon/radeon_kms.h @@ -8,7 +8,6 @@ int radeon_driver_load_kms(struct drm_device *dev, unsigned long flags); int radeon_driver_unload_kms(struct drm_device *dev); -int radeon_driver_firstopen_kms(struct drm_device *dev); void radeon_driver_lastclose_kms(struct drm_device *dev); int radeon_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv); void radeon_driver_postclose_kms(struct drm_device *dev, @@ -22,7 +21,5 @@ int radeon_get_vblank_timestamp_kms(struct drm_device *dev, int crtc, int *max_error, struct timeval *vblank_time, unsigned flags); -int radeon_dma_ioctl_kms(struct drm_device *dev, void *data, - struct drm_file *file_priv); #endif /* !defined(__RADEON_KMS_H__) */ diff --git a/sys/dev/drm/radeon/radeon_mode.h b/sys/dev/drm/radeon/radeon_mode.h index 88c2ab916c..3cfdbc3708 100644 --- a/sys/dev/drm/radeon/radeon_mode.h +++ b/sys/dev/drm/radeon/radeon_mode.h @@ -25,8 +25,6 @@ * Kevin E. Martin, Rickard E. Faith, Alan Hourihane * * Kernel port Author: Dave Airlie - * - * $FreeBSD: head/sys/dev/drm2/radeon/radeon_mode.h 254885 2013-08-25 19:37:15Z dumbbell $ */ #ifndef RADEON_MODE_H @@ -223,6 +221,7 @@ struct radeon_afmt { int offset; bool last_buffer_filled_status; int id; + struct r600_audio_pin *pin; }; struct radeon_mode_info { @@ -231,7 +230,7 @@ struct radeon_mode_info { enum radeon_connector_table connector_table; bool mode_config_initialized; struct radeon_crtc *crtcs[6]; - struct radeon_afmt *afmt[6]; + struct radeon_afmt *afmt[7]; /* DVI-I properties */ struct drm_property *coherent_mode_property; /* DAC enable load detect */ @@ -244,6 +243,8 @@ struct radeon_mode_info { struct drm_property *underscan_property; struct drm_property *underscan_hborder_property; struct drm_property *underscan_vborder_property; + /* audio */ + struct drm_property *audio_property; /* hardcoded DFP edid from BIOS */ struct edid *bios_hardcoded_edid; int bios_hardcoded_edid_size; @@ -468,6 +469,12 @@ struct radeon_router { u8 cd_mux_state; }; +enum radeon_connector_audio { + RADEON_AUDIO_DISABLE = 0, + RADEON_AUDIO_ENABLE = 1, + RADEON_AUDIO_AUTO = 2 +}; + struct radeon_connector { struct drm_connector base; uint32_t connector_id; @@ -486,6 +493,7 @@ struct radeon_connector { struct radeon_hpd hpd; struct radeon_router router; struct radeon_i2c_chan *router_bus; + enum radeon_connector_audio audio; }; struct radeon_framebuffer { diff --git a/sys/dev/drm/radeon/radeon_object.c b/sys/dev/drm/radeon/radeon_object.c index a07ccb9b64..28f1000035 100644 --- a/sys/dev/drm/radeon/radeon_object.c +++ b/sys/dev/drm/radeon/radeon_object.c @@ -330,7 +330,7 @@ int radeon_bo_init(struct radeon_device *rdev) rdev->mc.vram_mtrr = arch_phys_wc_add(rdev->mc.aper_base, rdev->mc.aper_size); } DRM_INFO("Detected VRAM RAM=%juM, BAR=%juM\n", - (uintmax_t)rdev->mc.mc_vram_size >> 20, + rdev->mc.mc_vram_size >> 20, (uintmax_t)rdev->mc.aper_size >> 20); DRM_INFO("RAM width %dbits %cDR\n", rdev->mc.vram_width, rdev->mc.vram_is_ddr ? 'D' : 'S'); @@ -353,14 +353,15 @@ void radeon_bo_list_add_object(struct radeon_bo_list *lobj, } } -int radeon_bo_list_validate(struct list_head *head, int ring) +int radeon_bo_list_validate(struct ww_acquire_ctx *ticket, + struct list_head *head, int ring) { struct radeon_bo_list *lobj; struct radeon_bo *bo; u32 domain; int r; - r = ttm_eu_reserve_buffers(head); + r = ttm_eu_reserve_buffers(ticket, head); if (unlikely(r != 0)) { return r; } diff --git a/sys/dev/drm/radeon/radeon_object.h b/sys/dev/drm/radeon/radeon_object.h index cb04135c83..87892bbd91 100644 --- a/sys/dev/drm/radeon/radeon_object.h +++ b/sys/dev/drm/radeon/radeon_object.h @@ -137,7 +137,8 @@ extern int radeon_bo_init(struct radeon_device *rdev); extern void radeon_bo_fini(struct radeon_device *rdev); extern void radeon_bo_list_add_object(struct radeon_bo_list *lobj, struct list_head *head); -extern int radeon_bo_list_validate(struct list_head *head, int ring); +extern int radeon_bo_list_validate(struct ww_acquire_ctx *ticket, + struct list_head *head, int ring); #ifdef DUMBBELL_WIP extern int radeon_bo_fbdev_mmap(struct radeon_bo *bo, struct vm_area_struct *vma); diff --git a/sys/dev/drm/radeon/radeon_pm.c b/sys/dev/drm/radeon/radeon_pm.c index 813716f065..0b49720c79 100644 --- a/sys/dev/drm/radeon/radeon_pm.c +++ b/sys/dev/drm/radeon/radeon_pm.c @@ -71,7 +71,18 @@ int radeon_pm_get_type_index(struct radeon_device *rdev, void radeon_pm_acpi_event_handler(struct radeon_device *rdev) { - if (rdev->pm.pm_method == PM_METHOD_PROFILE) { + if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) { + lockmgr(&rdev->pm.mutex, LK_EXCLUSIVE); +#if zRJ_TODO + if (power_supply_is_system_supplied() > 0) + rdev->pm.dpm.ac_power = true; + else +#endif + rdev->pm.dpm.ac_power = false; + if (rdev->asic->dpm.enable_bapm) + radeon_dpm_enable_bapm(rdev, rdev->pm.dpm.ac_power); + lockmgr(&rdev->pm.mutex, LK_RELEASE); + } else if (rdev->pm.pm_method == PM_METHOD_PROFILE) { if (rdev->pm.profile == PM_PROFILE_AUTO) { lockmgr(&rdev->pm.mutex, LK_EXCLUSIVE); radeon_pm_update_profile(rdev); @@ -595,6 +606,8 @@ static int radeon_hwmon_init(struct radeon_device *rdev) case THERMAL_TYPE_NI: case THERMAL_TYPE_SUMO: case THERMAL_TYPE_SI: + case THERMAL_TYPE_CI: + case THERMAL_TYPE_KV: if (rdev->asic->pm.get_temperature == NULL) return err; @@ -650,7 +663,15 @@ static void radeon_dpm_thermal_work_handler(void *arg, int pending) /* switch back the user state */ dpm_state = rdev->pm.dpm.user_state; } - radeon_dpm_enable_power_state(rdev, dpm_state); + mutex_lock(&rdev->pm.mutex); + if (dpm_state == POWER_STATE_TYPE_INTERNAL_THERMAL) + rdev->pm.dpm.thermal_active = true; + else + rdev->pm.dpm.thermal_active = false; + rdev->pm.dpm.state = dpm_state; + mutex_unlock(&rdev->pm.mutex); + + radeon_pm_compute_clocks(rdev); } static struct radeon_ps *radeon_dpm_pick_power_state(struct radeon_device *rdev, @@ -713,7 +734,10 @@ restart_search: break; /* internal states */ case POWER_STATE_TYPE_INTERNAL_UVD: - return rdev->pm.dpm.uvd_ps; + if (rdev->pm.dpm.uvd_ps) + return rdev->pm.dpm.uvd_ps; + else + break; case POWER_STATE_TYPE_INTERNAL_UVD_SD: if (ps->class & ATOM_PPLIB_CLASSIFICATION_SDSTATE) return ps; @@ -755,10 +779,17 @@ restart_search: /* use a fallback state if we didn't match */ switch (dpm_state) { case POWER_STATE_TYPE_INTERNAL_UVD_SD: + dpm_state = POWER_STATE_TYPE_INTERNAL_UVD_HD; + goto restart_search; case POWER_STATE_TYPE_INTERNAL_UVD_HD: case POWER_STATE_TYPE_INTERNAL_UVD_HD2: case POWER_STATE_TYPE_INTERNAL_UVD_MVC: - return rdev->pm.dpm.uvd_ps; + if (rdev->pm.dpm.uvd_ps) { + return rdev->pm.dpm.uvd_ps; + } else { + dpm_state = POWER_STATE_TYPE_PERFORMANCE; + goto restart_search; + } case POWER_STATE_TYPE_INTERNAL_THERMAL: dpm_state = POWER_STATE_TYPE_INTERNAL_ACPI; goto restart_search; @@ -876,38 +907,57 @@ static void radeon_dpm_change_power_state_locked(struct radeon_device *rdev) radeon_dpm_post_set_power_state(rdev); + if (rdev->asic->dpm.force_performance_level) { + if (rdev->pm.dpm.thermal_active) + /* force low perf level for thermal */ + radeon_dpm_force_performance_level(rdev, RADEON_DPM_FORCED_LEVEL_LOW); + else + /* otherwise, enable auto */ + radeon_dpm_force_performance_level(rdev, RADEON_DPM_FORCED_LEVEL_AUTO); + } + done: lockmgr(&rdev->ring_lock, LK_RELEASE); lockmgr(&rdev->pm.mclk_lock, LK_RELEASE); // up_write lockmgr(&rdev->ddev->struct_mutex, LK_RELEASE); } -void radeon_dpm_enable_power_state(struct radeon_device *rdev, - enum radeon_pm_state_type dpm_state) +void radeon_dpm_enable_uvd(struct radeon_device *rdev, bool enable) { - if (!rdev->pm.dpm_enabled) - return; + enum radeon_pm_state_type dpm_state; - lockmgr(&rdev->pm.mutex, LK_EXCLUSIVE); - switch (dpm_state) { - case POWER_STATE_TYPE_INTERNAL_THERMAL: - rdev->pm.dpm.thermal_active = true; - break; - case POWER_STATE_TYPE_INTERNAL_UVD: - case POWER_STATE_TYPE_INTERNAL_UVD_SD: - case POWER_STATE_TYPE_INTERNAL_UVD_HD: - case POWER_STATE_TYPE_INTERNAL_UVD_HD2: - case POWER_STATE_TYPE_INTERNAL_UVD_MVC: - rdev->pm.dpm.uvd_active = true; - break; - default: - rdev->pm.dpm.thermal_active = false; - rdev->pm.dpm.uvd_active = false; - break; + if (rdev->asic->dpm.powergate_uvd) { + mutex_lock(&rdev->pm.mutex); + /* enable/disable UVD */ + radeon_dpm_powergate_uvd(rdev, !enable); + mutex_unlock(&rdev->pm.mutex); + } else { + if (enable) { + mutex_lock(&rdev->pm.mutex); + rdev->pm.dpm.uvd_active = true; + /* disable this for now */ +#if 0 + if ((rdev->pm.dpm.sd == 1) && (rdev->pm.dpm.hd == 0)) + dpm_state = POWER_STATE_TYPE_INTERNAL_UVD_SD; + else if ((rdev->pm.dpm.sd == 2) && (rdev->pm.dpm.hd == 0)) + dpm_state = POWER_STATE_TYPE_INTERNAL_UVD_HD; + else if ((rdev->pm.dpm.sd == 0) && (rdev->pm.dpm.hd == 1)) + dpm_state = POWER_STATE_TYPE_INTERNAL_UVD_HD; + else if ((rdev->pm.dpm.sd == 0) && (rdev->pm.dpm.hd == 2)) + dpm_state = POWER_STATE_TYPE_INTERNAL_UVD_HD2; + else +#endif + dpm_state = POWER_STATE_TYPE_INTERNAL_UVD; + rdev->pm.dpm.state = dpm_state; + mutex_unlock(&rdev->pm.mutex); + } else { + mutex_lock(&rdev->pm.mutex); + rdev->pm.dpm.uvd_active = false; + mutex_unlock(&rdev->pm.mutex); + } + + radeon_pm_compute_clocks(rdev); } - rdev->pm.dpm.state = dpm_state; - lockmgr(&rdev->pm.mutex, LK_RELEASE); - radeon_pm_compute_clocks(rdev); } static void radeon_pm_suspend_old(struct radeon_device *rdev) @@ -947,7 +997,7 @@ static void radeon_pm_resume_old(struct radeon_device *rdev) { /* set up the default clocks if the MC ucode is loaded */ if ((rdev->family >= CHIP_BARTS) && - (rdev->family <= CHIP_HAINAN) && + (rdev->family <= CHIP_CAYMAN) && rdev->mc_fw) { if (rdev->pm.default_vddc) radeon_atom_set_voltage(rdev, rdev->pm.default_vddc, @@ -993,7 +1043,7 @@ static void radeon_pm_resume_dpm(struct radeon_device *rdev) if (ret) { DRM_ERROR("radeon: dpm resume failed\n"); if ((rdev->family >= CHIP_BARTS) && - (rdev->family <= CHIP_HAINAN) && + (rdev->family <= CHIP_CAYMAN) && rdev->mc_fw) { if (rdev->pm.default_vddc) radeon_atom_set_voltage(rdev, rdev->pm.default_vddc, @@ -1044,7 +1094,7 @@ static int radeon_pm_init_old(struct radeon_device *rdev) radeon_pm_init_profile(rdev); /* set up the default clocks if the MC ucode is loaded */ if ((rdev->family >= CHIP_BARTS) && - (rdev->family <= CHIP_HAINAN) && + (rdev->family <= CHIP_CAYMAN) && rdev->mc_fw) { if (rdev->pm.default_vddc) radeon_atom_set_voltage(rdev, rdev->pm.default_vddc, @@ -1105,9 +1155,10 @@ static int radeon_pm_init_dpm(struct radeon_device *rdev) { int ret; - /* default to performance state */ + /* default to balanced state */ rdev->pm.dpm.state = POWER_STATE_TYPE_BALANCED; rdev->pm.dpm.user_state = POWER_STATE_TYPE_BALANCED; + rdev->pm.dpm.forced_level = RADEON_DPM_FORCED_LEVEL_AUTO; rdev->pm.default_sclk = rdev->clock.default_sclk; rdev->pm.default_mclk = rdev->clock.default_mclk; rdev->pm.current_sclk = rdev->clock.default_sclk; @@ -1135,7 +1186,7 @@ static int radeon_pm_init_dpm(struct radeon_device *rdev) if (ret) { rdev->pm.dpm_enabled = false; if ((rdev->family >= CHIP_BARTS) && - (rdev->family <= CHIP_HAINAN) && + (rdev->family <= CHIP_CAYMAN) && rdev->mc_fw) { if (rdev->pm.default_vddc) radeon_atom_set_voltage(rdev, rdev->pm.default_vddc, @@ -1214,6 +1265,9 @@ int radeon_pm_init(struct radeon_device *rdev) case CHIP_VERDE: case CHIP_OLAND: case CHIP_HAINAN: + case CHIP_BONAIRE: + case CHIP_KABINI: + case CHIP_KAVERI: /* DPM requires the RLC, RV770+ dGPU requires SMC */ if (!rdev->rlc_fw) rdev->pm.pm_method = PM_METHOD_PROFILE; diff --git a/sys/dev/drm/radeon/radeon_prime.c b/sys/dev/drm/radeon/radeon_prime.c index 65b9eabd5a..20074560fc 100644 --- a/sys/dev/drm/radeon/radeon_prime.c +++ b/sys/dev/drm/radeon/radeon_prime.c @@ -68,7 +68,6 @@ struct drm_gem_object *radeon_gem_prime_import_sg_table(struct drm_device *dev, RADEON_GEM_DOMAIN_GTT, sg, &bo); if (ret) return ERR_PTR(ret); - bo->gem_base.driver_private = bo; mutex_lock(&rdev->gem.mutex); list_add_tail(&bo->list, &rdev->gem.objects); diff --git a/sys/dev/drm/radeon/radeon_ring.c b/sys/dev/drm/radeon/radeon_ring.c index 0daf0171bc..07cd0e4f14 100644 --- a/sys/dev/drm/radeon/radeon_ring.c +++ b/sys/dev/drm/radeon/radeon_ring.c @@ -370,11 +370,10 @@ u32 radeon_ring_generic_get_rptr(struct radeon_device *rdev, { u32 rptr; - if (rdev->wb.enabled && ring != &rdev->ring[R600_RING_TYPE_UVD_INDEX]) + if (rdev->wb.enabled) rptr = le32_to_cpu(rdev->wb.wb[ring->rptr_offs/4]); else rptr = RREG32(ring->rptr_reg); - rptr = (rptr & ring->ptr_reg_mask) >> ring->ptr_reg_shift; return rptr; } @@ -385,7 +384,6 @@ u32 radeon_ring_generic_get_wptr(struct radeon_device *rdev, u32 wptr; wptr = RREG32(ring->wptr_reg); - wptr = (wptr & ring->ptr_reg_mask) >> ring->ptr_reg_shift; return wptr; } @@ -393,7 +391,7 @@ u32 radeon_ring_generic_get_wptr(struct radeon_device *rdev, void radeon_ring_generic_set_wptr(struct radeon_device *rdev, struct radeon_ring *ring) { - WREG32(ring->wptr_reg, (ring->wptr << ring->ptr_reg_shift) & ring->ptr_reg_mask); + WREG32(ring->wptr_reg, ring->wptr); (void)RREG32(ring->wptr_reg); } @@ -726,16 +724,13 @@ int radeon_ring_restore(struct radeon_device *rdev, struct radeon_ring *ring, * @rptr_offs: offset of the rptr writeback location in the WB buffer * @rptr_reg: MMIO offset of the rptr register * @wptr_reg: MMIO offset of the wptr register - * @ptr_reg_shift: bit offset of the rptr/wptr values - * @ptr_reg_mask: bit mask of the rptr/wptr values * @nop: nop packet for this ring * * Initialize the driver information for the selected ring (all asics). * Returns 0 on success, error on failure. */ int radeon_ring_init(struct radeon_device *rdev, struct radeon_ring *ring, unsigned ring_size, - unsigned rptr_offs, unsigned rptr_reg, unsigned wptr_reg, - u32 ptr_reg_shift, u32 ptr_reg_mask, u32 nop) + unsigned rptr_offs, unsigned rptr_reg, unsigned wptr_reg, u32 nop) { int r; void *ring_ptr; @@ -744,8 +739,6 @@ int radeon_ring_init(struct radeon_device *rdev, struct radeon_ring *ring, unsig ring->rptr_offs = rptr_offs; ring->rptr_reg = rptr_reg; ring->wptr_reg = wptr_reg; - ring->ptr_reg_shift = ptr_reg_shift; - ring->ptr_reg_mask = ptr_reg_mask; ring->nop = nop; /* Allocate ring buffer */ if (ring->ring_obj == NULL) { @@ -861,9 +854,11 @@ static int radeon_debugfs_ring_info(struct seq_file *m, void *data) * packet that is the root issue */ i = (ring->rptr + ring->ptr_mask + 1 - 32) & ring->ptr_mask; - for (j = 0; j <= (count + 32); j++) { - seq_printf(m, "r[%5d]=0x%08x\n", i, ring->ring[i]); - i = (i + 1) & ring->ptr_mask; + if (ring->ready) { + for (j = 0; j <= (count + 32); j++) { + seq_printf(m, "r[%5d]=0x%08x\n", i, ring->ring[i]); + i = (i + 1) & ring->ptr_mask; + } } return 0; } diff --git a/sys/dev/drm/radeon/radeon_test.c b/sys/dev/drm/radeon/radeon_test.c index 18d99039d9..69cebd9584 100644 --- a/sys/dev/drm/radeon/radeon_test.c +++ b/sys/dev/drm/radeon/radeon_test.c @@ -40,8 +40,8 @@ static void radeon_do_test_moves(struct radeon_device *rdev, int flag) struct radeon_bo **gtt_obj = NULL; struct radeon_fence *fence = NULL; uint64_t gtt_addr, vram_addr; - unsigned i, n, size; - int r, ring; + unsigned n, size; + int i, r, ring; switch (flag) { case RADEON_TEST_COPY_DMA: diff --git a/sys/dev/drm/radeon/radeon_trace.h b/sys/dev/drm/radeon/radeon_trace.h index eafd8160a1..f7e3678159 100644 --- a/sys/dev/drm/radeon/radeon_trace.h +++ b/sys/dev/drm/radeon/radeon_trace.h @@ -27,6 +27,26 @@ TRACE_EVENT(radeon_bo_create, TP_printk("bo=%p, pages=%u", __entry->bo, __entry->pages) ); +TRACE_EVENT(radeon_cs, + TP_PROTO(struct radeon_cs_parser *p), + TP_ARGS(p), + TP_STRUCT__entry( + __field(u32, ring) + __field(u32, dw) + __field(u32, fences) + ), + + TP_fast_assign( + __entry->ring = p->ring; + __entry->dw = p->chunks[p->chunk_ib_idx].length_dw; + __entry->fences = radeon_fence_count_emitted( + p->rdev, p->ring); + ), + TP_printk("ring=%u, dw=%u, fences=%u", + __entry->ring, __entry->dw, + __entry->fences) +); + DECLARE_EVENT_CLASS(radeon_fence_request, TP_PROTO(struct drm_device *dev, u32 seqno), @@ -53,13 +73,6 @@ DEFINE_EVENT(radeon_fence_request, radeon_fence_emit, TP_ARGS(dev, seqno) ); -DEFINE_EVENT(radeon_fence_request, radeon_fence_retire, - - TP_PROTO(struct drm_device *dev, u32 seqno), - - TP_ARGS(dev, seqno) -); - DEFINE_EVENT(radeon_fence_request, radeon_fence_wait_begin, TP_PROTO(struct drm_device *dev, u32 seqno), diff --git a/sys/dev/drm/radeon/radeon_ucode.h b/sys/dev/drm/radeon/radeon_ucode.h index d8b05f7bcf..33858364fe 100644 --- a/sys/dev/drm/radeon/radeon_ucode.h +++ b/sys/dev/drm/radeon/radeon_ucode.h @@ -35,6 +35,12 @@ #define SI_PFP_UCODE_SIZE 2144 #define SI_PM4_UCODE_SIZE 2144 #define SI_CE_UCODE_SIZE 2144 +#define CIK_PFP_UCODE_SIZE 2144 +#define CIK_ME_UCODE_SIZE 2144 +#define CIK_CE_UCODE_SIZE 2144 + +/* MEC */ +#define CIK_MEC_UCODE_SIZE 4192 /* RLC */ #define R600_RLC_UCODE_SIZE 768 @@ -43,12 +49,20 @@ #define CAYMAN_RLC_UCODE_SIZE 1024 #define ARUBA_RLC_UCODE_SIZE 1536 #define SI_RLC_UCODE_SIZE 2048 +#define BONAIRE_RLC_UCODE_SIZE 2048 +#define KB_RLC_UCODE_SIZE 2560 +#define KV_RLC_UCODE_SIZE 2560 /* MC */ #define BTC_MC_UCODE_SIZE 6024 #define CAYMAN_MC_UCODE_SIZE 6037 #define SI_MC_UCODE_SIZE 7769 #define OLAND_MC_UCODE_SIZE 7863 +#define CIK_MC_UCODE_SIZE 7866 + +/* SDMA */ +#define CIK_SDMA_UCODE_SIZE 1050 +#define CIK_SDMA_UCODE_VERSION 64 /* SMC */ #define RV770_SMC_UCODE_START 0x0100 @@ -126,4 +140,7 @@ #define HAINAN_SMC_UCODE_START 0x10000 #define HAINAN_SMC_UCODE_SIZE 0xe67C +#define BONAIRE_SMC_UCODE_START 0x20000 +#define BONAIRE_SMC_UCODE_SIZE 0x1FDEC + #endif diff --git a/sys/dev/drm/radeon/radeon_uvd.c b/sys/dev/drm/radeon/radeon_uvd.c index 116546a051..b98607dd91 100644 --- a/sys/dev/drm/radeon/radeon_uvd.c +++ b/sys/dev/drm/radeon/radeon_uvd.c @@ -146,6 +146,7 @@ int radeon_uvd_init(struct radeon_device *rdev) for (i = 0; i < RADEON_MAX_UVD_HANDLES; ++i) { atomic_set(&rdev->uvd.handles[i], 0); rdev->uvd.filp[i] = NULL; + rdev->uvd.img_size[i] = 0; } return 0; @@ -346,6 +347,7 @@ static int radeon_uvd_cs_msg(struct radeon_cs_parser *p, struct radeon_bo *bo, unsigned offset, unsigned buf_sizes[]) { int32_t *msg, msg_type, handle; + unsigned img_size = 0; void *ptr; int i, r; @@ -382,6 +384,8 @@ static int radeon_uvd_cs_msg(struct radeon_cs_parser *p, struct radeon_bo *bo, if (msg_type == 1) { /* it's a decode msg, calc buffer sizes */ r = radeon_uvd_cs_msg_decode(msg, buf_sizes); + /* calc image size (width * height) */ + img_size = msg[6] * msg[7]; radeon_bo_kunmap(bo); if (r) return r; @@ -393,6 +397,8 @@ static int radeon_uvd_cs_msg(struct radeon_cs_parser *p, struct radeon_bo *bo, radeon_bo_kunmap(bo); return 0; } else { + /* it's a create msg, calc image size (width * height) */ + img_size = msg[7] * msg[8]; radeon_bo_kunmap(bo); if (msg_type != 0) { @@ -416,6 +422,7 @@ static int radeon_uvd_cs_msg(struct radeon_cs_parser *p, struct radeon_bo *bo, #endif if (atomic_cmpset(&p->rdev->uvd.handles[i], 0, handle) == 1) { p->rdev->uvd.filp[i] = p->filp; + p->rdev->uvd.img_size[i] = img_size; return 0; } } @@ -592,6 +599,7 @@ static int radeon_uvd_send_msg(struct radeon_device *rdev, struct radeon_fence **fence) { struct ttm_validate_buffer tv; + struct ww_acquire_ctx ticket; struct list_head head; struct radeon_ib ib; uint64_t addr; @@ -603,7 +611,7 @@ static int radeon_uvd_send_msg(struct radeon_device *rdev, INIT_LIST_HEAD(&head); list_add(&tv.head, &head); - r = ttm_eu_reserve_buffers(&head); + r = ttm_eu_reserve_buffers(&ticket, &head); if (r) return r; @@ -611,16 +619,12 @@ static int radeon_uvd_send_msg(struct radeon_device *rdev, radeon_uvd_force_into_uvd_segment(bo); r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false); - if (r) { - ttm_eu_backoff_reservation(&head); - return r; - } + if (r) + goto err; r = radeon_ib_get(rdev, ring, &ib, NULL, 16); - if (r) { - ttm_eu_backoff_reservation(&head); - return r; - } + if (r) + goto err; addr = radeon_bo_gpu_offset(bo); ib.ptr[0] = PACKET0(UVD_GPCOM_VCPU_DATA0, 0); @@ -634,11 +638,9 @@ static int radeon_uvd_send_msg(struct radeon_device *rdev, ib.length_dw = 16; r = radeon_ib_schedule(rdev, &ib, NULL); - if (r) { - ttm_eu_backoff_reservation(&head); - return r; - } - ttm_eu_fence_buffer_objects(&head, ib.fence); + if (r) + goto err; + ttm_eu_fence_buffer_objects(&ticket, &head, ib.fence); if (fence) *fence = radeon_fence_ref(ib.fence); @@ -646,6 +648,10 @@ static int radeon_uvd_send_msg(struct radeon_device *rdev, radeon_ib_free(rdev, &ib); radeon_bo_unref(&bo); return 0; + +err: + ttm_eu_backoff_reservation(&ticket, &head); + return r; } /* multiple fence commands without any stream commands in between can @@ -736,6 +742,34 @@ int radeon_uvd_get_destroy_msg(struct radeon_device *rdev, int ring, return radeon_uvd_send_msg(rdev, ring, bo, fence); } +/** + * radeon_uvd_count_handles - count number of open streams + * + * @rdev: radeon_device pointer + * @sd: number of SD streams + * @hd: number of HD streams + * + * Count the number of open SD/HD streams as a hint for power mangement + */ +static void radeon_uvd_count_handles(struct radeon_device *rdev, + unsigned *sd, unsigned *hd) +{ + unsigned i; + + *sd = 0; + *hd = 0; + + for (i = 0; i < RADEON_MAX_UVD_HANDLES; ++i) { + if (!atomic_read(&rdev->uvd.handles[i])) + continue; + + if (rdev->uvd.img_size[i] >= 720*576) + ++(*hd); + else + ++(*sd); + } +} + static void radeon_uvd_idle_work_handler(struct work_struct *work) { struct radeon_device *rdev = @@ -743,10 +777,7 @@ static void radeon_uvd_idle_work_handler(struct work_struct *work) if (radeon_fence_count_emitted(rdev, R600_RING_TYPE_UVD_INDEX) == 0) { if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) { - lockmgr(&rdev->pm.mutex, LK_EXCLUSIVE); - rdev->pm.dpm.uvd_active = false; - lockmgr(&rdev->pm.mutex, LK_RELEASE); - radeon_pm_compute_clocks(rdev); + radeon_dpm_enable_uvd(rdev, false); } else { radeon_set_uvd_clocks(rdev, 0, 0); } @@ -758,13 +789,26 @@ static void radeon_uvd_idle_work_handler(struct work_struct *work) void radeon_uvd_note_usage(struct radeon_device *rdev) { + bool streams_changed = false; bool set_clocks = !cancel_delayed_work_sync(&rdev->uvd.idle_work); set_clocks &= schedule_delayed_work(&rdev->uvd.idle_work, msecs_to_jiffies(UVD_IDLE_TIMEOUT_MS)); - if (set_clocks) { + + if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) { + unsigned hd = 0, sd = 0; + radeon_uvd_count_handles(rdev, &sd, &hd); + if ((rdev->pm.dpm.sd != sd) || + (rdev->pm.dpm.hd != hd)) { + rdev->pm.dpm.sd = sd; + rdev->pm.dpm.hd = hd; + /* disable this for now */ + /*streams_changed = true;*/ + } + } + + if (set_clocks || streams_changed) { if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) { - /* XXX pick SD/HD/MVC */ - radeon_dpm_enable_power_state(rdev, POWER_STATE_TYPE_INTERNAL_UVD); + radeon_dpm_enable_uvd(rdev, true); } else { radeon_set_uvd_clocks(rdev, 53300, 40000); } diff --git a/sys/dev/drm/radeon/rs400.c b/sys/dev/drm/radeon/rs400.c index 8161b57b93..2df55e52ae 100644 --- a/sys/dev/drm/radeon/rs400.c +++ b/sys/dev/drm/radeon/rs400.c @@ -24,8 +24,6 @@ * Authors: Dave Airlie * Alex Deucher * Jerome Glisse - * - * $FreeBSD: head/sys/dev/drm2/radeon/rs400.c 254885 2013-08-25 19:37:15Z dumbbell $ */ #include #include "radeon.h" @@ -177,10 +175,14 @@ int rs400_gart_enable(struct radeon_device *rdev) RS690_AGPMODE30 | RS690_AGP30ENHANCED); WREG32_MC(RS690_MC_NB_CNTL, tmp); - WREG32_MC(RS480_MC_MISC_CNTL, - (RS480_GART_INDEX_REG_EN | RS690_BLOCK_GFX_D3_EN)); + + tmp = RREG32_MC(RS480_MC_MISC_CNTL); + tmp |= RS480_GART_INDEX_REG_EN | RS690_BLOCK_GFX_D3_EN; + WREG32_MC(RS480_MC_MISC_CNTL, tmp); } else { - WREG32_MC(RS480_MC_MISC_CNTL, RS480_GART_INDEX_REG_EN); + tmp = RREG32_MC(RS480_MC_MISC_CNTL); + tmp |= RS480_GART_INDEX_REG_EN; + WREG32_MC(RS480_MC_MISC_CNTL, tmp); } /* Enable gart */ WREG32_MC(RS480_AGP_ADDRESS_SPACE_SIZE, (RS480_GART_EN | size_reg)); diff --git a/sys/dev/drm/radeon/rs780_dpm.c b/sys/dev/drm/radeon/rs780_dpm.c index e6e030a2a3..51499f0125 100644 --- a/sys/dev/drm/radeon/rs780_dpm.c +++ b/sys/dev/drm/radeon/rs780_dpm.c @@ -63,9 +63,7 @@ static void rs780_get_pm_mode_parameters(struct radeon_device *rdev) radeon_crtc = to_radeon_crtc(crtc); pi->crtc_id = radeon_crtc->crtc_id; if (crtc->mode.htotal && crtc->mode.vtotal) - pi->refresh_rate = - (crtc->mode.clock * 1000) / - (crtc->mode.htotal * crtc->mode.vtotal); + pi->refresh_rate = drm_mode_vrefresh(&crtc->mode); break; } } @@ -377,9 +375,8 @@ static void rs780_disable_vbios_powersaving(struct radeon_device *rdev) WREG32_P(CG_INTGFX_MISC, 0, ~0xFFF00000); } -static void rs780_force_voltage_to_high(struct radeon_device *rdev) +static void rs780_force_voltage(struct radeon_device *rdev, u16 voltage) { - struct igp_power_info *pi = rs780_get_pi(rdev); struct igp_ps *current_state = rs780_get_ps(rdev->pm.dpm.current_ps); if ((current_state->max_voltage == RS780_VDDC_LEVEL_HIGH) && @@ -391,7 +388,7 @@ static void rs780_force_voltage_to_high(struct radeon_device *rdev) udelay(1); WREG32_P(FVTHROT_PWM_CTRL_REG0, - STARTING_PWM_HIGHTIME(pi->max_voltage), + STARTING_PWM_HIGHTIME(voltage), ~STARTING_PWM_HIGHTIME_MASK); WREG32_P(FVTHROT_PWM_CTRL_REG0, @@ -405,6 +402,26 @@ static void rs780_force_voltage_to_high(struct radeon_device *rdev) WREG32_P(GFX_MACRO_BYPASS_CNTL, 0, ~SPLL_BYPASS_CNTL); } +static void rs780_force_fbdiv(struct radeon_device *rdev, u32 fb_div) +{ + struct igp_ps *current_state = rs780_get_ps(rdev->pm.dpm.current_ps); + + if (current_state->sclk_low == current_state->sclk_high) + return; + + WREG32_P(GFX_MACRO_BYPASS_CNTL, SPLL_BYPASS_CNTL, ~SPLL_BYPASS_CNTL); + + WREG32_P(FVTHROT_FBDIV_REG2, FORCED_FEEDBACK_DIV(fb_div), + ~FORCED_FEEDBACK_DIV_MASK); + WREG32_P(FVTHROT_FBDIV_REG1, STARTING_FEEDBACK_DIV(fb_div), + ~STARTING_FEEDBACK_DIV_MASK); + WREG32_P(FVTHROT_FBDIV_REG1, FORCE_FEEDBACK_DIV, ~FORCE_FEEDBACK_DIV); + + udelay(100); + + WREG32_P(GFX_MACRO_BYPASS_CNTL, 0, ~SPLL_BYPASS_CNTL); +} + static int rs780_set_engine_clock_scaling(struct radeon_device *rdev, struct radeon_ps *new_ps, struct radeon_ps *old_ps) @@ -433,17 +450,13 @@ static int rs780_set_engine_clock_scaling(struct radeon_device *rdev, if (ret) return ret; - WREG32_P(GFX_MACRO_BYPASS_CNTL, SPLL_BYPASS_CNTL, ~SPLL_BYPASS_CNTL); - - WREG32_P(FVTHROT_FBDIV_REG2, FORCED_FEEDBACK_DIV(max_dividers.fb_div), - ~FORCED_FEEDBACK_DIV_MASK); - WREG32_P(FVTHROT_FBDIV_REG1, STARTING_FEEDBACK_DIV(max_dividers.fb_div), - ~STARTING_FEEDBACK_DIV_MASK); - WREG32_P(FVTHROT_FBDIV_REG1, FORCE_FEEDBACK_DIV, ~FORCE_FEEDBACK_DIV); - - udelay(100); + if ((min_dividers.ref_div != max_dividers.ref_div) || + (min_dividers.post_div != max_dividers.post_div) || + (max_dividers.ref_div != current_max_dividers.ref_div) || + (max_dividers.post_div != current_max_dividers.post_div)) + return -EINVAL; - WREG32_P(GFX_MACRO_BYPASS_CNTL, 0, ~SPLL_BYPASS_CNTL); + rs780_force_fbdiv(rdev, max_dividers.fb_div); if (max_dividers.fb_div > min_dividers.fb_div) { WREG32_P(FVTHROT_FBDIV_REG0, @@ -487,6 +500,9 @@ static void rs780_activate_engine_clk_scaling(struct radeon_device *rdev, (new_state->sclk_low == old_state->sclk_low)) return; + if (new_state->sclk_high == new_state->sclk_low) + return; + rs780_clk_scaling_enable(rdev, true); } @@ -650,7 +666,7 @@ int rs780_dpm_set_power_state(struct radeon_device *rdev) rs780_set_uvd_clock_before_set_eng_clock(rdev, new_ps, old_ps); if (pi->voltage_control) { - rs780_force_voltage_to_high(rdev); + rs780_force_voltage(rdev, pi->max_voltage); mdelay(5); } @@ -718,14 +734,18 @@ static void rs780_parse_pplib_non_clock_info(struct radeon_device *rdev, if (ATOM_PPLIB_NONCLOCKINFO_VER1 < table_rev) { rps->vclk = le32_to_cpu(non_clock_info->ulVCLK); rps->dclk = le32_to_cpu(non_clock_info->ulDCLK); - } else if (r600_is_uvd_state(rps->class, rps->class2)) { - rps->vclk = RS780_DEFAULT_VCLK_FREQ; - rps->dclk = RS780_DEFAULT_DCLK_FREQ; } else { rps->vclk = 0; rps->dclk = 0; } + if (r600_is_uvd_state(rps->class, rps->class2)) { + if ((rps->vclk == 0) || (rps->dclk == 0)) { + rps->vclk = RS780_DEFAULT_VCLK_FREQ; + rps->dclk = RS780_DEFAULT_DCLK_FREQ; + } + } + if (rps->class & ATOM_PPLIB_CLASSIFICATION_BOOT) rdev->pm.dpm.boot_ps = rps; if (rps->class & ATOM_PPLIB_CLASSIFICATION_UVDSTATE) @@ -987,3 +1007,55 @@ void rs780_dpm_debugfs_print_current_performance_level(struct radeon_device *rde seq_printf(m, "power level 1 sclk: %u vddc_index: %d\n", ps->sclk_high, ps->max_voltage); } + +int rs780_dpm_force_performance_level(struct radeon_device *rdev, + enum radeon_dpm_forced_level level) +{ + struct igp_power_info *pi = rs780_get_pi(rdev); + struct radeon_ps *rps = rdev->pm.dpm.current_ps; + struct igp_ps *ps = rs780_get_ps(rps); + struct atom_clock_dividers dividers; + int ret; + + rs780_clk_scaling_enable(rdev, false); + rs780_voltage_scaling_enable(rdev, false); + + if (level == RADEON_DPM_FORCED_LEVEL_HIGH) { + if (pi->voltage_control) + rs780_force_voltage(rdev, pi->max_voltage); + + ret = radeon_atom_get_clock_dividers(rdev, COMPUTE_ENGINE_PLL_PARAM, + ps->sclk_high, false, ÷rs); + if (ret) + return ret; + + rs780_force_fbdiv(rdev, dividers.fb_div); + } else if (level == RADEON_DPM_FORCED_LEVEL_LOW) { + ret = radeon_atom_get_clock_dividers(rdev, COMPUTE_ENGINE_PLL_PARAM, + ps->sclk_low, false, ÷rs); + if (ret) + return ret; + + rs780_force_fbdiv(rdev, dividers.fb_div); + + if (pi->voltage_control) + rs780_force_voltage(rdev, pi->min_voltage); + } else { + if (pi->voltage_control) + rs780_force_voltage(rdev, pi->max_voltage); + + if (ps->sclk_high != ps->sclk_low) { + WREG32_P(FVTHROT_FBDIV_REG1, 0, ~FORCE_FEEDBACK_DIV); + rs780_clk_scaling_enable(rdev, true); + } + + if (pi->voltage_control) { + rs780_voltage_scaling_enable(rdev, true); + rs780_enable_voltage_scaling(rdev, rps); + } + } + + rdev->pm.dpm.forced_level = level; + + return 0; +} diff --git a/sys/dev/drm/radeon/rv6xx_dpm.c b/sys/dev/drm/radeon/rv6xx_dpm.c index 0f17311049..85d4274d15 100644 --- a/sys/dev/drm/radeon/rv6xx_dpm.c +++ b/sys/dev/drm/radeon/rv6xx_dpm.c @@ -1759,8 +1759,6 @@ int rv6xx_dpm_set_power_state(struct radeon_device *rdev) rv6xx_set_uvd_clock_after_set_eng_clock(rdev, new_ps, old_ps); - rdev->pm.dpm.forced_level = RADEON_DPM_FORCED_LEVEL_AUTO; - return 0; } @@ -1919,6 +1917,7 @@ static int rv6xx_parse_power_table(struct radeon_device *rdev) (power_state->v1.ucNonClockStateIndex * power_info->pplib.ucNonClockSize)); if (power_info->pplib.ucStateEntrySize - 1) { + u8 *idx; ps = kzalloc(sizeof(struct rv6xx_ps), GFP_KERNEL); if (ps == NULL) { kfree(rdev->pm.dpm.ps); @@ -1927,12 +1926,12 @@ static int rv6xx_parse_power_table(struct radeon_device *rdev) rdev->pm.dpm.ps[i].ps_priv = ps; rv6xx_parse_pplib_non_clock_info(rdev, &rdev->pm.dpm.ps[i], non_clock_info); + idx = (u8 *)&power_state->v1.ucClockStateIndices[0]; for (j = 0; j < (power_info->pplib.ucStateEntrySize - 1); j++) { clock_info = (union pplib_clock_info *) ((uint8_t*)mode_info->atom_context->bios + data_offset + le16_to_cpu(power_info->pplib.usClockInfoArrayOffset) + - (power_state->v1.ucClockStateIndices[j] * - power_info->pplib.ucClockInfoSize)); + (idx[j] * power_info->pplib.ucClockInfoSize)); rv6xx_parse_pplib_clock_info(rdev, &rdev->pm.dpm.ps[i], j, clock_info); diff --git a/sys/dev/drm/radeon/rv770.c b/sys/dev/drm/radeon/rv770.c index 51f558a5eb..741a768c30 100644 --- a/sys/dev/drm/radeon/rv770.c +++ b/sys/dev/drm/radeon/rv770.c @@ -24,8 +24,6 @@ * Authors: Dave Airlie * Alex Deucher * Jerome Glisse - * - * $FreeBSD: head/sys/dev/drm2/radeon/rv770.c 254885 2013-08-25 19:37:15Z dumbbell $ */ #include #include @@ -799,103 +797,6 @@ u32 rv770_get_xclk(struct radeon_device *rdev) return reference_clock; } -int rv770_uvd_resume(struct radeon_device *rdev) -{ - uint64_t addr; - uint32_t chip_id, size; - int r; - - r = radeon_uvd_resume(rdev); - if (r) - return r; - - /* programm the VCPU memory controller bits 0-27 */ - addr = rdev->uvd.gpu_addr >> 3; - size = RADEON_GPU_PAGE_ALIGN(rdev->uvd_fw->datasize + 4) >> 3; - WREG32(UVD_VCPU_CACHE_OFFSET0, addr); - WREG32(UVD_VCPU_CACHE_SIZE0, size); - - addr += size; - size = RADEON_UVD_STACK_SIZE >> 3; - WREG32(UVD_VCPU_CACHE_OFFSET1, addr); - WREG32(UVD_VCPU_CACHE_SIZE1, size); - - addr += size; - size = RADEON_UVD_HEAP_SIZE >> 3; - WREG32(UVD_VCPU_CACHE_OFFSET2, addr); - WREG32(UVD_VCPU_CACHE_SIZE2, size); - - /* bits 28-31 */ - addr = (rdev->uvd.gpu_addr >> 28) & 0xF; - WREG32(UVD_LMI_ADDR_EXT, (addr << 12) | (addr << 0)); - - /* bits 32-39 */ - addr = (rdev->uvd.gpu_addr >> 32) & 0xFF; - WREG32(UVD_LMI_EXT40_ADDR, addr | (0x9 << 16) | (0x1 << 31)); - - /* tell firmware which hardware it is running on */ - switch (rdev->family) { - default: - return -EINVAL; - case CHIP_RV710: - chip_id = 0x01000005; - break; - case CHIP_RV730: - chip_id = 0x01000006; - break; - case CHIP_RV740: - chip_id = 0x01000007; - break; - case CHIP_CYPRESS: - case CHIP_HEMLOCK: - chip_id = 0x01000008; - break; - case CHIP_JUNIPER: - chip_id = 0x01000009; - break; - case CHIP_REDWOOD: - chip_id = 0x0100000a; - break; - case CHIP_CEDAR: - chip_id = 0x0100000b; - break; - case CHIP_SUMO: - case CHIP_SUMO2: - chip_id = 0x0100000c; - break; - case CHIP_PALM: - chip_id = 0x0100000e; - break; - case CHIP_CAYMAN: - chip_id = 0x0100000f; - break; - case CHIP_BARTS: - chip_id = 0x01000010; - break; - case CHIP_TURKS: - chip_id = 0x01000011; - break; - case CHIP_CAICOS: - chip_id = 0x01000012; - break; - case CHIP_TAHITI: - chip_id = 0x01000014; - break; - case CHIP_VERDE: - chip_id = 0x01000015; - break; - case CHIP_PITCAIRN: - chip_id = 0x01000016; - break; - case CHIP_ARUBA: - chip_id = 0x01000017; - break; - } - WREG32(UVD_VCPU_CHIP_ID, chip_id); - - return 0; -} - u32 rv770_page_flip(struct radeon_device *rdev, int crtc_id, u64 crtc_base) { struct radeon_crtc *radeon_crtc = rdev->mode_info.crtcs[crtc_id]; @@ -1691,8 +1592,8 @@ void r700_vram_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc) } mc->vram_end = mc->vram_start + mc->mc_vram_size - 1; dev_info(rdev->dev, "VRAM: %juM 0x%08jX - 0x%08jX (%juM used)\n", - (uintmax_t)mc->mc_vram_size >> 20, (uintmax_t)mc->vram_start, - (uintmax_t)mc->vram_end, (uintmax_t)mc->real_vram_size >> 20); + mc->mc_vram_size >> 20, mc->vram_start, + mc->vram_end, mc->real_vram_size >> 20); } else { radeon_vram_location(rdev, &rdev->mc, 0); rdev->mc.gtt_base_align = 0; @@ -1745,80 +1646,6 @@ static int rv770_mc_init(struct radeon_device *rdev) return 0; } -/** - * rv770_copy_dma - copy pages using the DMA engine - * - * @rdev: radeon_device pointer - * @src_offset: src GPU address - * @dst_offset: dst GPU address - * @num_gpu_pages: number of GPU pages to xfer - * @fence: radeon fence object - * - * Copy GPU paging using the DMA engine (r7xx). - * Used by the radeon ttm implementation to move pages if - * registered as the asic copy callback. - */ -int rv770_copy_dma(struct radeon_device *rdev, - uint64_t src_offset, uint64_t dst_offset, - unsigned num_gpu_pages, - struct radeon_fence **fence) -{ - struct radeon_semaphore *sem = NULL; - int ring_index = rdev->asic->copy.dma_ring_index; - struct radeon_ring *ring = &rdev->ring[ring_index]; - u32 size_in_dw, cur_size_in_dw; - int i, num_loops; - int r = 0; - - r = radeon_semaphore_create(rdev, &sem); - if (r) { - DRM_ERROR("radeon: moving bo (%d).\n", r); - return r; - } - - size_in_dw = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT) / 4; - num_loops = DIV_ROUND_UP(size_in_dw, 0xFFFF); - r = radeon_ring_lock(rdev, ring, num_loops * 5 + 8); - if (r) { - DRM_ERROR("radeon: moving bo (%d).\n", r); - radeon_semaphore_free(rdev, &sem, NULL); - return r; - } - - if (radeon_fence_need_sync(*fence, ring->idx)) { - radeon_semaphore_sync_rings(rdev, sem, (*fence)->ring, - ring->idx); - radeon_fence_note_sync(*fence, ring->idx); - } else { - radeon_semaphore_free(rdev, &sem, NULL); - } - - for (i = 0; i < num_loops; i++) { - cur_size_in_dw = size_in_dw; - if (cur_size_in_dw > 0xFFFF) - cur_size_in_dw = 0xFFFF; - size_in_dw -= cur_size_in_dw; - radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_COPY, 0, 0, cur_size_in_dw)); - radeon_ring_write(ring, dst_offset & 0xfffffffc); - radeon_ring_write(ring, src_offset & 0xfffffffc); - radeon_ring_write(ring, upper_32_bits(dst_offset) & 0xff); - radeon_ring_write(ring, upper_32_bits(src_offset) & 0xff); - src_offset += cur_size_in_dw * 4; - dst_offset += cur_size_in_dw * 4; - } - - r = radeon_fence_emit(rdev, fence, ring->idx); - if (r) { - radeon_ring_unlock_undo(rdev, ring); - return r; - } - - radeon_ring_unlock_commit(rdev, ring); - radeon_semaphore_free(rdev, &sem, *fence); - - return r; -} - static int rv770_startup(struct radeon_device *rdev) { struct radeon_ring *ring; @@ -1827,6 +1654,11 @@ static int rv770_startup(struct radeon_device *rdev) /* enable pcie gen2 link */ rv770_pcie_gen2_enable(rdev); + /* scratch needs to be initialized before MC */ + r = r600_vram_scratch_init(rdev); + if (r) + return r; + rv770_mc_program(rdev); if (!rdev->me_fw || !rdev->pfp_fw || !rdev->rlc_fw) { @@ -1837,10 +1669,6 @@ static int rv770_startup(struct radeon_device *rdev) } } - r = r600_vram_scratch_init(rdev); - if (r) - return r; - if (rdev->flags & RADEON_IS_AGP) { rv770_agp_enable(rdev); } else { @@ -1850,12 +1678,6 @@ static int rv770_startup(struct radeon_device *rdev) } rv770_gpu_init(rdev); - r = r600_blit_init(rdev); - if (r) { - r600_blit_fini(rdev); - rdev->asic->copy.copy = NULL; - dev_warn(rdev->dev, "failed blitter (%d) falling back to memcpy\n", r); - } /* allocate wb buffer */ r = radeon_wb_init(rdev); @@ -1874,7 +1696,7 @@ static int rv770_startup(struct radeon_device *rdev) return r; } - r = rv770_uvd_resume(rdev); + r = uvd_v2_2_resume(rdev); if (!r) { r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_UVD_INDEX); @@ -1903,14 +1725,14 @@ static int rv770_startup(struct radeon_device *rdev) ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET, R600_CP_RB_RPTR, R600_CP_RB_WPTR, - 0, 0xfffff, RADEON_CP_PACKET2); + RADEON_CP_PACKET2); if (r) return r; ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX]; r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET, DMA_RB_RPTR, DMA_RB_WPTR, - 2, 0x3fffc, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0)); + DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0)); if (r) return r; @@ -1927,12 +1749,11 @@ static int rv770_startup(struct radeon_device *rdev) ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX]; if (ring->ring_size) { - r = radeon_ring_init(rdev, ring, ring->ring_size, - R600_WB_UVD_RPTR_OFFSET, + r = radeon_ring_init(rdev, ring, ring->ring_size, 0, UVD_RBC_RB_RPTR, UVD_RBC_RB_WPTR, - 0, 0xfffff, RADEON_CP_PACKET2); + RADEON_CP_PACKET2); if (!r) - r = r600_uvd_init(rdev); + r = uvd_v1_0_init(rdev); if (r) DRM_ERROR("radeon: failed initializing UVD (%d).\n", r); @@ -1982,7 +1803,7 @@ int rv770_resume(struct radeon_device *rdev) int rv770_suspend(struct radeon_device *rdev) { r600_audio_fini(rdev); - r600_uvd_stop(rdev); + uvd_v1_0_fini(rdev); radeon_uvd_suspend(rdev); r700_cp_stop(rdev); r600_dma_stop(rdev); @@ -2090,7 +1911,6 @@ int rv770_init(struct radeon_device *rdev) void rv770_fini(struct radeon_device *rdev) { - r600_blit_fini(rdev); r700_cp_fini(rdev); r600_dma_fini(rdev); r600_irq_fini(rdev); @@ -2098,7 +1918,7 @@ void rv770_fini(struct radeon_device *rdev) radeon_ib_pool_fini(rdev); radeon_irq_kms_fini(rdev); rv770_pcie_gart_fini(rdev); - r600_uvd_stop(rdev); + uvd_v1_0_fini(rdev); radeon_uvd_fini(rdev); r600_vram_scratch_fini(rdev); radeon_gem_fini(rdev); diff --git a/sys/dev/drm/radeon/rv770_dma.c b/sys/dev/drm/radeon/rv770_dma.c new file mode 100644 index 0000000000..f9b02e3d68 --- /dev/null +++ b/sys/dev/drm/radeon/rv770_dma.c @@ -0,0 +1,101 @@ +/* + * Copyright 2013 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: Alex Deucher + */ +#include +#include "radeon.h" +#include "radeon_asic.h" +#include "rv770d.h" + +/** + * rv770_copy_dma - copy pages using the DMA engine + * + * @rdev: radeon_device pointer + * @src_offset: src GPU address + * @dst_offset: dst GPU address + * @num_gpu_pages: number of GPU pages to xfer + * @fence: radeon fence object + * + * Copy GPU paging using the DMA engine (r7xx). + * Used by the radeon ttm implementation to move pages if + * registered as the asic copy callback. + */ +int rv770_copy_dma(struct radeon_device *rdev, + uint64_t src_offset, uint64_t dst_offset, + unsigned num_gpu_pages, + struct radeon_fence **fence) +{ + struct radeon_semaphore *sem = NULL; + int ring_index = rdev->asic->copy.dma_ring_index; + struct radeon_ring *ring = &rdev->ring[ring_index]; + u32 size_in_dw, cur_size_in_dw; + int i, num_loops; + int r = 0; + + r = radeon_semaphore_create(rdev, &sem); + if (r) { + DRM_ERROR("radeon: moving bo (%d).\n", r); + return r; + } + + size_in_dw = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT) / 4; + num_loops = DIV_ROUND_UP(size_in_dw, 0xFFFF); + r = radeon_ring_lock(rdev, ring, num_loops * 5 + 8); + if (r) { + DRM_ERROR("radeon: moving bo (%d).\n", r); + radeon_semaphore_free(rdev, &sem, NULL); + return r; + } + + if (radeon_fence_need_sync(*fence, ring->idx)) { + radeon_semaphore_sync_rings(rdev, sem, (*fence)->ring, + ring->idx); + radeon_fence_note_sync(*fence, ring->idx); + } else { + radeon_semaphore_free(rdev, &sem, NULL); + } + + for (i = 0; i < num_loops; i++) { + cur_size_in_dw = size_in_dw; + if (cur_size_in_dw > 0xFFFF) + cur_size_in_dw = 0xFFFF; + size_in_dw -= cur_size_in_dw; + radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_COPY, 0, 0, cur_size_in_dw)); + radeon_ring_write(ring, dst_offset & 0xfffffffc); + radeon_ring_write(ring, src_offset & 0xfffffffc); + radeon_ring_write(ring, upper_32_bits(dst_offset) & 0xff); + radeon_ring_write(ring, upper_32_bits(src_offset) & 0xff); + src_offset += cur_size_in_dw * 4; + dst_offset += cur_size_in_dw * 4; + } + + r = radeon_fence_emit(rdev, fence, ring->idx); + if (r) { + radeon_ring_unlock_undo(rdev, ring); + return r; + } + + radeon_ring_unlock_commit(rdev, ring); + radeon_semaphore_free(rdev, &sem, *fence); + + return r; +} diff --git a/sys/dev/drm/radeon/rv770_dpm.c b/sys/dev/drm/radeon/rv770_dpm.c index 51b3afa710..0f003e2e39 100644 --- a/sys/dev/drm/radeon/rv770_dpm.c +++ b/sys/dev/drm/radeon/rv770_dpm.c @@ -2070,12 +2070,6 @@ int rv770_dpm_set_power_state(struct radeon_device *rdev) rv770_program_dcodt_after_state_switch(rdev, new_ps, old_ps); rv770_set_uvd_clock_after_set_eng_clock(rdev, new_ps, old_ps); - ret = rv770_dpm_force_performance_level(rdev, RADEON_DPM_FORCED_LEVEL_AUTO); - if (ret) { - DRM_ERROR("rv770_dpm_force_performance_level failed\n"); - return ret; - } - return 0; } @@ -2153,14 +2147,18 @@ static void rv7xx_parse_pplib_non_clock_info(struct radeon_device *rdev, if (ATOM_PPLIB_NONCLOCKINFO_VER1 < table_rev) { rps->vclk = le32_to_cpu(non_clock_info->ulVCLK); rps->dclk = le32_to_cpu(non_clock_info->ulDCLK); - } else if (r600_is_uvd_state(rps->class, rps->class2)) { - rps->vclk = RV770_DEFAULT_VCLK_FREQ; - rps->dclk = RV770_DEFAULT_DCLK_FREQ; } else { rps->vclk = 0; rps->dclk = 0; } + if (r600_is_uvd_state(rps->class, rps->class2)) { + if ((rps->vclk == 0) || (rps->dclk == 0)) { + rps->vclk = RV770_DEFAULT_VCLK_FREQ; + rps->dclk = RV770_DEFAULT_DCLK_FREQ; + } + } + if (rps->class & ATOM_PPLIB_CLASSIFICATION_BOOT) rdev->pm.dpm.boot_ps = rps; if (rps->class & ATOM_PPLIB_CLASSIFICATION_UVDSTATE) @@ -2300,6 +2298,7 @@ int rv7xx_parse_power_table(struct radeon_device *rdev) (power_state->v1.ucNonClockStateIndex * power_info->pplib.ucNonClockSize)); if (power_info->pplib.ucStateEntrySize - 1) { + u8 *idx; ps = kzalloc(sizeof(struct rv7xx_ps), GFP_KERNEL); if (ps == NULL) { kfree(rdev->pm.dpm.ps); @@ -2309,12 +2308,12 @@ int rv7xx_parse_power_table(struct radeon_device *rdev) rv7xx_parse_pplib_non_clock_info(rdev, &rdev->pm.dpm.ps[i], non_clock_info, power_info->pplib.ucNonClockSize); + idx = (u8 *)&power_state->v1.ucClockStateIndices[0]; for (j = 0; j < (power_info->pplib.ucStateEntrySize - 1); j++) { clock_info = (union pplib_clock_info *) ((uint8_t*)mode_info->atom_context->bios + data_offset + le16_to_cpu(power_info->pplib.usClockInfoArrayOffset) + - (power_state->v1.ucClockStateIndices[j] * - power_info->pplib.ucClockInfoSize)); + (idx[j] * power_info->pplib.ucClockInfoSize)); rv7xx_parse_pplib_clock_info(rdev, &rdev->pm.dpm.ps[i], j, clock_info); @@ -2523,8 +2522,18 @@ u32 rv770_dpm_get_mclk(struct radeon_device *rdev, bool low) bool rv770_dpm_vblank_too_short(struct radeon_device *rdev) { u32 vblank_time = r600_dpm_get_vblank_time(rdev); + u32 switch_limit = 300; + + /* quirks */ + /* ASUS K70AF */ +#if zRJ_TODO + if ((rdev->pdev->device == 0x9553) && + (rdev->pdev->subsystem_vendor == 0x1043) && + (rdev->pdev->subsystem_device == 0x1c42)) + switch_limit = 200; +#endif - if (vblank_time < 300) + if (vblank_time < switch_limit) return true; else return false; diff --git a/sys/dev/drm/radeon/rv770d.h b/sys/dev/drm/radeon/rv770d.h index 6bef2b7d60..1ae277152c 100644 --- a/sys/dev/drm/radeon/rv770d.h +++ b/sys/dev/drm/radeon/rv770d.h @@ -852,7 +852,7 @@ #define AFMT_VBI_PACKET_CONTROL 0x7608 # define AFMT_GENERIC0_UPDATE (1 << 2) #define AFMT_INFOFRAME_CONTROL0 0x760c -# define AFMT_AUDIO_INFO_SOURCE (1 << 6) /* 0 - sound block; 1 - hmdi regs */ +# define AFMT_AUDIO_INFO_SOURCE (1 << 6) /* 0 - sound block; 1 - hdmi regs */ # define AFMT_AUDIO_INFO_UPDATE (1 << 7) # define AFMT_MPEG_INFO_UPDATE (1 << 10) #define AFMT_GENERIC0_7 0x7610 @@ -971,7 +971,21 @@ # define TARGET_LINK_SPEED_MASK (0xf << 0) # define SELECTABLE_DEEMPHASIS (1 << 6) +/* + * PM4 + */ +#define PACKET0(reg, n) ((RADEON_PACKET_TYPE0 << 30) | \ + (((reg) >> 2) & 0xFFFF) | \ + ((n) & 0x3FFF) << 16) +#define PACKET3(op, n) ((RADEON_PACKET_TYPE3 << 30) | \ + (((op) & 0xFF) << 8) | \ + ((n) & 0x3FFF) << 16) + /* UVD */ +#define UVD_GPCOM_VCPU_CMD 0xef0c +#define UVD_GPCOM_VCPU_DATA0 0xef10 +#define UVD_GPCOM_VCPU_DATA1 0xef14 + #define UVD_LMI_EXT40_ADDR 0xf498 #define UVD_VCPU_CHIP_ID 0xf4d4 #define UVD_VCPU_CACHE_OFFSET0 0xf4d8 @@ -985,4 +999,6 @@ #define UVD_RBC_RB_RPTR 0xf690 #define UVD_RBC_RB_WPTR 0xf694 +#define UVD_CONTEXT_ID 0xf6f4 + #endif diff --git a/sys/dev/drm/radeon/si.c b/sys/dev/drm/radeon/si.c index 4cf1d37a7e..e0dd660e41 100644 --- a/sys/dev/drm/radeon/si.c +++ b/sys/dev/drm/radeon/si.c @@ -20,8 +20,6 @@ * OTHER DEALINGS IN THE SOFTWARE. * * Authors: Alex Deucher - * - * $FreeBSD: head/sys/dev/drm2/radeon/si.c 254885 2013-08-25 19:37:15Z dumbbell $ */ #include #include @@ -76,6 +74,11 @@ MODULE_FIRMWARE("radeon/HAINAN_smc.bin"); static void si_pcie_gen3_enable(struct radeon_device *rdev); static void si_program_aspm(struct radeon_device *rdev); +static void si_enable_gui_idle_interrupt(struct radeon_device *rdev, + bool enable); +static void si_fini_pg(struct radeon_device *rdev); +static void si_fini_cg(struct radeon_device *rdev); +static void si_rlc_stop(struct radeon_device *rdev); static const u32 verde_rlc_save_restore_register_list[] = { @@ -1669,6 +1672,7 @@ static int si_init_microcode(struct radeon_device *rdev) fw_name); release_firmware(rdev->smc_fw); rdev->smc_fw = NULL; + err = 0; } else if (rdev->smc_fw->datasize != smc_req_size) { printk(KERN_ERR "si_smc: Bogus length %zu in firmware \"%s\"\n", @@ -1728,7 +1732,8 @@ static u32 dce6_line_buffer_adjust(struct radeon_device *rdev, struct drm_display_mode *mode, struct drm_display_mode *other_mode) { - u32 tmp; + u32 tmp, buffer_alloc, i; + u32 pipe_offset = radeon_crtc->crtc_id * 0x20; /* * Line Buffer Setup * There are 3 line buffers, each one shared by 2 display controllers. @@ -1743,16 +1748,30 @@ static u32 dce6_line_buffer_adjust(struct radeon_device *rdev, * non-linked crtcs for maximum line buffer allocation. */ if (radeon_crtc->base.enabled && mode) { - if (other_mode) + if (other_mode) { tmp = 0; /* 1/2 */ - else + buffer_alloc = 1; + } else { tmp = 2; /* whole */ - } else + buffer_alloc = 2; + } + } else { tmp = 0; + buffer_alloc = 0; + } WREG32(DC_LB_MEMORY_SPLIT + radeon_crtc->crtc_offset, DC_LB_MEMORY_CONFIG(tmp)); + WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset, + DMIF_BUFFERS_ALLOCATED(buffer_alloc)); + for (i = 0; i < rdev->usec_timeout; i++) { + if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) & + DMIF_BUFFERS_ALLOCATED_COMPLETED) + break; + udelay(1); + } + if (radeon_crtc->base.enabled && mode) { switch (tmp) { case 0: @@ -3388,16 +3407,7 @@ static int si_cp_resume(struct radeon_device *rdev) u32 rb_bufsz; int r; - /* Reset cp; if cp is reset, then PA, SH, VGT also need to be reset */ - WREG32(GRBM_SOFT_RESET, (SOFT_RESET_CP | - SOFT_RESET_PA | - SOFT_RESET_VGT | - SOFT_RESET_SPI | - SOFT_RESET_SX)); - RREG32(GRBM_SOFT_RESET); - mdelay(15); - WREG32(GRBM_SOFT_RESET, 0); - RREG32(GRBM_SOFT_RESET); + si_enable_gui_idle_interrupt(rdev, false); WREG32(CP_SEM_WAIT_TIMER, 0x0); WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0); @@ -3411,8 +3421,8 @@ static int si_cp_resume(struct radeon_device *rdev) /* ring 0 - compute and gfx */ /* Set ring buffer size */ ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; - rb_bufsz = drm_order(ring->ring_size / 8); - tmp = (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz; + rb_bufsz = order_base_2(ring->ring_size / 8); + tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz; #ifdef __BIG_ENDIAN tmp |= BUF_SWAP_32BIT; #endif @@ -3444,8 +3454,8 @@ static int si_cp_resume(struct radeon_device *rdev) /* ring1 - compute only */ /* Set ring buffer size */ ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]; - rb_bufsz = drm_order(ring->ring_size / 8); - tmp = (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz; + rb_bufsz = order_base_2(ring->ring_size / 8); + tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz; #ifdef __BIG_ENDIAN tmp |= BUF_SWAP_32BIT; #endif @@ -3470,8 +3480,8 @@ static int si_cp_resume(struct radeon_device *rdev) /* ring2 - compute only */ /* Set ring buffer size */ ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]; - rb_bufsz = drm_order(ring->ring_size / 8); - tmp = (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz; + rb_bufsz = order_base_2(ring->ring_size / 8); + tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz; #ifdef __BIG_ENDIAN tmp |= BUF_SWAP_32BIT; #endif @@ -3514,10 +3524,12 @@ static int si_cp_resume(struct radeon_device *rdev) rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false; } + si_enable_gui_idle_interrupt(rdev, true); + return 0; } -static u32 si_gpu_check_soft_reset(struct radeon_device *rdev) +u32 si_gpu_check_soft_reset(struct radeon_device *rdev) { u32 reset_mask = 0; u32 tmp; @@ -3615,6 +3627,13 @@ static void si_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask) dev_info(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n", RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS)); + /* disable PG/CG */ + si_fini_pg(rdev); + si_fini_cg(rdev); + + /* stop the rlc */ + si_rlc_stop(rdev); + /* Disable CP parsing/prefetching */ WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT); @@ -3766,34 +3785,6 @@ bool si_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring) return radeon_ring_test_lockup(rdev, ring); } -/** - * si_dma_is_lockup - Check if the DMA engine is locked up - * - * @rdev: radeon_device pointer - * @ring: radeon_ring structure holding ring information - * - * Check if the async DMA engine is locked up. - * Returns true if the engine appears to be locked up, false if not. - */ -bool si_dma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring) -{ - u32 reset_mask = si_gpu_check_soft_reset(rdev); - u32 mask; - - if (ring->idx == R600_RING_TYPE_DMA_INDEX) - mask = RADEON_RESET_DMA; - else - mask = RADEON_RESET_DMA1; - - if (!(reset_mask & mask)) { - radeon_ring_lockup_update(ring); - return false; - } - /* force ring activities */ - radeon_ring_force_activity(rdev, ring); - return radeon_ring_test_lockup(rdev, ring); -} - /* MC */ static void si_mc_program(struct radeon_device *rdev) { @@ -4107,13 +4098,64 @@ static int si_vm_packet3_ce_check(struct radeon_device *rdev, return 0; } +static int si_vm_packet3_cp_dma_check(u32 *ib, u32 idx) +{ + u32 start_reg, reg, i; + u32 command = ib[idx + 4]; + u32 info = ib[idx + 1]; + u32 idx_value = ib[idx]; + if (command & PACKET3_CP_DMA_CMD_SAS) { + /* src address space is register */ + if (((info & 0x60000000) >> 29) == 0) { + start_reg = idx_value << 2; + if (command & PACKET3_CP_DMA_CMD_SAIC) { + reg = start_reg; + if (!si_vm_reg_valid(reg)) { + DRM_ERROR("CP DMA Bad SRC register\n"); + return -EINVAL; + } + } else { + for (i = 0; i < (command & 0x1fffff); i++) { + reg = start_reg + (4 * i); + if (!si_vm_reg_valid(reg)) { + DRM_ERROR("CP DMA Bad SRC register\n"); + return -EINVAL; + } + } + } + } + } + if (command & PACKET3_CP_DMA_CMD_DAS) { + /* dst address space is register */ + if (((info & 0x00300000) >> 20) == 0) { + start_reg = ib[idx + 2]; + if (command & PACKET3_CP_DMA_CMD_DAIC) { + reg = start_reg; + if (!si_vm_reg_valid(reg)) { + DRM_ERROR("CP DMA Bad DST register\n"); + return -EINVAL; + } + } else { + for (i = 0; i < (command & 0x1fffff); i++) { + reg = start_reg + (4 * i); + if (!si_vm_reg_valid(reg)) { + DRM_ERROR("CP DMA Bad DST register\n"); + return -EINVAL; + } + } + } + } + } + return 0; +} + static int si_vm_packet3_gfx_check(struct radeon_device *rdev, u32 *ib, struct radeon_cs_packet *pkt) { + int r; u32 idx = pkt->idx + 1; u32 idx_value = ib[idx]; u32 start_reg, end_reg, reg, i; - u32 command, info; switch (pkt->opcode) { case PACKET3_NOP: @@ -4214,50 +4256,9 @@ static int si_vm_packet3_gfx_check(struct radeon_device *rdev, } break; case PACKET3_CP_DMA: - command = ib[idx + 4]; - info = ib[idx + 1]; - if (command & PACKET3_CP_DMA_CMD_SAS) { - /* src address space is register */ - if (((info & 0x60000000) >> 29) == 0) { - start_reg = idx_value << 2; - if (command & PACKET3_CP_DMA_CMD_SAIC) { - reg = start_reg; - if (!si_vm_reg_valid(reg)) { - DRM_ERROR("CP DMA Bad SRC register\n"); - return -EINVAL; - } - } else { - for (i = 0; i < (command & 0x1fffff); i++) { - reg = start_reg + (4 * i); - if (!si_vm_reg_valid(reg)) { - DRM_ERROR("CP DMA Bad SRC register\n"); - return -EINVAL; - } - } - } - } - } - if (command & PACKET3_CP_DMA_CMD_DAS) { - /* dst address space is register */ - if (((info & 0x00300000) >> 20) == 0) { - start_reg = ib[idx + 2]; - if (command & PACKET3_CP_DMA_CMD_DAIC) { - reg = start_reg; - if (!si_vm_reg_valid(reg)) { - DRM_ERROR("CP DMA Bad DST register\n"); - return -EINVAL; - } - } else { - for (i = 0; i < (command & 0x1fffff); i++) { - reg = start_reg + (4 * i); - if (!si_vm_reg_valid(reg)) { - DRM_ERROR("CP DMA Bad DST register\n"); - return -EINVAL; - } - } - } - } - } + r = si_vm_packet3_cp_dma_check(ib, idx); + if (r) + return r; break; default: DRM_ERROR("Invalid GFX packet3: 0x%x\n", pkt->opcode); @@ -4269,6 +4270,7 @@ static int si_vm_packet3_gfx_check(struct radeon_device *rdev, static int si_vm_packet3_compute_check(struct radeon_device *rdev, u32 *ib, struct radeon_cs_packet *pkt) { + int r; u32 idx = pkt->idx + 1; u32 idx_value = ib[idx]; u32 start_reg, reg, i; @@ -4341,6 +4343,11 @@ static int si_vm_packet3_compute_check(struct radeon_device *rdev, return -EINVAL; } break; + case PACKET3_CP_DMA: + r = si_vm_packet3_cp_dma_check(ib, idx); + if (r) + return r; + break; default: DRM_ERROR("Invalid Compute packet3: 0x%x\n", pkt->opcode); return -EINVAL; @@ -4732,58 +4739,7 @@ void si_vm_set_page(struct radeon_device *rdev, } } else { /* DMA */ - if (flags & RADEON_VM_PAGE_SYSTEM) { - while (count) { - ndw = count * 2; - if (ndw > 0xFFFFE) - ndw = 0xFFFFE; - - /* for non-physically contiguous pages (system) */ - ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 0, ndw); - ib->ptr[ib->length_dw++] = pe; - ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff; - for (; ndw > 0; ndw -= 2, --count, pe += 8) { - if (flags & RADEON_VM_PAGE_SYSTEM) { - value = radeon_vm_map_gart(rdev, addr); - value &= 0xFFFFFFFFFFFFF000ULL; - } else if (flags & RADEON_VM_PAGE_VALID) { - value = addr; - } else { - value = 0; - } - addr += incr; - value |= r600_flags; - ib->ptr[ib->length_dw++] = value; - ib->ptr[ib->length_dw++] = upper_32_bits(value); - } - } - } else { - while (count) { - ndw = count * 2; - if (ndw > 0xFFFFE) - ndw = 0xFFFFE; - - if (flags & RADEON_VM_PAGE_VALID) - value = addr; - else - value = 0; - /* for physically contiguous pages (vram) */ - ib->ptr[ib->length_dw++] = DMA_PTE_PDE_PACKET(ndw); - ib->ptr[ib->length_dw++] = pe; /* dst addr */ - ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff; - ib->ptr[ib->length_dw++] = r600_flags; /* mask */ - ib->ptr[ib->length_dw++] = 0; - ib->ptr[ib->length_dw++] = value; /* value */ - ib->ptr[ib->length_dw++] = upper_32_bits(value); - ib->ptr[ib->length_dw++] = incr; /* increment size */ - ib->ptr[ib->length_dw++] = 0; - pe += ndw * 4; - addr += (ndw / 2) * incr; - count -= ndw / 2; - } - } - while (ib->length_dw & 0x7) - ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0); + si_dma_vm_set_page(rdev, ib, pe, addr, count, incr, flags); } } @@ -4830,32 +4786,6 @@ void si_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm) radeon_ring_write(ring, 0x0); } -void si_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm) -{ - struct radeon_ring *ring = &rdev->ring[ridx]; - - if (vm == NULL) - return; - - radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0)); - if (vm->id < 8) { - radeon_ring_write(ring, (0xf << 16) | ((VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2)); - } else { - radeon_ring_write(ring, (0xf << 16) | ((VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2)); - } - radeon_ring_write(ring, vm->pd_gpu_addr >> 12); - - /* flush hdp cache */ - radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0)); - radeon_ring_write(ring, (0xf << 16) | (HDP_MEM_COHERENCY_FLUSH_CNTL >> 2)); - radeon_ring_write(ring, 1); - - /* bits 0-7 are the VM contexts0-7 */ - radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0)); - radeon_ring_write(ring, (0xf << 16) | (VM_INVALIDATE_REQUEST >> 2)); - radeon_ring_write(ring, 1 << vm->id); -} - /* * Power and clock gating */ @@ -4923,7 +4853,7 @@ static void si_set_uvd_dcm(struct radeon_device *rdev, WREG32_UVD_CTX(UVD_CGC_CTRL2, tmp2); } -static void si_init_uvd_internal_cg(struct radeon_device *rdev) +void si_init_uvd_internal_cg(struct radeon_device *rdev) { bool hw_mode = true; @@ -4966,7 +4896,7 @@ static void si_enable_dma_pg(struct radeon_device *rdev, bool enable) u32 data, orig; orig = data = RREG32(DMA_PG); - if (enable) + if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_SDMA)) data |= PG_CNTL_ENABLE; else data &= ~PG_CNTL_ENABLE; @@ -4990,7 +4920,7 @@ static void si_enable_gfx_cgpg(struct radeon_device *rdev, { u32 tmp; - if (enable) { + if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG)) { tmp = RLC_PUD(0x10) | RLC_PDD(0x10) | RLC_TTPD(0x10) | RLC_MSD(0x10); WREG32(RLC_TTOP_D, tmp); @@ -5093,9 +5023,9 @@ static void si_enable_cgcg(struct radeon_device *rdev, orig = data = RREG32(RLC_CGCG_CGLS_CTRL); - si_enable_gui_idle_interrupt(rdev, enable); + if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) { + si_enable_gui_idle_interrupt(rdev, true); - if (enable) { WREG32(RLC_GCPM_GENERAL_3, 0x00000080); tmp = si_halt_rlc(rdev); @@ -5112,6 +5042,8 @@ static void si_enable_cgcg(struct radeon_device *rdev, data |= CGCG_EN | CGLS_EN; } else { + si_enable_gui_idle_interrupt(rdev, false); + RREG32(CB_CGTT_SCLK_CTRL); RREG32(CB_CGTT_SCLK_CTRL); RREG32(CB_CGTT_SCLK_CTRL); @@ -5129,16 +5061,18 @@ static void si_enable_mgcg(struct radeon_device *rdev, { u32 data, orig, tmp = 0; - if (enable) { + if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) { orig = data = RREG32(CGTS_SM_CTRL_REG); data = 0x96940200; if (orig != data) WREG32(CGTS_SM_CTRL_REG, data); - orig = data = RREG32(CP_MEM_SLP_CNTL); - data |= CP_MEM_LS_EN; - if (orig != data) - WREG32(CP_MEM_SLP_CNTL, data); + if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) { + orig = data = RREG32(CP_MEM_SLP_CNTL); + data |= CP_MEM_LS_EN; + if (orig != data) + WREG32(CP_MEM_SLP_CNTL, data); + } orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE); data &= 0xffffffc0; @@ -5183,7 +5117,7 @@ static void si_enable_uvd_mgcg(struct radeon_device *rdev, { u32 orig, data, tmp; - if (enable) { + if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) { tmp = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL); tmp |= 0x3fff; WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, tmp); @@ -5231,7 +5165,7 @@ static void si_enable_mc_ls(struct radeon_device *rdev, for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) { orig = data = RREG32(mc_cg_registers[i]); - if (enable) + if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS)) data |= MC_LS_ENABLE; else data &= ~MC_LS_ENABLE; @@ -5240,229 +5174,297 @@ static void si_enable_mc_ls(struct radeon_device *rdev, } } - -static void si_init_cg(struct radeon_device *rdev) +static void si_enable_mc_mgcg(struct radeon_device *rdev, + bool enable) { - si_enable_mgcg(rdev, true); - si_enable_cgcg(rdev, false); - /* disable MC LS on Tahiti */ - if (rdev->family == CHIP_TAHITI) - si_enable_mc_ls(rdev, false); - if (rdev->has_uvd) { - si_enable_uvd_mgcg(rdev, true); - si_init_uvd_internal_cg(rdev); + int i; + u32 orig, data; + + for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) { + orig = data = RREG32(mc_cg_registers[i]); + if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG)) + data |= MC_CG_ENABLE; + else + data &= ~MC_CG_ENABLE; + if (data != orig) + WREG32(mc_cg_registers[i], data); } } -static void si_fini_cg(struct radeon_device *rdev) +static void si_enable_dma_mgcg(struct radeon_device *rdev, + bool enable) { - if (rdev->has_uvd) - si_enable_uvd_mgcg(rdev, false); - si_enable_cgcg(rdev, false); - si_enable_mgcg(rdev, false); -} + u32 orig, data, offset; + int i; -static void si_init_pg(struct radeon_device *rdev) -{ - bool has_pg = false; -#if 0 - /* only cape verde supports PG */ - if (rdev->family == CHIP_VERDE) - has_pg = true; -#endif - if (has_pg) { - si_init_ao_cu_mask(rdev); - si_init_dma_pg(rdev); - si_enable_dma_pg(rdev, true); - si_init_gfx_cgpg(rdev); - si_enable_gfx_cgpg(rdev, true); + if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) { + for (i = 0; i < 2; i++) { + if (i == 0) + offset = DMA0_REGISTER_OFFSET; + else + offset = DMA1_REGISTER_OFFSET; + orig = data = RREG32(DMA_POWER_CNTL + offset); + data &= ~MEM_POWER_OVERRIDE; + if (data != orig) + WREG32(DMA_POWER_CNTL + offset, data); + WREG32(DMA_CLK_CTRL + offset, 0x00000100); + } } else { - WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8); - WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8); + for (i = 0; i < 2; i++) { + if (i == 0) + offset = DMA0_REGISTER_OFFSET; + else + offset = DMA1_REGISTER_OFFSET; + orig = data = RREG32(DMA_POWER_CNTL + offset); + data |= MEM_POWER_OVERRIDE; + if (data != orig) + WREG32(DMA_POWER_CNTL + offset, data); + + orig = data = RREG32(DMA_CLK_CTRL + offset); + data = 0xff000000; + if (data != orig) + WREG32(DMA_CLK_CTRL + offset, data); + } } } -static void si_fini_pg(struct radeon_device *rdev) +static void si_enable_bif_mgls(struct radeon_device *rdev, + bool enable) { - bool has_pg = false; + u32 orig, data; + + orig = data = RREG32_PCIE(PCIE_CNTL2); - /* only cape verde supports PG */ - if (rdev->family == CHIP_VERDE) - has_pg = true; + if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS)) + data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | + REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN; + else + data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN | + REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN); - if (has_pg) { - si_enable_dma_pg(rdev, false); - si_enable_gfx_cgpg(rdev, false); - } + if (orig != data) + WREG32_PCIE(PCIE_CNTL2, data); } -/* - * RLC - */ -void si_rlc_fini(struct radeon_device *rdev) +static void si_enable_hdp_mgcg(struct radeon_device *rdev, + bool enable) { - int r; + u32 orig, data; - /* save restore block */ - if (rdev->rlc.save_restore_obj) { - r = radeon_bo_reserve(rdev->rlc.save_restore_obj, false); - if (unlikely(r != 0)) - dev_warn(rdev->dev, "(%d) reserve RLC sr bo failed\n", r); - radeon_bo_unpin(rdev->rlc.save_restore_obj); - radeon_bo_unreserve(rdev->rlc.save_restore_obj); + orig = data = RREG32(HDP_HOST_PATH_CNTL); - radeon_bo_unref(&rdev->rlc.save_restore_obj); - rdev->rlc.save_restore_obj = NULL; - } - - /* clear state block */ - if (rdev->rlc.clear_state_obj) { - r = radeon_bo_reserve(rdev->rlc.clear_state_obj, false); - if (unlikely(r != 0)) - dev_warn(rdev->dev, "(%d) reserve RLC c bo failed\n", r); - radeon_bo_unpin(rdev->rlc.clear_state_obj); - radeon_bo_unreserve(rdev->rlc.clear_state_obj); + if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG)) + data &= ~CLOCK_GATING_DIS; + else + data |= CLOCK_GATING_DIS; - radeon_bo_unref(&rdev->rlc.clear_state_obj); - rdev->rlc.clear_state_obj = NULL; - } + if (orig != data) + WREG32(HDP_HOST_PATH_CNTL, data); } -#define RLC_CLEAR_STATE_END_MARKER 0x00000001 - -int si_rlc_init(struct radeon_device *rdev) +static void si_enable_hdp_ls(struct radeon_device *rdev, + bool enable) { - volatile u32 *dst_ptr; - void *ptr; - u32 dws, data, i, j, k, reg_num; - u32 reg_list_num, reg_list_hdr_blk_index, reg_list_blk_index; - u64 reg_list_mc_addr; - const struct cs_section_def *cs_data = si_cs_data; - int r; + u32 orig, data; - /* save restore block */ - if (rdev->rlc.save_restore_obj == NULL) { - r = radeon_bo_create(rdev, RADEON_GPU_PAGE_SIZE, PAGE_SIZE, true, - RADEON_GEM_DOMAIN_VRAM, NULL, - &rdev->rlc.save_restore_obj); - if (r) { - dev_warn(rdev->dev, "(%d) create RLC sr bo failed\n", r); - return r; + orig = data = RREG32(HDP_MEM_POWER_LS); + + if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS)) + data |= HDP_LS_ENABLE; + else + data &= ~HDP_LS_ENABLE; + + if (orig != data) + WREG32(HDP_MEM_POWER_LS, data); +} + +void si_update_cg(struct radeon_device *rdev, + u32 block, bool enable) +{ + if (block & RADEON_CG_BLOCK_GFX) { + si_enable_gui_idle_interrupt(rdev, false); + /* order matters! */ + if (enable) { + si_enable_mgcg(rdev, true); + si_enable_cgcg(rdev, true); + } else { + si_enable_cgcg(rdev, false); + si_enable_mgcg(rdev, false); } + si_enable_gui_idle_interrupt(rdev, true); } - r = radeon_bo_reserve(rdev->rlc.save_restore_obj, false); - if (unlikely(r != 0)) { - si_rlc_fini(rdev); - return r; + if (block & RADEON_CG_BLOCK_MC) { + si_enable_mc_mgcg(rdev, enable); + si_enable_mc_ls(rdev, enable); } - r = radeon_bo_pin(rdev->rlc.save_restore_obj, RADEON_GEM_DOMAIN_VRAM, - &rdev->rlc.save_restore_gpu_addr); - if (r) { - radeon_bo_unreserve(rdev->rlc.save_restore_obj); - dev_warn(rdev->dev, "(%d) pin RLC sr bo failed\n", r); - si_rlc_fini(rdev); - return r; + + if (block & RADEON_CG_BLOCK_SDMA) { + si_enable_dma_mgcg(rdev, enable); } - if (rdev->family == CHIP_VERDE) { - r = radeon_bo_kmap(rdev->rlc.save_restore_obj, (void **)&ptr); - if (r) { - dev_warn(rdev->dev, "(%d) map RLC sr bo failed\n", r); - si_rlc_fini(rdev); - return r; - } - rdev->rlc.sr_ptr = ptr; - /* write the sr buffer */ - dst_ptr = rdev->rlc.sr_ptr; - for (i = 0; i < ARRAY_SIZE(verde_rlc_save_restore_register_list); i++) { - dst_ptr[i] = verde_rlc_save_restore_register_list[i]; - } - radeon_bo_kunmap(rdev->rlc.save_restore_obj); + if (block & RADEON_CG_BLOCK_BIF) { + si_enable_bif_mgls(rdev, enable); } - radeon_bo_unreserve(rdev->rlc.save_restore_obj); - /* clear state block */ - reg_list_num = 0; - dws = 0; - for (i = 0; cs_data[i].section != NULL; i++) { - for (j = 0; cs_data[i].section[j].extent != NULL; j++) { - reg_list_num++; - dws += cs_data[i].section[j].reg_count; + if (block & RADEON_CG_BLOCK_UVD) { + if (rdev->has_uvd) { + si_enable_uvd_mgcg(rdev, enable); } } - reg_list_blk_index = (3 * reg_list_num + 2); - dws += reg_list_blk_index; - if (rdev->rlc.clear_state_obj == NULL) { - r = radeon_bo_create(rdev, dws * 4, PAGE_SIZE, true, - RADEON_GEM_DOMAIN_VRAM, NULL, &rdev->rlc.clear_state_obj); - if (r) { - dev_warn(rdev->dev, "(%d) create RLC c bo failed\n", r); - si_rlc_fini(rdev); - return r; - } + if (block & RADEON_CG_BLOCK_HDP) { + si_enable_hdp_mgcg(rdev, enable); + si_enable_hdp_ls(rdev, enable); } - r = radeon_bo_reserve(rdev->rlc.clear_state_obj, false); - if (unlikely(r != 0)) { - si_rlc_fini(rdev); - return r; +} + +static void si_init_cg(struct radeon_device *rdev) +{ + si_update_cg(rdev, (RADEON_CG_BLOCK_GFX | + RADEON_CG_BLOCK_MC | + RADEON_CG_BLOCK_SDMA | + RADEON_CG_BLOCK_BIF | + RADEON_CG_BLOCK_HDP), true); + if (rdev->has_uvd) { + si_update_cg(rdev, RADEON_CG_BLOCK_UVD, true); + si_init_uvd_internal_cg(rdev); } - r = radeon_bo_pin(rdev->rlc.clear_state_obj, RADEON_GEM_DOMAIN_VRAM, - &rdev->rlc.clear_state_gpu_addr); - if (r) { +} - radeon_bo_unreserve(rdev->rlc.clear_state_obj); - dev_warn(rdev->dev, "(%d) pin RLC c bo failed\n", r); - si_rlc_fini(rdev); - return r; +static void si_fini_cg(struct radeon_device *rdev) +{ + if (rdev->has_uvd) { + si_update_cg(rdev, RADEON_CG_BLOCK_UVD, false); } - r = radeon_bo_kmap(rdev->rlc.clear_state_obj, (void **)&ptr); - if (r) { - dev_warn(rdev->dev, "(%d) map RLC c bo failed\n", r); - si_rlc_fini(rdev); - return r; + si_update_cg(rdev, (RADEON_CG_BLOCK_GFX | + RADEON_CG_BLOCK_MC | + RADEON_CG_BLOCK_SDMA | + RADEON_CG_BLOCK_BIF | + RADEON_CG_BLOCK_HDP), false); +} + +u32 si_get_csb_size(struct radeon_device *rdev) +{ + u32 count = 0; + const struct cs_section_def *sect = NULL; + const struct cs_extent_def *ext = NULL; + + if (rdev->rlc.cs_data == NULL) + return 0; + + /* begin clear state */ + count += 2; + /* context control state */ + count += 3; + + for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) { + for (ext = sect->section; ext->extent != NULL; ++ext) { + if (sect->id == SECT_CONTEXT) + count += 2 + ext->reg_count; + else + return 0; + } } - rdev->rlc.cs_ptr = ptr; - /* set up the cs buffer */ - dst_ptr = rdev->rlc.cs_ptr; - reg_list_hdr_blk_index = 0; - reg_list_mc_addr = rdev->rlc.clear_state_gpu_addr + (reg_list_blk_index * 4); - data = upper_32_bits(reg_list_mc_addr); - dst_ptr[reg_list_hdr_blk_index] = data; - reg_list_hdr_blk_index++; - for (i = 0; cs_data[i].section != NULL; i++) { - for (j = 0; cs_data[i].section[j].extent != NULL; j++) { - reg_num = cs_data[i].section[j].reg_count; - data = reg_list_mc_addr & 0xffffffff; - dst_ptr[reg_list_hdr_blk_index] = data; - reg_list_hdr_blk_index++; - - data = (cs_data[i].section[j].reg_index * 4) & 0xffffffff; - dst_ptr[reg_list_hdr_blk_index] = data; - reg_list_hdr_blk_index++; - - data = 0x08000000 | (reg_num * 4); - dst_ptr[reg_list_hdr_blk_index] = data; - reg_list_hdr_blk_index++; - - for (k = 0; k < reg_num; k++) { - data = cs_data[i].section[j].extent[k]; - dst_ptr[reg_list_blk_index + k] = data; + /* pa_sc_raster_config */ + count += 3; + /* end clear state */ + count += 2; + /* clear state */ + count += 2; + + return count; +} + +void si_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer) +{ + u32 count = 0, i; + const struct cs_section_def *sect = NULL; + const struct cs_extent_def *ext = NULL; + + if (rdev->rlc.cs_data == NULL) + return; + if (buffer == NULL) + return; + + buffer[count++] = PACKET3(PACKET3_PREAMBLE_CNTL, 0); + buffer[count++] = PACKET3_PREAMBLE_BEGIN_CLEAR_STATE; + + buffer[count++] = PACKET3(PACKET3_CONTEXT_CONTROL, 1); + buffer[count++] = 0x80000000; + buffer[count++] = 0x80000000; + + for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) { + for (ext = sect->section; ext->extent != NULL; ++ext) { + if (sect->id == SECT_CONTEXT) { + buffer[count++] = PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count); + buffer[count++] = ext->reg_index - 0xa000; + for (i = 0; i < ext->reg_count; i++) + buffer[count++] = ext->extent[i]; + } else { + return; } - reg_list_mc_addr += reg_num * 4; - reg_list_blk_index += reg_num; } } - dst_ptr[reg_list_hdr_blk_index] = RLC_CLEAR_STATE_END_MARKER; - radeon_bo_kunmap(rdev->rlc.clear_state_obj); - radeon_bo_unreserve(rdev->rlc.clear_state_obj); + buffer[count++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); + buffer[count++] = PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START; + switch (rdev->family) { + case CHIP_TAHITI: + case CHIP_PITCAIRN: + buffer[count++] = 0x2a00126a; + break; + case CHIP_VERDE: + buffer[count++] = 0x0000124a; + break; + case CHIP_OLAND: + buffer[count++] = 0x00000082; + break; + case CHIP_HAINAN: + buffer[count++] = 0x00000000; + break; + default: + buffer[count++] = 0x00000000; + break; + } + + buffer[count++] = PACKET3(PACKET3_PREAMBLE_CNTL, 0); + buffer[count++] = PACKET3_PREAMBLE_END_CLEAR_STATE; - return 0; + buffer[count++] = PACKET3(PACKET3_CLEAR_STATE, 0); + buffer[count++] = 0; } -static void si_rlc_reset(struct radeon_device *rdev) +static void si_init_pg(struct radeon_device *rdev) +{ + if (rdev->pg_flags) { + if (rdev->pg_flags & RADEON_PG_SUPPORT_SDMA) { + si_init_dma_pg(rdev); + } + si_init_ao_cu_mask(rdev); + if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) { + si_init_gfx_cgpg(rdev); + } + si_enable_dma_pg(rdev, true); + si_enable_gfx_cgpg(rdev, true); + } else { + WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8); + WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8); + } +} + +static void si_fini_pg(struct radeon_device *rdev) +{ + if (rdev->pg_flags) { + si_enable_dma_pg(rdev, false); + si_enable_gfx_cgpg(rdev, false); + } +} + +/* + * RLC + */ +void si_rlc_reset(struct radeon_device *rdev) { u32 tmp = RREG32(GRBM_SOFT_RESET); @@ -5592,7 +5594,9 @@ static void si_disable_interrupt_state(struct radeon_device *rdev) { u32 tmp; - WREG32(CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE); + tmp = RREG32(CP_INT_CNTL_RING0) & + (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE); + WREG32(CP_INT_CNTL_RING0, tmp); WREG32(CP_INT_CNTL_RING1, 0); WREG32(CP_INT_CNTL_RING2, 0); tmp = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET) & ~TRAP_ENABLE; @@ -5678,7 +5682,7 @@ static int si_irq_init(struct radeon_device *rdev) WREG32(INTERRUPT_CNTL, interrupt_cntl); WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8); - rb_bufsz = drm_order(rdev->ih.ring_size / 4); + rb_bufsz = order_base_2(rdev->ih.ring_size / 4); ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE | IH_WPTR_OVERFLOW_CLEAR | @@ -5717,7 +5721,7 @@ static int si_irq_init(struct radeon_device *rdev) int si_irq_set(struct radeon_device *rdev) { - u32 cp_int_cntl = CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE; + u32 cp_int_cntl; u32 cp_int_cntl1 = 0, cp_int_cntl2 = 0; u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0; u32 hpd1 = 0, hpd2 = 0, hpd3 = 0, hpd4 = 0, hpd5 = 0, hpd6 = 0; @@ -5738,6 +5742,9 @@ int si_irq_set(struct radeon_device *rdev) return 0; } + cp_int_cntl = RREG32(CP_INT_CNTL_RING0) & + (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE); + if (!ASIC_IS_NODCE(rdev)) { hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~DC_HPDx_INT_EN; hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~DC_HPDx_INT_EN; @@ -6362,80 +6369,6 @@ restart_ih: return IRQ_HANDLED; } -/** - * si_copy_dma - copy pages using the DMA engine - * - * @rdev: radeon_device pointer - * @src_offset: src GPU address - * @dst_offset: dst GPU address - * @num_gpu_pages: number of GPU pages to xfer - * @fence: radeon fence object - * - * Copy GPU paging using the DMA engine (SI). - * Used by the radeon ttm implementation to move pages if - * registered as the asic copy callback. - */ -int si_copy_dma(struct radeon_device *rdev, - uint64_t src_offset, uint64_t dst_offset, - unsigned num_gpu_pages, - struct radeon_fence **fence) -{ - struct radeon_semaphore *sem = NULL; - int ring_index = rdev->asic->copy.dma_ring_index; - struct radeon_ring *ring = &rdev->ring[ring_index]; - u32 size_in_bytes, cur_size_in_bytes; - int i, num_loops; - int r = 0; - - r = radeon_semaphore_create(rdev, &sem); - if (r) { - DRM_ERROR("radeon: moving bo (%d).\n", r); - return r; - } - - size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT); - num_loops = DIV_ROUND_UP(size_in_bytes, 0xfffff); - r = radeon_ring_lock(rdev, ring, num_loops * 5 + 11); - if (r) { - DRM_ERROR("radeon: moving bo (%d).\n", r); - radeon_semaphore_free(rdev, &sem, NULL); - return r; - } - - if (radeon_fence_need_sync(*fence, ring->idx)) { - radeon_semaphore_sync_rings(rdev, sem, (*fence)->ring, - ring->idx); - radeon_fence_note_sync(*fence, ring->idx); - } else { - radeon_semaphore_free(rdev, &sem, NULL); - } - - for (i = 0; i < num_loops; i++) { - cur_size_in_bytes = size_in_bytes; - if (cur_size_in_bytes > 0xFFFFF) - cur_size_in_bytes = 0xFFFFF; - size_in_bytes -= cur_size_in_bytes; - radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_COPY, 1, 0, 0, cur_size_in_bytes)); - radeon_ring_write(ring, dst_offset & 0xffffffff); - radeon_ring_write(ring, src_offset & 0xffffffff); - radeon_ring_write(ring, upper_32_bits(dst_offset) & 0xff); - radeon_ring_write(ring, upper_32_bits(src_offset) & 0xff); - src_offset += cur_size_in_bytes; - dst_offset += cur_size_in_bytes; - } - - r = radeon_fence_emit(rdev, fence, ring->idx); - if (r) { - radeon_ring_unlock_undo(rdev, ring); - return r; - } - - radeon_ring_unlock_commit(rdev, ring); - radeon_semaphore_free(rdev, &sem, *fence); - - return r; -} - /* * startup/shutdown callbacks */ @@ -6449,6 +6382,11 @@ static int si_startup(struct radeon_device *rdev) /* enable aspm */ si_program_aspm(rdev); + /* scratch needs to be initialized before MC */ + r = r600_vram_scratch_init(rdev); + if (r) + return r; + si_mc_program(rdev); if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw || @@ -6466,17 +6404,19 @@ static int si_startup(struct radeon_device *rdev) return r; } - r = r600_vram_scratch_init(rdev); - if (r) - return r; - r = si_pcie_gart_enable(rdev); if (r) return r; si_gpu_init(rdev); /* allocate rlc buffers */ - r = si_rlc_init(rdev); + if (rdev->family == CHIP_VERDE) { + rdev->rlc.reg_list = verde_rlc_save_restore_register_list; + rdev->rlc.reg_list_size = + (u32)ARRAY_SIZE(verde_rlc_save_restore_register_list); + } + rdev->rlc.cs_data = si_cs_data; + r = sumo_rlc_init(rdev); if (r) { DRM_ERROR("Failed to init rlc BOs!\n"); return r; @@ -6518,7 +6458,7 @@ static int si_startup(struct radeon_device *rdev) } if (rdev->has_uvd) { - r = rv770_uvd_resume(rdev); + r = uvd_v2_2_resume(rdev); if (!r) { r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_UVD_INDEX); @@ -6547,21 +6487,21 @@ static int si_startup(struct radeon_device *rdev) ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET, CP_RB0_RPTR, CP_RB0_WPTR, - 0, 0xfffff, RADEON_CP_PACKET2); + RADEON_CP_PACKET2); if (r) return r; ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]; r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET, CP_RB1_RPTR, CP_RB1_WPTR, - 0, 0xfffff, RADEON_CP_PACKET2); + RADEON_CP_PACKET2); if (r) return r; ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]; r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET, CP_RB2_RPTR, CP_RB2_WPTR, - 0, 0xfffff, RADEON_CP_PACKET2); + RADEON_CP_PACKET2); if (r) return r; @@ -6569,7 +6509,7 @@ static int si_startup(struct radeon_device *rdev) r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET, DMA_RB_RPTR + DMA0_REGISTER_OFFSET, DMA_RB_WPTR + DMA0_REGISTER_OFFSET, - 2, 0x3fffc, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0)); + DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0)); if (r) return r; @@ -6577,7 +6517,7 @@ static int si_startup(struct radeon_device *rdev) r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET, DMA_RB_RPTR + DMA1_REGISTER_OFFSET, DMA_RB_WPTR + DMA1_REGISTER_OFFSET, - 2, 0x3fffc, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0)); + DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0)); if (r) return r; @@ -6595,12 +6535,11 @@ static int si_startup(struct radeon_device *rdev) if (rdev->has_uvd) { ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX]; if (ring->ring_size) { - r = radeon_ring_init(rdev, ring, ring->ring_size, - R600_WB_UVD_RPTR_OFFSET, + r = radeon_ring_init(rdev, ring, ring->ring_size, 0, UVD_RBC_RB_RPTR, UVD_RBC_RB_WPTR, - 0, 0xfffff, RADEON_CP_PACKET2); + RADEON_CP_PACKET2); if (!r) - r = r600_uvd_init(rdev); + r = uvd_v1_0_init(rdev); if (r) DRM_ERROR("radeon: failed initializing UVD (%d).\n", r); } @@ -6618,6 +6557,10 @@ static int si_startup(struct radeon_device *rdev) return r; } + r = dce6_audio_init(rdev); + if (r) + return r; + return 0; } @@ -6649,13 +6592,16 @@ int si_resume(struct radeon_device *rdev) int si_suspend(struct radeon_device *rdev) { + dce6_audio_fini(rdev); radeon_vm_manager_fini(rdev); si_cp_enable(rdev, false); cayman_dma_stop(rdev); if (rdev->has_uvd) { - r600_uvd_stop(rdev); + uvd_v1_0_fini(rdev); radeon_uvd_suspend(rdev); } + si_fini_pg(rdev); + si_fini_cg(rdev); si_irq_suspend(rdev); radeon_wb_disable(rdev); si_pcie_gart_disable(rdev); @@ -6762,7 +6708,7 @@ int si_init(struct radeon_device *rdev) si_cp_fini(rdev); cayman_dma_fini(rdev); si_irq_fini(rdev); - si_rlc_fini(rdev); + sumo_rlc_fini(rdev); radeon_wb_fini(rdev); radeon_ib_pool_fini(rdev); radeon_vm_manager_fini(rdev); @@ -6787,16 +6733,16 @@ void si_fini(struct radeon_device *rdev) { si_cp_fini(rdev); cayman_dma_fini(rdev); - si_irq_fini(rdev); - si_rlc_fini(rdev); - si_fini_cg(rdev); si_fini_pg(rdev); + si_fini_cg(rdev); + si_irq_fini(rdev); + sumo_rlc_fini(rdev); radeon_wb_fini(rdev); radeon_vm_manager_fini(rdev); radeon_ib_pool_fini(rdev); radeon_irq_kms_fini(rdev); if (rdev->has_uvd) { - r600_uvd_stop(rdev); + uvd_v1_0_fini(rdev); radeon_uvd_fini(rdev); } si_pcie_gart_fini(rdev); @@ -6931,7 +6877,7 @@ static struct pci_dev dev_to_pcidev(device_t dev) static void si_pcie_gen3_enable(struct radeon_device *rdev) { #if 0 - struct pci_dev *root = rdev->dev->bus->self; + struct pci_dev *root = rdev->pdev->bus->self; #else device_t root = device_get_parent(rdev->dev); #endif @@ -7220,7 +7166,7 @@ static void si_program_aspm(struct radeon_device *rdev) WREG32_PIF_PHY1(PB1_PIF_CNTL, data); if (!disable_clkreq) { -#ifdef MN_TODO +#ifdef zMN_TODO struct pci_dev *root = rdev->pdev->bus->self; u32 lnkcap; diff --git a/sys/dev/drm/radeon/si_dma.c b/sys/dev/drm/radeon/si_dma.c new file mode 100644 index 0000000000..4022387fda --- /dev/null +++ b/sys/dev/drm/radeon/si_dma.c @@ -0,0 +1,232 @@ +/* + * Copyright 2013 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: Alex Deucher + */ +#include +#include "radeon.h" +#include "radeon_asic.h" +#include "sid.h" + +/** + * si_dma_is_lockup - Check if the DMA engine is locked up + * + * @rdev: radeon_device pointer + * @ring: radeon_ring structure holding ring information + * + * Check if the async DMA engine is locked up. + * Returns true if the engine appears to be locked up, false if not. + */ +bool si_dma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring) +{ + u32 reset_mask = si_gpu_check_soft_reset(rdev); + u32 mask; + + if (ring->idx == R600_RING_TYPE_DMA_INDEX) + mask = RADEON_RESET_DMA; + else + mask = RADEON_RESET_DMA1; + + if (!(reset_mask & mask)) { + radeon_ring_lockup_update(ring); + return false; + } + /* force ring activities */ + radeon_ring_force_activity(rdev, ring); + return radeon_ring_test_lockup(rdev, ring); +} + +/** + * si_dma_vm_set_page - update the page tables using the DMA + * + * @rdev: radeon_device pointer + * @ib: indirect buffer to fill with commands + * @pe: addr of the page entry + * @addr: dst addr to write into pe + * @count: number of page entries to update + * @incr: increase next addr by incr bytes + * @flags: access flags + * + * Update the page tables using the DMA (SI). + */ +void si_dma_vm_set_page(struct radeon_device *rdev, + struct radeon_ib *ib, + uint64_t pe, + uint64_t addr, unsigned count, + uint32_t incr, uint32_t flags) +{ + uint32_t r600_flags = cayman_vm_page_flags(rdev, flags); + uint64_t value; + unsigned ndw; + + if (flags & RADEON_VM_PAGE_SYSTEM) { + while (count) { + ndw = count * 2; + if (ndw > 0xFFFFE) + ndw = 0xFFFFE; + + /* for non-physically contiguous pages (system) */ + ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 0, ndw); + ib->ptr[ib->length_dw++] = pe; + ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff; + for (; ndw > 0; ndw -= 2, --count, pe += 8) { + if (flags & RADEON_VM_PAGE_SYSTEM) { + value = radeon_vm_map_gart(rdev, addr); + value &= 0xFFFFFFFFFFFFF000ULL; + } else if (flags & RADEON_VM_PAGE_VALID) { + value = addr; + } else { + value = 0; + } + addr += incr; + value |= r600_flags; + ib->ptr[ib->length_dw++] = value; + ib->ptr[ib->length_dw++] = upper_32_bits(value); + } + } + } else { + while (count) { + ndw = count * 2; + if (ndw > 0xFFFFE) + ndw = 0xFFFFE; + + if (flags & RADEON_VM_PAGE_VALID) + value = addr; + else + value = 0; + /* for physically contiguous pages (vram) */ + ib->ptr[ib->length_dw++] = DMA_PTE_PDE_PACKET(ndw); + ib->ptr[ib->length_dw++] = pe; /* dst addr */ + ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff; + ib->ptr[ib->length_dw++] = r600_flags; /* mask */ + ib->ptr[ib->length_dw++] = 0; + ib->ptr[ib->length_dw++] = value; /* value */ + ib->ptr[ib->length_dw++] = upper_32_bits(value); + ib->ptr[ib->length_dw++] = incr; /* increment size */ + ib->ptr[ib->length_dw++] = 0; + pe += ndw * 4; + addr += (ndw / 2) * incr; + count -= ndw / 2; + } + } + while (ib->length_dw & 0x7) + ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0); +} + +void si_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm) +{ + struct radeon_ring *ring = &rdev->ring[ridx]; + + if (vm == NULL) + return; + + radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0)); + if (vm->id < 8) { + radeon_ring_write(ring, (0xf << 16) | ((VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2)); + } else { + radeon_ring_write(ring, (0xf << 16) | ((VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2)); + } + radeon_ring_write(ring, vm->pd_gpu_addr >> 12); + + /* flush hdp cache */ + radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0)); + radeon_ring_write(ring, (0xf << 16) | (HDP_MEM_COHERENCY_FLUSH_CNTL >> 2)); + radeon_ring_write(ring, 1); + + /* bits 0-7 are the VM contexts0-7 */ + radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0)); + radeon_ring_write(ring, (0xf << 16) | (VM_INVALIDATE_REQUEST >> 2)); + radeon_ring_write(ring, 1 << vm->id); +} + +/** + * si_copy_dma - copy pages using the DMA engine + * + * @rdev: radeon_device pointer + * @src_offset: src GPU address + * @dst_offset: dst GPU address + * @num_gpu_pages: number of GPU pages to xfer + * @fence: radeon fence object + * + * Copy GPU paging using the DMA engine (SI). + * Used by the radeon ttm implementation to move pages if + * registered as the asic copy callback. + */ +int si_copy_dma(struct radeon_device *rdev, + uint64_t src_offset, uint64_t dst_offset, + unsigned num_gpu_pages, + struct radeon_fence **fence) +{ + struct radeon_semaphore *sem = NULL; + int ring_index = rdev->asic->copy.dma_ring_index; + struct radeon_ring *ring = &rdev->ring[ring_index]; + u32 size_in_bytes, cur_size_in_bytes; + int i, num_loops; + int r = 0; + + r = radeon_semaphore_create(rdev, &sem); + if (r) { + DRM_ERROR("radeon: moving bo (%d).\n", r); + return r; + } + + size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT); + num_loops = DIV_ROUND_UP(size_in_bytes, 0xfffff); + r = radeon_ring_lock(rdev, ring, num_loops * 5 + 11); + if (r) { + DRM_ERROR("radeon: moving bo (%d).\n", r); + radeon_semaphore_free(rdev, &sem, NULL); + return r; + } + + if (radeon_fence_need_sync(*fence, ring->idx)) { + radeon_semaphore_sync_rings(rdev, sem, (*fence)->ring, + ring->idx); + radeon_fence_note_sync(*fence, ring->idx); + } else { + radeon_semaphore_free(rdev, &sem, NULL); + } + + for (i = 0; i < num_loops; i++) { + cur_size_in_bytes = size_in_bytes; + if (cur_size_in_bytes > 0xFFFFF) + cur_size_in_bytes = 0xFFFFF; + size_in_bytes -= cur_size_in_bytes; + radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_COPY, 1, 0, 0, cur_size_in_bytes)); + radeon_ring_write(ring, dst_offset & 0xffffffff); + radeon_ring_write(ring, src_offset & 0xffffffff); + radeon_ring_write(ring, upper_32_bits(dst_offset) & 0xff); + radeon_ring_write(ring, upper_32_bits(src_offset) & 0xff); + src_offset += cur_size_in_bytes; + dst_offset += cur_size_in_bytes; + } + + r = radeon_fence_emit(rdev, fence, ring->idx); + if (r) { + radeon_ring_unlock_undo(rdev, ring); + return r; + } + + radeon_ring_unlock_commit(rdev, ring); + radeon_semaphore_free(rdev, &sem, *fence); + + return r; +} diff --git a/sys/dev/drm/radeon/si_dpm.c b/sys/dev/drm/radeon/si_dpm.c index 7d8880219b..26afa1fa1c 100644 --- a/sys/dev/drm/radeon/si_dpm.c +++ b/sys/dev/drm/radeon/si_dpm.c @@ -2910,6 +2910,7 @@ static void si_apply_state_adjust_rules(struct radeon_device *rdev, bool disable_sclk_switching = false; u32 mclk, sclk; u16 vddc, vddci; + u32 max_sclk_vddc, max_mclk_vddci, max_mclk_vddc; int i; if ((rdev->pm.dpm.new_active_crtc_count > 1) || @@ -2943,6 +2944,29 @@ static void si_apply_state_adjust_rules(struct radeon_device *rdev, } } + /* limit clocks to max supported clocks based on voltage dependency tables */ + btc_get_max_clock_from_voltage_dependency_table(&rdev->pm.dpm.dyn_state.vddc_dependency_on_sclk, + &max_sclk_vddc); + btc_get_max_clock_from_voltage_dependency_table(&rdev->pm.dpm.dyn_state.vddci_dependency_on_mclk, + &max_mclk_vddci); + btc_get_max_clock_from_voltage_dependency_table(&rdev->pm.dpm.dyn_state.vddc_dependency_on_mclk, + &max_mclk_vddc); + + for (i = 0; i < ps->performance_level_count; i++) { + if (max_sclk_vddc) { + if (ps->performance_levels[i].sclk > max_sclk_vddc) + ps->performance_levels[i].sclk = max_sclk_vddc; + } + if (max_mclk_vddci) { + if (ps->performance_levels[i].mclk > max_mclk_vddci) + ps->performance_levels[i].mclk = max_mclk_vddci; + } + if (max_mclk_vddc) { + if (ps->performance_levels[i].mclk > max_mclk_vddc) + ps->performance_levels[i].mclk = max_mclk_vddc; + } + } + /* XXX validate the min clocks required for display */ if (disable_mclk_switching) { @@ -3666,7 +3690,7 @@ static void si_clear_vc(struct radeon_device *rdev) WREG32(CG_FTV, 0); } -static u8 si_get_ddr3_mclk_frequency_ratio(u32 memory_clock) +u8 si_get_ddr3_mclk_frequency_ratio(u32 memory_clock) { u8 mc_para_index; @@ -3679,7 +3703,7 @@ static u8 si_get_ddr3_mclk_frequency_ratio(u32 memory_clock) return mc_para_index; } -static u8 si_get_mclk_frequency_ratio(u32 memory_clock, bool strobe_mode) +u8 si_get_mclk_frequency_ratio(u32 memory_clock, bool strobe_mode) { u8 mc_para_index; @@ -3761,20 +3785,21 @@ static bool si_validate_phase_shedding_tables(struct radeon_device *rdev, return true; } -static void si_trim_voltage_table_to_fit_state_table(struct radeon_device *rdev, - struct atom_voltage_table *voltage_table) +void si_trim_voltage_table_to_fit_state_table(struct radeon_device *rdev, + u32 max_voltage_steps, + struct atom_voltage_table *voltage_table) { unsigned int i, diff; - if (voltage_table->count <= SISLANDS_MAX_NO_VREG_STEPS) + if (voltage_table->count <= max_voltage_steps) return; - diff = voltage_table->count - SISLANDS_MAX_NO_VREG_STEPS; + diff = voltage_table->count - max_voltage_steps; - for (i= 0; i < SISLANDS_MAX_NO_VREG_STEPS; i++) + for (i= 0; i < max_voltage_steps; i++) voltage_table->entries[i] = voltage_table->entries[i + diff]; - voltage_table->count = SISLANDS_MAX_NO_VREG_STEPS; + voltage_table->count = max_voltage_steps; } static int si_construct_voltage_tables(struct radeon_device *rdev) @@ -3790,7 +3815,9 @@ static int si_construct_voltage_tables(struct radeon_device *rdev) return ret; if (eg_pi->vddc_voltage_table.count > SISLANDS_MAX_NO_VREG_STEPS) - si_trim_voltage_table_to_fit_state_table(rdev, &eg_pi->vddc_voltage_table); + si_trim_voltage_table_to_fit_state_table(rdev, + SISLANDS_MAX_NO_VREG_STEPS, + &eg_pi->vddc_voltage_table); if (eg_pi->vddci_control) { ret = radeon_atom_get_voltage_table(rdev, VOLTAGE_TYPE_VDDCI, @@ -3799,7 +3826,9 @@ static int si_construct_voltage_tables(struct radeon_device *rdev) return ret; if (eg_pi->vddci_voltage_table.count > SISLANDS_MAX_NO_VREG_STEPS) - si_trim_voltage_table_to_fit_state_table(rdev, &eg_pi->vddci_voltage_table); + si_trim_voltage_table_to_fit_state_table(rdev, + SISLANDS_MAX_NO_VREG_STEPS, + &eg_pi->vddci_voltage_table); } if (pi->mvdd_control) { @@ -3817,7 +3846,9 @@ static int si_construct_voltage_tables(struct radeon_device *rdev) } if (si_pi->mvdd_voltage_table.count > SISLANDS_MAX_NO_VREG_STEPS) - si_trim_voltage_table_to_fit_state_table(rdev, &si_pi->mvdd_voltage_table); + si_trim_voltage_table_to_fit_state_table(rdev, + SISLANDS_MAX_NO_VREG_STEPS, + &si_pi->mvdd_voltage_table); } if (si_pi->vddc_phase_shed_control) { @@ -5177,7 +5208,7 @@ static int si_set_mc_special_registers(struct radeon_device *rdev, table->mc_reg_table_entry[k].mc_data[j] |= 0x100; } j++; - if (j > SMC_SISLANDS_MC_REGISTER_ARRAY_SIZE) + if (j >= SMC_SISLANDS_MC_REGISTER_ARRAY_SIZE) return -EINVAL; if (!pi->mem_gddr5) { @@ -5187,7 +5218,7 @@ static int si_set_mc_special_registers(struct radeon_device *rdev, table->mc_reg_table_entry[k].mc_data[j] = (table->mc_reg_table_entry[k].mc_data[i] & 0xffff0000) >> 16; j++; - if (j > SMC_SISLANDS_MC_REGISTER_ARRAY_SIZE) + if (j >= SMC_SISLANDS_MC_REGISTER_ARRAY_SIZE) return -EINVAL; } break; @@ -5200,7 +5231,7 @@ static int si_set_mc_special_registers(struct radeon_device *rdev, (temp_reg & 0xffff0000) | (table->mc_reg_table_entry[k].mc_data[i] & 0x0000ffff); j++; - if (j > SMC_SISLANDS_MC_REGISTER_ARRAY_SIZE) + if (j >= SMC_SISLANDS_MC_REGISTER_ARRAY_SIZE) return -EINVAL; break; default: @@ -5755,6 +5786,13 @@ int si_dpm_enable(struct radeon_device *rdev) struct radeon_ps *boot_ps = rdev->pm.dpm.boot_ps; int ret; + si_update_cg(rdev, (RADEON_CG_BLOCK_GFX | + RADEON_CG_BLOCK_MC | + RADEON_CG_BLOCK_SDMA | + RADEON_CG_BLOCK_BIF | + RADEON_CG_BLOCK_UVD | + RADEON_CG_BLOCK_HDP), false); + if (si_is_smc_running(rdev)) return -EINVAL; if (pi->voltage_control) @@ -5874,6 +5912,13 @@ int si_dpm_enable(struct radeon_device *rdev) si_enable_auto_throttle_source(rdev, RADEON_DPM_AUTO_THROTTLE_SRC_THERMAL, true); + si_update_cg(rdev, (RADEON_CG_BLOCK_GFX | + RADEON_CG_BLOCK_MC | + RADEON_CG_BLOCK_SDMA | + RADEON_CG_BLOCK_BIF | + RADEON_CG_BLOCK_UVD | + RADEON_CG_BLOCK_HDP), true); + ni_update_current_ps(rdev, boot_ps); return 0; @@ -5884,6 +5929,13 @@ void si_dpm_disable(struct radeon_device *rdev) struct rv7xx_power_info *pi = rv770_get_pi(rdev); struct radeon_ps *boot_ps = rdev->pm.dpm.boot_ps; + si_update_cg(rdev, (RADEON_CG_BLOCK_GFX | + RADEON_CG_BLOCK_MC | + RADEON_CG_BLOCK_SDMA | + RADEON_CG_BLOCK_BIF | + RADEON_CG_BLOCK_UVD | + RADEON_CG_BLOCK_HDP), false); + if (!si_is_smc_running(rdev)) return; si_disable_ulv(rdev); @@ -5948,6 +6000,13 @@ int si_dpm_set_power_state(struct radeon_device *rdev) struct radeon_ps *old_ps = &eg_pi->current_rps; int ret; + si_update_cg(rdev, (RADEON_CG_BLOCK_GFX | + RADEON_CG_BLOCK_MC | + RADEON_CG_BLOCK_SDMA | + RADEON_CG_BLOCK_BIF | + RADEON_CG_BLOCK_UVD | + RADEON_CG_BLOCK_HDP), false); + ret = si_disable_ulv(rdev); if (ret) { DRM_ERROR("si_disable_ulv failed\n"); @@ -6040,11 +6099,12 @@ int si_dpm_set_power_state(struct radeon_device *rdev) return ret; } - ret = si_dpm_force_performance_level(rdev, RADEON_DPM_FORCED_LEVEL_AUTO); - if (ret) { - DRM_ERROR("si_dpm_force_performance_level failed\n"); - return ret; - } + si_update_cg(rdev, (RADEON_CG_BLOCK_GFX | + RADEON_CG_BLOCK_MC | + RADEON_CG_BLOCK_SDMA | + RADEON_CG_BLOCK_BIF | + RADEON_CG_BLOCK_UVD | + RADEON_CG_BLOCK_HDP), true); return 0; } @@ -6235,6 +6295,7 @@ static int si_parse_power_table(struct radeon_device *rdev) rdev->pm.dpm.backbias_response_time = le16_to_cpu(power_info->pplib.usBackbiasTime); rdev->pm.dpm.voltage_response_time = le16_to_cpu(power_info->pplib.usVoltageTime); for (i = 0; i < state_array->ucNumEntries; i++) { + u8 *idx; power_state = (union pplib_power_state *)power_state_offset; non_clock_array_index = power_state->v2.nonClockInfoIndex; non_clock_info = (struct _ATOM_PPLIB_NONCLOCK_INFO *) @@ -6251,14 +6312,16 @@ static int si_parse_power_table(struct radeon_device *rdev) non_clock_info, non_clock_info_array->ucEntrySize); k = 0; + idx = (u8 *)&power_state->v2.clockInfoIndex[0]; for (j = 0; j < power_state->v2.ucNumDPMLevels; j++) { - clock_array_index = power_state->v2.clockInfoIndex[j]; + clock_array_index = idx[j]; if (clock_array_index >= clock_info_array->ucNumEntries) continue; if (k >= SISLANDS_MAX_HARDWARE_POWERLEVELS) break; clock_info = (union pplib_clock_info *) - &clock_info_array->clockInfo[clock_array_index * clock_info_array->ucEntrySize]; + ((u8 *)&clock_info_array->clockInfo[0] + + (clock_array_index * clock_info_array->ucEntrySize)); si_parse_pplib_clock_info(rdev, &rdev->pm.dpm.ps[i], k, clock_info); @@ -6404,6 +6467,12 @@ int si_dpm_init(struct radeon_device *rdev) si_initialize_powertune_defaults(rdev); + /* make sure dc limits are valid */ + if ((rdev->pm.dpm.dyn_state.max_clock_voltage_on_dc.sclk == 0) || + (rdev->pm.dpm.dyn_state.max_clock_voltage_on_dc.mclk == 0)) + rdev->pm.dpm.dyn_state.max_clock_voltage_on_dc = + rdev->pm.dpm.dyn_state.max_clock_voltage_on_ac; + return 0; } diff --git a/sys/dev/drm/radeon/sid.h b/sys/dev/drm/radeon/sid.h index 2c8da27a92..7e2e0ea66a 100644 --- a/sys/dev/drm/radeon/sid.h +++ b/sys/dev/drm/radeon/sid.h @@ -282,6 +282,10 @@ #define DMIF_ADDR_CALC 0xC00 +#define PIPE0_DMIF_BUFFER_CONTROL 0x0ca0 +# define DMIF_BUFFERS_ALLOCATED(x) ((x) << 0) +# define DMIF_BUFFERS_ALLOCATED_COMPLETED (1 << 4) + #define SRBM_STATUS 0xE50 #define GRBM_RQ_PENDING (1 << 5) #define VMC_BUSY (1 << 8) @@ -581,6 +585,7 @@ #define CLKS_MASK (0xfff << 0) #define HDP_HOST_PATH_CNTL 0x2C00 +#define CLOCK_GATING_DIS (1 << 23) #define HDP_NONSURFACE_BASE 0x2C04 #define HDP_NONSURFACE_INFO 0x2C08 #define HDP_NONSURFACE_SIZE 0x2C0C @@ -588,6 +593,8 @@ #define HDP_ADDR_CONFIG 0x2F48 #define HDP_MISC_CNTL 0x2F4C #define HDP_FLUSH_INVALIDATE_CACHE (1 << 0) +#define HDP_MEM_POWER_LS 0x2F50 +#define HDP_LS_ENABLE (1 << 0) #define ATC_MISC_CG 0x3350 @@ -635,6 +642,54 @@ #define HDP_REG_COHERENCY_FLUSH_CNTL 0x54A0 +/* DCE6 ELD audio interface */ +#define AZ_F0_CODEC_ENDPOINT_INDEX 0x5E00 +# define AZ_ENDPOINT_REG_INDEX(x) (((x) & 0xff) << 0) +# define AZ_ENDPOINT_REG_WRITE_EN (1 << 8) +#define AZ_F0_CODEC_ENDPOINT_DATA 0x5E04 + +#define AZ_F0_CODEC_PIN_CONTROL_CHANNEL_SPEAKER 0x25 +#define SPEAKER_ALLOCATION(x) (((x) & 0x7f) << 0) +#define SPEAKER_ALLOCATION_MASK (0x7f << 0) +#define SPEAKER_ALLOCATION_SHIFT 0 +#define HDMI_CONNECTION (1 << 16) +#define DP_CONNECTION (1 << 17) + +#define AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR0 0x28 /* LPCM */ +#define AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR1 0x29 /* AC3 */ +#define AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR2 0x2A /* MPEG1 */ +#define AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR3 0x2B /* MP3 */ +#define AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR4 0x2C /* MPEG2 */ +#define AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR5 0x2D /* AAC */ +#define AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR6 0x2E /* DTS */ +#define AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR7 0x2F /* ATRAC */ +#define AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR8 0x30 /* one bit audio - leave at 0 (default) */ +#define AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR9 0x31 /* Dolby Digital */ +#define AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR10 0x32 /* DTS-HD */ +#define AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR11 0x33 /* MAT-MLP */ +#define AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR12 0x34 /* DTS */ +#define AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR13 0x35 /* WMA Pro */ +# define MAX_CHANNELS(x) (((x) & 0x7) << 0) +/* max channels minus one. 7 = 8 channels */ +# define SUPPORTED_FREQUENCIES(x) (((x) & 0xff) << 8) +# define DESCRIPTOR_BYTE_2(x) (((x) & 0xff) << 16) +# define SUPPORTED_FREQUENCIES_STEREO(x) (((x) & 0xff) << 24) /* LPCM only */ +/* SUPPORTED_FREQUENCIES, SUPPORTED_FREQUENCIES_STEREO + * bit0 = 32 kHz + * bit1 = 44.1 kHz + * bit2 = 48 kHz + * bit3 = 88.2 kHz + * bit4 = 96 kHz + * bit5 = 176.4 kHz + * bit6 = 192 kHz + */ +#define AZ_F0_CODEC_PIN_CONTROL_HOTPLUG_CONTROL 0x54 +# define AUDIO_ENABLED (1 << 31) + +#define AZ_F0_CODEC_PIN_CONTROL_RESPONSE_CONFIGURATION_DEFAULT 0x56 +#define PORT_CONNECTIVITY_MASK (3 << 30) +#define PORT_CONNECTIVITY_SHIFT 30 + #define DC_LB_MEMORY_SPLIT 0x6b0c #define DC_LB_MEMORY_CONFIG(x) ((x) << 20) @@ -755,6 +810,17 @@ /* 0x6e98, 0x7a98, 0x10698, 0x11298, 0x11e98, 0x12a98 */ #define CRTC_STATUS_FRAME_COUNT 0x6e98 +#define AFMT_AUDIO_SRC_CONTROL 0x713c +#define AFMT_AUDIO_SRC_SELECT(x) (((x) & 7) << 0) +/* AFMT_AUDIO_SRC_SELECT + * 0 = stream0 + * 1 = stream1 + * 2 = stream2 + * 3 = stream3 + * 4 = stream4 + * 5 = stream5 + */ + #define GRBM_CNTL 0x8000 #define GRBM_READ_TIMEOUT(x) ((x) << 0) @@ -1295,6 +1361,7 @@ /* PCIE registers idx/data 0x30/0x34 */ #define PCIE_CNTL2 0x1c /* PCIE */ # define SLV_MEM_LS_EN (1 << 16) +# define SLV_MEM_AGGRESSIVE_LS_EN (1 << 17) # define MST_MEM_LS_EN (1 << 18) # define REPLAY_MEM_LS_EN (1 << 19) #define PCIE_LC_STATUS1 0x28 /* PCIE */ @@ -1486,7 +1553,7 @@ * 6. COMMAND [30:21] | BYTE_COUNT [20:0] */ # define PACKET3_CP_DMA_DST_SEL(x) ((x) << 20) - /* 0 - SRC_ADDR + /* 0 - DST_ADDR * 1 - GDS */ # define PACKET3_CP_DMA_ENGINE(x) ((x) << 27) @@ -1501,7 +1568,7 @@ # define PACKET3_CP_DMA_CP_SYNC (1 << 31) /* COMMAND */ # define PACKET3_CP_DMA_DIS_WC (1 << 21) -# define PACKET3_CP_DMA_CMD_SRC_SWAP(x) ((x) << 23) +# define PACKET3_CP_DMA_CMD_SRC_SWAP(x) ((x) << 22) /* 0 - none * 1 - 8 in 16 * 2 - 8 in 32 @@ -1644,6 +1711,10 @@ # define DMA_IDLE (1 << 0) #define DMA_TILING_CONFIG 0xd0b8 +#define DMA_POWER_CNTL 0xd0bc +# define MEM_POWER_OVERRIDE (1 << 8) +#define DMA_CLK_CTRL 0xd0c0 + #define DMA_PG 0xd0d4 # define PG_CNTL_ENABLE (1 << 0) #define DMA_PGFSM_CONFIG 0xd0d8 diff --git a/sys/dev/drm/radeon/smu7.h b/sys/dev/drm/radeon/smu7.h new file mode 100644 index 0000000000..f5904e7ffd --- /dev/null +++ b/sys/dev/drm/radeon/smu7.h @@ -0,0 +1,169 @@ +/* + * Copyright 2013 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef SMU7_H +#define SMU7_H + +#pragma pack(push, 1) + +#define SMU7_CONTEXT_ID_SMC 1 +#define SMU7_CONTEXT_ID_VBIOS 2 + + +#define SMU7_CONTEXT_ID_SMC 1 +#define SMU7_CONTEXT_ID_VBIOS 2 + +#define SMU7_MAX_LEVELS_VDDC 8 +#define SMU7_MAX_LEVELS_VDDCI 4 +#define SMU7_MAX_LEVELS_MVDD 4 +#define SMU7_MAX_LEVELS_VDDNB 8 + +#define SMU7_MAX_LEVELS_GRAPHICS SMU__NUM_SCLK_DPM_STATE // SCLK + SQ DPM + ULV +#define SMU7_MAX_LEVELS_MEMORY SMU__NUM_MCLK_DPM_LEVELS // MCLK Levels DPM +#define SMU7_MAX_LEVELS_GIO SMU__NUM_LCLK_DPM_LEVELS // LCLK Levels +#define SMU7_MAX_LEVELS_LINK SMU__NUM_PCIE_DPM_LEVELS // PCIe speed and number of lanes. +#define SMU7_MAX_LEVELS_UVD 8 // VCLK/DCLK levels for UVD. +#define SMU7_MAX_LEVELS_VCE 8 // ECLK levels for VCE. +#define SMU7_MAX_LEVELS_ACP 8 // ACLK levels for ACP. +#define SMU7_MAX_LEVELS_SAMU 8 // SAMCLK levels for SAMU. +#define SMU7_MAX_ENTRIES_SMIO 32 // Number of entries in SMIO table. + +#define DPM_NO_LIMIT 0 +#define DPM_NO_UP 1 +#define DPM_GO_DOWN 2 +#define DPM_GO_UP 3 + +#define SMU7_FIRST_DPM_GRAPHICS_LEVEL 0 +#define SMU7_FIRST_DPM_MEMORY_LEVEL 0 + +#define GPIO_CLAMP_MODE_VRHOT 1 +#define GPIO_CLAMP_MODE_THERM 2 +#define GPIO_CLAMP_MODE_DC 4 + +#define SCRATCH_B_TARG_PCIE_INDEX_SHIFT 0 +#define SCRATCH_B_TARG_PCIE_INDEX_MASK (0x7<enable_dpm) sumo_set_uvd_clock_after_set_eng_clock(rdev, new_ps, old_ps); - rdev->pm.dpm.forced_level = RADEON_DPM_FORCED_LEVEL_AUTO; - return 0; } @@ -1488,6 +1486,7 @@ static int sumo_parse_power_table(struct radeon_device *rdev) rdev->pm.dpm.backbias_response_time = le16_to_cpu(power_info->pplib.usBackbiasTime); rdev->pm.dpm.voltage_response_time = le16_to_cpu(power_info->pplib.usVoltageTime); for (i = 0; i < state_array->ucNumEntries; i++) { + u8 *idx; power_state = (union pplib_power_state *)power_state_offset; non_clock_array_index = power_state->v2.nonClockInfoIndex; non_clock_info = (struct _ATOM_PPLIB_NONCLOCK_INFO *) @@ -1501,12 +1500,15 @@ static int sumo_parse_power_table(struct radeon_device *rdev) } rdev->pm.dpm.ps[i].ps_priv = ps; k = 0; + idx = (u8 *)&power_state->v2.clockInfoIndex[0]; for (j = 0; j < power_state->v2.ucNumDPMLevels; j++) { - clock_array_index = power_state->v2.clockInfoIndex[j]; + clock_array_index = idx[j]; if (k >= SUMO_MAX_HARDWARE_POWERLEVELS) break; + clock_info = (union pplib_clock_info *) - &clock_info_array->clockInfo[clock_array_index * clock_info_array->ucEntrySize]; + ((u8 *)&clock_info_array->clockInfo[0] + + (clock_array_index * clock_info_array->ucEntrySize)); sumo_parse_pplib_clock_info(rdev, &rdev->pm.dpm.ps[i], k, clock_info); @@ -1535,6 +1537,20 @@ u32 sumo_convert_vid2_to_vid7(struct radeon_device *rdev, return vid_mapping_table->entries[vid_mapping_table->num_entries - 1].vid_7bit; } +u32 sumo_convert_vid7_to_vid2(struct radeon_device *rdev, + struct sumo_vid_mapping_table *vid_mapping_table, + u32 vid_7bit) +{ + u32 i; + + for (i = 0; i < vid_mapping_table->num_entries; i++) { + if (vid_mapping_table->entries[i].vid_7bit == vid_7bit) + return vid_mapping_table->entries[i].vid_2bit; + } + + return vid_mapping_table->entries[vid_mapping_table->num_entries - 1].vid_2bit; +} + static u16 sumo_convert_voltage_index_to_value(struct radeon_device *rdev, u32 vid_2bit) { diff --git a/sys/dev/drm/radeon/sumo_dpm.h b/sys/dev/drm/radeon/sumo_dpm.h index 07dda299c7..db1ea32a90 100644 --- a/sys/dev/drm/radeon/sumo_dpm.h +++ b/sys/dev/drm/radeon/sumo_dpm.h @@ -202,6 +202,9 @@ void sumo_construct_vid_mapping_table(struct radeon_device *rdev, u32 sumo_convert_vid2_to_vid7(struct radeon_device *rdev, struct sumo_vid_mapping_table *vid_mapping_table, u32 vid_2bit); +u32 sumo_convert_vid7_to_vid2(struct radeon_device *rdev, + struct sumo_vid_mapping_table *vid_mapping_table, + u32 vid_7bit); u32 sumo_get_sleep_divider_from_id(u32 id); u32 sumo_get_sleep_divider_id_from_clock(struct radeon_device *rdev, u32 sclk, diff --git a/sys/dev/drm/radeon/trinity_dpm.c b/sys/dev/drm/radeon/trinity_dpm.c index 7c3a160a0a..0e533448e3 100644 --- a/sys/dev/drm/radeon/trinity_dpm.c +++ b/sys/dev/drm/radeon/trinity_dpm.c @@ -1072,6 +1072,17 @@ static void trinity_update_requested_ps(struct radeon_device *rdev, pi->requested_rps.ps_priv = &pi->requested_ps; } +void trinity_dpm_enable_bapm(struct radeon_device *rdev, bool enable) +{ + struct trinity_power_info *pi = trinity_get_pi(rdev); + + if (pi->enable_bapm) { + trinity_acquire_mutex(rdev); + trinity_dpm_bapm_enable(rdev, enable); + trinity_release_mutex(rdev); + } +} + int trinity_dpm_enable(struct radeon_device *rdev) { struct trinity_power_info *pi = trinity_get_pi(rdev); @@ -1095,6 +1106,7 @@ int trinity_dpm_enable(struct radeon_device *rdev) trinity_program_sclk_dpm(rdev); trinity_start_dpm(rdev); trinity_wait_for_dpm_enabled(rdev); + trinity_dpm_bapm_enable(rdev, false); trinity_release_mutex(rdev); if (rdev->irq.installed && @@ -1120,6 +1132,7 @@ void trinity_dpm_disable(struct radeon_device *rdev) trinity_release_mutex(rdev); return; } + trinity_dpm_bapm_enable(rdev, false); trinity_disable_clock_power_gating(rdev); sumo_clear_vc(rdev); trinity_wait_for_level_0(rdev); @@ -1216,6 +1229,8 @@ int trinity_dpm_set_power_state(struct radeon_device *rdev) trinity_acquire_mutex(rdev); if (pi->enable_dpm) { + if (pi->enable_bapm) + trinity_dpm_bapm_enable(rdev, rdev->pm.dpm.ac_power); trinity_set_uvd_clock_before_set_eng_clock(rdev, new_ps, old_ps); trinity_enable_power_level_0(rdev); trinity_force_level_0(rdev); @@ -1225,7 +1240,6 @@ int trinity_dpm_set_power_state(struct radeon_device *rdev) trinity_force_level_0(rdev); trinity_unforce_levels(rdev); trinity_set_uvd_clock_after_set_eng_clock(rdev, new_ps, old_ps); - rdev->pm.dpm.forced_level = RADEON_DPM_FORCED_LEVEL_AUTO; } trinity_release_mutex(rdev); @@ -1679,6 +1693,7 @@ static int trinity_parse_power_table(struct radeon_device *rdev) rdev->pm.dpm.backbias_response_time = le16_to_cpu(power_info->pplib.usBackbiasTime); rdev->pm.dpm.voltage_response_time = le16_to_cpu(power_info->pplib.usVoltageTime); for (i = 0; i < state_array->ucNumEntries; i++) { + u8 *idx; power_state = (union pplib_power_state *)power_state_offset; non_clock_array_index = power_state->v2.nonClockInfoIndex; non_clock_info = (struct _ATOM_PPLIB_NONCLOCK_INFO *) @@ -1692,14 +1707,16 @@ static int trinity_parse_power_table(struct radeon_device *rdev) } rdev->pm.dpm.ps[i].ps_priv = ps; k = 0; + idx = (u8 *)&power_state->v2.clockInfoIndex[0]; for (j = 0; j < power_state->v2.ucNumDPMLevels; j++) { - clock_array_index = power_state->v2.clockInfoIndex[j]; + clock_array_index = idx[j]; if (clock_array_index >= clock_info_array->ucNumEntries) continue; if (k >= SUMO_MAX_HARDWARE_POWERLEVELS) break; clock_info = (union pplib_clock_info *) - &clock_info_array->clockInfo[clock_array_index * clock_info_array->ucEntrySize]; + ((u8 *)&clock_info_array->clockInfo[0] + + (clock_array_index * clock_info_array->ucEntrySize)); trinity_parse_pplib_clock_info(rdev, &rdev->pm.dpm.ps[i], k, clock_info); @@ -1855,6 +1872,7 @@ int trinity_dpm_init(struct radeon_device *rdev) for (i = 0; i < SUMO_MAX_HARDWARE_POWERLEVELS; i++) pi->at[i] = TRINITY_AT_DFLT; + pi->enable_bapm = false; pi->enable_nbps_policy = true; pi->enable_sclk_ds = true; pi->enable_gfx_power_gating = true; diff --git a/sys/dev/drm/radeon/trinity_dpm.h b/sys/dev/drm/radeon/trinity_dpm.h index e82df071f8..c261657750 100644 --- a/sys/dev/drm/radeon/trinity_dpm.h +++ b/sys/dev/drm/radeon/trinity_dpm.h @@ -108,6 +108,7 @@ struct trinity_power_info { bool enable_auto_thermal_throttling; bool enable_dpm; bool enable_sclk_ds; + bool enable_bapm; bool uvd_dpm; struct radeon_ps current_rps; struct trinity_ps current_ps; @@ -118,6 +119,7 @@ struct trinity_power_info { #define TRINITY_AT_DFLT 30 /* trinity_smc.c */ +int trinity_dpm_bapm_enable(struct radeon_device *rdev, bool enable); int trinity_dpm_config(struct radeon_device *rdev, bool enable); int trinity_uvd_dpm_config(struct radeon_device *rdev); int trinity_dpm_force_state(struct radeon_device *rdev, u32 n); diff --git a/sys/dev/drm/radeon/trinity_smc.c b/sys/dev/drm/radeon/trinity_smc.c index 9cb8974b5e..6a28db2a48 100644 --- a/sys/dev/drm/radeon/trinity_smc.c +++ b/sys/dev/drm/radeon/trinity_smc.c @@ -56,6 +56,14 @@ static int trinity_notify_message_to_smu(struct radeon_device *rdev, u32 id) return 0; } +int trinity_dpm_bapm_enable(struct radeon_device *rdev, bool enable) +{ + if (enable) + return trinity_notify_message_to_smu(rdev, PPSMC_MSG_EnableBAPM); + else + return trinity_notify_message_to_smu(rdev, PPSMC_MSG_DisableBAPM); +} + int trinity_dpm_config(struct radeon_device *rdev, bool enable) { if (enable) diff --git a/sys/dev/drm/radeon/uvd_v1_0.c b/sys/dev/drm/radeon/uvd_v1_0.c new file mode 100644 index 0000000000..7266805d97 --- /dev/null +++ b/sys/dev/drm/radeon/uvd_v1_0.c @@ -0,0 +1,436 @@ +/* + * Copyright 2013 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: Christian König + */ + +#include +#include "radeon.h" +#include "radeon_asic.h" +#include "r600d.h" + +/** + * uvd_v1_0_get_rptr - get read pointer + * + * @rdev: radeon_device pointer + * @ring: radeon_ring pointer + * + * Returns the current hardware read pointer + */ +uint32_t uvd_v1_0_get_rptr(struct radeon_device *rdev, + struct radeon_ring *ring) +{ + return RREG32(UVD_RBC_RB_RPTR); +} + +/** + * uvd_v1_0_get_wptr - get write pointer + * + * @rdev: radeon_device pointer + * @ring: radeon_ring pointer + * + * Returns the current hardware write pointer + */ +uint32_t uvd_v1_0_get_wptr(struct radeon_device *rdev, + struct radeon_ring *ring) +{ + return RREG32(UVD_RBC_RB_WPTR); +} + +/** + * uvd_v1_0_set_wptr - set write pointer + * + * @rdev: radeon_device pointer + * @ring: radeon_ring pointer + * + * Commits the write pointer to the hardware + */ +void uvd_v1_0_set_wptr(struct radeon_device *rdev, + struct radeon_ring *ring) +{ + WREG32(UVD_RBC_RB_WPTR, ring->wptr); +} + +/** + * uvd_v1_0_init - start and test UVD block + * + * @rdev: radeon_device pointer + * + * Initialize the hardware, boot up the VCPU and do some testing + */ +int uvd_v1_0_init(struct radeon_device *rdev) +{ + struct radeon_ring *ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX]; + uint32_t tmp; + int r; + + /* raise clocks while booting up the VCPU */ + radeon_set_uvd_clocks(rdev, 53300, 40000); + + r = uvd_v1_0_start(rdev); + if (r) + goto done; + + ring->ready = true; + r = radeon_ring_test(rdev, R600_RING_TYPE_UVD_INDEX, ring); + if (r) { + ring->ready = false; + goto done; + } + + r = radeon_ring_lock(rdev, ring, 10); + if (r) { + DRM_ERROR("radeon: ring failed to lock UVD ring (%d).\n", r); + goto done; + } + + tmp = PACKET0(UVD_SEMA_WAIT_FAULT_TIMEOUT_CNTL, 0); + radeon_ring_write(ring, tmp); + radeon_ring_write(ring, 0xFFFFF); + + tmp = PACKET0(UVD_SEMA_WAIT_INCOMPLETE_TIMEOUT_CNTL, 0); + radeon_ring_write(ring, tmp); + radeon_ring_write(ring, 0xFFFFF); + + tmp = PACKET0(UVD_SEMA_SIGNAL_INCOMPLETE_TIMEOUT_CNTL, 0); + radeon_ring_write(ring, tmp); + radeon_ring_write(ring, 0xFFFFF); + + /* Clear timeout status bits */ + radeon_ring_write(ring, PACKET0(UVD_SEMA_TIMEOUT_STATUS, 0)); + radeon_ring_write(ring, 0x8); + + radeon_ring_write(ring, PACKET0(UVD_SEMA_CNTL, 0)); + radeon_ring_write(ring, 3); + + radeon_ring_unlock_commit(rdev, ring); + +done: + /* lower clocks again */ + radeon_set_uvd_clocks(rdev, 0, 0); + + if (!r) + DRM_INFO("UVD initialized successfully.\n"); + + return r; +} + +/** + * uvd_v1_0_fini - stop the hardware block + * + * @rdev: radeon_device pointer + * + * Stop the UVD block, mark ring as not ready any more + */ +void uvd_v1_0_fini(struct radeon_device *rdev) +{ + struct radeon_ring *ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX]; + + uvd_v1_0_stop(rdev); + ring->ready = false; +} + +/** + * uvd_v1_0_start - start UVD block + * + * @rdev: radeon_device pointer + * + * Setup and start the UVD block + */ +int uvd_v1_0_start(struct radeon_device *rdev) +{ + struct radeon_ring *ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX]; + uint32_t rb_bufsz; + int i, j, r; + + /* disable byte swapping */ + u32 lmi_swap_cntl = 0; + u32 mp_swap_cntl = 0; + + /* disable clock gating */ + WREG32(UVD_CGC_GATE, 0); + + /* disable interupt */ + WREG32_P(UVD_MASTINT_EN, 0, ~(1 << 1)); + + /* Stall UMC and register bus before resetting VCPU */ + WREG32_P(UVD_LMI_CTRL2, 1 << 8, ~(1 << 8)); + WREG32_P(UVD_RB_ARB_CTRL, 1 << 3, ~(1 << 3)); + mdelay(1); + + /* put LMI, VCPU, RBC etc... into reset */ + WREG32(UVD_SOFT_RESET, LMI_SOFT_RESET | VCPU_SOFT_RESET | + LBSI_SOFT_RESET | RBC_SOFT_RESET | CSM_SOFT_RESET | + CXW_SOFT_RESET | TAP_SOFT_RESET | LMI_UMC_SOFT_RESET); + mdelay(5); + + /* take UVD block out of reset */ + WREG32_P(SRBM_SOFT_RESET, 0, ~SOFT_RESET_UVD); + mdelay(5); + + /* initialize UVD memory controller */ + WREG32(UVD_LMI_CTRL, 0x40 | (1 << 8) | (1 << 13) | + (1 << 21) | (1 << 9) | (1 << 20)); + +#ifdef __BIG_ENDIAN + /* swap (8 in 32) RB and IB */ + lmi_swap_cntl = 0xa; + mp_swap_cntl = 0; +#endif + WREG32(UVD_LMI_SWAP_CNTL, lmi_swap_cntl); + WREG32(UVD_MP_SWAP_CNTL, mp_swap_cntl); + + WREG32(UVD_MPC_SET_MUXA0, 0x40c2040); + WREG32(UVD_MPC_SET_MUXA1, 0x0); + WREG32(UVD_MPC_SET_MUXB0, 0x40c2040); + WREG32(UVD_MPC_SET_MUXB1, 0x0); + WREG32(UVD_MPC_SET_ALU, 0); + WREG32(UVD_MPC_SET_MUX, 0x88); + + /* take all subblocks out of reset, except VCPU */ + WREG32(UVD_SOFT_RESET, VCPU_SOFT_RESET); + mdelay(5); + + /* enable VCPU clock */ + WREG32(UVD_VCPU_CNTL, 1 << 9); + + /* enable UMC */ + WREG32_P(UVD_LMI_CTRL2, 0, ~(1 << 8)); + + /* boot up the VCPU */ + WREG32(UVD_SOFT_RESET, 0); + mdelay(10); + + WREG32_P(UVD_RB_ARB_CTRL, 0, ~(1 << 3)); + + for (i = 0; i < 10; ++i) { + uint32_t status; + for (j = 0; j < 100; ++j) { + status = RREG32(UVD_STATUS); + if (status & 2) + break; + mdelay(10); + } + r = 0; + if (status & 2) + break; + + DRM_ERROR("UVD not responding, trying to reset the VCPU!!!\n"); + WREG32_P(UVD_SOFT_RESET, VCPU_SOFT_RESET, ~VCPU_SOFT_RESET); + mdelay(10); + WREG32_P(UVD_SOFT_RESET, 0, ~VCPU_SOFT_RESET); + mdelay(10); + r = -1; + } + + if (r) { + DRM_ERROR("UVD not responding, giving up!!!\n"); + return r; + } + + /* enable interupt */ + WREG32_P(UVD_MASTINT_EN, 3<<1, ~(3 << 1)); + + /* force RBC into idle state */ + WREG32(UVD_RBC_RB_CNTL, 0x11010101); + + /* Set the write pointer delay */ + WREG32(UVD_RBC_RB_WPTR_CNTL, 0); + + /* programm the 4GB memory segment for rptr and ring buffer */ + WREG32(UVD_LMI_EXT40_ADDR, upper_32_bits(ring->gpu_addr) | + (0x7 << 16) | (0x1 << 31)); + + /* Initialize the ring buffer's read and write pointers */ + WREG32(UVD_RBC_RB_RPTR, 0x0); + + ring->wptr = ring->rptr = RREG32(UVD_RBC_RB_RPTR); + WREG32(UVD_RBC_RB_WPTR, ring->wptr); + + /* set the ring address */ + WREG32(UVD_RBC_RB_BASE, ring->gpu_addr); + + /* Set ring buffer size */ + rb_bufsz = order_base_2(ring->ring_size); + rb_bufsz = (0x1 << 8) | rb_bufsz; + WREG32_P(UVD_RBC_RB_CNTL, rb_bufsz, ~0x11f1f); + + return 0; +} + +/** + * uvd_v1_0_stop - stop UVD block + * + * @rdev: radeon_device pointer + * + * stop the UVD block + */ +void uvd_v1_0_stop(struct radeon_device *rdev) +{ + /* force RBC into idle state */ + WREG32(UVD_RBC_RB_CNTL, 0x11010101); + + /* Stall UMC and register bus before resetting VCPU */ + WREG32_P(UVD_LMI_CTRL2, 1 << 8, ~(1 << 8)); + WREG32_P(UVD_RB_ARB_CTRL, 1 << 3, ~(1 << 3)); + mdelay(1); + + /* put VCPU into reset */ + WREG32(UVD_SOFT_RESET, VCPU_SOFT_RESET); + mdelay(5); + + /* disable VCPU clock */ + WREG32(UVD_VCPU_CNTL, 0x0); + + /* Unstall UMC and register bus */ + WREG32_P(UVD_LMI_CTRL2, 0, ~(1 << 8)); + WREG32_P(UVD_RB_ARB_CTRL, 0, ~(1 << 3)); +} + +/** + * uvd_v1_0_ring_test - register write test + * + * @rdev: radeon_device pointer + * @ring: radeon_ring pointer + * + * Test if we can successfully write to the context register + */ +int uvd_v1_0_ring_test(struct radeon_device *rdev, struct radeon_ring *ring) +{ + uint32_t tmp = 0; + unsigned i; + int r; + + WREG32(UVD_CONTEXT_ID, 0xCAFEDEAD); + r = radeon_ring_lock(rdev, ring, 3); + if (r) { + DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", + ring->idx, r); + return r; + } + radeon_ring_write(ring, PACKET0(UVD_CONTEXT_ID, 0)); + radeon_ring_write(ring, 0xDEADBEEF); + radeon_ring_unlock_commit(rdev, ring); + for (i = 0; i < rdev->usec_timeout; i++) { + tmp = RREG32(UVD_CONTEXT_ID); + if (tmp == 0xDEADBEEF) + break; + DRM_UDELAY(1); + } + + if (i < rdev->usec_timeout) { + DRM_INFO("ring test on %d succeeded in %d usecs\n", + ring->idx, i); + } else { + DRM_ERROR("radeon: ring %d test failed (0x%08X)\n", + ring->idx, tmp); + r = -EINVAL; + } + return r; +} + +/** + * uvd_v1_0_semaphore_emit - emit semaphore command + * + * @rdev: radeon_device pointer + * @ring: radeon_ring pointer + * @semaphore: semaphore to emit commands for + * @emit_wait: true if we should emit a wait command + * + * Emit a semaphore command (either wait or signal) to the UVD ring. + */ +void uvd_v1_0_semaphore_emit(struct radeon_device *rdev, + struct radeon_ring *ring, + struct radeon_semaphore *semaphore, + bool emit_wait) +{ + uint64_t addr = semaphore->gpu_addr; + + radeon_ring_write(ring, PACKET0(UVD_SEMA_ADDR_LOW, 0)); + radeon_ring_write(ring, (addr >> 3) & 0x000FFFFF); + + radeon_ring_write(ring, PACKET0(UVD_SEMA_ADDR_HIGH, 0)); + radeon_ring_write(ring, (addr >> 23) & 0x000FFFFF); + + radeon_ring_write(ring, PACKET0(UVD_SEMA_CMD, 0)); + radeon_ring_write(ring, emit_wait ? 1 : 0); +} + +/** + * uvd_v1_0_ib_execute - execute indirect buffer + * + * @rdev: radeon_device pointer + * @ib: indirect buffer to execute + * + * Write ring commands to execute the indirect buffer + */ +void uvd_v1_0_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib) +{ + struct radeon_ring *ring = &rdev->ring[ib->ring]; + + radeon_ring_write(ring, PACKET0(UVD_RBC_IB_BASE, 0)); + radeon_ring_write(ring, ib->gpu_addr); + radeon_ring_write(ring, PACKET0(UVD_RBC_IB_SIZE, 0)); + radeon_ring_write(ring, ib->length_dw); +} + +/** + * uvd_v1_0_ib_test - test ib execution + * + * @rdev: radeon_device pointer + * @ring: radeon_ring pointer + * + * Test if we can successfully execute an IB + */ +int uvd_v1_0_ib_test(struct radeon_device *rdev, struct radeon_ring *ring) +{ + struct radeon_fence *fence = NULL; + int r; + + r = radeon_set_uvd_clocks(rdev, 53300, 40000); + if (r) { + DRM_ERROR("radeon: failed to raise UVD clocks (%d).\n", r); + return r; + } + + r = radeon_uvd_get_create_msg(rdev, ring->idx, 1, NULL); + if (r) { + DRM_ERROR("radeon: failed to get create msg (%d).\n", r); + goto error; + } + + r = radeon_uvd_get_destroy_msg(rdev, ring->idx, 1, &fence); + if (r) { + DRM_ERROR("radeon: failed to get destroy ib (%d).\n", r); + goto error; + } + + r = radeon_fence_wait(fence, false); + if (r) { + DRM_ERROR("radeon: fence wait failed (%d).\n", r); + goto error; + } + DRM_INFO("ib test on ring %d succeeded\n", ring->idx); +error: + radeon_fence_unref(&fence); + radeon_set_uvd_clocks(rdev, 0, 0); + return r; +} diff --git a/sys/dev/drm/radeon/uvd_v2_2.c b/sys/dev/drm/radeon/uvd_v2_2.c new file mode 100644 index 0000000000..18a1b99ee1 --- /dev/null +++ b/sys/dev/drm/radeon/uvd_v2_2.c @@ -0,0 +1,165 @@ +/* + * Copyright 2013 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: Christian König + */ + +#include +#include +#include "radeon.h" +#include "radeon_asic.h" +#include "rv770d.h" + +/** + * uvd_v2_2_fence_emit - emit an fence & trap command + * + * @rdev: radeon_device pointer + * @fence: fence to emit + * + * Write a fence and a trap command to the ring. + */ +void uvd_v2_2_fence_emit(struct radeon_device *rdev, + struct radeon_fence *fence) +{ + struct radeon_ring *ring = &rdev->ring[fence->ring]; + uint64_t addr = rdev->fence_drv[fence->ring].gpu_addr; + + radeon_ring_write(ring, PACKET0(UVD_CONTEXT_ID, 0)); + radeon_ring_write(ring, fence->seq); + radeon_ring_write(ring, PACKET0(UVD_GPCOM_VCPU_DATA0, 0)); + radeon_ring_write(ring, addr & 0xffffffff); + radeon_ring_write(ring, PACKET0(UVD_GPCOM_VCPU_DATA1, 0)); + radeon_ring_write(ring, upper_32_bits(addr) & 0xff); + radeon_ring_write(ring, PACKET0(UVD_GPCOM_VCPU_CMD, 0)); + radeon_ring_write(ring, 0); + + radeon_ring_write(ring, PACKET0(UVD_GPCOM_VCPU_DATA0, 0)); + radeon_ring_write(ring, 0); + radeon_ring_write(ring, PACKET0(UVD_GPCOM_VCPU_DATA1, 0)); + radeon_ring_write(ring, 0); + radeon_ring_write(ring, PACKET0(UVD_GPCOM_VCPU_CMD, 0)); + radeon_ring_write(ring, 2); + return; +} + +/** + * uvd_v2_2_resume - memory controller programming + * + * @rdev: radeon_device pointer + * + * Let the UVD memory controller know it's offsets + */ +int uvd_v2_2_resume(struct radeon_device *rdev) +{ + uint64_t addr; + uint32_t chip_id, size; + int r; + + r = radeon_uvd_resume(rdev); + if (r) + return r; + + /* programm the VCPU memory controller bits 0-27 */ + addr = rdev->uvd.gpu_addr >> 3; + size = RADEON_GPU_PAGE_ALIGN(rdev->uvd_fw->datasize + 4) >> 3; + WREG32(UVD_VCPU_CACHE_OFFSET0, addr); + WREG32(UVD_VCPU_CACHE_SIZE0, size); + + addr += size; + size = RADEON_UVD_STACK_SIZE >> 3; + WREG32(UVD_VCPU_CACHE_OFFSET1, addr); + WREG32(UVD_VCPU_CACHE_SIZE1, size); + + addr += size; + size = RADEON_UVD_HEAP_SIZE >> 3; + WREG32(UVD_VCPU_CACHE_OFFSET2, addr); + WREG32(UVD_VCPU_CACHE_SIZE2, size); + + /* bits 28-31 */ + addr = (rdev->uvd.gpu_addr >> 28) & 0xF; + WREG32(UVD_LMI_ADDR_EXT, (addr << 12) | (addr << 0)); + + /* bits 32-39 */ + addr = (rdev->uvd.gpu_addr >> 32) & 0xFF; + WREG32(UVD_LMI_EXT40_ADDR, addr | (0x9 << 16) | (0x1 << 31)); + + /* tell firmware which hardware it is running on */ + switch (rdev->family) { + default: + return -EINVAL; + case CHIP_RV710: + chip_id = 0x01000005; + break; + case CHIP_RV730: + chip_id = 0x01000006; + break; + case CHIP_RV740: + chip_id = 0x01000007; + break; + case CHIP_CYPRESS: + case CHIP_HEMLOCK: + chip_id = 0x01000008; + break; + case CHIP_JUNIPER: + chip_id = 0x01000009; + break; + case CHIP_REDWOOD: + chip_id = 0x0100000a; + break; + case CHIP_CEDAR: + chip_id = 0x0100000b; + break; + case CHIP_SUMO: + case CHIP_SUMO2: + chip_id = 0x0100000c; + break; + case CHIP_PALM: + chip_id = 0x0100000e; + break; + case CHIP_CAYMAN: + chip_id = 0x0100000f; + break; + case CHIP_BARTS: + chip_id = 0x01000010; + break; + case CHIP_TURKS: + chip_id = 0x01000011; + break; + case CHIP_CAICOS: + chip_id = 0x01000012; + break; + case CHIP_TAHITI: + chip_id = 0x01000014; + break; + case CHIP_VERDE: + chip_id = 0x01000015; + break; + case CHIP_PITCAIRN: + chip_id = 0x01000016; + break; + case CHIP_ARUBA: + chip_id = 0x01000017; + break; + } + WREG32(UVD_VCPU_CHIP_ID, chip_id); + + return 0; +} diff --git a/sys/dev/drm/radeon/uvd_v3_1.c b/sys/dev/drm/radeon/uvd_v3_1.c new file mode 100644 index 0000000000..5b6fa1f62d --- /dev/null +++ b/sys/dev/drm/radeon/uvd_v3_1.c @@ -0,0 +1,55 @@ +/* + * Copyright 2013 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: Christian König + */ + +#include +#include "radeon.h" +#include "radeon_asic.h" +#include "nid.h" + +/** + * uvd_v3_1_semaphore_emit - emit semaphore command + * + * @rdev: radeon_device pointer + * @ring: radeon_ring pointer + * @semaphore: semaphore to emit commands for + * @emit_wait: true if we should emit a wait command + * + * Emit a semaphore command (either wait or signal) to the UVD ring. + */ +void uvd_v3_1_semaphore_emit(struct radeon_device *rdev, + struct radeon_ring *ring, + struct radeon_semaphore *semaphore, + bool emit_wait) +{ + uint64_t addr = semaphore->gpu_addr; + + radeon_ring_write(ring, PACKET0(UVD_SEMA_ADDR_LOW, 0)); + radeon_ring_write(ring, (addr >> 3) & 0x000FFFFF); + + radeon_ring_write(ring, PACKET0(UVD_SEMA_ADDR_HIGH, 0)); + radeon_ring_write(ring, (addr >> 23) & 0x000FFFFF); + + radeon_ring_write(ring, PACKET0(UVD_SEMA_CMD, 0)); + radeon_ring_write(ring, 0x80 | (emit_wait ? 1 : 0)); +} diff --git a/sys/dev/drm/radeon/uvd_v4_2.c b/sys/dev/drm/radeon/uvd_v4_2.c new file mode 100644 index 0000000000..0e73324bdc --- /dev/null +++ b/sys/dev/drm/radeon/uvd_v4_2.c @@ -0,0 +1,68 @@ +/* + * Copyright 2013 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: Christian König + */ + +#include +#include +#include "radeon.h" +#include "radeon_asic.h" +#include "cikd.h" + +/** + * uvd_v4_2_resume - memory controller programming + * + * @rdev: radeon_device pointer + * + * Let the UVD memory controller know it's offsets + */ +int uvd_v4_2_resume(struct radeon_device *rdev) +{ + uint64_t addr; + uint32_t size; + + /* programm the VCPU memory controller bits 0-27 */ + addr = rdev->uvd.gpu_addr >> 3; + size = RADEON_GPU_PAGE_ALIGN(rdev->uvd_fw->datasize + 4) >> 3; + WREG32(UVD_VCPU_CACHE_OFFSET0, addr); + WREG32(UVD_VCPU_CACHE_SIZE0, size); + + addr += size; + size = RADEON_UVD_STACK_SIZE >> 3; + WREG32(UVD_VCPU_CACHE_OFFSET1, addr); + WREG32(UVD_VCPU_CACHE_SIZE1, size); + + addr += size; + size = RADEON_UVD_HEAP_SIZE >> 3; + WREG32(UVD_VCPU_CACHE_OFFSET2, addr); + WREG32(UVD_VCPU_CACHE_SIZE2, size); + + /* bits 28-31 */ + addr = (rdev->uvd.gpu_addr >> 28) & 0xF; + WREG32(UVD_LMI_ADDR_EXT, (addr << 12) | (addr << 0)); + + /* bits 32-39 */ + addr = (rdev->uvd.gpu_addr >> 32) & 0xFF; + WREG32(UVD_LMI_EXT40_ADDR, addr | (0x9 << 16) | (0x1 << 31)); + + return 0; +} diff --git a/sys/dev/drm/ttm/ttm_bo.c b/sys/dev/drm/ttm/ttm_bo.c index adf137d065..632abc7fc4 100644 --- a/sys/dev/drm/ttm/ttm_bo.c +++ b/sys/dev/drm/ttm/ttm_bo.c @@ -190,7 +190,8 @@ int ttm_bo_del_from_lru(struct ttm_buffer_object *bo) int ttm_bo_reserve_nolru(struct ttm_buffer_object *bo, bool interruptible, - bool no_wait, bool use_sequence, uint32_t sequence) + bool no_wait, bool use_ticket, + struct ww_acquire_ctx *ticket) { int ret; @@ -198,17 +199,17 @@ int ttm_bo_reserve_nolru(struct ttm_buffer_object *bo, /** * Deadlock avoidance for multi-bo reserving. */ - if (use_sequence && bo->seq_valid) { + if (use_ticket && bo->seq_valid) { /** * We've already reserved this one. */ - if (unlikely(sequence == bo->val_seq)) + if (unlikely(ticket->stamp == bo->val_seq)) return -EDEADLK; /** * Already reserved by a thread that will not back * off for us. We need to back off. */ - if (unlikely(sequence - bo->val_seq < (1U << 31))) + if (unlikely(ticket->stamp - bo->val_seq <= LONG_MAX)) return -EAGAIN; } @@ -221,13 +222,14 @@ int ttm_bo_reserve_nolru(struct ttm_buffer_object *bo, return ret; } - if (use_sequence) { + if (use_ticket) { bool wake_up = false; + /** * Wake up waiters that may need to recheck for deadlock, * if we decreased the sequence number. */ - if (unlikely((bo->val_seq - sequence < (1U << 31)) + if (unlikely((bo->val_seq - ticket->stamp <= LONG_MAX) || !bo->seq_valid)) wake_up = true; @@ -241,7 +243,7 @@ int ttm_bo_reserve_nolru(struct ttm_buffer_object *bo, * written before val_seq was, and just means some slightly * increased cpu usage */ - bo->val_seq = sequence; + bo->val_seq = ticket->stamp; bo->seq_valid = true; if (wake_up) wake_up_all(&bo->event_queue); @@ -267,14 +269,15 @@ void ttm_bo_list_ref_sub(struct ttm_buffer_object *bo, int count, int ttm_bo_reserve(struct ttm_buffer_object *bo, bool interruptible, - bool no_wait, bool use_sequence, uint32_t sequence) + bool no_wait, bool use_ticket, + struct ww_acquire_ctx *ticket) { struct ttm_bo_global *glob = bo->glob; int put_count = 0; int ret; - ret = ttm_bo_reserve_nolru(bo, interruptible, no_wait, use_sequence, - sequence); + ret = ttm_bo_reserve_nolru(bo, interruptible, no_wait, use_ticket, + ticket); if (likely(ret == 0)) { lockmgr(&glob->lru_lock, LK_EXCLUSIVE); put_count = ttm_bo_del_from_lru(bo); @@ -286,13 +289,14 @@ int ttm_bo_reserve(struct ttm_buffer_object *bo, } int ttm_bo_reserve_slowpath_nolru(struct ttm_buffer_object *bo, - bool interruptible, uint32_t sequence) + bool interruptible, + struct ww_acquire_ctx *ticket) { bool wake_up = false; int ret; while (unlikely(atomic_xchg(&bo->reserved, 1) != 0)) { - WARN_ON(bo->seq_valid && sequence == bo->val_seq); + WARN_ON(bo->seq_valid && ticket->stamp == bo->val_seq); ret = ttm_bo_wait_unreserved(bo, interruptible); @@ -300,14 +304,14 @@ int ttm_bo_reserve_slowpath_nolru(struct ttm_buffer_object *bo, return ret; } - if ((bo->val_seq - sequence < (1U << 31)) || !bo->seq_valid) + if (bo->val_seq - ticket->stamp < LONG_MAX || !bo->seq_valid) wake_up = true; /** * Wake up waiters that may need to recheck for deadlock, * if we decreased the sequence number. */ - bo->val_seq = sequence; + bo->val_seq = ticket->stamp; bo->seq_valid = true; if (wake_up) wake_up_all(&bo->event_queue); @@ -316,12 +320,12 @@ int ttm_bo_reserve_slowpath_nolru(struct ttm_buffer_object *bo, } int ttm_bo_reserve_slowpath(struct ttm_buffer_object *bo, - bool interruptible, uint32_t sequence) + bool interruptible, struct ww_acquire_ctx *ticket) { struct ttm_bo_global *glob = bo->glob; int put_count, ret; - ret = ttm_bo_reserve_slowpath_nolru(bo, interruptible, sequence); + ret = ttm_bo_reserve_slowpath_nolru(bo, interruptible, ticket); if (likely(!ret)) { lockmgr(&glob->lru_lock, LK_EXCLUSIVE); put_count = ttm_bo_del_from_lru(bo); @@ -346,7 +350,7 @@ ttm_bo_unreserve_core(struct ttm_buffer_object *bo) wake_up_all(&bo->event_queue); } -void ttm_bo_unreserve_locked(struct ttm_buffer_object *bo) +void ttm_bo_unreserve_ticket_locked(struct ttm_buffer_object *bo, struct ww_acquire_ctx *ticket) { ttm_bo_add_to_lru(bo); ttm_bo_unreserve_core(bo); @@ -357,11 +361,21 @@ void ttm_bo_unreserve(struct ttm_buffer_object *bo) struct ttm_bo_global *glob = bo->glob; lockmgr(&glob->lru_lock, LK_EXCLUSIVE); - ttm_bo_unreserve_locked(bo); + ttm_bo_unreserve_ticket_locked(bo, NULL); lockmgr(&glob->lru_lock, LK_RELEASE); } EXPORT_SYMBOL(ttm_bo_unreserve); +void ttm_bo_unreserve_ticket(struct ttm_buffer_object *bo, struct ww_acquire_ctx *ticket) +{ + struct ttm_bo_global *glob = bo->glob; + + lockmgr(&glob->lru_lock, LK_EXCLUSIVE); + ttm_bo_unreserve_ticket_locked(bo, ticket); + lockmgr(&glob->lru_lock, LK_RELEASE); +} +EXPORT_SYMBOL(ttm_bo_unreserve_ticket); + /* * Call bo->mutex locked. */ diff --git a/sys/dev/drm/ttm/ttm_bo_manager.c b/sys/dev/drm/ttm/ttm_bo_manager.c index 9fd9d64c72..34b25a7f79 100644 --- a/sys/dev/drm/ttm/ttm_bo_manager.c +++ b/sys/dev/drm/ttm/ttm_bo_manager.c @@ -61,28 +61,25 @@ static int ttm_bo_man_get_node(struct ttm_mem_type_manager *man, lpfn = placement->lpfn; if (!lpfn) lpfn = man->size; - do { - ret = drm_mm_pre_get(mm); - if (unlikely(ret)) - return ret; - lockmgr(&rman->lock, LK_EXCLUSIVE); - node = drm_mm_search_free_in_range(mm, - mem->num_pages, mem->page_alignment, - placement->fpfn, lpfn, 1); - if (unlikely(node == NULL)) { - lockmgr(&rman->lock, LK_RELEASE); - return 0; - } - node = drm_mm_get_block_atomic_range(node, mem->num_pages, - mem->page_alignment, - placement->fpfn, - lpfn); - lockmgr(&rman->lock, LK_RELEASE); - } while (node == NULL); + node = kzalloc(sizeof(*node), GFP_KERNEL); + if (!node) + return -ENOMEM; + + lockmgr(&rman->lock, LK_EXCLUSIVE); + ret = drm_mm_insert_node_in_range(mm, node, mem->num_pages, + mem->page_alignment, + placement->fpfn, lpfn, + DRM_MM_SEARCH_BEST); + lockmgr(&rman->lock, LK_RELEASE); + + if (unlikely(ret)) { + kfree(node); + } else { + mem->mm_node = node; + mem->start = node->start; + } - mem->mm_node = node; - mem->start = node->start; return 0; } @@ -93,8 +90,10 @@ static void ttm_bo_man_put_node(struct ttm_mem_type_manager *man, if (mem->mm_node) { lockmgr(&rman->lock, LK_EXCLUSIVE); - drm_mm_put_block(mem->mm_node); + drm_mm_remove_node(mem->mm_node); lockmgr(&rman->lock, LK_RELEASE); + + kfree(mem->mm_node); mem->mm_node = NULL; } } diff --git a/sys/dev/drm/ttm/ttm_bo_util.c b/sys/dev/drm/ttm/ttm_bo_util.c index 51354e5ef4..9d37f7efca 100644 --- a/sys/dev/drm/ttm/ttm_bo_util.c +++ b/sys/dev/drm/ttm/ttm_bo_util.c @@ -88,6 +88,7 @@ int ttm_mem_io_lock(struct ttm_mem_type_manager *man, bool interruptible) lockmgr(&man->io_reserve_mutex, LK_EXCLUSIVE); return 0; } +EXPORT_SYMBOL(ttm_mem_io_lock); void ttm_mem_io_unlock(struct ttm_mem_type_manager *man) { @@ -96,6 +97,7 @@ void ttm_mem_io_unlock(struct ttm_mem_type_manager *man) lockmgr(&man->io_reserve_mutex, LK_RELEASE); } +EXPORT_SYMBOL(ttm_mem_io_unlock); static int ttm_mem_io_evict(struct ttm_mem_type_manager *man) { @@ -113,8 +115,9 @@ static int ttm_mem_io_evict(struct ttm_mem_type_manager *man) return 0; } -static int ttm_mem_io_reserve(struct ttm_bo_device *bdev, - struct ttm_mem_reg *mem) + +int ttm_mem_io_reserve(struct ttm_bo_device *bdev, + struct ttm_mem_reg *mem) { struct ttm_mem_type_manager *man = &bdev->man[mem->mem_type]; int ret = 0; @@ -136,9 +139,10 @@ retry: } return ret; } +EXPORT_SYMBOL(ttm_mem_io_reserve); -static void ttm_mem_io_free(struct ttm_bo_device *bdev, - struct ttm_mem_reg *mem) +void ttm_mem_io_free(struct ttm_bo_device *bdev, + struct ttm_mem_reg *mem) { struct ttm_mem_type_manager *man = &bdev->man[mem->mem_type]; @@ -151,6 +155,7 @@ static void ttm_mem_io_free(struct ttm_bo_device *bdev, bdev->driver->io_mem_free(bdev, mem); } +EXPORT_SYMBOL(ttm_mem_io_free); int ttm_mem_io_reserve_vm(struct ttm_buffer_object *bo) { diff --git a/sys/dev/drm/ttm/ttm_bo_vm.c b/sys/dev/drm/ttm/ttm_bo_vm.c index 667618d3e6..6debc5b842 100644 --- a/sys/dev/drm/ttm/ttm_bo_vm.c +++ b/sys/dev/drm/ttm/ttm_bo_vm.c @@ -390,163 +390,4 @@ int ttm_fbdev_mmap(struct vm_area_struct *vma, struct ttm_buffer_object *bo) return 0; } EXPORT_SYMBOL(ttm_fbdev_mmap); - - -ssize_t ttm_bo_io(struct ttm_bo_device *bdev, struct file *filp, - const char __user *wbuf, char __user *rbuf, size_t count, - loff_t *f_pos, bool write) -{ - struct ttm_buffer_object *bo; - struct ttm_bo_driver *driver; - struct ttm_bo_kmap_obj map; - unsigned long dev_offset = (*f_pos >> PAGE_SHIFT); - unsigned long kmap_offset; - unsigned long kmap_end; - unsigned long kmap_num; - size_t io_size; - unsigned int page_offset; - char *virtual; - int ret; - bool no_wait = false; - bool dummy; - - lockmgr(&bdev->vm_lock, LK_EXCLUSIVE); - bo = ttm_bo_vm_lookup_rb(bdev, dev_offset, 1); - if (likely(bo != NULL)) - ttm_bo_reference(bo); - lockmgr(&bdev->vm_lock, LK_RELEASE); - - if (unlikely(bo == NULL)) - return -EFAULT; - - driver = bo->bdev->driver; - if (unlikely(!driver->verify_access)) { - ret = -EPERM; - goto out_unref; - } - - ret = driver->verify_access(bo, filp); - if (unlikely(ret != 0)) - goto out_unref; - - kmap_offset = dev_offset - bo->vm_node->start; - if (unlikely(kmap_offset >= bo->num_pages)) { - ret = -EFBIG; - goto out_unref; - } - - page_offset = *f_pos & ~PAGE_MASK; - io_size = bo->num_pages - kmap_offset; - io_size = (io_size << PAGE_SHIFT) - page_offset; - if (count < io_size) - io_size = count; - - kmap_end = (*f_pos + count - 1) >> PAGE_SHIFT; - kmap_num = kmap_end - kmap_offset + 1; - - ret = ttm_bo_reserve(bo, true, no_wait, false, 0); - - switch (ret) { - case 0: - break; - case -EBUSY: - ret = -EAGAIN; - goto out_unref; - default: - goto out_unref; - } - - ret = ttm_bo_kmap(bo, kmap_offset, kmap_num, &map); - if (unlikely(ret != 0)) { - ttm_bo_unreserve(bo); - goto out_unref; - } - - virtual = ttm_kmap_obj_virtual(&map, &dummy); - virtual += page_offset; - - if (write) - ret = copy_from_user(virtual, wbuf, io_size); - else - ret = copy_to_user(rbuf, virtual, io_size); - - ttm_bo_kunmap(&map); - ttm_bo_unreserve(bo); - ttm_bo_unref(&bo); - - if (unlikely(ret != 0)) - return -EFBIG; - - *f_pos += io_size; - - return io_size; -out_unref: - ttm_bo_unref(&bo); - return ret; -} - -ssize_t ttm_bo_fbdev_io(struct ttm_buffer_object *bo, const char __user *wbuf, - char __user *rbuf, size_t count, loff_t *f_pos, - bool write) -{ - struct ttm_bo_kmap_obj map; - unsigned long kmap_offset; - unsigned long kmap_end; - unsigned long kmap_num; - size_t io_size; - unsigned int page_offset; - char *virtual; - int ret; - bool no_wait = false; - bool dummy; - - kmap_offset = (*f_pos >> PAGE_SHIFT); - if (unlikely(kmap_offset >= bo->num_pages)) - return -EFBIG; - - page_offset = *f_pos & ~PAGE_MASK; - io_size = bo->num_pages - kmap_offset; - io_size = (io_size << PAGE_SHIFT) - page_offset; - if (count < io_size) - io_size = count; - - kmap_end = (*f_pos + count - 1) >> PAGE_SHIFT; - kmap_num = kmap_end - kmap_offset + 1; - - ret = ttm_bo_reserve(bo, true, no_wait, false, 0); - - switch (ret) { - case 0: - break; - case -EBUSY: - return -EAGAIN; - default: - return ret; - } - - ret = ttm_bo_kmap(bo, kmap_offset, kmap_num, &map); - if (unlikely(ret != 0)) { - ttm_bo_unreserve(bo); - return ret; - } - - virtual = ttm_kmap_obj_virtual(&map, &dummy); - virtual += page_offset; - - if (write) - ret = copy_from_user(virtual, wbuf, io_size); - else - ret = copy_to_user(rbuf, virtual, io_size); - - ttm_bo_kunmap(&map); - ttm_bo_unreserve(bo); - ttm_bo_unref(&bo); - - if (unlikely(ret != 0)) - return ret; - - *f_pos += io_size; - - return io_size; -} #endif diff --git a/sys/dev/drm/ttm/ttm_execbuf_util.c b/sys/dev/drm/ttm/ttm_execbuf_util.c index b06e7af22a..904513cf24 100644 --- a/sys/dev/drm/ttm/ttm_execbuf_util.c +++ b/sys/dev/drm/ttm/ttm_execbuf_util.c @@ -31,7 +31,11 @@ #include #include -static void ttm_eu_backoff_reservation_locked(struct list_head *list) +/* XXX this should go to dma-buf driver, for now just to avoid undef */ +DEFINE_WW_CLASS(reservation_ww_class); + +static void ttm_eu_backoff_reservation_locked(struct list_head *list, + struct ww_acquire_ctx *ticket) { struct ttm_validate_buffer *entry; @@ -40,14 +44,15 @@ static void ttm_eu_backoff_reservation_locked(struct list_head *list) if (!entry->reserved) continue; + entry->reserved = false; if (entry->removed) { - ttm_bo_add_to_lru(bo); + ttm_bo_unreserve_ticket_locked(bo, ticket); entry->removed = false; + } else { + atomic_set(&bo->reserved, 0); + wake_up_all(&bo->event_queue); } - entry->reserved = false; - atomic_set(&bo->reserved, 0); - wake_up_all(&bo->event_queue); } } @@ -81,7 +86,8 @@ static void ttm_eu_list_ref_sub(struct list_head *list) } } -void ttm_eu_backoff_reservation(struct list_head *list) +void ttm_eu_backoff_reservation(struct ww_acquire_ctx *ticket, + struct list_head *list) { struct ttm_validate_buffer *entry; struct ttm_bo_global *glob; @@ -92,7 +98,8 @@ void ttm_eu_backoff_reservation(struct list_head *list) entry = list_first_entry(list, struct ttm_validate_buffer, head); glob = entry->bo->glob; lockmgr(&glob->lru_lock, LK_EXCLUSIVE); - ttm_eu_backoff_reservation_locked(list); + ttm_eu_backoff_reservation_locked(list, ticket); + ww_acquire_fini(ticket); lockmgr(&glob->lru_lock, LK_RELEASE); } EXPORT_SYMBOL(ttm_eu_backoff_reservation); @@ -109,12 +116,12 @@ EXPORT_SYMBOL(ttm_eu_backoff_reservation); * buffers in different orders. */ -int ttm_eu_reserve_buffers(struct list_head *list) +int ttm_eu_reserve_buffers(struct ww_acquire_ctx *ticket, + struct list_head *list) { struct ttm_bo_global *glob; struct ttm_validate_buffer *entry; int ret; - uint32_t val_seq; if (list_empty(list)) return 0; @@ -128,8 +135,8 @@ int ttm_eu_reserve_buffers(struct list_head *list) entry = list_first_entry(list, struct ttm_validate_buffer, head); glob = entry->bo->glob; + ww_acquire_init(ticket, &reservation_ww_class); lockmgr(&glob->lru_lock, LK_EXCLUSIVE); - val_seq = entry->bo->bdev->val_seq++; retry: list_for_each_entry(entry, list, head) { @@ -140,7 +147,7 @@ retry: if (entry->reserved) continue; - ret = ttm_bo_reserve_nolru(bo, true, true, true, val_seq); + ret = ttm_bo_reserve_nolru(bo, true, true, true, ticket); switch (ret) { case 0: break; @@ -150,7 +157,7 @@ retry: if (owned == LK_EXCLUSIVE) lockmgr(&glob->lru_lock, LK_RELEASE); ret = ttm_bo_reserve_nolru(bo, true, false, - true, val_seq); + true, ticket); if (owned == LK_EXCLUSIVE) lockmgr(&glob->lru_lock, LK_EXCLUSIVE); if (!ret) @@ -161,21 +168,13 @@ retry: /* fallthrough */ case -EAGAIN: - ttm_eu_backoff_reservation_locked(list); - - /* - * temporarily increase sequence number every retry, - * to prevent us from seeing our old reservation - * sequence when someone else reserved the buffer, - * but hasn't updated the seq_valid/seqno members yet. - */ - val_seq = entry->bo->bdev->val_seq++; - + ttm_eu_backoff_reservation_locked(list, ticket); lockmgr(&glob->lru_lock, LK_RELEASE); ttm_eu_list_ref_sub(list); - ret = ttm_bo_reserve_slowpath_nolru(bo, true, val_seq); + ret = ttm_bo_reserve_slowpath_nolru(bo, true, ticket); if (unlikely(ret != 0)) - return ret; + goto err_fini; + lockmgr(&glob->lru_lock, LK_EXCLUSIVE); entry->reserved = true; if (unlikely(atomic_read(&bo->cpu_writers) > 0)) { @@ -194,21 +193,25 @@ retry: } } + ww_acquire_done(ticket); ttm_eu_del_from_lru_locked(list); lockmgr(&glob->lru_lock, LK_RELEASE); ttm_eu_list_ref_sub(list); - return 0; err: - ttm_eu_backoff_reservation_locked(list); + ttm_eu_backoff_reservation_locked(list, ticket); lockmgr(&glob->lru_lock, LK_RELEASE); ttm_eu_list_ref_sub(list); +err_fini: + ww_acquire_done(ticket); + ww_acquire_fini(ticket); return ret; } EXPORT_SYMBOL(ttm_eu_reserve_buffers); -void ttm_eu_fence_buffer_objects(struct list_head *list, void *sync_obj) +void ttm_eu_fence_buffer_objects(struct ww_acquire_ctx *ticket, + struct list_head *list, void *sync_obj) { struct ttm_validate_buffer *entry; struct ttm_buffer_object *bo; @@ -231,11 +234,12 @@ void ttm_eu_fence_buffer_objects(struct list_head *list, void *sync_obj) bo = entry->bo; entry->old_sync_obj = bo->sync_obj; bo->sync_obj = driver->sync_obj_ref(sync_obj); - ttm_bo_unreserve_locked(bo); + ttm_bo_unreserve_ticket_locked(bo, ticket); entry->reserved = false; } lockmgr(&bdev->fence_lock, LK_RELEASE); lockmgr(&glob->lru_lock, LK_RELEASE); + ww_acquire_fini(ticket); list_for_each_entry(entry, list, head) { if (entry->old_sync_obj) -- 2.11.4.GIT