sys/dev/drm/i915/i915_gem.c

   1 /*
   2  * Copyright © 2008-2015 Intel Corporation
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21  * IN THE SOFTWARE.
  22  *
  23  * Authors:
  24  *    Eric Anholt <eric@anholt.net>
  25  *
  26  */
  27
  28 #include <drm/drmP.h>
  29 #include <drm/drm_vma_manager.h>
  30 #include <drm/i915_drm.h>
  31 #include "i915_drv.h"
  32 #include "i915_vgpu.h"
  33 #include "i915_trace.h"
  34 #include "intel_drv.h"
  35 #include <linux/shmem_fs.h>
  36 #include <linux/slab.h>
  37 #include <linux/swap.h>
  38 #include <linux/pci.h>
  39
  40 #define RQ_BUG_ON(expr)
  41
  42 static void i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj);
  43 static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj);
  44 static void
  45 i915_gem_object_retire__write(struct drm_i915_gem_object *obj);
  46 static void
  47 i915_gem_object_retire__read(struct drm_i915_gem_object *obj, int ring);
  48 static void i915_gem_write_fence(struct drm_device *dev, int reg,
  49                                  struct drm_i915_gem_object *obj);
  50 static void i915_gem_object_update_fence(struct drm_i915_gem_object *obj,
  51                                          struct drm_i915_fence_reg *fence,
  52                                          bool enable);
  53
  54 static bool cpu_cache_is_coherent(struct drm_device *dev,
  55                                   enum i915_cache_level level)
  56 {
  57         return HAS_LLC(dev) || level != I915_CACHE_NONE;
  58 }
  59
  60 static bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj)
  61 {
  62         if (!cpu_cache_is_coherent(obj->base.dev, obj->cache_level))
  63                 return true;
  64
  65         return obj->pin_display;
  66 }
  67
  68 static inline void i915_gem_object_fence_lost(struct drm_i915_gem_object *obj)
  69 {
  70         if (obj->tiling_mode)
  71                 i915_gem_release_mmap(obj);
  72
  73         /* As we do not have an associated fence register, we will force
  74          * a tiling change if we ever need to acquire one.
  75          */
  76         obj->fence_dirty = false;
  77         obj->fence_reg = I915_FENCE_REG_NONE;
  78 }
  79
  80 /* some bookkeeping */
  81 static void i915_gem_info_add_obj(struct drm_i915_private *dev_priv,
  82                                   size_t size)
  83 {
  84         spin_lock(&dev_priv->mm.object_stat_lock);
  85         dev_priv->mm.object_count++;
  86         dev_priv->mm.object_memory += size;
  87         spin_unlock(&dev_priv->mm.object_stat_lock);
  88 }
  89
  90 static void i915_gem_info_remove_obj(struct drm_i915_private *dev_priv,
  91                                      size_t size)
  92 {
  93         spin_lock(&dev_priv->mm.object_stat_lock);
  94         dev_priv->mm.object_count--;
  95         dev_priv->mm.object_memory -= size;
  96         spin_unlock(&dev_priv->mm.object_stat_lock);
  97 }
  98
  99 static int
 100 i915_gem_wait_for_error(struct i915_gpu_error *error)
 101 {
 102         int ret;
 103
 104 #define EXIT_COND (!i915_reset_in_progress(error) || \
 105                    i915_terminally_wedged(error))
 106         if (EXIT_COND)
 107                 return 0;
 108
 109         /*
 110          * Only wait 10 seconds for the gpu reset to complete to avoid hanging
 111          * userspace. If it takes that long something really bad is going on and
 112          * we should simply try to bail out and fail as gracefully as possible.
 113          */
 114         ret = wait_event_interruptible_timeout(error->reset_queue,
 115                                                EXIT_COND,
 116                                                10*HZ);
 117         if (ret == 0) {
 118                 DRM_ERROR("Timed out waiting for the gpu reset to complete\n");
 119                 return -EIO;
 120         } else if (ret < 0) {
 121                 return ret;
 122         }
 123 #undef EXIT_COND
 124
 125         return 0;
 126 }
 127
 128 int i915_mutex_lock_interruptible(struct drm_device *dev)
 129 {
 130         struct drm_i915_private *dev_priv = dev->dev_private;
 131         int ret;
 132
 133         ret = i915_gem_wait_for_error(&dev_priv->gpu_error);
 134         if (ret)
 135                 return ret;
 136
 137         ret = mutex_lock_interruptible(&dev->struct_mutex);
 138         if (ret)
 139                 return ret;
 140
 141         WARN_ON(i915_verify_lists(dev));
 142         return 0;
 143 }
 144
 145 int
 146 i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
 147                             struct drm_file *file)
 148 {
 149         struct drm_i915_private *dev_priv = dev->dev_private;
 150         struct drm_i915_gem_get_aperture *args = data;
 151         struct drm_i915_gem_object *obj;
 152         size_t pinned;
 153
 154         pinned = 0;
 155         mutex_lock(&dev->struct_mutex);
 156         list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list)
 157                 if (i915_gem_obj_is_pinned(obj))
 158                         pinned += i915_gem_obj_ggtt_size(obj);
 159         mutex_unlock(&dev->struct_mutex);
 160
 161         args->aper_size = dev_priv->gtt.base.total;
 162         args->aper_available_size = args->aper_size - pinned;
 163
 164         return 0;
 165 }
 166
 167 #if 0
 168 static int
 169 i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj)
 170 {
 171         struct address_space *mapping = file_inode(obj->base.filp)->i_mapping;
 172         char *vaddr = obj->phys_handle->vaddr;
 173         struct sg_table *st;
 174         struct scatterlist *sg;
 175         int i;
 176
 177         if (WARN_ON(i915_gem_object_needs_bit17_swizzle(obj)))
 178                 return -EINVAL;
 179
 180         for (i = 0; i < obj->base.size / PAGE_SIZE; i++) {
 181                 struct page *page;
 182                 char *src;
 183
 184                 page = shmem_read_mapping_page(mapping, i);
 185                 if (IS_ERR(page))
 186                         return PTR_ERR(page);
 187
 188                 src = kmap_atomic(page);
 189                 memcpy(vaddr, src, PAGE_SIZE);
 190                 drm_clflush_virt_range(vaddr, PAGE_SIZE);
 191                 kunmap_atomic(src);
 192
 193                 page_cache_release(page);
 194                 vaddr += PAGE_SIZE;
 195         }
 196
 197         i915_gem_chipset_flush(obj->base.dev);
 198
 199         st = kmalloc(sizeof(*st), GFP_KERNEL);
 200         if (st == NULL)
 201                 return -ENOMEM;
 202
 203         if (sg_alloc_table(st, 1, GFP_KERNEL)) {
 204                 kfree(st);
 205                 return -ENOMEM;
 206         }
 207
 208         sg = st->sgl;
 209         sg->offset = 0;
 210         sg->length = obj->base.size;
 211
 212         sg_dma_address(sg) = obj->phys_handle->busaddr;
 213         sg_dma_len(sg) = obj->base.size;
 214
 215         obj->pages = st;
 216         return 0;
 217 }
 218
 219 static void
 220 i915_gem_object_put_pages_phys(struct drm_i915_gem_object *obj)
 221 {
 222         int ret;
 223
 224         BUG_ON(obj->madv == __I915_MADV_PURGED);
 225
 226         ret = i915_gem_object_set_to_cpu_domain(obj, true);
 227         if (ret) {
 228                 /* In the event of a disaster, abandon all caches and
 229                  * hope for the best.
 230                  */
 231                 WARN_ON(ret != -EIO);
 232                 obj->base.read_domains = obj->base.write_domain = I915_GEM_DOMAIN_CPU;
 233         }
 234
 235         if (obj->madv == I915_MADV_DONTNEED)
 236                 obj->dirty = 0;
 237
 238         if (obj->dirty) {
 239                 struct address_space *mapping = file_inode(obj->base.filp)->i_mapping;
 240                 char *vaddr = obj->phys_handle->vaddr;
 241                 int i;
 242
 243                 for (i = 0; i < obj->base.size / PAGE_SIZE; i++) {
 244                         struct page *page;
 245                         char *dst;
 246
 247                         page = shmem_read_mapping_page(mapping, i);
 248                         if (IS_ERR(page))
 249                                 continue;
 250
 251                         dst = kmap_atomic(page);
 252                         drm_clflush_virt_range(vaddr, PAGE_SIZE);
 253                         memcpy(dst, vaddr, PAGE_SIZE);
 254                         kunmap_atomic(dst);
 255
 256                         set_page_dirty(page);
 257                         if (obj->madv == I915_MADV_WILLNEED)
 258                                 mark_page_accessed(page);
 259                         page_cache_release(page);
 260                         vaddr += PAGE_SIZE;
 261                 }
 262                 obj->dirty = 0;
 263         }
 264
 265         sg_free_table(obj->pages);
 266         kfree(obj->pages);
 267 }
 268
 269 static void
 270 i915_gem_object_release_phys(struct drm_i915_gem_object *obj)
 271 {
 272         drm_pci_free(obj->base.dev, obj->phys_handle);
 273 }
 274
 275 static const struct drm_i915_gem_object_ops i915_gem_phys_ops = {
 276         .get_pages = i915_gem_object_get_pages_phys,
 277         .put_pages = i915_gem_object_put_pages_phys,
 278         .release = i915_gem_object_release_phys,
 279 };
 280 #endif
 281
 282 static int
 283 drop_pages(struct drm_i915_gem_object *obj)
 284 {
 285         struct i915_vma *vma, *next;
 286         int ret;
 287
 288         drm_gem_object_reference(&obj->base);
 289         list_for_each_entry_safe(vma, next, &obj->vma_list, vma_link)
 290                 if (i915_vma_unbind(vma))
 291                         break;
 292
 293         ret = i915_gem_object_put_pages(obj);
 294         drm_gem_object_unreference(&obj->base);
 295
 296         return ret;
 297 }
 298
 299 int
 300 i915_gem_object_attach_phys(struct drm_i915_gem_object *obj,
 301                             int align)
 302 {
 303         drm_dma_handle_t *phys;
 304         int ret;
 305
 306         if (obj->phys_handle) {
 307                 if ((unsigned long)obj->phys_handle->vaddr & (align -1))
 308                         return -EBUSY;
 309
 310                 return 0;
 311         }
 312
 313         if (obj->madv != I915_MADV_WILLNEED)
 314                 return -EFAULT;
 315
 316 #if 0
 317         if (obj->base.filp == NULL)
 318                 return -EINVAL;
 319 #endif
 320
 321         ret = drop_pages(obj);
 322         if (ret)
 323                 return ret;
 324
 325         /* create a new object */
 326         phys = drm_pci_alloc(obj->base.dev, obj->base.size, align);
 327         if (!phys)
 328                 return -ENOMEM;
 329
 330         obj->phys_handle = phys;
 331 #if 0
 332         obj->ops = &i915_gem_phys_ops;
 333 #endif
 334
 335         return i915_gem_object_get_pages(obj);
 336 }
 337
 338 static int
 339 i915_gem_phys_pwrite(struct drm_i915_gem_object *obj,
 340                      struct drm_i915_gem_pwrite *args,
 341                      struct drm_file *file_priv)
 342 {
 343         struct drm_device *dev = obj->base.dev;
 344         void *vaddr = (char *)obj->phys_handle->vaddr + args->offset;
 345         char __user *user_data = to_user_ptr(args->data_ptr);
 346         int ret = 0;
 347
 348         /* We manually control the domain here and pretend that it
 349          * remains coherent i.e. in the GTT domain, like shmem_pwrite.
 350          */
 351         ret = i915_gem_object_wait_rendering(obj, false);
 352         if (ret)
 353                 return ret;
 354
 355         intel_fb_obj_invalidate(obj, NULL, ORIGIN_CPU);
 356         if (__copy_from_user_inatomic_nocache(vaddr, user_data, args->size)) {
 357                 unsigned long unwritten;
 358
 359                 /* The physical object once assigned is fixed for the lifetime
 360                  * of the obj, so we can safely drop the lock and continue
 361                  * to access vaddr.
 362                  */
 363                 mutex_unlock(&dev->struct_mutex);
 364                 unwritten = copy_from_user(vaddr, user_data, args->size);
 365                 mutex_lock(&dev->struct_mutex);
 366                 if (unwritten) {
 367                         ret = -EFAULT;
 368                         goto out;
 369                 }
 370         }
 371
 372         drm_clflush_virt_range(vaddr, args->size);
 373         i915_gem_chipset_flush(dev);
 374
 375 out:
 376         intel_fb_obj_flush(obj, false);
 377         return ret;
 378 }
 379
 380 void *i915_gem_object_alloc(struct drm_device *dev)
 381 {
 382         return kmalloc(sizeof(struct drm_i915_gem_object),
 383             M_DRM, M_WAITOK | M_ZERO);
 384 }
 385
 386 void i915_gem_object_free(struct drm_i915_gem_object *obj)
 387 {
 388         kfree(obj);
 389 }
 390
 391 static int
 392 i915_gem_create(struct drm_file *file,
 393                 struct drm_device *dev,
 394                 uint64_t size,
 395                 uint32_t *handle_p)
 396 {
 397         struct drm_i915_gem_object *obj;
 398         int ret;
 399         u32 handle;
 400
 401         size = roundup(size, PAGE_SIZE);
 402         if (size == 0)
 403                 return -EINVAL;
 404
 405         /* Allocate the new object */
 406         obj = i915_gem_alloc_object(dev, size);
 407         if (obj == NULL)
 408                 return -ENOMEM;
 409
 410         ret = drm_gem_handle_create(file, &obj->base, &handle);
 411         /* drop reference from allocate - handle holds it now */
 412         drm_gem_object_unreference_unlocked(&obj->base);
 413         if (ret)
 414                 return ret;
 415
 416         *handle_p = handle;
 417         return 0;
 418 }
 419
 420 int
 421 i915_gem_dumb_create(struct drm_file *file,
 422                      struct drm_device *dev,
 423                      struct drm_mode_create_dumb *args)
 424 {
 425         /* have to work out size/pitch and return them */
 426         args->pitch = ALIGN(args->width * DIV_ROUND_UP(args->bpp, 8), 64);
 427         args->size = args->pitch * args->height;
 428         return i915_gem_create(file, dev,
 429                                args->size, &args->handle);
 430 }
 431
 432 /**
 433  * Creates a new mm object and returns a handle to it.
 434  */
 435 int
 436 i915_gem_create_ioctl(struct drm_device *dev, void *data,
 437                       struct drm_file *file)
 438 {
 439         struct drm_i915_gem_create *args = data;
 440
 441         return i915_gem_create(file, dev,
 442                                args->size, &args->handle);
 443 }
 444
 445 static inline int
 446 __copy_to_user_swizzled(char __user *cpu_vaddr,
 447                         const char *gpu_vaddr, int gpu_offset,
 448                         int length)
 449 {
 450         int ret, cpu_offset = 0;
 451
 452         while (length > 0) {
 453                 int cacheline_end = ALIGN(gpu_offset + 1, 64);
 454                 int this_length = min(cacheline_end - gpu_offset, length);
 455                 int swizzled_gpu_offset = gpu_offset ^ 64;
 456
 457                 ret = __copy_to_user(cpu_vaddr + cpu_offset,
 458                                      gpu_vaddr + swizzled_gpu_offset,
 459                                      this_length);
 460                 if (ret)
 461                         return ret + length;
 462
 463                 cpu_offset += this_length;
 464                 gpu_offset += this_length;
 465                 length -= this_length;
 466         }
 467
 468         return 0;
 469 }
 470
 471 static inline int
 472 __copy_from_user_swizzled(char *gpu_vaddr, int gpu_offset,
 473                           const char __user *cpu_vaddr,
 474                           int length)
 475 {
 476         int ret, cpu_offset = 0;
 477
 478         while (length > 0) {
 479                 int cacheline_end = ALIGN(gpu_offset + 1, 64);
 480                 int this_length = min(cacheline_end - gpu_offset, length);
 481                 int swizzled_gpu_offset = gpu_offset ^ 64;
 482
 483                 ret = __copy_from_user(gpu_vaddr + swizzled_gpu_offset,
 484                                        cpu_vaddr + cpu_offset,
 485                                        this_length);
 486                 if (ret)
 487                         return ret + length;
 488
 489                 cpu_offset += this_length;
 490                 gpu_offset += this_length;
 491                 length -= this_length;
 492         }
 493
 494         return 0;
 495 }
 496
 497 /*
 498  * Pins the specified object's pages and synchronizes the object with
 499  * GPU accesses. Sets needs_clflush to non-zero if the caller should
 500  * flush the object from the CPU cache.
 501  */
 502 int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj,
 503                                     int *needs_clflush)
 504 {
 505         int ret;
 506
 507         *needs_clflush = 0;
 508
 509 #if 0
 510         if (!obj->base.filp)
 511                 return -EINVAL;
 512 #endif
 513
 514         if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU)) {
 515                 /* If we're not in the cpu read domain, set ourself into the gtt
 516                  * read domain and manually flush cachelines (if required). This
 517                  * optimizes for the case when the gpu will dirty the data
 518                  * anyway again before the next pread happens. */
 519                 *needs_clflush = !cpu_cache_is_coherent(obj->base.dev,
 520                                                         obj->cache_level);
 521                 ret = i915_gem_object_wait_rendering(obj, true);
 522                 if (ret)
 523                         return ret;
 524         }
 525
 526         ret = i915_gem_object_get_pages(obj);
 527         if (ret)
 528                 return ret;
 529
 530         i915_gem_object_pin_pages(obj);
 531
 532         return ret;
 533 }
 534
 535 /* Per-page copy function for the shmem pread fastpath.
 536  * Flushes invalid cachelines before reading the target if
 537  * needs_clflush is set. */
 538 static int
 539 shmem_pread_fast(struct vm_page *page, int shmem_page_offset, int page_length,
 540                  char __user *user_data,
 541                  bool page_do_bit17_swizzling, bool needs_clflush)
 542 {
 543         char *vaddr;
 544         int ret;
 545
 546         if (unlikely(page_do_bit17_swizzling))
 547                 return -EINVAL;
 548
 549         vaddr = kmap_atomic(page);
 550         if (needs_clflush)
 551                 drm_clflush_virt_range(vaddr + shmem_page_offset,
 552                                        page_length);
 553         ret = __copy_to_user_inatomic(user_data,
 554                                       vaddr + shmem_page_offset,
 555                                       page_length);
 556         kunmap_atomic(vaddr);
 557
 558         return ret ? -EFAULT : 0;
 559 }
 560
 561 static void
 562 shmem_clflush_swizzled_range(char *addr, unsigned long length,
 563                              bool swizzled)
 564 {
 565         if (unlikely(swizzled)) {
 566                 unsigned long start = (unsigned long) addr;
 567                 unsigned long end = (unsigned long) addr + length;
 568
 569                 /* For swizzling simply ensure that we always flush both
 570                  * channels. Lame, but simple and it works. Swizzled
 571                  * pwrite/pread is far from a hotpath - current userspace
 572                  * doesn't use it at all. */
 573                 start = round_down(start, 128);
 574                 end = round_up(end, 128);
 575
 576                 drm_clflush_virt_range((void *)start, end - start);
 577         } else {
 578                 drm_clflush_virt_range(addr, length);
 579         }
 580
 581 }
 582
 583 /* Only difference to the fast-path function is that this can handle bit17
 584  * and uses non-atomic copy and kmap functions. */
 585 static int
 586 shmem_pread_slow(struct vm_page *page, int shmem_page_offset, int page_length,
 587                  char __user *user_data,
 588                  bool page_do_bit17_swizzling, bool needs_clflush)
 589 {
 590         char *vaddr;
 591         int ret;
 592
 593         vaddr = kmap(page);
 594         if (needs_clflush)
 595                 shmem_clflush_swizzled_range(vaddr + shmem_page_offset,
 596                                              page_length,
 597                                              page_do_bit17_swizzling);
 598
 599         if (page_do_bit17_swizzling)
 600                 ret = __copy_to_user_swizzled(user_data,
 601                                               vaddr, shmem_page_offset,
 602                                               page_length);
 603         else
 604                 ret = __copy_to_user(user_data,
 605                                      vaddr + shmem_page_offset,
 606                                      page_length);
 607         kunmap(page);
 608
 609         return ret ? - EFAULT : 0;
 610 }
 611
 612 static int
 613 i915_gem_shmem_pread(struct drm_device *dev,
 614                      struct drm_i915_gem_object *obj,
 615                      struct drm_i915_gem_pread *args,
 616                      struct drm_file *file)
 617 {
 618         char __user *user_data;
 619         ssize_t remain;
 620         loff_t offset;
 621         int shmem_page_offset, page_length, ret = 0;
 622         int obj_do_bit17_swizzling, page_do_bit17_swizzling;
 623         int prefaulted = 0;
 624         int needs_clflush = 0;
 625         struct sg_page_iter sg_iter;
 626
 627         user_data = to_user_ptr(args->data_ptr);
 628         remain = args->size;
 629
 630         obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj);
 631
 632         ret = i915_gem_obj_prepare_shmem_read(obj, &needs_clflush);
 633         if (ret)
 634                 return ret;
 635
 636         offset = args->offset;
 637
 638         for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents,
 639                          offset >> PAGE_SHIFT) {
 640                 struct vm_page *page = sg_page_iter_page(&sg_iter);
 641
 642                 if (remain <= 0)
 643                         break;
 644
 645                 /* Operation in this page
 646                  *
 647                  * shmem_page_offset = offset within page in shmem file
 648                  * page_length = bytes to copy for this page
 649                  */
 650                 shmem_page_offset = offset_in_page(offset);
 651                 page_length = remain;
 652                 if ((shmem_page_offset + page_length) > PAGE_SIZE)
 653                         page_length = PAGE_SIZE - shmem_page_offset;
 654
 655                 page_do_bit17_swizzling = obj_do_bit17_swizzling &&
 656                         (page_to_phys(page) & (1 << 17)) != 0;
 657
 658                 ret = shmem_pread_fast(page, shmem_page_offset, page_length,
 659                                        user_data, page_do_bit17_swizzling,
 660                                        needs_clflush);
 661                 if (ret == 0)
 662                         goto next_page;
 663
 664                 mutex_unlock(&dev->struct_mutex);
 665
 666                 if (likely(!i915.prefault_disable) && !prefaulted) {
 667                         ret = fault_in_multipages_writeable(user_data, remain);
 668                         /* Userspace is tricking us, but we've already clobbered
 669                          * its pages with the prefault and promised to write the
 670                          * data up to the first fault. Hence ignore any errors
 671                          * and just continue. */
 672                         (void)ret;
 673                         prefaulted = 1;
 674                 }
 675
 676                 ret = shmem_pread_slow(page, shmem_page_offset, page_length,
 677                                        user_data, page_do_bit17_swizzling,
 678                                        needs_clflush);
 679
 680                 mutex_lock(&dev->struct_mutex);
 681
 682                 if (ret)
 683                         goto out;
 684
 685 next_page:
 686                 remain -= page_length;
 687                 user_data += page_length;
 688                 offset += page_length;
 689         }
 690
 691 out:
 692         i915_gem_object_unpin_pages(obj);
 693
 694         return ret;
 695 }
 696
 697 /**
 698  * Reads data from the object referenced by handle.
 699  *
 700  * On error, the contents of *data are undefined.
 701  */
 702 int
 703 i915_gem_pread_ioctl(struct drm_device *dev, void *data,
 704                      struct drm_file *file)
 705 {
 706         struct drm_i915_gem_pread *args = data;
 707         struct drm_i915_gem_object *obj;
 708         int ret = 0;
 709
 710         if (args->size == 0)
 711                 return 0;
 712
 713         ret = i915_mutex_lock_interruptible(dev);
 714         if (ret)
 715                 return ret;
 716
 717         obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
 718         if (&obj->base == NULL) {
 719                 ret = -ENOENT;
 720                 goto unlock;
 721         }
 722
 723         /* Bounds check source.  */
 724         if (args->offset > obj->base.size ||
 725             args->size > obj->base.size - args->offset) {
 726                 ret = -EINVAL;
 727                 goto out;
 728         }
 729
 730         /* prime objects have no backing filp to GEM pread/pwrite
 731          * pages from.
 732          */
 733
 734         trace_i915_gem_object_pread(obj, args->offset, args->size);
 735
 736         ret = i915_gem_shmem_pread(dev, obj, args, file);
 737
 738 out:
 739         drm_gem_object_unreference(&obj->base);
 740 unlock:
 741         mutex_unlock(&dev->struct_mutex);
 742         return ret;
 743 }
 744
 745 /* This is the fast write path which cannot handle
 746  * page faults in the source data
 747  */
 748
 749 static inline int
 750 fast_user_write(struct io_mapping *mapping,
 751                 loff_t page_base, int page_offset,
 752                 char __user *user_data,
 753                 int length)
 754 {
 755         void __iomem *vaddr_atomic;
 756         void *vaddr;
 757         unsigned long unwritten;
 758
 759         vaddr_atomic = io_mapping_map_atomic_wc(mapping, page_base);
 760         /* We can use the cpu mem copy function because this is X86. */
 761         vaddr = (char __force*)vaddr_atomic + page_offset;
 762         unwritten = __copy_from_user_inatomic_nocache(vaddr,
 763                                                       user_data, length);
 764         io_mapping_unmap_atomic(vaddr_atomic);
 765         return unwritten;
 766 }
 767
 768 /**
 769  * This is the fast pwrite path, where we copy the data directly from the
 770  * user into the GTT, uncached.
 771  */
 772 static int
 773 i915_gem_gtt_pwrite_fast(struct drm_device *dev,
 774                          struct drm_i915_gem_object *obj,
 775                          struct drm_i915_gem_pwrite *args,
 776                          struct drm_file *file)
 777 {
 778         struct drm_i915_private *dev_priv = dev->dev_private;
 779         ssize_t remain;
 780         loff_t offset, page_base;
 781         char __user *user_data;
 782         int page_offset, page_length, ret;
 783
 784         ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_MAPPABLE | PIN_NONBLOCK);
 785         if (ret)
 786                 goto out;
 787
 788         ret = i915_gem_object_set_to_gtt_domain(obj, true);
 789         if (ret)
 790                 goto out_unpin;
 791
 792         ret = i915_gem_object_put_fence(obj);
 793         if (ret)
 794                 goto out_unpin;
 795
 796         user_data = to_user_ptr(args->data_ptr);
 797         remain = args->size;
 798
 799         offset = i915_gem_obj_ggtt_offset(obj) + args->offset;
 800
 801         intel_fb_obj_invalidate(obj, NULL, ORIGIN_GTT);
 802
 803         while (remain > 0) {
 804                 /* Operation in this page
 805                  *
 806                  * page_base = page offset within aperture
 807                  * page_offset = offset within page
 808                  * page_length = bytes to copy for this page
 809                  */
 810                 page_base = offset & ~PAGE_MASK;
 811                 page_offset = offset_in_page(offset);
 812                 page_length = remain;
 813                 if ((page_offset + remain) > PAGE_SIZE)
 814                         page_length = PAGE_SIZE - page_offset;
 815
 816                 /* If we get a fault while copying data, then (presumably) our
 817                  * source page isn't available.  Return the error and we'll
 818                  * retry in the slow path.
 819                  */
 820                 if (fast_user_write(dev_priv->gtt.mappable, page_base,
 821                                     page_offset, user_data, page_length)) {
 822                         ret = -EFAULT;
 823                         goto out_flush;
 824                 }
 825
 826                 remain -= page_length;
 827                 user_data += page_length;
 828                 offset += page_length;
 829         }
 830
 831 out_flush:
 832         intel_fb_obj_flush(obj, false);
 833 out_unpin:
 834         i915_gem_object_ggtt_unpin(obj);
 835 out:
 836         return ret;
 837 }
 838
 839 /* Per-page copy function for the shmem pwrite fastpath.
 840  * Flushes invalid cachelines before writing to the target if
 841  * needs_clflush_before is set and flushes out any written cachelines after
 842  * writing if needs_clflush is set. */
 843 static int
 844 shmem_pwrite_fast(struct vm_page *page, int shmem_page_offset, int page_length,
 845                   char __user *user_data,
 846                   bool page_do_bit17_swizzling,
 847                   bool needs_clflush_before,
 848                   bool needs_clflush_after)
 849 {
 850         char *vaddr;
 851         int ret;
 852
 853         if (unlikely(page_do_bit17_swizzling))
 854                 return -EINVAL;
 855
 856         vaddr = kmap_atomic(page);
 857         if (needs_clflush_before)
 858                 drm_clflush_virt_range(vaddr + shmem_page_offset,
 859                                        page_length);
 860         ret = __copy_from_user_inatomic(vaddr + shmem_page_offset,
 861                                         user_data, page_length);
 862         if (needs_clflush_after)
 863                 drm_clflush_virt_range(vaddr + shmem_page_offset,
 864                                        page_length);
 865         kunmap_atomic(vaddr);
 866
 867         return ret ? -EFAULT : 0;
 868 }
 869
 870 /* Only difference to the fast-path function is that this can handle bit17
 871  * and uses non-atomic copy and kmap functions. */
 872 static int
 873 shmem_pwrite_slow(struct vm_page *page, int shmem_page_offset, int page_length,
 874                   char __user *user_data,
 875                   bool page_do_bit17_swizzling,
 876                   bool needs_clflush_before,
 877                   bool needs_clflush_after)
 878 {
 879         char *vaddr;
 880         int ret;
 881
 882         vaddr = kmap(page);
 883         if (unlikely(needs_clflush_before || page_do_bit17_swizzling))
 884                 shmem_clflush_swizzled_range(vaddr + shmem_page_offset,
 885                                              page_length,
 886                                              page_do_bit17_swizzling);
 887         if (page_do_bit17_swizzling)
 888                 ret = __copy_from_user_swizzled(vaddr, shmem_page_offset,
 889                                                 user_data,
 890                                                 page_length);
 891         else
 892                 ret = __copy_from_user(vaddr + shmem_page_offset,
 893                                        user_data,
 894                                        page_length);
 895         if (needs_clflush_after)
 896                 shmem_clflush_swizzled_range(vaddr + shmem_page_offset,
 897                                              page_length,
 898                                              page_do_bit17_swizzling);
 899         kunmap(page);
 900
 901         return ret ? -EFAULT : 0;
 902 }
 903
 904 static int
 905 i915_gem_shmem_pwrite(struct drm_device *dev,
 906                       struct drm_i915_gem_object *obj,
 907                       struct drm_i915_gem_pwrite *args,
 908                       struct drm_file *file)
 909 {
 910         ssize_t remain;
 911         loff_t offset;
 912         char __user *user_data;
 913         int shmem_page_offset, page_length, ret = 0;
 914         int obj_do_bit17_swizzling, page_do_bit17_swizzling;
 915         int hit_slowpath = 0;
 916         int needs_clflush_after = 0;
 917         int needs_clflush_before = 0;
 918         struct sg_page_iter sg_iter;
 919
 920         user_data = to_user_ptr(args->data_ptr);
 921         remain = args->size;
 922
 923         obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj);
 924
 925         if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) {
 926                 /* If we're not in the cpu write domain, set ourself into the gtt
 927                  * write domain and manually flush cachelines (if required). This
 928                  * optimizes for the case when the gpu will use the data
 929                  * right away and we therefore have to clflush anyway. */
 930                 needs_clflush_after = cpu_write_needs_clflush(obj);
 931                 ret = i915_gem_object_wait_rendering(obj, false);
 932                 if (ret)
 933                         return ret;
 934         }
 935         /* Same trick applies to invalidate partially written cachelines read
 936          * before writing. */
 937         if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0)
 938                 needs_clflush_before =
 939                         !cpu_cache_is_coherent(dev, obj->cache_level);
 940
 941         ret = i915_gem_object_get_pages(obj);
 942         if (ret)
 943                 return ret;
 944
 945         intel_fb_obj_invalidate(obj, NULL, ORIGIN_CPU);
 946
 947         i915_gem_object_pin_pages(obj);
 948
 949         offset = args->offset;
 950         obj->dirty = 1;
 951
 952         VM_OBJECT_LOCK(obj->base.vm_obj);
 953         vm_object_pip_add(obj->base.vm_obj, 1);
 954
 955         for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents,
 956                          offset >> PAGE_SHIFT) {
 957                 struct vm_page *page = sg_page_iter_page(&sg_iter);
 958                 int partial_cacheline_write;
 959
 960                 if (remain <= 0)
 961                         break;
 962
 963                 /* Operation in this page
 964                  *
 965                  * shmem_page_offset = offset within page in shmem file
 966                  * page_length = bytes to copy for this page
 967                  */
 968                 shmem_page_offset = offset_in_page(offset);
 969
 970                 page_length = remain;
 971                 if ((shmem_page_offset + page_length) > PAGE_SIZE)
 972                         page_length = PAGE_SIZE - shmem_page_offset;
 973
 974                 /* If we don't overwrite a cacheline completely we need to be
 975                  * careful to have up-to-date data by first clflushing. Don't
 976                  * overcomplicate things and flush the entire patch. */
 977                 partial_cacheline_write = needs_clflush_before &&
 978                         ((shmem_page_offset | page_length)
 979                                 & (cpu_clflush_line_size - 1));
 980
 981                 page_do_bit17_swizzling = obj_do_bit17_swizzling &&
 982                         (page_to_phys(page) & (1 << 17)) != 0;
 983
 984                 ret = shmem_pwrite_fast(page, shmem_page_offset, page_length,
 985                                         user_data, page_do_bit17_swizzling,
 986                                         partial_cacheline_write,
 987                                         needs_clflush_after);
 988                 if (ret == 0)
 989                         goto next_page;
 990
 991                 hit_slowpath = 1;
 992                 mutex_unlock(&dev->struct_mutex);
 993                 ret = shmem_pwrite_slow(page, shmem_page_offset, page_length,
 994                                         user_data, page_do_bit17_swizzling,
 995                                         partial_cacheline_write,
 996                                         needs_clflush_after);
 997
 998                 mutex_lock(&dev->struct_mutex);
 999
1000                 if (ret)
1001                         goto out;
1002
1003 next_page:
1004                 remain -= page_length;
1005                 user_data += page_length;
1006                 offset += page_length;
1007         }
1008         vm_object_pip_wakeup(obj->base.vm_obj);
1009         VM_OBJECT_UNLOCK(obj->base.vm_obj);
1010
1011 out:
1012         i915_gem_object_unpin_pages(obj);
1013
1014         if (hit_slowpath) {
1015                 /*
1016                  * Fixup: Flush cpu caches in case we didn't flush the dirty
1017                  * cachelines in-line while writing and the object moved
1018                  * out of the cpu write domain while we've dropped the lock.
1019                  */
1020                 if (!needs_clflush_after &&
1021                     obj->base.write_domain != I915_GEM_DOMAIN_CPU) {
1022                         if (i915_gem_clflush_object(obj, obj->pin_display))
1023                                 i915_gem_chipset_flush(dev);
1024                 }
1025         }
1026
1027         if (needs_clflush_after)
1028                 i915_gem_chipset_flush(dev);
1029
1030         intel_fb_obj_flush(obj, false);
1031         return ret;
1032 }
1033
1034 /**
1035  * Writes data to the object referenced by handle.
1036  *
1037  * On error, the contents of the buffer that were to be modified are undefined.
1038  */
1039 int
1040 i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
1041                       struct drm_file *file)
1042 {
1043         struct drm_i915_private *dev_priv = dev->dev_private;
1044         struct drm_i915_gem_pwrite *args = data;
1045         struct drm_i915_gem_object *obj;
1046         int ret;
1047
1048         if (args->size == 0)
1049                 return 0;
1050
1051         if (likely(!i915.prefault_disable)) {
1052                 ret = fault_in_multipages_readable(to_user_ptr(args->data_ptr),
1053                                                    args->size);
1054                 if (ret)
1055                         return -EFAULT;
1056         }
1057
1058         intel_runtime_pm_get(dev_priv);
1059
1060         ret = i915_mutex_lock_interruptible(dev);
1061         if (ret)
1062                 goto put_rpm;
1063
1064         obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
1065         if (&obj->base == NULL) {
1066                 ret = -ENOENT;
1067                 goto unlock;
1068         }
1069
1070         /* Bounds check destination. */
1071         if (args->offset > obj->base.size ||
1072             args->size > obj->base.size - args->offset) {
1073                 ret = -EINVAL;
1074                 goto out;
1075         }
1076
1077         /* prime objects have no backing filp to GEM pread/pwrite
1078          * pages from.
1079          */
1080
1081         trace_i915_gem_object_pwrite(obj, args->offset, args->size);
1082
1083         ret = -EFAULT;
1084         /* We can only do the GTT pwrite on untiled buffers, as otherwise
1085          * it would end up going through the fenced access, and we'll get
1086          * different detiling behavior between reading and writing.
1087          * pread/pwrite currently are reading and writing from the CPU
1088          * perspective, requiring manual detiling by the client.
1089          */
1090         if (obj->tiling_mode == I915_TILING_NONE &&
1091             obj->base.write_domain != I915_GEM_DOMAIN_CPU &&
1092             cpu_write_needs_clflush(obj)) {
1093                 ret = i915_gem_gtt_pwrite_fast(dev, obj, args, file);
1094                 /* Note that the gtt paths might fail with non-page-backed user
1095                  * pointers (e.g. gtt mappings when moving data between
1096                  * textures). Fallback to the shmem path in that case. */
1097         }
1098
1099         if (ret == -EFAULT || ret == -ENOSPC) {
1100                 if (obj->phys_handle)
1101                         ret = i915_gem_phys_pwrite(obj, args, file);
1102                 else
1103                         ret = i915_gem_shmem_pwrite(dev, obj, args, file);
1104         }
1105
1106 out:
1107         drm_gem_object_unreference(&obj->base);
1108 unlock:
1109         mutex_unlock(&dev->struct_mutex);
1110 put_rpm:
1111         intel_runtime_pm_put(dev_priv);
1112
1113         return ret;
1114 }
1115
1116 int
1117 i915_gem_check_wedge(struct i915_gpu_error *error,
1118                      bool interruptible)
1119 {
1120         if (i915_reset_in_progress(error)) {
1121                 /* Non-interruptible callers can't handle -EAGAIN, hence return
1122                  * -EIO unconditionally for these. */
1123                 if (!interruptible)
1124                         return -EIO;
1125
1126                 /* Recovery complete, but the reset failed ... */
1127                 if (i915_terminally_wedged(error))
1128                         return -EIO;
1129
1130                 /*
1131                  * Check if GPU Reset is in progress - we need intel_ring_begin
1132                  * to work properly to reinit the hw state while the gpu is
1133                  * still marked as reset-in-progress. Handle this with a flag.
1134                  */
1135                 if (!error->reload_in_reset)
1136                         return -EAGAIN;
1137         }
1138
1139         return 0;
1140 }
1141
1142 /*
1143  * Compare arbitrary request against outstanding lazy request. Emit on match.
1144  */
1145 int
1146 i915_gem_check_olr(struct drm_i915_gem_request *req)
1147 {
1148         int ret;
1149
1150         WARN_ON(!mutex_is_locked(&req->ring->dev->struct_mutex));
1151
1152         ret = 0;
1153         if (req == req->ring->outstanding_lazy_request)
1154                 ret = i915_add_request(req->ring);
1155
1156         return ret;
1157 }
1158
1159 static void fake_irq(unsigned long data)
1160 {
1161         wakeup_one((void *)data);
1162 }
1163
1164 static bool missed_irq(struct drm_i915_private *dev_priv,
1165                        struct intel_engine_cs *ring)
1166 {
1167         return test_bit(ring->id, &dev_priv->gpu_error.missed_irq_rings);
1168 }
1169
1170 #if 0
1171 static int __i915_spin_request(struct drm_i915_gem_request *req)
1172 {
1173         unsigned long timeout;
1174
1175         if (i915_gem_request_get_ring(req)->irq_refcount)
1176                 return -EBUSY;
1177
1178         timeout = jiffies + 1;
1179         while (!need_resched()) {
1180                 if (i915_gem_request_completed(req, true))
1181                         return 0;
1182
1183                 if (time_after_eq(jiffies, timeout))
1184                         break;
1185
1186                 cpu_relax_lowlatency();
1187         }
1188         if (i915_gem_request_completed(req, false))
1189                 return 0;
1190
1191         return -EAGAIN;
1192 }
1193 #endif
1194
1195 /**
1196  * __i915_wait_request - wait until execution of request has finished
1197  * @req: duh!
1198  * @reset_counter: reset sequence associated with the given request
1199  * @interruptible: do an interruptible wait (normally yes)
1200  * @timeout: in - how long to wait (NULL forever); out - how much time remaining
1201  *
1202  * Note: It is of utmost importance that the passed in seqno and reset_counter
1203  * values have been read by the caller in an smp safe manner. Where read-side
1204  * locks are involved, it is sufficient to read the reset_counter before
1205  * unlocking the lock that protects the seqno. For lockless tricks, the
1206  * reset_counter _must_ be read before, and an appropriate smp_rmb must be
1207  * inserted.
1208  *
1209  * Returns 0 if the request was found within the alloted time. Else returns the
1210  * errno with remaining time filled in timeout argument.
1211  */
1212 int __i915_wait_request(struct drm_i915_gem_request *req,
1213                         unsigned reset_counter,
1214                         bool interruptible,
1215                         s64 *timeout,
1216                         struct intel_rps_client *rps)
1217 {
1218         struct intel_engine_cs *ring = i915_gem_request_get_ring(req);
1219         struct drm_device *dev = ring->dev;
1220         struct drm_i915_private *dev_priv = dev->dev_private;
1221         const bool irq_test_in_progress =
1222                 ACCESS_ONCE(dev_priv->gpu_error.test_irq_rings) & intel_ring_flag(ring);
1223         unsigned long timeout_expire;
1224         s64 before, now;
1225         int ret, sl_timeout = 1;
1226
1227         WARN(!intel_irqs_enabled(dev_priv), "IRQs disabled");
1228
1229         if (list_empty(&req->list))
1230                 return 0;
1231
1232         if (i915_gem_request_completed(req, true))
1233                 return 0;
1234
1235         timeout_expire = timeout ?
1236                 jiffies + nsecs_to_jiffies_timeout((u64)*timeout) : 0;
1237
1238         if (INTEL_INFO(dev_priv)->gen >= 6)
1239                 gen6_rps_boost(dev_priv, rps, req->emitted_jiffies);
1240
1241         /* Record current time in case interrupted by signal, or wedged */
1242         trace_i915_gem_request_wait_begin(req);
1243         before = ktime_get_raw_ns();
1244
1245         /* Optimistic spin for the next jiffie before touching IRQs */
1246 #if 0
1247         ret = __i915_spin_request(req);
1248         if (ret == 0)
1249                 goto out;
1250 #endif
1251
1252         if (!irq_test_in_progress && WARN_ON(!ring->irq_get(ring))) {
1253                 ret = -ENODEV;
1254                 goto out;
1255         }
1256
1257         lockmgr(&ring->irq_queue.lock, LK_EXCLUSIVE);
1258         for (;;) {
1259                 struct timer_list timer;
1260
1261                 /* We need to check whether any gpu reset happened in between
1262                  * the caller grabbing the seqno and now ... */
1263                 if (reset_counter != atomic_read(&dev_priv->gpu_error.reset_counter)) {
1264                         /* ... but upgrade the -EAGAIN to an -EIO if the gpu
1265                          * is truely gone. */
1266                         ret = i915_gem_check_wedge(&dev_priv->gpu_error, interruptible);
1267                         if (ret == 0)
1268                                 ret = -EAGAIN;
1269                         break;
1270                 }
1271
1272                 if (i915_gem_request_completed(req, false)) {
1273                         ret = 0;
1274                         break;
1275                 }
1276
1277                 if (interruptible && signal_pending(curthread->td_lwp)) {
1278                         ret = -ERESTARTSYS;
1279                         break;
1280                 }
1281
1282                 if (timeout && time_after_eq(jiffies, timeout_expire)) {
1283                         ret = -ETIME;
1284                         break;
1285                 }
1286
1287                 timer.function = NULL;
1288                 if (timeout || missed_irq(dev_priv, ring)) {
1289                         unsigned long expire;
1290
1291                         setup_timer_on_stack(&timer, fake_irq, (unsigned long)&ring->irq_queue);
1292                         expire = missed_irq(dev_priv, ring) ? jiffies + 1 : timeout_expire;
1293                         sl_timeout = expire - jiffies;
1294                         if (sl_timeout < 1)
1295                                 sl_timeout = 1;
1296                         mod_timer(&timer, expire);
1297                 }
1298
1299 #if 0
1300                 io_schedule();
1301 #endif
1302
1303                 if (timer.function) {
1304                         del_singleshot_timer_sync(&timer);
1305                         destroy_timer_on_stack(&timer);
1306                 }
1307
1308                 lksleep(&ring->irq_queue, &ring->irq_queue.lock,
1309                         interruptible ? PCATCH : 0, "lwe", sl_timeout);
1310         }
1311         lockmgr(&ring->irq_queue.lock, LK_RELEASE);
1312         if (!irq_test_in_progress)
1313                 ring->irq_put(ring);
1314
1315 out:
1316         now = ktime_get_raw_ns();
1317         trace_i915_gem_request_wait_end(req);
1318
1319         if (timeout) {
1320                 s64 tres = *timeout - (now - before);
1321
1322                 *timeout = tres < 0 ? 0 : tres;
1323
1324                 /*
1325                  * Apparently ktime isn't accurate enough and occasionally has a
1326                  * bit of mismatch in the jiffies<->nsecs<->ktime loop. So patch
1327                  * things up to make the test happy. We allow up to 1 jiffy.
1328                  *
1329                  * This is a regrssion from the timespec->ktime conversion.
1330                  */
1331                 if (ret == -ETIME && *timeout < jiffies_to_usecs(1)*1000)
1332                         *timeout = 0;
1333         }
1334
1335         return ret;
1336 }
1337
1338 static inline void
1339 i915_gem_request_remove_from_client(struct drm_i915_gem_request *request)
1340 {
1341         struct drm_i915_file_private *file_priv = request->file_priv;
1342
1343         if (!file_priv)
1344                 return;
1345
1346         spin_lock(&file_priv->mm.lock);
1347         list_del(&request->client_list);
1348         request->file_priv = NULL;
1349         spin_unlock(&file_priv->mm.lock);
1350 }
1351
1352 static void i915_gem_request_retire(struct drm_i915_gem_request *request)
1353 {
1354         trace_i915_gem_request_retire(request);
1355
1356         /* We know the GPU must have read the request to have
1357          * sent us the seqno + interrupt, so use the position
1358          * of tail of the request to update the last known position
1359          * of the GPU head.
1360          *
1361          * Note this requires that we are always called in request
1362          * completion order.
1363          */
1364         request->ringbuf->last_retired_head = request->postfix;
1365
1366         list_del_init(&request->list);
1367         i915_gem_request_remove_from_client(request);
1368
1369 #if 0
1370         put_pid(request->pid);
1371 #endif
1372
1373         i915_gem_request_unreference(request);
1374 }
1375
1376 static void
1377 __i915_gem_request_retire__upto(struct drm_i915_gem_request *req)
1378 {
1379         struct intel_engine_cs *engine = req->ring;
1380         struct drm_i915_gem_request *tmp;
1381
1382         lockdep_assert_held(&engine->dev->struct_mutex);
1383
1384         if (list_empty(&req->list))
1385                 return;
1386
1387         do {
1388                 tmp = list_first_entry(&engine->request_list,
1389                                        typeof(*tmp), list);
1390
1391                 i915_gem_request_retire(tmp);
1392         } while (tmp != req);
1393
1394         WARN_ON(i915_verify_lists(engine->dev));
1395 }
1396
1397 /**
1398  * Waits for a request to be signaled, and cleans up the
1399  * request and object lists appropriately for that event.
1400  */
1401 int
1402 i915_wait_request(struct drm_i915_gem_request *req)
1403 {
1404         struct drm_device *dev;
1405         struct drm_i915_private *dev_priv;
1406         bool interruptible;
1407         int ret;
1408
1409         BUG_ON(req == NULL);
1410
1411         dev = req->ring->dev;
1412         dev_priv = dev->dev_private;
1413         interruptible = dev_priv->mm.interruptible;
1414
1415         BUG_ON(!mutex_is_locked(&dev->struct_mutex));
1416
1417         ret = i915_gem_check_wedge(&dev_priv->gpu_error, interruptible);
1418         if (ret)
1419                 return ret;
1420
1421         ret = i915_gem_check_olr(req);
1422         if (ret)
1423                 return ret;
1424
1425         ret = __i915_wait_request(req,
1426                                   atomic_read(&dev_priv->gpu_error.reset_counter),
1427                                   interruptible, NULL, NULL);
1428         if (ret)
1429                 return ret;
1430
1431         __i915_gem_request_retire__upto(req);
1432         return 0;
1433 }
1434
1435 /**
1436  * Ensures that all rendering to the object has completed and the object is
1437  * safe to unbind from the GTT or access from the CPU.
1438  */
1439 int
1440 i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj,
1441                                bool readonly)
1442 {
1443         int ret, i;
1444
1445         if (!obj->active)
1446                 return 0;
1447
1448         if (readonly) {
1449                 if (obj->last_write_req != NULL) {
1450                         ret = i915_wait_request(obj->last_write_req);
1451                         if (ret)
1452                                 return ret;
1453
1454                         i = obj->last_write_req->ring->id;
1455                         if (obj->last_read_req[i] == obj->last_write_req)
1456                                 i915_gem_object_retire__read(obj, i);
1457                         else
1458                                 i915_gem_object_retire__write(obj);
1459                 }
1460         } else {
1461                 for (i = 0; i < I915_NUM_RINGS; i++) {
1462                         if (obj->last_read_req[i] == NULL)
1463                                 continue;
1464
1465                         ret = i915_wait_request(obj->last_read_req[i]);
1466                         if (ret)
1467                                 return ret;
1468
1469                         i915_gem_object_retire__read(obj, i);
1470                 }
1471                 RQ_BUG_ON(obj->active);
1472         }
1473
1474         return 0;
1475 }
1476
1477 static void
1478 i915_gem_object_retire_request(struct drm_i915_gem_object *obj,
1479                                struct drm_i915_gem_request *req)
1480 {
1481         int ring = req->ring->id;
1482
1483         if (obj->last_read_req[ring] == req)
1484                 i915_gem_object_retire__read(obj, ring);
1485         else if (obj->last_write_req == req)
1486                 i915_gem_object_retire__write(obj);
1487
1488         __i915_gem_request_retire__upto(req);
1489 }
1490
1491 /* A nonblocking variant of the above wait. This is a highly dangerous routine
1492  * as the object state may change during this call.
1493  */
1494 static __must_check int
1495 i915_gem_object_wait_rendering__nonblocking(struct drm_i915_gem_object *obj,
1496                                             struct intel_rps_client *rps,
1497                                             bool readonly)
1498 {
1499         struct drm_device *dev = obj->base.dev;
1500         struct drm_i915_private *dev_priv = dev->dev_private;
1501         struct drm_i915_gem_request *requests[I915_NUM_RINGS];
1502         unsigned reset_counter;
1503         int ret, i, n = 0;
1504
1505         BUG_ON(!mutex_is_locked(&dev->struct_mutex));
1506         BUG_ON(!dev_priv->mm.interruptible);
1507
1508         if (!obj->active)
1509                 return 0;
1510
1511         ret = i915_gem_check_wedge(&dev_priv->gpu_error, true);
1512         if (ret)
1513                 return ret;
1514
1515         reset_counter = atomic_read(&dev_priv->gpu_error.reset_counter);
1516
1517         if (readonly) {
1518                 struct drm_i915_gem_request *req;
1519
1520                 req = obj->last_write_req;
1521                 if (req == NULL)
1522                         return 0;
1523
1524                 ret = i915_gem_check_olr(req);
1525                 if (ret)
1526                         goto err;
1527
1528                 requests[n++] = i915_gem_request_reference(req);
1529         } else {
1530                 for (i = 0; i < I915_NUM_RINGS; i++) {
1531                         struct drm_i915_gem_request *req;
1532
1533                         req = obj->last_read_req[i];
1534                         if (req == NULL)
1535                                 continue;
1536
1537                         ret = i915_gem_check_olr(req);
1538                         if (ret)
1539                                 goto err;
1540
1541                         requests[n++] = i915_gem_request_reference(req);
1542                 }
1543         }
1544
1545         mutex_unlock(&dev->struct_mutex);
1546         for (i = 0; ret == 0 && i < n; i++)
1547                 ret = __i915_wait_request(requests[i], reset_counter, true,
1548                                           NULL, rps);
1549         mutex_lock(&dev->struct_mutex);
1550
1551 err:
1552         for (i = 0; i < n; i++) {
1553                 if (ret == 0)
1554                         i915_gem_object_retire_request(obj, requests[i]);
1555                 i915_gem_request_unreference(requests[i]);
1556         }
1557
1558         return ret;
1559 }
1560
1561 static struct intel_rps_client *to_rps_client(struct drm_file *file)
1562 {
1563         struct drm_i915_file_private *fpriv = file->driver_priv;
1564         return &fpriv->rps;
1565 }
1566
1567 /**
1568  * Called when user space prepares to use an object with the CPU, either
1569  * through the mmap ioctl's mapping or a GTT mapping.
1570  */
1571 int
1572 i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
1573                           struct drm_file *file)
1574 {
1575         struct drm_i915_gem_set_domain *args = data;
1576         struct drm_i915_gem_object *obj;
1577         uint32_t read_domains = args->read_domains;
1578         uint32_t write_domain = args->write_domain;
1579         int ret;
1580
1581         /* Only handle setting domains to types used by the CPU. */
1582         if (write_domain & I915_GEM_GPU_DOMAINS)
1583                 return -EINVAL;
1584
1585         if (read_domains & I915_GEM_GPU_DOMAINS)
1586                 return -EINVAL;
1587
1588         /* Having something in the write domain implies it's in the read
1589          * domain, and only that read domain.  Enforce that in the request.
1590          */
1591         if (write_domain != 0 && read_domains != write_domain)
1592                 return -EINVAL;
1593
1594         ret = i915_mutex_lock_interruptible(dev);
1595         if (ret)
1596                 return ret;
1597
1598         obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
1599         if (&obj->base == NULL) {
1600                 ret = -ENOENT;
1601                 goto unlock;
1602         }
1603
1604         /* Try to flush the object off the GPU without holding the lock.
1605          * We will repeat the flush holding the lock in the normal manner
1606          * to catch cases where we are gazumped.
1607          */
1608         ret = i915_gem_object_wait_rendering__nonblocking(obj,
1609                                                           to_rps_client(file),
1610                                                           !write_domain);
1611         if (ret)
1612                 goto unref;
1613
1614         if (read_domains & I915_GEM_DOMAIN_GTT)
1615                 ret = i915_gem_object_set_to_gtt_domain(obj, write_domain != 0);
1616         else
1617                 ret = i915_gem_object_set_to_cpu_domain(obj, write_domain != 0);
1618
1619 unref:
1620         drm_gem_object_unreference(&obj->base);
1621 unlock:
1622         mutex_unlock(&dev->struct_mutex);
1623         return ret;
1624 }
1625
1626 /**
1627  * Called when user space has done writes to this buffer
1628  */
1629 int
1630 i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data,
1631                          struct drm_file *file)
1632 {
1633         struct drm_i915_gem_sw_finish *args = data;
1634         struct drm_i915_gem_object *obj;
1635         int ret = 0;
1636
1637         ret = i915_mutex_lock_interruptible(dev);
1638         if (ret)
1639                 return ret;
1640
1641         obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
1642         if (&obj->base == NULL) {
1643                 ret = -ENOENT;
1644                 goto unlock;
1645         }
1646
1647         /* Pinned buffers may be scanout, so flush the cache */
1648         if (obj->pin_display)
1649                 i915_gem_object_flush_cpu_write_domain(obj);
1650
1651         drm_gem_object_unreference(&obj->base);
1652 unlock:
1653         mutex_unlock(&dev->struct_mutex);
1654         return ret;
1655 }
1656
1657 /**
1658  * Maps the contents of an object, returning the address it is mapped
1659  * into.
1660  *
1661  * While the mapping holds a reference on the contents of the object, it doesn't
1662  * imply a ref on the object itself.
1663  *
1664  * IMPORTANT:
1665  *
1666  * DRM driver writers who look a this function as an example for how to do GEM
1667  * mmap support, please don't implement mmap support like here. The modern way
1668  * to implement DRM mmap support is with an mmap offset ioctl (like
1669  * i915_gem_mmap_gtt) and then using the mmap syscall on the DRM fd directly.
1670  * That way debug tooling like valgrind will understand what's going on, hiding
1671  * the mmap call in a driver private ioctl will break that. The i915 driver only
1672  * does cpu mmaps this way because we didn't know better.
1673  */
1674 int
1675 i915_gem_mmap_ioctl(struct drm_device *dev, void *data,
1676                     struct drm_file *file)
1677 {
1678         struct drm_i915_gem_mmap *args = data;
1679         struct drm_gem_object *obj;
1680         unsigned long addr;
1681
1682         struct proc *p = curproc;
1683         vm_map_t map = &p->p_vmspace->vm_map;
1684         vm_size_t size;
1685         int error = 0, rv;
1686
1687         if (args->flags & ~(I915_MMAP_WC))
1688                 return -EINVAL;
1689
1690         obj = drm_gem_object_lookup(dev, file, args->handle);
1691         if (obj == NULL)
1692                 return -ENOENT;
1693
1694         if (args->size == 0)
1695                 goto out;
1696
1697         size = round_page(args->size);
1698         if (map->size + size > p->p_rlimit[RLIMIT_VMEM].rlim_cur) {
1699                 error = -ENOMEM;
1700                 goto out;
1701         }
1702
1703         /* prime objects have no backing filp to GEM mmap
1704          * pages from.
1705          */
1706
1707         /*
1708          * Call hint to ensure that NULL is not returned as a valid address
1709          * and to reduce vm_map traversals. XXX causes instability, use a
1710          * fixed low address as the start point instead to avoid the NULL
1711          * return issue.
1712          */
1713
1714         addr = PAGE_SIZE;
1715
1716         /*
1717          * Use 256KB alignment.  It is unclear why this matters for a
1718          * virtual address but it appears to fix a number of application/X
1719          * crashes and kms console switching is much faster.
1720          */
1721         vm_object_hold(obj->vm_obj);
1722         vm_object_reference_locked(obj->vm_obj);
1723         vm_object_drop(obj->vm_obj);
1724
1725         rv = vm_map_find(map, obj->vm_obj, NULL,
1726                          args->offset, &addr, args->size,
1727                          256 * 1024, /* align */
1728                          TRUE, /* fitit */
1729                          VM_MAPTYPE_NORMAL, /* maptype */
1730                          VM_PROT_READ | VM_PROT_WRITE, /* prot */
1731                          VM_PROT_READ | VM_PROT_WRITE, /* max */
1732                          MAP_SHARED /* cow */);
1733         if (rv != KERN_SUCCESS) {
1734                 vm_object_deallocate(obj->vm_obj);
1735                 error = -vm_mmap_to_errno(rv);
1736         } else {
1737                 args->addr_ptr = (uint64_t)addr;
1738         }
1739 out:
1740         drm_gem_object_unreference(obj);
1741         return (error);
1742 }
1743
1744 /**
1745  * i915_gem_fault - fault a page into the GTT
1746  *
1747  * vm_obj is locked on entry and expected to be locked on return.
1748  *
1749  * The vm_pager has placemarked the object with an anonymous memory page
1750  * which we must replace atomically to avoid races against concurrent faults
1751  * on the same page.  XXX we currently are unable to do this atomically.
1752  *
1753  * If we are to return an error we should not touch the anonymous page,
1754  * the caller will deallocate it.
1755  *
1756  * XXX Most GEM calls appear to be interruptable, but we can't hard loop
1757  * in that case.  Release all resources and wait 1 tick before retrying.
1758  * This is a huge problem which needs to be fixed by getting rid of most
1759  * of the interruptability.  The linux code does not retry but does appear
1760  * to have some sort of mechanism (VM_FAULT_NOPAGE ?) for the higher level
1761  * to be able to retry.
1762  *
1763  * --
1764  * vma: VMA in question
1765  * vmf: fault info
1766  *
1767  * The fault handler is set up by drm_gem_mmap() when a object is GTT mapped
1768  * from userspace.  The fault handler takes care of binding the object to
1769  * the GTT (if needed), allocating and programming a fence register (again,
1770  * only if needed based on whether the old reg is still valid or the object
1771  * is tiled) and inserting a new PTE into the faulting process.
1772  *
1773  * Note that the faulting process may involve evicting existing objects
1774  * from the GTT and/or fence registers to make room.  So performance may
1775  * suffer if the GTT working set is large or there are few fence registers
1776  * left.
1777  *
1778  * vm_obj is locked on entry and expected to be locked on return.  The VM
1779  * pager has placed an anonymous memory page at (obj,offset) which we have
1780  * to replace.
1781  */
1782 int i915_gem_fault(vm_object_t vm_obj, vm_ooffset_t offset, int prot, vm_page_t *mres)
1783 {
1784         struct drm_i915_gem_object *obj = to_intel_bo(vm_obj->handle);
1785         struct drm_device *dev = obj->base.dev;
1786         struct drm_i915_private *dev_priv = dev->dev_private;
1787         struct i915_ggtt_view view = i915_ggtt_view_normal;
1788         unsigned long page_offset;
1789         vm_page_t m, oldm = NULL;
1790         int ret = 0;
1791         bool write = !!(prot & VM_PROT_WRITE);
1792
1793         intel_runtime_pm_get(dev_priv);
1794
1795         /* We don't use vmf->pgoff since that has the fake offset */
1796         page_offset = (unsigned long)offset;
1797
1798 retry:
1799         ret = i915_mutex_lock_interruptible(dev);
1800         if (ret)
1801                 goto out;
1802
1803         trace_i915_gem_object_fault(obj, page_offset, true, write);
1804
1805         /* Try to flush the object off the GPU first without holding the lock.
1806          * Upon reacquiring the lock, we will perform our sanity checks and then
1807          * repeat the flush holding the lock in the normal manner to catch cases
1808          * where we are gazumped.
1809          */
1810         ret = i915_gem_object_wait_rendering__nonblocking(obj, NULL, !write);
1811         if (ret)
1812                 goto unlock;
1813
1814         /* Access to snoopable pages through the GTT is incoherent. */
1815         if (obj->cache_level != I915_CACHE_NONE && !HAS_LLC(dev)) {
1816                 ret = -EFAULT;
1817                 goto unlock;
1818         }
1819
1820         /* Use a partial view if the object is bigger than the aperture. */
1821         if (obj->base.size >= dev_priv->gtt.mappable_end &&
1822             obj->tiling_mode == I915_TILING_NONE) {
1823 #if 0
1824                 static const unsigned int chunk_size = 256; // 1 MiB
1825
1826                 memset(&view, 0, sizeof(view));
1827                 view.type = I915_GGTT_VIEW_PARTIAL;
1828                 view.params.partial.offset = rounddown(page_offset, chunk_size);
1829                 view.params.partial.size =
1830                         min_t(unsigned int,
1831                               chunk_size,
1832                               (vma->vm_end - vma->vm_start)/PAGE_SIZE -
1833                               view.params.partial.offset);
1834 #endif
1835         }
1836
1837         /* Now pin it into the GTT if needed */
1838         ret = i915_gem_object_ggtt_pin(obj, &view, 0, PIN_MAPPABLE);
1839         if (ret)
1840                 goto unlock;
1841
1842         ret = i915_gem_object_set_to_gtt_domain(obj, write);
1843         if (ret)
1844                 goto unpin;
1845
1846         ret = i915_gem_object_get_fence(obj);
1847         if (ret)
1848                 goto unpin;
1849
1850         /*
1851          * START FREEBSD MAGIC
1852          *
1853          * Add a pip count to avoid destruction and certain other
1854          * complex operations (such as collapses?) while unlocked.
1855          */
1856         vm_object_pip_add(vm_obj, 1);
1857
1858         /*
1859          * XXX We must currently remove the placeholder page now to avoid
1860          * a deadlock against a concurrent i915_gem_release_mmap().
1861          * Otherwise concurrent operation will block on the busy page
1862          * while holding locks which we need to obtain.
1863          */
1864         if (*mres != NULL) {
1865                 oldm = *mres;
1866                 if ((oldm->flags & PG_BUSY) == 0)
1867                         kprintf("i915_gem_fault: Page was not busy\n");
1868                 else
1869                         vm_page_remove(oldm);
1870                 *mres = NULL;
1871         } else {
1872                 oldm = NULL;
1873         }
1874
1875         ret = 0;
1876         m = NULL;
1877
1878         /*
1879          * Since the object lock was dropped, another thread might have
1880          * faulted on the same GTT address and instantiated the mapping.
1881          * Recheck.
1882          */
1883         m = vm_page_lookup(vm_obj, OFF_TO_IDX(offset));
1884         if (m != NULL) {
1885                 /*
1886                  * Try to busy the page, retry on failure (non-zero ret).
1887                  */
1888                 if (vm_page_busy_try(m, false)) {
1889                         kprintf("i915_gem_fault: PG_BUSY\n");
1890                         ret = -EINTR;
1891                         goto unlock;
1892                 }
1893                 goto have_page;
1894         }
1895         /*
1896          * END FREEBSD MAGIC
1897          */
1898
1899         obj->fault_mappable = true;
1900
1901         /* Finally, remap it using the new GTT offset */
1902         m = vm_phys_fictitious_to_vm_page(dev_priv->gtt.mappable_base +
1903                         i915_gem_obj_ggtt_offset_view(obj, &view) + offset);
1904         if (m == NULL) {
1905                 ret = -EFAULT;
1906                 goto unpin;
1907         }
1908         KASSERT((m->flags & PG_FICTITIOUS) != 0, ("not fictitious %p", m));
1909         KASSERT(m->wire_count == 1, ("wire_count not 1 %p", m));
1910
1911         /*
1912          * Try to busy the page.  Fails on non-zero return.
1913          */
1914         if (vm_page_busy_try(m, false)) {
1915                 kprintf("i915_gem_fault: PG_BUSY(2)\n");
1916                 ret = -EINTR;
1917                 goto unpin;
1918         }
1919         m->valid = VM_PAGE_BITS_ALL;
1920
1921 #if 0
1922         if (unlikely(view.type == I915_GGTT_VIEW_PARTIAL)) {
1923                 /* Overriding existing pages in partial view does not cause
1924                  * us any trouble as TLBs are still valid because the fault
1925                  * is due to userspace losing part of the mapping or never
1926                  * having accessed it before (at this partials' range).
1927                  */
1928                 unsigned long base = vma->vm_start +
1929                                      (view.params.partial.offset << PAGE_SHIFT);
1930                 unsigned int i;
1931
1932                 for (i = 0; i < view.params.partial.size; i++) {
1933                         ret = vm_insert_pfn(vma, base + i * PAGE_SIZE, pfn + i);
1934                         if (ret)
1935                                 break;
1936                 }
1937
1938                 obj->fault_mappable = true;
1939         } else {
1940                 if (!obj->fault_mappable) {
1941                         unsigned long size = min_t(unsigned long,
1942                                                    vma->vm_end - vma->vm_start,
1943                                                    obj->base.size);
1944                         int i;
1945
1946                         for (i = 0; i < size >> PAGE_SHIFT; i++) {
1947                                 ret = vm_insert_pfn(vma,
1948                                                     (unsigned long)vma->vm_start + i * PAGE_SIZE,
1949                                                     pfn + i);
1950                                 if (ret)
1951                                         break;
1952                         }
1953
1954                         obj->fault_mappable = true;
1955                 } else
1956                         ret = vm_insert_pfn(vma,
1957                                             (unsigned long)vmf->virtual_address,
1958                                             pfn + page_offset);
1959 #endif
1960                         vm_page_insert(m, vm_obj, OFF_TO_IDX(offset));
1961 #if 0
1962         }
1963 #endif
1964
1965 have_page:
1966         *mres = m;
1967
1968         i915_gem_object_ggtt_unpin_view(obj, &view);
1969         mutex_unlock(&dev->struct_mutex);
1970         ret = VM_PAGER_OK;
1971         goto done;
1972
1973         /*
1974          * ALTERNATIVE ERROR RETURN.
1975          *
1976          * OBJECT EXPECTED TO BE LOCKED.
1977          */
1978 unpin:
1979         i915_gem_object_ggtt_unpin_view(obj, &view);
1980 unlock:
1981         mutex_unlock(&dev->struct_mutex);
1982 out:
1983         switch (ret) {
1984         case -EIO:
1985                 /*
1986                  * We eat errors when the gpu is terminally wedged to avoid
1987                  * userspace unduly crashing (gl has no provisions for mmaps to
1988                  * fail). But any other -EIO isn't ours (e.g. swap in failure)
1989                  * and so needs to be reported.
1990                  */
1991                 if (!i915_terminally_wedged(&dev_priv->gpu_error)) {
1992 //                      ret = VM_FAULT_SIGBUS;
1993                         break;
1994                 }
1995         case -EAGAIN:
1996                 /*
1997                  * EAGAIN means the gpu is hung and we'll wait for the error
1998                  * handler to reset everything when re-faulting in
1999                  * i915_mutex_lock_interruptible.
2000                  */
2001         case -ERESTARTSYS:
2002         case -EINTR:
2003                 VM_OBJECT_UNLOCK(vm_obj);
2004                 int dummy;
2005                 tsleep(&dummy, 0, "delay", 1); /* XXX */
2006                 VM_OBJECT_LOCK(vm_obj);
2007                 goto retry;
2008         default:
2009                 WARN_ONCE(ret, "unhandled error in i915_gem_fault: %i\n", ret);
2010                 ret = VM_PAGER_ERROR;
2011                 break;
2012         }
2013
2014 done:
2015         if (oldm != NULL)
2016                 vm_page_free(oldm);
2017         vm_object_pip_wakeup(vm_obj);
2018
2019         intel_runtime_pm_put(dev_priv);
2020         return ret;
2021 }
2022
2023 /**
2024  * i915_gem_release_mmap - remove physical page mappings
2025  * @obj: obj in question
2026  *
2027  * Preserve the reservation of the mmapping with the DRM core code, but
2028  * relinquish ownership of the pages back to the system.
2029  *
2030  * It is vital that we remove the page mapping if we have mapped a tiled
2031  * object through the GTT and then lose the fence register due to
2032  * resource pressure. Similarly if the object has been moved out of the
2033  * aperture, than pages mapped into userspace must be revoked. Removing the
2034  * mapping will then trigger a page fault on the next user access, allowing
2035  * fixup by i915_gem_fault().
2036  */
2037 void
2038 i915_gem_release_mmap(struct drm_i915_gem_object *obj)
2039 {
2040         vm_object_t devobj;
2041         vm_page_t m;
2042         int i, page_count;
2043
2044         if (!obj->fault_mappable)
2045                 return;
2046
2047         devobj = cdev_pager_lookup(obj);
2048         if (devobj != NULL) {
2049                 page_count = OFF_TO_IDX(obj->base.size);
2050
2051                 VM_OBJECT_LOCK(devobj);
2052                 for (i = 0; i < page_count; i++) {
2053                         m = vm_page_lookup_busy_wait(devobj, i, TRUE, "915unm");
2054                         if (m == NULL)
2055                                 continue;
2056                         cdev_pager_free_page(devobj, m);
2057                 }
2058                 VM_OBJECT_UNLOCK(devobj);
2059                 vm_object_deallocate(devobj);
2060         }
2061
2062         obj->fault_mappable = false;
2063 }
2064
2065 void
2066 i915_gem_release_all_mmaps(struct drm_i915_private *dev_priv)
2067 {
2068         struct drm_i915_gem_object *obj;
2069
2070         list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list)
2071                 i915_gem_release_mmap(obj);
2072 }
2073
2074 uint32_t
2075 i915_gem_get_gtt_size(struct drm_device *dev, uint32_t size, int tiling_mode)
2076 {
2077         uint32_t gtt_size;
2078
2079         if (INTEL_INFO(dev)->gen >= 4 ||
2080             tiling_mode == I915_TILING_NONE)
2081                 return size;
2082
2083         /* Previous chips need a power-of-two fence region when tiling */
2084         if (INTEL_INFO(dev)->gen == 3)
2085                 gtt_size = 1024*1024;
2086         else
2087                 gtt_size = 512*1024;
2088
2089         while (gtt_size < size)
2090                 gtt_size <<= 1;
2091
2092         return gtt_size;
2093 }
2094
2095 /**
2096  * i915_gem_get_gtt_alignment - return required GTT alignment for an object
2097  * @obj: object to check
2098  *
2099  * Return the required GTT alignment for an object, taking into account
2100  * potential fence register mapping.
2101  */
2102 uint32_t
2103 i915_gem_get_gtt_alignment(struct drm_device *dev, uint32_t size,
2104                            int tiling_mode, bool fenced)
2105 {
2106         /*
2107          * Minimum alignment is 4k (GTT page size), but might be greater
2108          * if a fence register is needed for the object.
2109          */
2110         if (INTEL_INFO(dev)->gen >= 4 || (!fenced && IS_G33(dev)) ||
2111             tiling_mode == I915_TILING_NONE)
2112                 return 4096;
2113
2114         /*
2115          * Previous chips need to be aligned to the size of the smallest
2116          * fence register that can contain the object.
2117          */
2118         return i915_gem_get_gtt_size(dev, size, tiling_mode);
2119 }
2120
2121 static int i915_gem_object_create_mmap_offset(struct drm_i915_gem_object *obj)
2122 {
2123         struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
2124         int ret;
2125
2126 #if 0
2127         if (drm_vma_node_has_offset(&obj->base.vma_node))
2128                 return 0;
2129 #endif
2130
2131         dev_priv->mm.shrinker_no_lock_stealing = true;
2132
2133         ret = drm_gem_create_mmap_offset(&obj->base);
2134         if (ret != -ENOSPC)
2135                 goto out;
2136
2137         /* Badly fragmented mmap space? The only way we can recover
2138          * space is by destroying unwanted objects. We can't randomly release
2139          * mmap_offsets as userspace expects them to be persistent for the
2140          * lifetime of the objects. The closest we can is to release the
2141          * offsets on purgeable objects by truncating it and marking it purged,
2142          * which prevents userspace from ever using that object again.
2143          */
2144         i915_gem_shrink(dev_priv,
2145                         obj->base.size >> PAGE_SHIFT,
2146                         I915_SHRINK_BOUND |
2147                         I915_SHRINK_UNBOUND |
2148                         I915_SHRINK_PURGEABLE);
2149         ret = drm_gem_create_mmap_offset(&obj->base);
2150         if (ret != -ENOSPC)
2151                 goto out;
2152
2153         i915_gem_shrink_all(dev_priv);
2154         ret = drm_gem_create_mmap_offset(&obj->base);
2155 out:
2156         dev_priv->mm.shrinker_no_lock_stealing = false;
2157
2158         return ret;
2159 }
2160
2161 static void i915_gem_object_free_mmap_offset(struct drm_i915_gem_object *obj)
2162 {
2163         drm_gem_free_mmap_offset(&obj->base);
2164 }
2165
2166 int
2167 i915_gem_mmap_gtt(struct drm_file *file,
2168                   struct drm_device *dev,
2169                   uint32_t handle,
2170                   uint64_t *offset)
2171 {
2172         struct drm_i915_gem_object *obj;
2173         int ret;
2174
2175         ret = i915_mutex_lock_interruptible(dev);
2176         if (ret)
2177                 return ret;
2178
2179         obj = to_intel_bo(drm_gem_object_lookup(dev, file, handle));
2180         if (&obj->base == NULL) {
2181                 ret = -ENOENT;
2182                 goto unlock;
2183         }
2184
2185         if (obj->madv != I915_MADV_WILLNEED) {
2186                 DRM_DEBUG("Attempting to mmap a purgeable buffer\n");
2187                 ret = -EFAULT;
2188                 goto out;
2189         }
2190
2191         ret = i915_gem_object_create_mmap_offset(obj);
2192         if (ret)
2193                 goto out;
2194
2195         *offset = DRM_GEM_MAPPING_OFF(obj->base.map_list.key) |
2196             DRM_GEM_MAPPING_KEY;
2197
2198 out:
2199         drm_gem_object_unreference(&obj->base);
2200 unlock:
2201         mutex_unlock(&dev->struct_mutex);
2202         return ret;
2203 }
2204
2205 /**
2206  * i915_gem_mmap_gtt_ioctl - prepare an object for GTT mmap'ing
2207  * @dev: DRM device
2208  * @data: GTT mapping ioctl data
2209  * @file: GEM object info
2210  *
2211  * Simply returns the fake offset to userspace so it can mmap it.
2212  * The mmap call will end up in drm_gem_mmap(), which will set things
2213  * up so we can get faults in the handler above.
2214  *
2215  * The fault handler will take care of binding the object into the GTT
2216  * (since it may have been evicted to make room for something), allocating
2217  * a fence register, and mapping the appropriate aperture address into
2218  * userspace.
2219  */
2220 int
2221 i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data,
2222                         struct drm_file *file)
2223 {
2224         struct drm_i915_gem_mmap_gtt *args = data;
2225
2226         return i915_gem_mmap_gtt(file, dev, args->handle, &args->offset);
2227 }
2228
2229 /* Immediately discard the backing storage */
2230 static void
2231 i915_gem_object_truncate(struct drm_i915_gem_object *obj)
2232 {
2233         vm_object_t vm_obj;
2234
2235         vm_obj = obj->base.vm_obj;
2236         VM_OBJECT_LOCK(vm_obj);
2237         vm_object_page_remove(vm_obj, 0, 0, false);
2238         VM_OBJECT_UNLOCK(vm_obj);
2239
2240         obj->madv = __I915_MADV_PURGED;
2241 }
2242
2243 /* Try to discard unwanted pages */
2244 static void
2245 i915_gem_object_invalidate(struct drm_i915_gem_object *obj)
2246 {
2247 #if 0
2248         struct address_space *mapping;
2249 #endif
2250
2251         switch (obj->madv) {
2252         case I915_MADV_DONTNEED:
2253                 i915_gem_object_truncate(obj);
2254         case __I915_MADV_PURGED:
2255                 return;
2256         }
2257
2258 #if 0
2259         if (obj->base.filp == NULL)
2260                 return;
2261
2262         mapping = file_inode(obj->base.filp)->i_mapping,
2263         invalidate_mapping_pages(mapping, 0, (loff_t)-1);
2264 #endif
2265 }
2266
2267 static void
2268 i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj)
2269 {
2270         struct sg_page_iter sg_iter;
2271         int ret;
2272
2273         BUG_ON(obj->madv == __I915_MADV_PURGED);
2274
2275         ret = i915_gem_object_set_to_cpu_domain(obj, true);
2276         if (ret) {
2277                 /* In the event of a disaster, abandon all caches and
2278                  * hope for the best.
2279                  */
2280                 WARN_ON(ret != -EIO);
2281                 i915_gem_clflush_object(obj, true);
2282                 obj->base.read_domains = obj->base.write_domain = I915_GEM_DOMAIN_CPU;
2283         }
2284
2285         i915_gem_gtt_finish_object(obj);
2286
2287         if (i915_gem_object_needs_bit17_swizzle(obj))
2288                 i915_gem_object_save_bit_17_swizzle(obj);
2289
2290         if (obj->madv == I915_MADV_DONTNEED)
2291                 obj->dirty = 0;
2292
2293         for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents, 0) {
2294                 struct vm_page *page = sg_page_iter_page(&sg_iter);
2295
2296                 if (obj->dirty)
2297                         set_page_dirty(page);
2298
2299                 if (obj->madv == I915_MADV_WILLNEED)
2300                         mark_page_accessed(page);
2301
2302                 vm_page_busy_wait(page, FALSE, "i915gem");
2303                 vm_page_unwire(page, 1);
2304                 vm_page_wakeup(page);
2305         }
2306         obj->dirty = 0;
2307
2308         sg_free_table(obj->pages);
2309         kfree(obj->pages);
2310 }
2311
2312 int
2313 i915_gem_object_put_pages(struct drm_i915_gem_object *obj)
2314 {
2315         const struct drm_i915_gem_object_ops *ops = obj->ops;
2316
2317         if (obj->pages == NULL)
2318                 return 0;
2319
2320         if (obj->pages_pin_count)
2321                 return -EBUSY;
2322
2323         BUG_ON(i915_gem_obj_bound_any(obj));
2324
2325         /* ->put_pages might need to allocate memory for the bit17 swizzle
2326          * array, hence protect them from being reaped by removing them from gtt
2327          * lists early. */
2328         list_del(&obj->global_list);
2329
2330         ops->put_pages(obj);
2331         obj->pages = NULL;
2332
2333         i915_gem_object_invalidate(obj);
2334
2335         return 0;
2336 }
2337
2338 static int
2339 i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj)
2340 {
2341         struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
2342         int page_count, i;
2343         vm_object_t vm_obj;
2344         struct sg_table *st;
2345         struct scatterlist *sg;
2346         struct sg_page_iter sg_iter;
2347         struct vm_page *page;
2348         unsigned long last_pfn = 0;     /* suppress gcc warning */
2349         int ret;
2350
2351         /* Assert that the object is not currently in any GPU domain. As it
2352          * wasn't in the GTT, there shouldn't be any way it could have been in
2353          * a GPU cache
2354          */
2355         BUG_ON(obj->base.read_domains & I915_GEM_GPU_DOMAINS);
2356         BUG_ON(obj->base.write_domain & I915_GEM_GPU_DOMAINS);
2357
2358         st = kmalloc(sizeof(*st), M_DRM, M_WAITOK);
2359         if (st == NULL)
2360                 return -ENOMEM;
2361
2362         page_count = obj->base.size / PAGE_SIZE;
2363         if (sg_alloc_table(st, page_count, GFP_KERNEL)) {
2364                 kfree(st);
2365                 return -ENOMEM;
2366         }
2367
2368         /* Get the list of pages out of our struct file.  They'll be pinned
2369          * at this point until we release them.
2370          *
2371          * Fail silently without starting the shrinker
2372          */
2373         vm_obj = obj->base.vm_obj;
2374         VM_OBJECT_LOCK(vm_obj);
2375         sg = st->sgl;
2376         st->nents = 0;
2377         for (i = 0; i < page_count; i++) {
2378                 page = shmem_read_mapping_page(vm_obj, i);
2379                 if (IS_ERR(page)) {
2380                         i915_gem_shrink(dev_priv,
2381                                         page_count,
2382                                         I915_SHRINK_BOUND |
2383                                         I915_SHRINK_UNBOUND |
2384                                         I915_SHRINK_PURGEABLE);
2385                         page = shmem_read_mapping_page(vm_obj, i);
2386                 }
2387                 if (IS_ERR(page)) {
2388                         /* We've tried hard to allocate the memory by reaping
2389                          * our own buffer, now let the real VM do its job and
2390                          * go down in flames if truly OOM.
2391                          */
2392                         i915_gem_shrink_all(dev_priv);
2393                         page = shmem_read_mapping_page(vm_obj, i);
2394                         if (IS_ERR(page)) {
2395                                 ret = PTR_ERR(page);
2396                                 goto err_pages;
2397                         }
2398                 }
2399 #ifdef CONFIG_SWIOTLB
2400                 if (swiotlb_nr_tbl()) {
2401                         st->nents++;
2402                         sg_set_page(sg, page, PAGE_SIZE, 0);
2403                         sg = sg_next(sg);
2404                         continue;
2405                 }
2406 #endif
2407                 if (!i || page_to_pfn(page) != last_pfn + 1) {
2408                         if (i)
2409                                 sg = sg_next(sg);
2410                         st->nents++;
2411                         sg_set_page(sg, page, PAGE_SIZE, 0);
2412                 } else {
2413                         sg->length += PAGE_SIZE;
2414                 }
2415                 last_pfn = page_to_pfn(page);
2416
2417                 /* Check that the i965g/gm workaround works. */
2418         }
2419 #ifdef CONFIG_SWIOTLB
2420         if (!swiotlb_nr_tbl())
2421 #endif
2422                 sg_mark_end(sg);
2423         obj->pages = st;
2424         VM_OBJECT_UNLOCK(vm_obj);
2425
2426         ret = i915_gem_gtt_prepare_object(obj);
2427         if (ret)
2428                 goto err_pages;
2429
2430         if (i915_gem_object_needs_bit17_swizzle(obj))
2431                 i915_gem_object_do_bit_17_swizzle(obj);
2432
2433         if (obj->tiling_mode != I915_TILING_NONE &&
2434             dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES)
2435                 i915_gem_object_pin_pages(obj);
2436
2437         return 0;
2438
2439 err_pages:
2440         sg_mark_end(sg);
2441         for_each_sg_page(st->sgl, &sg_iter, st->nents, 0) {
2442                 page = sg_page_iter_page(&sg_iter);
2443                 vm_page_busy_wait(page, FALSE, "i915gem");
2444                 vm_page_unwire(page, 0);
2445                 vm_page_wakeup(page);
2446         }
2447         VM_OBJECT_UNLOCK(vm_obj);
2448         sg_free_table(st);
2449         kfree(st);
2450
2451         /* shmemfs first checks if there is enough memory to allocate the page
2452          * and reports ENOSPC should there be insufficient, along with the usual
2453          * ENOMEM for a genuine allocation failure.
2454          *
2455          * We use ENOSPC in our driver to mean that we have run out of aperture
2456          * space and so want to translate the error from shmemfs back to our
2457          * usual understanding of ENOMEM.
2458          */
2459         if (ret == -ENOSPC)
2460                 ret = -ENOMEM;
2461
2462         return ret;
2463 }
2464
2465 /* Ensure that the associated pages are gathered from the backing storage
2466  * and pinned into our object. i915_gem_object_get_pages() may be called
2467  * multiple times before they are released by a single call to
2468  * i915_gem_object_put_pages() - once the pages are no longer referenced
2469  * either as a result of memory pressure (reaping pages under the shrinker)
2470  * or as the object is itself released.
2471  */
2472 int
2473 i915_gem_object_get_pages(struct drm_i915_gem_object *obj)
2474 {
2475         struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
2476         const struct drm_i915_gem_object_ops *ops = obj->ops;
2477         int ret;
2478
2479         if (obj->pages)
2480                 return 0;
2481
2482         if (obj->madv != I915_MADV_WILLNEED) {
2483                 DRM_DEBUG("Attempting to obtain a purgeable object\n");
2484                 return -EFAULT;
2485         }
2486
2487         BUG_ON(obj->pages_pin_count);
2488
2489         ret = ops->get_pages(obj);
2490         if (ret)
2491                 return ret;
2492
2493         list_add_tail(&obj->global_list, &dev_priv->mm.unbound_list);
2494
2495         obj->get_page.sg = obj->pages->sgl;
2496         obj->get_page.last = 0;
2497
2498         return 0;
2499 }
2500
2501 void i915_vma_move_to_active(struct i915_vma *vma,
2502                              struct intel_engine_cs *ring)
2503 {
2504         struct drm_i915_gem_object *obj = vma->obj;
2505
2506         /* Add a reference if we're newly entering the active list. */
2507         if (obj->active == 0)
2508                 drm_gem_object_reference(&obj->base);
2509         obj->active |= intel_ring_flag(ring);
2510
2511         list_move_tail(&obj->ring_list[ring->id], &ring->active_list);
2512         i915_gem_request_assign(&obj->last_read_req[ring->id],
2513                                 intel_ring_get_request(ring));
2514
2515         list_move_tail(&vma->mm_list, &vma->vm->active_list);
2516 }
2517
2518 static void
2519 i915_gem_object_retire__write(struct drm_i915_gem_object *obj)
2520 {
2521         RQ_BUG_ON(obj->last_write_req == NULL);
2522         RQ_BUG_ON(!(obj->active & intel_ring_flag(obj->last_write_req->ring)));
2523
2524         i915_gem_request_assign(&obj->last_write_req, NULL);
2525         intel_fb_obj_flush(obj, true);
2526 }
2527
2528 static void
2529 i915_gem_object_retire__read(struct drm_i915_gem_object *obj, int ring)
2530 {
2531         struct i915_vma *vma;
2532
2533         RQ_BUG_ON(obj->last_read_req[ring] == NULL);
2534         RQ_BUG_ON(!(obj->active & (1 << ring)));
2535
2536         list_del_init(&obj->ring_list[ring]);
2537         i915_gem_request_assign(&obj->last_read_req[ring], NULL);
2538
2539         if (obj->last_write_req && obj->last_write_req->ring->id == ring)
2540                 i915_gem_object_retire__write(obj);
2541
2542         obj->active &= ~(1 << ring);
2543         if (obj->active)
2544                 return;
2545
2546         list_for_each_entry(vma, &obj->vma_list, vma_link) {
2547                 if (!list_empty(&vma->mm_list))
2548                         list_move_tail(&vma->mm_list, &vma->vm->inactive_list);
2549         }
2550
2551         i915_gem_request_assign(&obj->last_fenced_req, NULL);
2552         drm_gem_object_unreference(&obj->base);
2553 }
2554
2555 static int
2556 i915_gem_init_seqno(struct drm_device *dev, u32 seqno)
2557 {
2558         struct drm_i915_private *dev_priv = dev->dev_private;
2559         struct intel_engine_cs *ring;
2560         int ret, i, j;
2561
2562         /* Carefully retire all requests without writing to the rings */
2563         for_each_ring(ring, dev_priv, i) {
2564                 ret = intel_ring_idle(ring);
2565                 if (ret)
2566                         return ret;
2567         }
2568         i915_gem_retire_requests(dev);
2569
2570         /* Finally reset hw state */
2571         for_each_ring(ring, dev_priv, i) {
2572                 intel_ring_init_seqno(ring, seqno);
2573
2574                 for (j = 0; j < ARRAY_SIZE(ring->semaphore.sync_seqno); j++)
2575                         ring->semaphore.sync_seqno[j] = 0;
2576         }
2577
2578         return 0;
2579 }
2580
2581 int i915_gem_set_seqno(struct drm_device *dev, u32 seqno)
2582 {
2583         struct drm_i915_private *dev_priv = dev->dev_private;
2584         int ret;
2585
2586         if (seqno == 0)
2587                 return -EINVAL;
2588
2589         /* HWS page needs to be set less than what we
2590          * will inject to ring
2591          */
2592         ret = i915_gem_init_seqno(dev, seqno - 1);
2593         if (ret)
2594                 return ret;
2595
2596         /* Carefully set the last_seqno value so that wrap
2597          * detection still works
2598          */
2599         dev_priv->next_seqno = seqno;
2600         dev_priv->last_seqno = seqno - 1;
2601         if (dev_priv->last_seqno == 0)
2602                 dev_priv->last_seqno--;
2603
2604         return 0;
2605 }
2606
2607 int
2608 i915_gem_get_seqno(struct drm_device *dev, u32 *seqno)
2609 {
2610         struct drm_i915_private *dev_priv = dev->dev_private;
2611
2612         /* reserve 0 for non-seqno */
2613         if (dev_priv->next_seqno == 0) {
2614                 int ret = i915_gem_init_seqno(dev, 0);
2615                 if (ret)
2616                         return ret;
2617
2618                 dev_priv->next_seqno = 1;
2619         }
2620
2621         *seqno = dev_priv->last_seqno = dev_priv->next_seqno++;
2622         return 0;
2623 }
2624
2625 int __i915_add_request(struct intel_engine_cs *ring,
2626                        struct drm_file *file,
2627                        struct drm_i915_gem_object *obj)
2628 {
2629         struct drm_i915_private *dev_priv = ring->dev->dev_private;
2630         struct drm_i915_gem_request *request;
2631         struct intel_ringbuffer *ringbuf;
2632         u32 request_start;
2633         int ret;
2634
2635         request = ring->outstanding_lazy_request;
2636         if (WARN_ON(request == NULL))
2637                 return -ENOMEM;
2638
2639         if (i915.enable_execlists) {
2640                 ringbuf = request->ctx->engine[ring->id].ringbuf;
2641         } else
2642                 ringbuf = ring->buffer;
2643
2644         request_start = intel_ring_get_tail(ringbuf);
2645         /*
2646          * Emit any outstanding flushes - execbuf can fail to emit the flush
2647          * after having emitted the batchbuffer command. Hence we need to fix
2648          * things up similar to emitting the lazy request. The difference here
2649          * is that the flush _must_ happen before the next request, no matter
2650          * what.
2651          */
2652         if (i915.enable_execlists) {
2653                 ret = logical_ring_flush_all_caches(ringbuf, request->ctx);
2654                 if (ret)
2655                         return ret;
2656         } else {
2657                 ret = intel_ring_flush_all_caches(ring);
2658                 if (ret)
2659                         return ret;
2660         }
2661
2662         /* Record the position of the start of the request so that
2663          * should we detect the updated seqno part-way through the
2664          * GPU processing the request, we never over-estimate the
2665          * position of the head.
2666          */
2667         request->postfix = intel_ring_get_tail(ringbuf);
2668
2669         if (i915.enable_execlists) {
2670                 ret = ring->emit_request(ringbuf, request);
2671                 if (ret)
2672                         return ret;
2673         } else {
2674                 ret = ring->add_request(ring);
2675                 if (ret)
2676                         return ret;
2677
2678                 request->tail = intel_ring_get_tail(ringbuf);
2679         }
2680
2681         request->head = request_start;
2682
2683         /* Whilst this request exists, batch_obj will be on the
2684          * active_list, and so will hold the active reference. Only when this
2685          * request is retired will the the batch_obj be moved onto the
2686          * inactive_list and lose its active reference. Hence we do not need
2687          * to explicitly hold another reference here.
2688          */
2689         request->batch_obj = obj;
2690
2691         if (!i915.enable_execlists) {
2692                 /* Hold a reference to the current context so that we can inspect
2693                  * it later in case a hangcheck error event fires.
2694                  */
2695                 request->ctx = ring->last_context;
2696                 if (request->ctx)
2697                         i915_gem_context_reference(request->ctx);
2698         }
2699
2700         request->emitted_jiffies = jiffies;
2701         ring->last_submitted_seqno = request->seqno;
2702         list_add_tail(&request->list, &ring->request_list);
2703         request->file_priv = NULL;
2704
2705         if (file) {
2706                 struct drm_i915_file_private *file_priv = file->driver_priv;
2707
2708                 spin_lock(&file_priv->mm.lock);
2709                 request->file_priv = file_priv;
2710                 list_add_tail(&request->client_list,
2711                               &file_priv->mm.request_list);
2712                 spin_unlock(&file_priv->mm.lock);
2713
2714                 request->pid = curproc->p_pid;
2715         }
2716
2717         trace_i915_gem_request_add(request);
2718         ring->outstanding_lazy_request = NULL;
2719
2720         i915_queue_hangcheck(ring->dev);
2721
2722         queue_delayed_work(dev_priv->wq,
2723                            &dev_priv->mm.retire_work,
2724                            round_jiffies_up_relative(HZ));
2725         intel_mark_busy(dev_priv->dev);
2726
2727         return 0;
2728 }
2729
2730 static bool i915_context_is_banned(struct drm_i915_private *dev_priv,
2731                                    const struct intel_context *ctx)
2732 {
2733         unsigned long elapsed;
2734
2735         elapsed = get_seconds() - ctx->hang_stats.guilty_ts;
2736
2737         if (ctx->hang_stats.banned)
2738                 return true;
2739
2740         if (ctx->hang_stats.ban_period_seconds &&
2741             elapsed <= ctx->hang_stats.ban_period_seconds) {
2742                 if (!i915_gem_context_is_default(ctx)) {
2743                         DRM_DEBUG("context hanging too fast, banning!\n");
2744                         return true;
2745                 } else if (i915_stop_ring_allow_ban(dev_priv)) {
2746                         if (i915_stop_ring_allow_warn(dev_priv))
2747                                 DRM_ERROR("gpu hanging too fast, banning!\n");
2748                         return true;
2749                 }
2750         }
2751
2752         return false;
2753 }
2754
2755 static void i915_set_reset_status(struct drm_i915_private *dev_priv,
2756                                   struct intel_context *ctx,
2757                                   const bool guilty)
2758 {
2759         struct i915_ctx_hang_stats *hs;
2760
2761         if (WARN_ON(!ctx))
2762                 return;
2763
2764         hs = &ctx->hang_stats;
2765
2766         if (guilty) {
2767                 hs->banned = i915_context_is_banned(dev_priv, ctx);
2768                 hs->batch_active++;
2769                 hs->guilty_ts = get_seconds();
2770         } else {
2771                 hs->batch_pending++;
2772         }
2773 }
2774
2775 void i915_gem_request_free(struct kref *req_ref)
2776 {
2777         struct drm_i915_gem_request *req = container_of(req_ref,
2778                                                  typeof(*req), ref);
2779         struct intel_context *ctx = req->ctx;
2780
2781         if (ctx) {
2782                 if (i915.enable_execlists) {
2783                         struct intel_engine_cs *ring = req->ring;
2784
2785                         if (ctx != ring->default_context)
2786                                 intel_lr_context_unpin(ring, ctx);
2787                 }
2788
2789                 i915_gem_context_unreference(ctx);
2790         }
2791
2792         kfree(req);
2793 }
2794
2795 int i915_gem_request_alloc(struct intel_engine_cs *ring,
2796                            struct intel_context *ctx)
2797 {
2798         struct drm_i915_private *dev_priv = to_i915(ring->dev);
2799         struct drm_i915_gem_request *req;
2800         int ret;
2801
2802         if (ring->outstanding_lazy_request)
2803                 return 0;
2804
2805         req = kzalloc(sizeof(*req), GFP_KERNEL);
2806         if (req == NULL)
2807                 return -ENOMEM;
2808
2809         kref_init(&req->ref);
2810         req->i915 = dev_priv;
2811
2812         ret = i915_gem_get_seqno(ring->dev, &req->seqno);
2813         if (ret)
2814                 goto err;
2815
2816         req->ring = ring;
2817
2818         if (i915.enable_execlists)
2819                 ret = intel_logical_ring_alloc_request_extras(req, ctx);
2820         else
2821                 ret = intel_ring_alloc_request_extras(req);
2822         if (ret)
2823                 goto err;
2824
2825         ring->outstanding_lazy_request = req;
2826         return 0;
2827
2828 err:
2829         kfree(req);
2830         return ret;
2831 }
2832
2833 struct drm_i915_gem_request *
2834 i915_gem_find_active_request(struct intel_engine_cs *ring)
2835 {
2836         struct drm_i915_gem_request *request;
2837
2838         list_for_each_entry(request, &ring->request_list, list) {
2839                 if (i915_gem_request_completed(request, false))
2840                         continue;
2841
2842                 return request;
2843         }
2844
2845         return NULL;
2846 }
2847
2848 static void i915_gem_reset_ring_status(struct drm_i915_private *dev_priv,
2849                                        struct intel_engine_cs *ring)
2850 {
2851         struct drm_i915_gem_request *request;
2852         bool ring_hung;
2853
2854         request = i915_gem_find_active_request(ring);
2855
2856         if (request == NULL)
2857                 return;
2858
2859         ring_hung = ring->hangcheck.score >= HANGCHECK_SCORE_RING_HUNG;
2860
2861         i915_set_reset_status(dev_priv, request->ctx, ring_hung);
2862
2863         list_for_each_entry_continue(request, &ring->request_list, list)
2864                 i915_set_reset_status(dev_priv, request->ctx, false);
2865 }
2866
2867 static void i915_gem_reset_ring_cleanup(struct drm_i915_private *dev_priv,
2868                                         struct intel_engine_cs *ring)
2869 {
2870         while (!list_empty(&ring->active_list)) {
2871                 struct drm_i915_gem_object *obj;
2872
2873                 obj = list_first_entry(&ring->active_list,
2874                                        struct drm_i915_gem_object,
2875                                        ring_list[ring->id]);
2876
2877                 i915_gem_object_retire__read(obj, ring->id);
2878         }
2879
2880         /*
2881          * Clear the execlists queue up before freeing the requests, as those
2882          * are the ones that keep the context and ringbuffer backing objects
2883          * pinned in place.
2884          */
2885         while (!list_empty(&ring->execlist_queue)) {
2886                 struct drm_i915_gem_request *submit_req;
2887
2888                 submit_req = list_first_entry(&ring->execlist_queue,
2889                                 struct drm_i915_gem_request,
2890                                 execlist_link);
2891                 list_del(&submit_req->execlist_link);
2892
2893                 if (submit_req->ctx != ring->default_context)
2894                         intel_lr_context_unpin(ring, submit_req->ctx);
2895
2896                 i915_gem_request_unreference(submit_req);
2897         }
2898
2899         /*
2900          * We must free the requests after all the corresponding objects have
2901          * been moved off active lists. Which is the same order as the normal
2902          * retire_requests function does. This is important if object hold
2903          * implicit references on things like e.g. ppgtt address spaces through
2904          * the request.
2905          */
2906         while (!list_empty(&ring->request_list)) {
2907                 struct drm_i915_gem_request *request;
2908
2909                 request = list_first_entry(&ring->request_list,
2910                                            struct drm_i915_gem_request,
2911                                            list);
2912
2913                 i915_gem_request_retire(request);
2914         }
2915
2916         /* This may not have been flushed before the reset, so clean it now */
2917         i915_gem_request_assign(&ring->outstanding_lazy_request, NULL);
2918 }
2919
2920 void i915_gem_restore_fences(struct drm_device *dev)
2921 {
2922         struct drm_i915_private *dev_priv = dev->dev_private;
2923         int i;
2924
2925         for (i = 0; i < dev_priv->num_fence_regs; i++) {
2926                 struct drm_i915_fence_reg *reg = &dev_priv->fence_regs[i];
2927
2928                 /*
2929                  * Commit delayed tiling changes if we have an object still
2930                  * attached to the fence, otherwise just clear the fence.
2931                  */
2932                 if (reg->obj) {
2933                         i915_gem_object_update_fence(reg->obj, reg,
2934                                                      reg->obj->tiling_mode);
2935                 } else {
2936                         i915_gem_write_fence(dev, i, NULL);
2937                 }
2938         }
2939 }
2940
2941 void i915_gem_reset(struct drm_device *dev)
2942 {
2943         struct drm_i915_private *dev_priv = dev->dev_private;
2944         struct intel_engine_cs *ring;
2945         int i;
2946
2947         /*
2948          * Before we free the objects from the requests, we need to inspect
2949          * them for finding the guilty party. As the requests only borrow
2950          * their reference to the objects, the inspection must be done first.
2951          */
2952         for_each_ring(ring, dev_priv, i)
2953                 i915_gem_reset_ring_status(dev_priv, ring);
2954
2955         for_each_ring(ring, dev_priv, i)
2956                 i915_gem_reset_ring_cleanup(dev_priv, ring);
2957
2958         i915_gem_context_reset(dev);
2959
2960         i915_gem_restore_fences(dev);
2961
2962         WARN_ON(i915_verify_lists(dev));
2963 }
2964
2965 /**
2966  * This function clears the request list as sequence numbers are passed.
2967  */
2968 void
2969 i915_gem_retire_requests_ring(struct intel_engine_cs *ring)
2970 {
2971         WARN_ON(i915_verify_lists(ring->dev));
2972
2973         /* Retire requests first as we use it above for the early return.
2974          * If we retire requests last, we may use a later seqno and so clear
2975          * the requests lists without clearing the active list, leading to
2976          * confusion.
2977          */
2978         while (!list_empty(&ring->request_list)) {
2979                 struct drm_i915_gem_request *request;
2980
2981                 request = list_first_entry(&ring->request_list,
2982                                            struct drm_i915_gem_request,
2983                                            list);
2984
2985                 if (!i915_gem_request_completed(request, true))
2986                         break;
2987
2988                 i915_gem_request_retire(request);
2989         }
2990
2991         /* Move any buffers on the active list that are no longer referenced
2992          * by the ringbuffer to the flushing/inactive lists as appropriate,
2993          * before we free the context associated with the requests.
2994          */
2995         while (!list_empty(&ring->active_list)) {
2996                 struct drm_i915_gem_object *obj;
2997
2998                 obj = list_first_entry(&ring->active_list,
2999                                       struct drm_i915_gem_object,
3000                                       ring_list[ring->id]);
3001
3002                 if (!list_empty(&obj->last_read_req[ring->id]->list))
3003                         break;
3004
3005                 i915_gem_object_retire__read(obj, ring->id);
3006         }
3007
3008         if (unlikely(ring->trace_irq_req &&
3009                      i915_gem_request_completed(ring->trace_irq_req, true))) {
3010                 ring->irq_put(ring);
3011                 i915_gem_request_assign(&ring->trace_irq_req, NULL);
3012         }
3013
3014         WARN_ON(i915_verify_lists(ring->dev));
3015 }
3016
3017 bool
3018 i915_gem_retire_requests(struct drm_device *dev)
3019 {
3020         struct drm_i915_private *dev_priv = dev->dev_private;
3021         struct intel_engine_cs *ring;
3022         bool idle = true;
3023         int i;
3024
3025         for_each_ring(ring, dev_priv, i) {
3026                 i915_gem_retire_requests_ring(ring);
3027                 idle &= list_empty(&ring->request_list);
3028                 if (i915.enable_execlists) {
3029
3030                         lockmgr(&ring->execlist_lock, LK_EXCLUSIVE);
3031                         idle &= list_empty(&ring->execlist_queue);
3032                         lockmgr(&ring->execlist_lock, LK_RELEASE);
3033
3034                         intel_execlists_retire_requests(ring);
3035                 }
3036         }
3037
3038         if (idle)
3039                 mod_delayed_work(dev_priv->wq,
3040                                    &dev_priv->mm.idle_work,
3041                                    msecs_to_jiffies(100));
3042
3043         return idle;
3044 }
3045
3046 static void
3047 i915_gem_retire_work_handler(struct work_struct *work)
3048 {
3049         struct drm_i915_private *dev_priv =
3050                 container_of(work, typeof(*dev_priv), mm.retire_work.work);
3051         struct drm_device *dev = dev_priv->dev;
3052         bool idle;
3053
3054         /* Come back later if the device is busy... */
3055         idle = false;
3056         if (mutex_trylock(&dev->struct_mutex)) {
3057                 idle = i915_gem_retire_requests(dev);
3058                 mutex_unlock(&dev->struct_mutex);
3059         }
3060         if (!idle)
3061                 queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work,
3062                                    round_jiffies_up_relative(HZ));
3063 }
3064
3065 static void
3066 i915_gem_idle_work_handler(struct work_struct *work)
3067 {
3068         struct drm_i915_private *dev_priv =
3069                 container_of(work, typeof(*dev_priv), mm.idle_work.work);
3070         struct drm_device *dev = dev_priv->dev;
3071         struct intel_engine_cs *ring;
3072         int i;
3073
3074         for_each_ring(ring, dev_priv, i)
3075                 if (!list_empty(&ring->request_list))
3076                         return;
3077
3078         intel_mark_idle(dev);
3079
3080         if (mutex_trylock(&dev->struct_mutex)) {
3081                 struct intel_engine_cs *ring;
3082                 int i;
3083
3084                 for_each_ring(ring, dev_priv, i)
3085                         i915_gem_batch_pool_fini(&ring->batch_pool);
3086
3087                 mutex_unlock(&dev->struct_mutex);
3088         }
3089 }
3090
3091 /**
3092  * Ensures that an object will eventually get non-busy by flushing any required
3093  * write domains, emitting any outstanding lazy request and retiring and
3094  * completed requests.
3095  */
3096 static int
3097 i915_gem_object_flush_active(struct drm_i915_gem_object *obj)
3098 {
3099         int ret, i;
3100
3101         if (!obj->active)
3102                 return 0;
3103
3104         for (i = 0; i < I915_NUM_RINGS; i++) {
3105                 struct drm_i915_gem_request *req;
3106
3107                 req = obj->last_read_req[i];
3108                 if (req == NULL)
3109                         continue;
3110
3111                 if (list_empty(&req->list))
3112                         goto retire;
3113
3114                 ret = i915_gem_check_olr(req);
3115                 if (ret)
3116                         return ret;
3117
3118                 if (i915_gem_request_completed(req, true)) {
3119                         __i915_gem_request_retire__upto(req);
3120 retire:
3121                         i915_gem_object_retire__read(obj, i);
3122                 }
3123         }
3124
3125         return 0;
3126 }
3127
3128 /**
3129  * i915_gem_wait_ioctl - implements DRM_IOCTL_I915_GEM_WAIT
3130  * @DRM_IOCTL_ARGS: standard ioctl arguments
3131  *
3132  * Returns 0 if successful, else an error is returned with the remaining time in
3133  * the timeout parameter.
3134  *  -ETIME: object is still busy after timeout
3135  *  -ERESTARTSYS: signal interrupted the wait
3136  *  -ENONENT: object doesn't exist
3137  * Also possible, but rare:
3138  *  -EAGAIN: GPU wedged
3139  *  -ENOMEM: damn
3140  *  -ENODEV: Internal IRQ fail
3141  *  -E?: The add request failed
3142  *
3143  * The wait ioctl with a timeout of 0 reimplements the busy ioctl. With any
3144  * non-zero timeout parameter the wait ioctl will wait for the given number of
3145  * nanoseconds on an object becoming unbusy. Since the wait itself does so
3146  * without holding struct_mutex the object may become re-busied before this
3147  * function completes. A similar but shorter * race condition exists in the busy
3148  * ioctl
3149  */
3150 int
3151 i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
3152 {
3153         struct drm_i915_private *dev_priv = dev->dev_private;
3154         struct drm_i915_gem_wait *args = data;
3155         struct drm_i915_gem_object *obj;
3156         struct drm_i915_gem_request *req[I915_NUM_RINGS];
3157         unsigned reset_counter;
3158         int i, n = 0;
3159         int ret;
3160
3161         if (args->flags != 0)
3162                 return -EINVAL;
3163
3164         ret = i915_mutex_lock_interruptible(dev);
3165         if (ret)
3166                 return ret;
3167
3168         obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->bo_handle));
3169         if (&obj->base == NULL) {
3170                 mutex_unlock(&dev->struct_mutex);
3171                 return -ENOENT;
3172         }
3173
3174         /* Need to make sure the object gets inactive eventually. */
3175         ret = i915_gem_object_flush_active(obj);
3176         if (ret)
3177                 goto out;
3178
3179         if (!obj->active)
3180                 goto out;
3181
3182         /* Do this after OLR check to make sure we make forward progress polling
3183          * on this IOCTL with a timeout == 0 (like busy ioctl)
3184          */
3185         if (args->timeout_ns == 0) {
3186                 ret = -ETIME;
3187                 goto out;
3188         }
3189
3190         drm_gem_object_unreference(&obj->base);
3191         reset_counter = atomic_read(&dev_priv->gpu_error.reset_counter);
3192
3193         for (i = 0; i < I915_NUM_RINGS; i++) {
3194                 if (obj->last_read_req[i] == NULL)
3195                         continue;
3196
3197                 req[n++] = i915_gem_request_reference(obj->last_read_req[i]);
3198         }
3199
3200         mutex_unlock(&dev->struct_mutex);
3201
3202         for (i = 0; i < n; i++) {
3203                 if (ret == 0)
3204                         ret = __i915_wait_request(req[i], reset_counter, true,
3205                                                   args->timeout_ns > 0 ? &args->timeout_ns : NULL,
3206                                                   file->driver_priv);
3207                 i915_gem_request_unreference__unlocked(req[i]);
3208         }
3209         return ret;
3210
3211 out:
3212         drm_gem_object_unreference(&obj->base);
3213         mutex_unlock(&dev->struct_mutex);
3214         return ret;
3215 }
3216
3217 static int
3218 __i915_gem_object_sync(struct drm_i915_gem_object *obj,
3219                        struct intel_engine_cs *to,
3220                        struct drm_i915_gem_request *req)
3221 {
3222         struct intel_engine_cs *from;
3223         int ret;
3224
3225         from = i915_gem_request_get_ring(req);
3226         if (to == from)
3227                 return 0;
3228
3229         if (i915_gem_request_completed(req, true))
3230                 return 0;
3231
3232         ret = i915_gem_check_olr(req);
3233         if (ret)
3234                 return ret;
3235
3236         if (!i915_semaphore_is_enabled(obj->base.dev)) {
3237                 struct drm_i915_private *i915 = to_i915(obj->base.dev);
3238                 ret = __i915_wait_request(req,
3239                                           atomic_read(&i915->gpu_error.reset_counter),
3240                                           i915->mm.interruptible,
3241                                           NULL,
3242                                           &i915->rps.semaphores);
3243                 if (ret)
3244                         return ret;
3245
3246                 i915_gem_object_retire_request(obj, req);
3247         } else {
3248                 int idx = intel_ring_sync_index(from, to);
3249                 u32 seqno = i915_gem_request_get_seqno(req);
3250
3251                 if (seqno <= from->semaphore.sync_seqno[idx])
3252                         return 0;
3253
3254                 trace_i915_gem_ring_sync_to(from, to, req);
3255                 ret = to->semaphore.sync_to(to, from, seqno);
3256                 if (ret)
3257                         return ret;
3258
3259                 /* We use last_read_req because sync_to()
3260                  * might have just caused seqno wrap under
3261                  * the radar.
3262                  */
3263                 from->semaphore.sync_seqno[idx] =
3264                         i915_gem_request_get_seqno(obj->last_read_req[from->id]);
3265         }
3266
3267         return 0;
3268 }
3269
3270 /**
3271  * i915_gem_object_sync - sync an object to a ring.
3272  *
3273  * @obj: object which may be in use on another ring.
3274  * @to: ring we wish to use the object on. May be NULL.
3275  *
3276  * This code is meant to abstract object synchronization with the GPU.
3277  * Calling with NULL implies synchronizing the object with the CPU
3278  * rather than a particular GPU ring. Conceptually we serialise writes
3279  * between engines inside the GPU. We only allow on engine to write
3280  * into a buffer at any time, but multiple readers. To ensure each has
3281  * a coherent view of memory, we must:
3282  *
3283  * - If there is an outstanding write request to the object, the new
3284  *   request must wait for it to complete (either CPU or in hw, requests
3285  *   on the same ring will be naturally ordered).
3286  *
3287  * - If we are a write request (pending_write_domain is set), the new
3288  *   request must wait for outstanding read requests to complete.
3289  *
3290  * Returns 0 if successful, else propagates up the lower layer error.
3291  */
3292 int
3293 i915_gem_object_sync(struct drm_i915_gem_object *obj,
3294                      struct intel_engine_cs *to)
3295 {
3296         const bool readonly = obj->base.pending_write_domain == 0;
3297         struct drm_i915_gem_request *req[I915_NUM_RINGS];
3298         int ret, i, n;
3299
3300         if (!obj->active)
3301                 return 0;
3302
3303         if (to == NULL)
3304                 return i915_gem_object_wait_rendering(obj, readonly);
3305
3306         n = 0;
3307         if (readonly) {
3308                 if (obj->last_write_req)
3309                         req[n++] = obj->last_write_req;
3310         } else {
3311                 for (i = 0; i < I915_NUM_RINGS; i++)
3312                         if (obj->last_read_req[i])
3313                                 req[n++] = obj->last_read_req[i];
3314         }
3315         for (i = 0; i < n; i++) {
3316                 ret = __i915_gem_object_sync(obj, to, req[i]);
3317                 if (ret)
3318                         return ret;
3319         }
3320
3321         return 0;
3322 }
3323
3324 static void i915_gem_object_finish_gtt(struct drm_i915_gem_object *obj)
3325 {
3326         u32 old_write_domain, old_read_domains;
3327
3328         /* Force a pagefault for domain tracking on next user access */
3329         i915_gem_release_mmap(obj);
3330
3331         if ((obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0)
3332                 return;
3333
3334         /* Wait for any direct GTT access to complete */
3335         mb();
3336
3337         old_read_domains = obj->base.read_domains;
3338         old_write_domain = obj->base.write_domain;
3339
3340         obj->base.read_domains &= ~I915_GEM_DOMAIN_GTT;
3341         obj->base.write_domain &= ~I915_GEM_DOMAIN_GTT;
3342
3343         trace_i915_gem_object_change_domain(obj,
3344                                             old_read_domains,
3345                                             old_write_domain);
3346 }
3347
3348 int i915_vma_unbind(struct i915_vma *vma)
3349 {
3350         struct drm_i915_gem_object *obj = vma->obj;
3351         struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
3352         int ret;
3353
3354         if (list_empty(&vma->vma_link))
3355                 return 0;
3356
3357         if (!drm_mm_node_allocated(&vma->node)) {
3358                 i915_gem_vma_destroy(vma);
3359                 return 0;
3360         }
3361
3362         if (vma->pin_count)
3363                 return -EBUSY;
3364
3365         BUG_ON(obj->pages == NULL);
3366
3367         ret = i915_gem_object_wait_rendering(obj, false);
3368         if (ret)
3369                 return ret;
3370         /* Continue on if we fail due to EIO, the GPU is hung so we
3371          * should be safe and we need to cleanup or else we might
3372          * cause memory corruption through use-after-free.
3373          */
3374
3375         if (i915_is_ggtt(vma->vm) &&
3376             vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL) {
3377                 i915_gem_object_finish_gtt(obj);
3378
3379                 /* release the fence reg _after_ flushing */
3380                 ret = i915_gem_object_put_fence(obj);
3381                 if (ret)
3382                         return ret;
3383         }
3384
3385         trace_i915_vma_unbind(vma);
3386
3387         vma->vm->unbind_vma(vma);
3388         vma->bound = 0;
3389
3390         list_del_init(&vma->mm_list);
3391         if (i915_is_ggtt(vma->vm)) {
3392                 if (vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL) {
3393                         obj->map_and_fenceable = false;
3394                 } else if (vma->ggtt_view.pages) {
3395                         sg_free_table(vma->ggtt_view.pages);
3396                         kfree(vma->ggtt_view.pages);
3397                 }
3398                 vma->ggtt_view.pages = NULL;
3399         }
3400
3401         drm_mm_remove_node(&vma->node);
3402         i915_gem_vma_destroy(vma);
3403
3404         /* Since the unbound list is global, only move to that list if
3405          * no more VMAs exist. */
3406         if (list_empty(&obj->vma_list))
3407                 list_move_tail(&obj->global_list, &dev_priv->mm.unbound_list);
3408
3409         /* And finally now the object is completely decoupled from this vma,
3410          * we can drop its hold on the backing storage and allow it to be
3411          * reaped by the shrinker.
3412          */
3413         i915_gem_object_unpin_pages(obj);
3414
3415         return 0;
3416 }
3417
3418 int i915_gpu_idle(struct drm_device *dev)
3419 {
3420         struct drm_i915_private *dev_priv = dev->dev_private;
3421         struct intel_engine_cs *ring;
3422         int ret, i;
3423
3424         /* Flush everything onto the inactive list. */
3425         for_each_ring(ring, dev_priv, i) {
3426                 if (!i915.enable_execlists) {
3427                         ret = i915_switch_context(ring, ring->default_context);
3428                         if (ret)
3429                                 return ret;
3430                 }
3431
3432                 ret = intel_ring_idle(ring);
3433                 if (ret)
3434                         return ret;
3435         }
3436
3437         WARN_ON(i915_verify_lists(dev));
3438         return 0;
3439 }
3440
3441 static void i965_write_fence_reg(struct drm_device *dev, int reg,
3442                                  struct drm_i915_gem_object *obj)
3443 {
3444         struct drm_i915_private *dev_priv = dev->dev_private;
3445         int fence_reg;
3446         int fence_pitch_shift;
3447
3448         if (INTEL_INFO(dev)->gen >= 6) {
3449                 fence_reg = FENCE_REG_SANDYBRIDGE_0;
3450                 fence_pitch_shift = SANDYBRIDGE_FENCE_PITCH_SHIFT;
3451         } else {
3452                 fence_reg = FENCE_REG_965_0;
3453                 fence_pitch_shift = I965_FENCE_PITCH_SHIFT;
3454         }
3455
3456         fence_reg += reg * 8;
3457
3458         /* To w/a incoherency with non-atomic 64-bit register updates,
3459          * we split the 64-bit update into two 32-bit writes. In order
3460          * for a partial fence not to be evaluated between writes, we
3461          * precede the update with write to turn off the fence register,
3462          * and only enable the fence as the last step.
3463          *
3464          * For extra levels of paranoia, we make sure each step lands
3465          * before applying the next step.
3466          */
3467         I915_WRITE(fence_reg, 0);
3468         POSTING_READ(fence_reg);
3469
3470         if (obj) {
3471                 u32 size = i915_gem_obj_ggtt_size(obj);
3472                 uint64_t val;
3473
3474                 /* Adjust fence size to match tiled area */
3475                 if (obj->tiling_mode != I915_TILING_NONE) {
3476                         uint32_t row_size = obj->stride *
3477                                 (obj->tiling_mode == I915_TILING_Y ? 32 : 8);
3478                         size = (size / row_size) * row_size;
3479                 }
3480
3481                 val = (uint64_t)((i915_gem_obj_ggtt_offset(obj) + size - 4096) &
3482                                  0xfffff000) << 32;
3483                 val |= i915_gem_obj_ggtt_offset(obj) & 0xfffff000;
3484                 val |= (uint64_t)((obj->stride / 128) - 1) << fence_pitch_shift;
3485                 if (obj->tiling_mode == I915_TILING_Y)
3486                         val |= 1 << I965_FENCE_TILING_Y_SHIFT;
3487                 val |= I965_FENCE_REG_VALID;
3488
3489                 I915_WRITE(fence_reg + 4, val >> 32);
3490                 POSTING_READ(fence_reg + 4);
3491
3492                 I915_WRITE(fence_reg + 0, val);
3493                 POSTING_READ(fence_reg);
3494         } else {
3495                 I915_WRITE(fence_reg + 4, 0);
3496                 POSTING_READ(fence_reg + 4);
3497         }
3498 }
3499
3500 static void i915_write_fence_reg(struct drm_device *dev, int reg,
3501                                  struct drm_i915_gem_object *obj)
3502 {
3503         struct drm_i915_private *dev_priv = dev->dev_private;
3504         u32 val;
3505
3506         if (obj) {
3507                 u32 size = i915_gem_obj_ggtt_size(obj);
3508                 int pitch_val;
3509                 int tile_width;
3510
3511                 WARN((i915_gem_obj_ggtt_offset(obj) & ~I915_FENCE_START_MASK) ||
3512                      (size & -size) != size ||
3513                      (i915_gem_obj_ggtt_offset(obj) & (size - 1)),
3514                      "object 0x%08lx [fenceable? %d] not 1M or pot-size (0x%08x) aligned\n",
3515                      i915_gem_obj_ggtt_offset(obj), obj->map_and_fenceable, size);
3516
3517                 if (obj->tiling_mode == I915_TILING_Y && HAS_128_BYTE_Y_TILING(dev))
3518                         tile_width = 128;
3519                 else
3520                         tile_width = 512;
3521
3522                 /* Note: pitch better be a power of two tile widths */
3523                 pitch_val = obj->stride / tile_width;
3524                 pitch_val = ffs(pitch_val) - 1;
3525
3526                 val = i915_gem_obj_ggtt_offset(obj);
3527                 if (obj->tiling_mode == I915_TILING_Y)
3528                         val |= 1 << I830_FENCE_TILING_Y_SHIFT;
3529                 val |= I915_FENCE_SIZE_BITS(size);
3530                 val |= pitch_val << I830_FENCE_PITCH_SHIFT;
3531                 val |= I830_FENCE_REG_VALID;
3532         } else
3533                 val = 0;
3534
3535         if (reg < 8)
3536                 reg = FENCE_REG_830_0 + reg * 4;
3537         else
3538                 reg = FENCE_REG_945_8 + (reg - 8) * 4;
3539
3540         I915_WRITE(reg, val);
3541         POSTING_READ(reg);
3542 }
3543
3544 static void i830_write_fence_reg(struct drm_device *dev, int reg,
3545                                 struct drm_i915_gem_object *obj)
3546 {
3547         struct drm_i915_private *dev_priv = dev->dev_private;
3548         uint32_t val;
3549
3550         if (obj) {
3551                 u32 size = i915_gem_obj_ggtt_size(obj);
3552                 uint32_t pitch_val;
3553
3554                 WARN((i915_gem_obj_ggtt_offset(obj) & ~I830_FENCE_START_MASK) ||
3555                      (size & -size) != size ||
3556                      (i915_gem_obj_ggtt_offset(obj) & (size - 1)),
3557                      "object 0x%08lx not 512K or pot-size 0x%08x aligned\n",
3558                      i915_gem_obj_ggtt_offset(obj), size);
3559
3560                 pitch_val = obj->stride / 128;
3561                 pitch_val = ffs(pitch_val) - 1;
3562
3563                 val = i915_gem_obj_ggtt_offset(obj);
3564                 if (obj->tiling_mode == I915_TILING_Y)
3565                         val |= 1 << I830_FENCE_TILING_Y_SHIFT;
3566                 val |= I830_FENCE_SIZE_BITS(size);
3567                 val |= pitch_val << I830_FENCE_PITCH_SHIFT;
3568                 val |= I830_FENCE_REG_VALID;
3569         } else
3570                 val = 0;
3571
3572         I915_WRITE(FENCE_REG_830_0 + reg * 4, val);
3573         POSTING_READ(FENCE_REG_830_0 + reg * 4);
3574 }
3575
3576 inline static bool i915_gem_object_needs_mb(struct drm_i915_gem_object *obj)
3577 {
3578         return obj && obj->base.read_domains & I915_GEM_DOMAIN_GTT;
3579 }
3580
3581 static void i915_gem_write_fence(struct drm_device *dev, int reg,
3582                                  struct drm_i915_gem_object *obj)
3583 {
3584         struct drm_i915_private *dev_priv = dev->dev_private;
3585
3586         /* Ensure that all CPU reads are completed before installing a fence
3587          * and all writes before removing the fence.
3588          */
3589         if (i915_gem_object_needs_mb(dev_priv->fence_regs[reg].obj))
3590                 mb();
3591
3592         WARN(obj && (!obj->stride || !obj->tiling_mode),
3593              "bogus fence setup with stride: 0x%x, tiling mode: %i\n",
3594              obj->stride, obj->tiling_mode);
3595
3596         if (IS_GEN2(dev))
3597                 i830_write_fence_reg(dev, reg, obj);
3598         else if (IS_GEN3(dev))
3599                 i915_write_fence_reg(dev, reg, obj);
3600         else if (INTEL_INFO(dev)->gen >= 4)
3601                 i965_write_fence_reg(dev, reg, obj);
3602
3603         /* And similarly be paranoid that no direct access to this region
3604          * is reordered to before the fence is installed.
3605          */
3606         if (i915_gem_object_needs_mb(obj))
3607                 mb();
3608 }
3609
3610 static inline int fence_number(struct drm_i915_private *dev_priv,
3611                                struct drm_i915_fence_reg *fence)
3612 {
3613         return fence - dev_priv->fence_regs;
3614 }
3615
3616 static void i915_gem_object_update_fence(struct drm_i915_gem_object *obj,
3617                                          struct drm_i915_fence_reg *fence,
3618                                          bool enable)
3619 {
3620         struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
3621         int reg = fence_number(dev_priv, fence);
3622
3623         i915_gem_write_fence(obj->base.dev, reg, enable ? obj : NULL);
3624
3625         if (enable) {
3626                 obj->fence_reg = reg;
3627                 fence->obj = obj;
3628                 list_move_tail(&fence->lru_list, &dev_priv->mm.fence_list);
3629         } else {
3630                 obj->fence_reg = I915_FENCE_REG_NONE;
3631                 fence->obj = NULL;
3632                 list_del_init(&fence->lru_list);
3633         }
3634         obj->fence_dirty = false;
3635 }
3636
3637 static int
3638 i915_gem_object_wait_fence(struct drm_i915_gem_object *obj)
3639 {
3640         if (obj->last_fenced_req) {
3641                 int ret = i915_wait_request(obj->last_fenced_req);
3642                 if (ret)
3643                         return ret;
3644
3645                 i915_gem_request_assign(&obj->last_fenced_req, NULL);
3646         }
3647
3648         return 0;
3649 }
3650
3651 int
3652 i915_gem_object_put_fence(struct drm_i915_gem_object *obj)
3653 {
3654         struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
3655         struct drm_i915_fence_reg *fence;
3656         int ret;
3657
3658         ret = i915_gem_object_wait_fence(obj);
3659         if (ret)
3660                 return ret;
3661
3662         if (obj->fence_reg == I915_FENCE_REG_NONE)
3663                 return 0;
3664
3665         fence = &dev_priv->fence_regs[obj->fence_reg];
3666
3667         if (WARN_ON(fence->pin_count))
3668                 return -EBUSY;
3669
3670         i915_gem_object_fence_lost(obj);
3671         i915_gem_object_update_fence(obj, fence, false);
3672
3673         return 0;
3674 }
3675
3676 static struct drm_i915_fence_reg *
3677 i915_find_fence_reg(struct drm_device *dev)
3678 {
3679         struct drm_i915_private *dev_priv = dev->dev_private;
3680         struct drm_i915_fence_reg *reg, *avail;
3681         int i;
3682
3683         /* First try to find a free reg */
3684         avail = NULL;
3685         for (i = dev_priv->fence_reg_start; i < dev_priv->num_fence_regs; i++) {
3686                 reg = &dev_priv->fence_regs[i];
3687                 if (!reg->obj)
3688                         return reg;
3689
3690                 if (!reg->pin_count)
3691                         avail = reg;
3692         }
3693
3694         if (avail == NULL)
3695                 goto deadlock;
3696
3697         /* None available, try to steal one or wait for a user to finish */
3698         list_for_each_entry(reg, &dev_priv->mm.fence_list, lru_list) {
3699                 if (reg->pin_count)
3700                         continue;
3701
3702                 return reg;
3703         }
3704
3705 deadlock:
3706         /* Wait for completion of pending flips which consume fences */
3707         if (intel_has_pending_fb_unpin(dev))
3708                 return ERR_PTR(-EAGAIN);
3709
3710         return ERR_PTR(-EDEADLK);
3711 }
3712
3713 /**
3714  * i915_gem_object_get_fence - set up fencing for an object
3715  * @obj: object to map through a fence reg
3716  *
3717  * When mapping objects through the GTT, userspace wants to be able to write
3718  * to them without having to worry about swizzling if the object is tiled.
3719  * This function walks the fence regs looking for a free one for @obj,
3720  * stealing one if it can't find any.
3721  *
3722  * It then sets up the reg based on the object's properties: address, pitch
3723  * and tiling format.
3724  *
3725  * For an untiled surface, this removes any existing fence.
3726  */
3727 int
3728 i915_gem_object_get_fence(struct drm_i915_gem_object *obj)
3729 {
3730         struct drm_device *dev = obj->base.dev;
3731         struct drm_i915_private *dev_priv = dev->dev_private;
3732         bool enable = obj->tiling_mode != I915_TILING_NONE;
3733         struct drm_i915_fence_reg *reg;
3734         int ret;
3735
3736         /* Have we updated the tiling parameters upon the object and so
3737          * will need to serialise the write to the associated fence register?
3738          */
3739         if (obj->fence_dirty) {
3740                 ret = i915_gem_object_wait_fence(obj);
3741                 if (ret)
3742                         return ret;
3743         }
3744
3745         /* Just update our place in the LRU if our fence is getting reused. */
3746         if (obj->fence_reg != I915_FENCE_REG_NONE) {
3747                 reg = &dev_priv->fence_regs[obj->fence_reg];
3748                 if (!obj->fence_dirty) {
3749                         list_move_tail(&reg->lru_list,
3750                                        &dev_priv->mm.fence_list);
3751                         return 0;
3752                 }
3753         } else if (enable) {
3754                 if (WARN_ON(!obj->map_and_fenceable))
3755                         return -EINVAL;
3756
3757                 reg = i915_find_fence_reg(dev);
3758                 if (IS_ERR(reg))
3759                         return PTR_ERR(reg);
3760
3761                 if (reg->obj) {
3762                         struct drm_i915_gem_object *old = reg->obj;
3763
3764                         ret = i915_gem_object_wait_fence(old);
3765                         if (ret)
3766                                 return ret;
3767
3768                         i915_gem_object_fence_lost(old);
3769                 }
3770         } else
3771                 return 0;
3772
3773         i915_gem_object_update_fence(obj, reg, enable);
3774
3775         return 0;
3776 }
3777
3778 static bool i915_gem_valid_gtt_space(struct i915_vma *vma,
3779                                      unsigned long cache_level)
3780 {
3781         struct drm_mm_node *gtt_space = &vma->node;
3782         struct drm_mm_node *other;
3783
3784         /*
3785          * On some machines we have to be careful when putting differing types
3786          * of snoopable memory together to avoid the prefetcher crossing memory
3787          * domains and dying. During vm initialisation, we decide whether or not
3788          * these constraints apply and set the drm_mm.color_adjust
3789          * appropriately.
3790          */
3791         if (vma->vm->mm.color_adjust == NULL)
3792                 return true;
3793
3794         if (!drm_mm_node_allocated(gtt_space))
3795                 return true;
3796
3797         if (list_empty(&gtt_space->node_list))
3798                 return true;
3799
3800         other = list_entry(gtt_space->node_list.prev, struct drm_mm_node, node_list);
3801         if (other->allocated && !other->hole_follows && other->color != cache_level)
3802                 return false;
3803
3804         other = list_entry(gtt_space->node_list.next, struct drm_mm_node, node_list);
3805         if (other->allocated && !gtt_space->hole_follows && other->color != cache_level)
3806                 return false;
3807
3808         return true;
3809 }
3810
3811 /**
3812  * Finds free space in the GTT aperture and binds the object or a view of it
3813  * there.
3814  */
3815 static struct i915_vma *
3816 i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj,
3817                            struct i915_address_space *vm,
3818                            const struct i915_ggtt_view *ggtt_view,
3819                            unsigned alignment,
3820                            uint64_t flags)
3821 {
3822         struct drm_device *dev = obj->base.dev;
3823         struct drm_i915_private *dev_priv = dev->dev_private;
3824         u32 size, fence_size, fence_alignment, unfenced_alignment;
3825         unsigned long start =
3826                 flags & PIN_OFFSET_BIAS ? flags & PIN_OFFSET_MASK : 0;
3827         unsigned long end =
3828                 flags & PIN_MAPPABLE ? dev_priv->gtt.mappable_end : vm->total;
3829         struct i915_vma *vma;
3830         int ret;
3831
3832         if (i915_is_ggtt(vm)) {
3833                 u32 view_size;
3834
3835                 if (WARN_ON(!ggtt_view))
3836                         return ERR_PTR(-EINVAL);
3837
3838                 view_size = i915_ggtt_view_size(obj, ggtt_view);
3839
3840                 fence_size = i915_gem_get_gtt_size(dev,
3841                                                    view_size,
3842                                                    obj->tiling_mode);
3843                 fence_alignment = i915_gem_get_gtt_alignment(dev,
3844                                                              view_size,
3845                                                              obj->tiling_mode,
3846                                                              true);
3847                 unfenced_alignment = i915_gem_get_gtt_alignment(dev,
3848                                                                 view_size,
3849                                                                 obj->tiling_mode,
3850                                                                 false);
3851                 size = flags & PIN_MAPPABLE ? fence_size : view_size;
3852         } else {
3853                 fence_size = i915_gem_get_gtt_size(dev,
3854                                                    obj->base.size,
3855                                                    obj->tiling_mode);
3856                 fence_alignment = i915_gem_get_gtt_alignment(dev,
3857                                                              obj->base.size,
3858                                                              obj->tiling_mode,
3859                                                              true);
3860                 unfenced_alignment =
3861                         i915_gem_get_gtt_alignment(dev,
3862                                                    obj->base.size,
3863                                                    obj->tiling_mode,
3864                                                    false);
3865                 size = flags & PIN_MAPPABLE ? fence_size : obj->base.size;
3866         }
3867
3868         if (alignment == 0)
3869                 alignment = flags & PIN_MAPPABLE ? fence_alignment :
3870                                                 unfenced_alignment;
3871         if (flags & PIN_MAPPABLE && alignment & (fence_alignment - 1)) {
3872                 DRM_DEBUG("Invalid object (view type=%u) alignment requested %u\n",
3873                           ggtt_view ? ggtt_view->type : 0,
3874                           alignment);
3875                 return ERR_PTR(-EINVAL);
3876         }
3877
3878         /* If binding the object/GGTT view requires more space than the entire
3879          * aperture has, reject it early before evicting everything in a vain
3880          * attempt to find space.
3881          */
3882         if (size > end) {
3883                 DRM_DEBUG("Attempting to bind an object (view type=%u) larger than the aperture: size=%u > %s aperture=%lu\n",
3884                           ggtt_view ? ggtt_view->type : 0,
3885                           size,
3886                           flags & PIN_MAPPABLE ? "mappable" : "total",
3887                           end);
3888                 return ERR_PTR(-E2BIG);
3889         }
3890
3891         ret = i915_gem_object_get_pages(obj);
3892         if (ret)
3893                 return ERR_PTR(ret);
3894
3895         i915_gem_object_pin_pages(obj);
3896
3897         vma = ggtt_view ? i915_gem_obj_lookup_or_create_ggtt_vma(obj, ggtt_view) :
3898                           i915_gem_obj_lookup_or_create_vma(obj, vm);
3899
3900         if (IS_ERR(vma))
3901                 goto err_unpin;
3902
3903 search_free:
3904         ret = drm_mm_insert_node_in_range_generic(&vm->mm, &vma->node,
3905                                                   size, alignment,
3906                                                   obj->cache_level,
3907                                                   start, end,
3908                                                   DRM_MM_SEARCH_DEFAULT,
3909                                                   DRM_MM_CREATE_DEFAULT);
3910         if (ret) {
3911                 ret = i915_gem_evict_something(dev, vm, size, alignment,
3912                                                obj->cache_level,
3913                                                start, end,
3914                                                flags);
3915                 if (ret == 0)
3916                         goto search_free;
3917
3918                 goto err_free_vma;
3919         }
3920         if (WARN_ON(!i915_gem_valid_gtt_space(vma, obj->cache_level))) {
3921                 ret = -EINVAL;
3922                 goto err_remove_node;
3923         }
3924
3925         trace_i915_vma_bind(vma, flags);
3926         ret = i915_vma_bind(vma, obj->cache_level, flags);
3927         if (ret)
3928                 goto err_remove_node;
3929
3930         list_move_tail(&obj->global_list, &dev_priv->mm.bound_list);
3931         list_add_tail(&vma->mm_list, &vm->inactive_list);
3932
3933         return vma;
3934
3935 err_remove_node:
3936         drm_mm_remove_node(&vma->node);
3937 err_free_vma:
3938         i915_gem_vma_destroy(vma);
3939         vma = ERR_PTR(ret);
3940 err_unpin:
3941         i915_gem_object_unpin_pages(obj);
3942         return vma;
3943 }
3944
3945 bool
3946 i915_gem_clflush_object(struct drm_i915_gem_object *obj,
3947                         bool force)
3948 {
3949         /* If we don't have a page list set up, then we're not pinned
3950          * to GPU, and we can ignore the cache flush because it'll happen
3951          * again at bind time.
3952          */
3953         if (obj->pages == NULL)
3954                 return false;
3955
3956         /*
3957          * Stolen memory is always coherent with the GPU as it is explicitly
3958          * marked as wc by the system, or the system is cache-coherent.
3959          */
3960         if (obj->stolen || obj->phys_handle)
3961                 return false;
3962
3963         /* If the GPU is snooping the contents of the CPU cache,
3964          * we do not need to manually clear the CPU cache lines.  However,
3965          * the caches are only snooped when the render cache is
3966          * flushed/invalidated.  As we always have to emit invalidations
3967          * and flushes when moving into and out of the RENDER domain, correct
3968          * snooping behaviour occurs naturally as the result of our domain
3969          * tracking.
3970          */
3971         if (!force && cpu_cache_is_coherent(obj->base.dev, obj->cache_level)) {
3972                 obj->cache_dirty = true;
3973                 return false;
3974         }
3975
3976         trace_i915_gem_object_clflush(obj);
3977         drm_clflush_sg(obj->pages);
3978         obj->cache_dirty = false;
3979
3980         return true;
3981 }
3982
3983 /** Flushes the GTT write domain for the object if it's dirty. */
3984 static void
3985 i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj)
3986 {
3987         uint32_t old_write_domain;
3988
3989         if (obj->base.write_domain != I915_GEM_DOMAIN_GTT)
3990                 return;
3991
3992         /* No actual flushing is required for the GTT write domain.  Writes
3993          * to it immediately go to main memory as far as we know, so there's
3994          * no chipset flush.  It also doesn't land in render cache.
3995          *
3996          * However, we do have to enforce the order so that all writes through
3997          * the GTT land before any writes to the device, such as updates to
3998          * the GATT itself.
3999          */
4000         wmb();
4001
4002         old_write_domain = obj->base.write_domain;
4003         obj->base.write_domain = 0;
4004
4005         intel_fb_obj_flush(obj, false);
4006
4007         trace_i915_gem_object_change_domain(obj,
4008                                             obj->base.read_domains,
4009                                             old_write_domain);
4010 }
4011
4012 /** Flushes the CPU write domain for the object if it's dirty. */
4013 static void
4014 i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj)
4015 {
4016         uint32_t old_write_domain;
4017
4018         if (obj->base.write_domain != I915_GEM_DOMAIN_CPU)
4019                 return;
4020
4021         if (i915_gem_clflush_object(obj, obj->pin_display))
4022                 i915_gem_chipset_flush(obj->base.dev);
4023
4024         old_write_domain = obj->base.write_domain;
4025         obj->base.write_domain = 0;
4026
4027         intel_fb_obj_flush(obj, false);
4028
4029         trace_i915_gem_object_change_domain(obj,
4030                                             obj->base.read_domains,
4031                                             old_write_domain);
4032 }
4033
4034 /**
4035  * Moves a single object to the GTT read, and possibly write domain.
4036  *
4037  * This function returns when the move is complete, including waiting on
4038  * flushes to occur.
4039  */
4040 int
4041 i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
4042 {
4043         uint32_t old_write_domain, old_read_domains;
4044         struct i915_vma *vma;
4045         int ret;
4046
4047         if (obj->base.write_domain == I915_GEM_DOMAIN_GTT)
4048                 return 0;
4049
4050         ret = i915_gem_object_wait_rendering(obj, !write);
4051         if (ret)
4052                 return ret;
4053
4054         /* Flush and acquire obj->pages so that we are coherent through
4055          * direct access in memory with previous cached writes through
4056          * shmemfs and that our cache domain tracking remains valid.
4057          * For example, if the obj->filp was moved to swap without us
4058          * being notified and releasing the pages, we would mistakenly
4059          * continue to assume that the obj remained out of the CPU cached
4060          * domain.
4061          */
4062         ret = i915_gem_object_get_pages(obj);
4063         if (ret)
4064                 return ret;
4065
4066         i915_gem_object_flush_cpu_write_domain(obj);
4067
4068         /* Serialise direct access to this object with the barriers for
4069          * coherent writes from the GPU, by effectively invalidating the
4070          * GTT domain upon first access.
4071          */
4072         if ((obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0)
4073                 mb();
4074
4075         old_write_domain = obj->base.write_domain;
4076         old_read_domains = obj->base.read_domains;
4077
4078         /* It should now be out of any other write domains, and we can update
4079          * the domain values for our changes.
4080          */
4081         BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_GTT) != 0);
4082         obj->base.read_domains |= I915_GEM_DOMAIN_GTT;
4083         if (write) {
4084                 obj->base.read_domains = I915_GEM_DOMAIN_GTT;
4085                 obj->base.write_domain = I915_GEM_DOMAIN_GTT;
4086                 obj->dirty = 1;
4087         }
4088
4089         if (write)
4090                 intel_fb_obj_invalidate(obj, NULL, ORIGIN_GTT);
4091
4092         trace_i915_gem_object_change_domain(obj,
4093                                             old_read_domains,
4094                                             old_write_domain);
4095
4096         /* And bump the LRU for this access */
4097         vma = i915_gem_obj_to_ggtt(obj);
4098         if (vma && drm_mm_node_allocated(&vma->node) && !obj->active)
4099                 list_move_tail(&vma->mm_list,
4100                                &to_i915(obj->base.dev)->gtt.base.inactive_list);
4101
4102         return 0;
4103 }
4104
4105 int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
4106                                     enum i915_cache_level cache_level)
4107 {
4108         struct drm_device *dev = obj->base.dev;
4109         struct i915_vma *vma, *next;
4110         int ret;
4111
4112         if (obj->cache_level == cache_level)
4113                 return 0;
4114
4115         if (i915_gem_obj_is_pinned(obj)) {
4116                 DRM_DEBUG("can not change the cache level of pinned objects\n");
4117                 return -EBUSY;
4118         }
4119
4120         list_for_each_entry_safe(vma, next, &obj->vma_list, vma_link) {
4121                 if (!i915_gem_valid_gtt_space(vma, cache_level)) {
4122                         ret = i915_vma_unbind(vma);
4123                         if (ret)
4124                                 return ret;
4125                 }
4126         }
4127
4128         if (i915_gem_obj_bound_any(obj)) {
4129                 ret = i915_gem_object_wait_rendering(obj, false);
4130                 if (ret)
4131                         return ret;
4132
4133                 i915_gem_object_finish_gtt(obj);
4134
4135                 /* Before SandyBridge, you could not use tiling or fence
4136                  * registers with snooped memory, so relinquish any fences
4137                  * currently pointing to our region in the aperture.
4138                  */
4139                 if (INTEL_INFO(dev)->gen < 6) {
4140                         ret = i915_gem_object_put_fence(obj);
4141                         if (ret)
4142                                 return ret;
4143                 }
4144
4145                 list_for_each_entry(vma, &obj->vma_list, vma_link)
4146                         if (drm_mm_node_allocated(&vma->node)) {
4147                                 ret = i915_vma_bind(vma, cache_level,
4148                                                     PIN_UPDATE);
4149                                 if (ret)
4150                                         return ret;
4151                         }
4152         }
4153
4154         list_for_each_entry(vma, &obj->vma_list, vma_link)
4155                 vma->node.color = cache_level;
4156         obj->cache_level = cache_level;
4157
4158         if (obj->cache_dirty &&
4159             obj->base.write_domain != I915_GEM_DOMAIN_CPU &&
4160             cpu_write_needs_clflush(obj)) {
4161                 if (i915_gem_clflush_object(obj, true))
4162                         i915_gem_chipset_flush(obj->base.dev);
4163         }
4164
4165         return 0;
4166 }
4167
4168 int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data,
4169                                struct drm_file *file)
4170 {
4171         struct drm_i915_gem_caching *args = data;
4172         struct drm_i915_gem_object *obj;
4173
4174         obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
4175         if (&obj->base == NULL)
4176                 return -ENOENT;
4177
4178         switch (obj->cache_level) {
4179         case I915_CACHE_LLC:
4180         case I915_CACHE_L3_LLC:
4181                 args->caching = I915_CACHING_CACHED;
4182                 break;
4183
4184         case I915_CACHE_WT:
4185                 args->caching = I915_CACHING_DISPLAY;
4186                 break;
4187
4188         default:
4189                 args->caching = I915_CACHING_NONE;
4190                 break;
4191         }
4192
4193         drm_gem_object_unreference_unlocked(&obj->base);
4194         return 0;
4195 }
4196
4197 int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data,
4198                                struct drm_file *file)
4199 {
4200         struct drm_i915_gem_caching *args = data;
4201         struct drm_i915_gem_object *obj;
4202         enum i915_cache_level level;
4203         int ret;
4204
4205         switch (args->caching) {
4206         case I915_CACHING_NONE:
4207                 level = I915_CACHE_NONE;
4208                 break;
4209         case I915_CACHING_CACHED:
4210                 level = I915_CACHE_LLC;
4211                 break;
4212         case I915_CACHING_DISPLAY:
4213                 level = HAS_WT(dev) ? I915_CACHE_WT : I915_CACHE_NONE;
4214                 break;
4215         default:
4216                 return -EINVAL;
4217         }
4218
4219         ret = i915_mutex_lock_interruptible(dev);
4220         if (ret)
4221                 return ret;
4222
4223         obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
4224         if (&obj->base == NULL) {
4225                 ret = -ENOENT;
4226                 goto unlock;
4227         }
4228
4229         ret = i915_gem_object_set_cache_level(obj, level);
4230
4231         drm_gem_object_unreference(&obj->base);
4232 unlock:
4233         mutex_unlock(&dev->struct_mutex);
4234         return ret;
4235 }
4236
4237 /*
4238  * Prepare buffer for display plane (scanout, cursors, etc).
4239  * Can be called from an uninterruptible phase (modesetting) and allows
4240  * any flushes to be pipelined (for pageflips).
4241  */
4242 int
4243 i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
4244                                      u32 alignment,
4245                                      struct intel_engine_cs *pipelined,
4246                                      const struct i915_ggtt_view *view)
4247 {
4248         u32 old_read_domains, old_write_domain;
4249         int ret;
4250
4251         ret = i915_gem_object_sync(obj, pipelined);
4252         if (ret)
4253                 return ret;
4254
4255         /* Mark the pin_display early so that we account for the
4256          * display coherency whilst setting up the cache domains.
4257          */
4258         obj->pin_display++;
4259
4260         /* The display engine is not coherent with the LLC cache on gen6.  As
4261          * a result, we make sure that the pinning that is about to occur is
4262          * done with uncached PTEs. This is lowest common denominator for all
4263          * chipsets.
4264          *
4265          * However for gen6+, we could do better by using the GFDT bit instead
4266          * of uncaching, which would allow us to flush all the LLC-cached data
4267          * with that bit in the PTE to main memory with just one PIPE_CONTROL.
4268          */
4269         ret = i915_gem_object_set_cache_level(obj,
4270                                               HAS_WT(obj->base.dev) ? I915_CACHE_WT : I915_CACHE_NONE);
4271         if (ret)
4272                 goto err_unpin_display;
4273
4274         /* As the user may map the buffer once pinned in the display plane
4275          * (e.g. libkms for the bootup splash), we have to ensure that we
4276          * always use map_and_fenceable for all scanout buffers.
4277          */
4278         ret = i915_gem_object_ggtt_pin(obj, view, alignment,
4279                                        view->type == I915_GGTT_VIEW_NORMAL ?
4280                                        PIN_MAPPABLE : 0);
4281         if (ret)
4282                 goto err_unpin_display;
4283
4284         i915_gem_object_flush_cpu_write_domain(obj);
4285
4286         old_write_domain = obj->base.write_domain;
4287         old_read_domains = obj->base.read_domains;
4288
4289         /* It should now be out of any other write domains, and we can update
4290          * the domain values for our changes.
4291          */
4292         obj->base.write_domain = 0;
4293         obj->base.read_domains |= I915_GEM_DOMAIN_GTT;
4294
4295         trace_i915_gem_object_change_domain(obj,
4296                                             old_read_domains,
4297                                             old_write_domain);
4298
4299         return 0;
4300
4301 err_unpin_display:
4302         obj->pin_display--;
4303         return ret;
4304 }
4305
4306 void
4307 i915_gem_object_unpin_from_display_plane(struct drm_i915_gem_object *obj,
4308                                          const struct i915_ggtt_view *view)
4309 {
4310         if (WARN_ON(obj->pin_display == 0))
4311                 return;
4312
4313         i915_gem_object_ggtt_unpin_view(obj, view);
4314
4315         obj->pin_display--;
4316 }
4317
4318 /**
4319  * Moves a single object to the CPU read, and possibly write domain.
4320  *
4321  * This function returns when the move is complete, including waiting on
4322  * flushes to occur.
4323  */
4324 int
4325 i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
4326 {
4327         uint32_t old_write_domain, old_read_domains;
4328         int ret;
4329
4330         if (obj->base.write_domain == I915_GEM_DOMAIN_CPU)
4331                 return 0;
4332
4333         ret = i915_gem_object_wait_rendering(obj, !write);
4334         if (ret)
4335                 return ret;
4336
4337         i915_gem_object_flush_gtt_write_domain(obj);
4338
4339         old_write_domain = obj->base.write_domain;
4340         old_read_domains = obj->base.read_domains;
4341
4342         /* Flush the CPU cache if it's still invalid. */
4343         if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) {
4344                 i915_gem_clflush_object(obj, false);
4345
4346                 obj->base.read_domains |= I915_GEM_DOMAIN_CPU;
4347         }
4348
4349         /* It should now be out of any other write domains, and we can update
4350          * the domain values for our changes.
4351          */
4352         BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_CPU) != 0);
4353
4354         /* If we're writing through the CPU, then the GPU read domains will
4355          * need to be invalidated at next use.
4356          */
4357         if (write) {
4358                 obj->base.read_domains = I915_GEM_DOMAIN_CPU;
4359                 obj->base.write_domain = I915_GEM_DOMAIN_CPU;
4360         }
4361
4362         if (write)
4363                 intel_fb_obj_invalidate(obj, NULL, ORIGIN_CPU);
4364
4365         trace_i915_gem_object_change_domain(obj,
4366                                             old_read_domains,
4367                                             old_write_domain);
4368
4369         return 0;
4370 }
4371
4372 /* Throttle our rendering by waiting until the ring has completed our requests
4373  * emitted over 20 msec ago.
4374  *
4375  * Note that if we were to use the current jiffies each time around the loop,
4376  * we wouldn't escape the function with any frames outstanding if the time to
4377  * render a frame was over 20ms.
4378  *
4379  * This should get us reasonable parallelism between CPU and GPU but also
4380  * relatively low latency when blocking on a particular request to finish.
4381  */
4382 static int
4383 i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file)
4384 {
4385         struct drm_i915_private *dev_priv = dev->dev_private;
4386         struct drm_i915_file_private *file_priv = file->driver_priv;
4387         unsigned long recent_enough = jiffies - DRM_I915_THROTTLE_JIFFIES;
4388         struct drm_i915_gem_request *request, *target = NULL;
4389         unsigned reset_counter;
4390         int ret;
4391
4392         ret = i915_gem_wait_for_error(&dev_priv->gpu_error);
4393         if (ret)
4394                 return ret;
4395
4396         ret = i915_gem_check_wedge(&dev_priv->gpu_error, false);
4397         if (ret)
4398                 return ret;
4399
4400         spin_lock(&file_priv->mm.lock);
4401         list_for_each_entry(request, &file_priv->mm.request_list, client_list) {
4402                 if (time_after_eq(request->emitted_jiffies, recent_enough))
4403                         break;
4404
4405                 target = request;
4406         }
4407         reset_counter = atomic_read(&dev_priv->gpu_error.reset_counter);
4408         if (target)
4409                 i915_gem_request_reference(target);
4410         spin_unlock(&file_priv->mm.lock);
4411
4412         if (target == NULL)
4413                 return 0;
4414
4415         ret = __i915_wait_request(target, reset_counter, true, NULL, NULL);
4416         if (ret == 0)
4417                 queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, 0);
4418
4419         i915_gem_request_unreference__unlocked(target);
4420
4421         return ret;
4422 }
4423
4424 static bool
4425 i915_vma_misplaced(struct i915_vma *vma, uint32_t alignment, uint64_t flags)
4426 {
4427         struct drm_i915_gem_object *obj = vma->obj;
4428
4429         if (alignment &&
4430             vma->node.start & (alignment - 1))
4431                 return true;
4432
4433         if (flags & PIN_MAPPABLE && !obj->map_and_fenceable)
4434                 return true;
4435
4436         if (flags & PIN_OFFSET_BIAS &&
4437             vma->node.start < (flags & PIN_OFFSET_MASK))
4438                 return true;
4439
4440         return false;
4441 }
4442
4443 static int
4444 i915_gem_object_do_pin(struct drm_i915_gem_object *obj,
4445                        struct i915_address_space *vm,
4446                        const struct i915_ggtt_view *ggtt_view,
4447                        uint32_t alignment,
4448                        uint64_t flags)
4449 {
4450         struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
4451         struct i915_vma *vma;
4452         unsigned bound;
4453         int ret;
4454
4455         if (WARN_ON(vm == &dev_priv->mm.aliasing_ppgtt->base))
4456                 return -ENODEV;
4457
4458         if (WARN_ON(flags & (PIN_GLOBAL | PIN_MAPPABLE) && !i915_is_ggtt(vm)))
4459                 return -EINVAL;
4460
4461         if (WARN_ON((flags & (PIN_MAPPABLE | PIN_GLOBAL)) == PIN_MAPPABLE))
4462                 return -EINVAL;
4463
4464         if (WARN_ON(i915_is_ggtt(vm) != !!ggtt_view))
4465                 return -EINVAL;
4466
4467         vma = ggtt_view ? i915_gem_obj_to_ggtt_view(obj, ggtt_view) :
4468                           i915_gem_obj_to_vma(obj, vm);
4469
4470         if (IS_ERR(vma))
4471                 return PTR_ERR(vma);
4472
4473         if (vma) {
4474                 if (WARN_ON(vma->pin_count == DRM_I915_GEM_OBJECT_MAX_PIN_COUNT))
4475                         return -EBUSY;
4476
4477                 if (i915_vma_misplaced(vma, alignment, flags)) {
4478                         unsigned long offset;
4479                         offset = ggtt_view ? i915_gem_obj_ggtt_offset_view(obj, ggtt_view) :
4480                                              i915_gem_obj_offset(obj, vm);
4481                         WARN(vma->pin_count,
4482                              "bo is already pinned in %s with incorrect alignment:"
4483                              " offset=%lx, req.alignment=%x, req.map_and_fenceable=%d,"
4484                              " obj->map_and_fenceable=%d\n",
4485                              ggtt_view ? "ggtt" : "ppgtt",
4486                              offset,
4487                              alignment,
4488                              !!(flags & PIN_MAPPABLE),
4489                              obj->map_and_fenceable);
4490                         ret = i915_vma_unbind(vma);
4491                         if (ret)
4492                                 return ret;
4493
4494                         vma = NULL;
4495                 }
4496         }
4497
4498         bound = vma ? vma->bound : 0;
4499         if (vma == NULL || !drm_mm_node_allocated(&vma->node)) {
4500                 vma = i915_gem_object_bind_to_vm(obj, vm, ggtt_view, alignment,
4501                                                  flags);
4502                 if (IS_ERR(vma))
4503                         return PTR_ERR(vma);
4504         } else {
4505                 ret = i915_vma_bind(vma, obj->cache_level, flags);
4506                 if (ret)
4507                         return ret;
4508         }
4509
4510         if (ggtt_view && ggtt_view->type == I915_GGTT_VIEW_NORMAL &&
4511             (bound ^ vma->bound) & GLOBAL_BIND) {
4512                 bool mappable, fenceable;
4513                 u32 fence_size, fence_alignment;
4514
4515                 fence_size = i915_gem_get_gtt_size(obj->base.dev,
4516                                                    obj->base.size,
4517                                                    obj->tiling_mode);
4518                 fence_alignment = i915_gem_get_gtt_alignment(obj->base.dev,
4519                                                              obj->base.size,
4520                                                              obj->tiling_mode,
4521                                                              true);
4522
4523                 fenceable = (vma->node.size == fence_size &&
4524                              (vma->node.start & (fence_alignment - 1)) == 0);
4525
4526                 mappable = (vma->node.start + fence_size <=
4527                             dev_priv->gtt.mappable_end);
4528
4529                 obj->map_and_fenceable = mappable && fenceable;
4530
4531                 WARN_ON(flags & PIN_MAPPABLE && !obj->map_and_fenceable);
4532         }
4533
4534         vma->pin_count++;
4535         return 0;
4536 }
4537
4538 int
4539 i915_gem_object_pin(struct drm_i915_gem_object *obj,
4540                     struct i915_address_space *vm,
4541                     uint32_t alignment,
4542                     uint64_t flags)
4543 {
4544         return i915_gem_object_do_pin(obj, vm,
4545                                       i915_is_ggtt(vm) ? &i915_ggtt_view_normal : NULL,
4546                                       alignment, flags);
4547 }
4548
4549 int
4550 i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
4551                          const struct i915_ggtt_view *view,
4552                          uint32_t alignment,
4553                          uint64_t flags)
4554 {
4555         if (WARN_ONCE(!view, "no view specified"))
4556                 return -EINVAL;
4557
4558         return i915_gem_object_do_pin(obj, i915_obj_to_ggtt(obj), view,
4559                                       alignment, flags | PIN_GLOBAL);
4560 }
4561
4562 void
4563 i915_gem_object_ggtt_unpin_view(struct drm_i915_gem_object *obj,
4564                                 const struct i915_ggtt_view *view)
4565 {
4566         struct i915_vma *vma = i915_gem_obj_to_ggtt_view(obj, view);
4567
4568         BUG_ON(!vma);
4569         WARN_ON(vma->pin_count == 0);
4570         WARN_ON(!i915_gem_obj_ggtt_bound_view(obj, view));
4571
4572         --vma->pin_count;
4573 }
4574
4575 bool
4576 i915_gem_object_pin_fence(struct drm_i915_gem_object *obj)
4577 {
4578         if (obj->fence_reg != I915_FENCE_REG_NONE) {
4579                 struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
4580                 struct i915_vma *ggtt_vma = i915_gem_obj_to_ggtt(obj);
4581
4582                 WARN_ON(!ggtt_vma ||
4583                         dev_priv->fence_regs[obj->fence_reg].pin_count >
4584                         ggtt_vma->pin_count);
4585                 dev_priv->fence_regs[obj->fence_reg].pin_count++;
4586                 return true;
4587         } else
4588                 return false;
4589 }
4590
4591 void
4592 i915_gem_object_unpin_fence(struct drm_i915_gem_object *obj)
4593 {
4594         if (obj->fence_reg != I915_FENCE_REG_NONE) {
4595                 struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
4596                 WARN_ON(dev_priv->fence_regs[obj->fence_reg].pin_count <= 0);
4597                 dev_priv->fence_regs[obj->fence_reg].pin_count--;
4598         }
4599 }
4600
4601 int
4602 i915_gem_busy_ioctl(struct drm_device *dev, void *data,
4603                     struct drm_file *file)
4604 {
4605         struct drm_i915_gem_busy *args = data;
4606         struct drm_i915_gem_object *obj;
4607         int ret;
4608
4609         ret = i915_mutex_lock_interruptible(dev);
4610         if (ret)
4611                 return ret;
4612
4613         obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
4614         if (&obj->base == NULL) {
4615                 ret = -ENOENT;
4616                 goto unlock;
4617         }
4618
4619         /* Count all active objects as busy, even if they are currently not used
4620          * by the gpu. Users of this interface expect objects to eventually
4621          * become non-busy without any further actions, therefore emit any
4622          * necessary flushes here.
4623          */
4624         ret = i915_gem_object_flush_active(obj);
4625         if (ret)
4626                 goto unref;
4627
4628         BUILD_BUG_ON(I915_NUM_RINGS > 16);
4629         args->busy = obj->active << 16;
4630         if (obj->last_write_req)
4631                 args->busy |= obj->last_write_req->ring->id;
4632
4633 unref:
4634         drm_gem_object_unreference(&obj->base);
4635 unlock:
4636         mutex_unlock(&dev->struct_mutex);
4637         return ret;
4638 }
4639
4640 int
4641 i915_gem_throttle_ioctl(struct drm_device *dev, void *data,
4642                         struct drm_file *file_priv)
4643 {
4644         return i915_gem_ring_throttle(dev, file_priv);
4645 }
4646
4647 int
4648 i915_gem_madvise_ioctl(struct drm_device *dev, void *data,
4649                        struct drm_file *file_priv)
4650 {
4651         struct drm_i915_private *dev_priv = dev->dev_private;
4652         struct drm_i915_gem_madvise *args = data;
4653         struct drm_i915_gem_object *obj;
4654         int ret;
4655
4656         switch (args->madv) {
4657         case I915_MADV_DONTNEED:
4658         case I915_MADV_WILLNEED:
4659             break;
4660         default:
4661             return -EINVAL;
4662         }
4663
4664         ret = i915_mutex_lock_interruptible(dev);
4665         if (ret)
4666                 return ret;
4667
4668         obj = to_intel_bo(drm_gem_object_lookup(dev, file_priv, args->handle));
4669         if (&obj->base == NULL) {
4670                 ret = -ENOENT;
4671                 goto unlock;
4672         }
4673
4674         if (i915_gem_obj_is_pinned(obj)) {
4675                 ret = -EINVAL;
4676                 goto out;
4677         }
4678
4679         if (obj->pages &&
4680             obj->tiling_mode != I915_TILING_NONE &&
4681             dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES) {
4682                 if (obj->madv == I915_MADV_WILLNEED)
4683                         i915_gem_object_unpin_pages(obj);
4684                 if (args->madv == I915_MADV_WILLNEED)
4685                         i915_gem_object_pin_pages(obj);
4686         }
4687
4688         if (obj->madv != __I915_MADV_PURGED)
4689                 obj->madv = args->madv;
4690
4691         /* if the object is no longer attached, discard its backing storage */
4692         if (obj->madv == I915_MADV_DONTNEED && obj->pages == NULL)
4693                 i915_gem_object_truncate(obj);
4694
4695         args->retained = obj->madv != __I915_MADV_PURGED;
4696
4697 out:
4698         drm_gem_object_unreference(&obj->base);
4699 unlock:
4700         mutex_unlock(&dev->struct_mutex);
4701         return ret;
4702 }
4703
4704 void i915_gem_object_init(struct drm_i915_gem_object *obj,
4705                           const struct drm_i915_gem_object_ops *ops)
4706 {
4707         int i;
4708
4709         INIT_LIST_HEAD(&obj->global_list);
4710         for (i = 0; i < I915_NUM_RINGS; i++)
4711                 INIT_LIST_HEAD(&obj->ring_list[i]);
4712         INIT_LIST_HEAD(&obj->obj_exec_link);
4713         INIT_LIST_HEAD(&obj->vma_list);
4714         INIT_LIST_HEAD(&obj->batch_pool_link);
4715
4716         obj->ops = ops;
4717
4718         obj->fence_reg = I915_FENCE_REG_NONE;
4719         obj->madv = I915_MADV_WILLNEED;
4720
4721         i915_gem_info_add_obj(obj->base.dev->dev_private, obj->base.size);
4722 }
4723
4724 static const struct drm_i915_gem_object_ops i915_gem_object_ops = {
4725         .get_pages = i915_gem_object_get_pages_gtt,
4726         .put_pages = i915_gem_object_put_pages_gtt,
4727 };
4728
4729 struct drm_i915_gem_object *i915_gem_alloc_object(struct drm_device *dev,
4730                                                   size_t size)
4731 {
4732         struct drm_i915_gem_object *obj;
4733 #if 0
4734         struct address_space *mapping;
4735         gfp_t mask;
4736 #endif
4737
4738         obj = i915_gem_object_alloc(dev);
4739         if (obj == NULL)
4740                 return NULL;
4741
4742         if (drm_gem_object_init(dev, &obj->base, size) != 0) {
4743                 i915_gem_object_free(obj);
4744                 return NULL;
4745         }
4746
4747 #if 0
4748         mask = GFP_HIGHUSER | __GFP_RECLAIMABLE;
4749         if (IS_CRESTLINE(dev) || IS_BROADWATER(dev)) {
4750                 /* 965gm cannot relocate objects above 4GiB. */
4751                 mask &= ~__GFP_HIGHMEM;
4752                 mask |= __GFP_DMA32;
4753         }
4754
4755         mapping = file_inode(obj->base.filp)->i_mapping;
4756         mapping_set_gfp_mask(mapping, mask);
4757 #endif
4758
4759         i915_gem_object_init(obj, &i915_gem_object_ops);
4760
4761         obj->base.write_domain = I915_GEM_DOMAIN_CPU;
4762         obj->base.read_domains = I915_GEM_DOMAIN_CPU;
4763
4764         if (HAS_LLC(dev)) {
4765                 /* On some devices, we can have the GPU use the LLC (the CPU
4766                  * cache) for about a 10% performance improvement
4767                  * compared to uncached.  Graphics requests other than
4768                  * display scanout are coherent with the CPU in
4769                  * accessing this cache.  This means in this mode we
4770                  * don't need to clflush on the CPU side, and on the
4771                  * GPU side we only need to flush internal caches to
4772                  * get data visible to the CPU.
4773                  *
4774                  * However, we maintain the display planes as UC, and so
4775                  * need to rebind when first used as such.
4776                  */
4777                 obj->cache_level = I915_CACHE_LLC;
4778         } else
4779                 obj->cache_level = I915_CACHE_NONE;
4780
4781         trace_i915_gem_object_create(obj);
4782
4783         return obj;
4784 }
4785
4786 static bool discard_backing_storage(struct drm_i915_gem_object *obj)
4787 {
4788         /* If we are the last user of the backing storage (be it shmemfs
4789          * pages or stolen etc), we know that the pages are going to be
4790          * immediately released. In this case, we can then skip copying
4791          * back the contents from the GPU.
4792          */
4793
4794         if (obj->madv != I915_MADV_WILLNEED)
4795                 return false;
4796
4797         if (obj->base.vm_obj == NULL)
4798                 return true;
4799
4800         /* At first glance, this looks racy, but then again so would be
4801          * userspace racing mmap against close. However, the first external
4802          * reference to the filp can only be obtained through the
4803          * i915_gem_mmap_ioctl() which safeguards us against the user
4804          * acquiring such a reference whilst we are in the middle of
4805          * freeing the object.
4806          */
4807 #if 0
4808         return atomic_long_read(&obj->base.filp->f_count) == 1;
4809 #else
4810         return false;
4811 #endif
4812 }
4813
4814 void i915_gem_free_object(struct drm_gem_object *gem_obj)
4815 {
4816         struct drm_i915_gem_object *obj = to_intel_bo(gem_obj);
4817         struct drm_device *dev = obj->base.dev;
4818         struct drm_i915_private *dev_priv = dev->dev_private;
4819         struct i915_vma *vma, *next;
4820
4821         intel_runtime_pm_get(dev_priv);
4822
4823         trace_i915_gem_object_destroy(obj);
4824
4825         list_for_each_entry_safe(vma, next, &obj->vma_list, vma_link) {
4826                 int ret;
4827
4828                 vma->pin_count = 0;
4829                 ret = i915_vma_unbind(vma);
4830                 if (WARN_ON(ret == -ERESTARTSYS)) {
4831                         bool was_interruptible;
4832
4833                         was_interruptible = dev_priv->mm.interruptible;
4834                         dev_priv->mm.interruptible = false;
4835
4836                         WARN_ON(i915_vma_unbind(vma));
4837
4838                         dev_priv->mm.interruptible = was_interruptible;
4839                 }
4840         }
4841
4842         /* Stolen objects don't hold a ref, but do hold pin count. Fix that up
4843          * before progressing. */
4844         if (obj->stolen)
4845                 i915_gem_object_unpin_pages(obj);
4846
4847         WARN_ON(obj->frontbuffer_bits);
4848
4849         if (obj->pages && obj->madv == I915_MADV_WILLNEED &&
4850             dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES &&
4851             obj->tiling_mode != I915_TILING_NONE)
4852                 i915_gem_object_unpin_pages(obj);
4853
4854         if (WARN_ON(obj->pages_pin_count))
4855                 obj->pages_pin_count = 0;
4856         if (discard_backing_storage(obj))
4857                 obj->madv = I915_MADV_DONTNEED;
4858         i915_gem_object_put_pages(obj);
4859         i915_gem_object_free_mmap_offset(obj);
4860
4861         BUG_ON(obj->pages);
4862
4863 #if 0
4864         if (obj->base.import_attach)
4865                 drm_prime_gem_destroy(&obj->base, NULL);
4866 #endif
4867
4868         if (obj->ops->release)
4869                 obj->ops->release(obj);
4870
4871         drm_gem_object_release(&obj->base);
4872         i915_gem_info_remove_obj(dev_priv, obj->base.size);
4873
4874         kfree(obj->bit_17);
4875         i915_gem_object_free(obj);
4876
4877         intel_runtime_pm_put(dev_priv);
4878 }
4879
4880 struct i915_vma *i915_gem_obj_to_vma(struct drm_i915_gem_object *obj,
4881                                      struct i915_address_space *vm)
4882 {
4883         struct i915_vma *vma;
4884         list_for_each_entry(vma, &obj->vma_list, vma_link) {
4885                 if (i915_is_ggtt(vma->vm) &&
4886                     vma->ggtt_view.type != I915_GGTT_VIEW_NORMAL)
4887                         continue;
4888                 if (vma->vm == vm)
4889                         return vma;
4890         }
4891         return NULL;
4892 }
4893
4894 struct i915_vma *i915_gem_obj_to_ggtt_view(struct drm_i915_gem_object *obj,
4895                                            const struct i915_ggtt_view *view)
4896 {
4897         struct i915_address_space *ggtt = i915_obj_to_ggtt(obj);
4898         struct i915_vma *vma;
4899
4900         if (WARN_ONCE(!view, "no view specified"))
4901                 return ERR_PTR(-EINVAL);
4902
4903         list_for_each_entry(vma, &obj->vma_list, vma_link)
4904                 if (vma->vm == ggtt &&
4905                     i915_ggtt_view_equal(&vma->ggtt_view, view))
4906                         return vma;
4907         return NULL;
4908 }
4909
4910 void i915_gem_vma_destroy(struct i915_vma *vma)
4911 {
4912         struct i915_address_space *vm = NULL;
4913         WARN_ON(vma->node.allocated);
4914
4915         /* Keep the vma as a placeholder in the execbuffer reservation lists */
4916         if (!list_empty(&vma->exec_list))
4917                 return;
4918
4919         vm = vma->vm;
4920
4921         if (!i915_is_ggtt(vm))
4922                 i915_ppgtt_put(i915_vm_to_ppgtt(vm));
4923
4924         list_del(&vma->vma_link);
4925
4926         kfree(vma);
4927 }
4928
4929 static void
4930 i915_gem_stop_ringbuffers(struct drm_device *dev)
4931 {
4932         struct drm_i915_private *dev_priv = dev->dev_private;
4933         struct intel_engine_cs *ring;
4934         int i;
4935
4936         for_each_ring(ring, dev_priv, i)
4937                 dev_priv->gt.stop_ring(ring);
4938 }
4939
4940 int
4941 i915_gem_suspend(struct drm_device *dev)
4942 {
4943         struct drm_i915_private *dev_priv = dev->dev_private;
4944         int ret = 0;
4945
4946         mutex_lock(&dev->struct_mutex);
4947         ret = i915_gpu_idle(dev);
4948         if (ret)
4949                 goto err;
4950
4951         i915_gem_retire_requests(dev);
4952
4953         i915_gem_stop_ringbuffers(dev);
4954         mutex_unlock(&dev->struct_mutex);
4955
4956         cancel_delayed_work_sync(&dev_priv->gpu_error.hangcheck_work);
4957         cancel_delayed_work_sync(&dev_priv->mm.retire_work);
4958 #if 0
4959         flush_delayed_work(&dev_priv->mm.idle_work);
4960 #endif
4961
4962         /* Assert that we sucessfully flushed all the work and
4963          * reset the GPU back to its idle, low power state.
4964          */
4965         WARN_ON(dev_priv->mm.busy);
4966
4967         return 0;
4968
4969 err:
4970         mutex_unlock(&dev->struct_mutex);
4971         return ret;
4972 }
4973
4974 int i915_gem_l3_remap(struct intel_engine_cs *ring, int slice)
4975 {
4976         struct drm_device *dev = ring->dev;
4977         struct drm_i915_private *dev_priv = dev->dev_private;
4978         u32 reg_base = GEN7_L3LOG_BASE + (slice * 0x200);
4979         u32 *remap_info = dev_priv->l3_parity.remap_info[slice];
4980         int i, ret;
4981
4982         if (!HAS_L3_DPF(dev) || !remap_info)
4983                 return 0;
4984
4985         ret = intel_ring_begin(ring, GEN7_L3LOG_SIZE / 4 * 3);
4986         if (ret)
4987                 return ret;
4988
4989         /*
4990          * Note: We do not worry about the concurrent register cacheline hang
4991          * here because no other code should access these registers other than
4992          * at initialization time.
4993          */
4994         for (i = 0; i < GEN7_L3LOG_SIZE; i += 4) {
4995                 intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
4996                 intel_ring_emit(ring, reg_base + i);
4997                 intel_ring_emit(ring, remap_info[i/4]);
4998         }
4999
5000         intel_ring_advance(ring);
5001
5002         return ret;
5003 }
5004
5005 void i915_gem_init_swizzling(struct drm_device *dev)
5006 {
5007         struct drm_i915_private *dev_priv = dev->dev_private;
5008
5009         if (INTEL_INFO(dev)->gen < 5 ||
5010             dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_NONE)
5011                 return;
5012
5013         I915_WRITE(DISP_ARB_CTL, I915_READ(DISP_ARB_CTL) |
5014                                  DISP_TILE_SURFACE_SWIZZLING);
5015
5016         if (IS_GEN5(dev))
5017                 return;
5018
5019         I915_WRITE(TILECTL, I915_READ(TILECTL) | TILECTL_SWZCTL);
5020         if (IS_GEN6(dev))
5021                 I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_SNB));
5022         else if (IS_GEN7(dev))
5023                 I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_IVB));
5024         else if (IS_GEN8(dev))
5025                 I915_WRITE(GAMTARBMODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_BDW));
5026         else
5027                 BUG();
5028 }
5029
5030 static bool
5031 intel_enable_blt(struct drm_device *dev)
5032 {
5033         if (!HAS_BLT(dev))
5034                 return false;
5035
5036         /* The blitter was dysfunctional on early prototypes */
5037         if (IS_GEN6(dev) && dev->pdev->revision < 8) {
5038                 DRM_INFO("BLT not supported on this pre-production hardware;"
5039                          " graphics performance will be degraded.\n");
5040                 return false;
5041         }
5042
5043         return true;
5044 }
5045
5046 static void init_unused_ring(struct drm_device *dev, u32 base)
5047 {
5048         struct drm_i915_private *dev_priv = dev->dev_private;
5049
5050         I915_WRITE(RING_CTL(base), 0);
5051         I915_WRITE(RING_HEAD(base), 0);
5052         I915_WRITE(RING_TAIL(base), 0);
5053         I915_WRITE(RING_START(base), 0);
5054 }
5055
5056 static void init_unused_rings(struct drm_device *dev)
5057 {
5058         if (IS_I830(dev)) {
5059                 init_unused_ring(dev, PRB1_BASE);
5060                 init_unused_ring(dev, SRB0_BASE);
5061                 init_unused_ring(dev, SRB1_BASE);
5062                 init_unused_ring(dev, SRB2_BASE);
5063                 init_unused_ring(dev, SRB3_BASE);
5064         } else if (IS_GEN2(dev)) {
5065                 init_unused_ring(dev, SRB0_BASE);
5066                 init_unused_ring(dev, SRB1_BASE);
5067         } else if (IS_GEN3(dev)) {
5068                 init_unused_ring(dev, PRB1_BASE);
5069                 init_unused_ring(dev, PRB2_BASE);
5070         }
5071 }
5072
5073 int i915_gem_init_rings(struct drm_device *dev)
5074 {
5075         struct drm_i915_private *dev_priv = dev->dev_private;
5076         int ret;
5077
5078         ret = intel_init_render_ring_buffer(dev);
5079         if (ret)
5080                 return ret;
5081
5082         if (HAS_BSD(dev)) {
5083                 ret = intel_init_bsd_ring_buffer(dev);
5084                 if (ret)
5085                         goto cleanup_render_ring;
5086         }
5087
5088         if (intel_enable_blt(dev)) {
5089                 ret = intel_init_blt_ring_buffer(dev);
5090                 if (ret)
5091                         goto cleanup_bsd_ring;
5092         }
5093
5094         if (HAS_VEBOX(dev)) {
5095                 ret = intel_init_vebox_ring_buffer(dev);
5096                 if (ret)
5097                         goto cleanup_blt_ring;
5098         }
5099
5100         if (HAS_BSD2(dev)) {
5101                 ret = intel_init_bsd2_ring_buffer(dev);
5102                 if (ret)
5103                         goto cleanup_vebox_ring;
5104         }
5105
5106         ret = i915_gem_set_seqno(dev, ((u32)~0 - 0x1000));
5107         if (ret)
5108                 goto cleanup_bsd2_ring;
5109
5110         return 0;
5111
5112 cleanup_bsd2_ring:
5113         intel_cleanup_ring_buffer(&dev_priv->ring[VCS2]);
5114 cleanup_vebox_ring:
5115         intel_cleanup_ring_buffer(&dev_priv->ring[VECS]);
5116 cleanup_blt_ring:
5117         intel_cleanup_ring_buffer(&dev_priv->ring[BCS]);
5118 cleanup_bsd_ring:
5119         intel_cleanup_ring_buffer(&dev_priv->ring[VCS]);
5120 cleanup_render_ring:
5121         intel_cleanup_ring_buffer(&dev_priv->ring[RCS]);
5122
5123         return ret;
5124 }
5125
5126 int
5127 i915_gem_init_hw(struct drm_device *dev)
5128 {
5129         struct drm_i915_private *dev_priv = dev->dev_private;
5130         struct intel_engine_cs *ring;
5131         int ret, i;
5132
5133 #if 0
5134         if (INTEL_INFO(dev)->gen < 6 && !intel_enable_gtt())
5135                 return -EIO;
5136 #endif
5137
5138         /* Double layer security blanket, see i915_gem_init() */
5139         intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
5140
5141         if (dev_priv->ellc_size)
5142                 I915_WRITE(HSW_IDICR, I915_READ(HSW_IDICR) | IDIHASHMSK(0xf));
5143
5144         if (IS_HASWELL(dev))
5145                 I915_WRITE(MI_PREDICATE_RESULT_2, IS_HSW_GT3(dev) ?
5146                            LOWER_SLICE_ENABLED : LOWER_SLICE_DISABLED);
5147
5148         if (HAS_PCH_NOP(dev)) {
5149                 if (IS_IVYBRIDGE(dev)) {
5150                         u32 temp = I915_READ(GEN7_MSG_CTL);
5151                         temp &= ~(WAIT_FOR_PCH_FLR_ACK | WAIT_FOR_PCH_RESET_ACK);
5152                         I915_WRITE(GEN7_MSG_CTL, temp);
5153                 } else if (INTEL_INFO(dev)->gen >= 7) {
5154                         u32 temp = I915_READ(HSW_NDE_RSTWRN_OPT);
5155                         temp &= ~RESET_PCH_HANDSHAKE_ENABLE;
5156                         I915_WRITE(HSW_NDE_RSTWRN_OPT, temp);
5157                 }
5158         }
5159
5160         i915_gem_init_swizzling(dev);
5161
5162         /*
5163          * At least 830 can leave some of the unused rings
5164          * "active" (ie. head != tail) after resume which
5165          * will prevent c3 entry. Makes sure all unused rings
5166          * are totally idle.
5167          */
5168         init_unused_rings(dev);
5169
5170         for_each_ring(ring, dev_priv, i) {
5171                 ret = ring->init_hw(ring);
5172                 if (ret)
5173                         goto out;
5174         }
5175
5176         for (i = 0; i < NUM_L3_SLICES(dev); i++)
5177                 i915_gem_l3_remap(&dev_priv->ring[RCS], i);
5178
5179         ret = i915_ppgtt_init_hw(dev);
5180         if (ret && ret != -EIO) {
5181                 DRM_ERROR("PPGTT enable failed %d\n", ret);
5182                 i915_gem_cleanup_ringbuffer(dev);
5183         }
5184
5185         ret = i915_gem_context_enable(dev_priv);
5186         if (ret && ret != -EIO) {
5187                 DRM_ERROR("Context enable failed %d\n", ret);
5188                 i915_gem_cleanup_ringbuffer(dev);
5189
5190                 goto out;
5191         }
5192
5193 out:
5194         intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
5195         return ret;
5196 }
5197
5198 int i915_gem_init(struct drm_device *dev)
5199 {
5200         struct drm_i915_private *dev_priv = dev->dev_private;
5201         int ret;
5202
5203         i915.enable_execlists = intel_sanitize_enable_execlists(dev,
5204                         i915.enable_execlists);
5205
5206         mutex_lock(&dev->struct_mutex);
5207
5208         if (IS_VALLEYVIEW(dev)) {
5209                 /* VLVA0 (potential hack), BIOS isn't actually waking us */
5210                 I915_WRITE(VLV_GTLC_WAKE_CTRL, VLV_GTLC_ALLOWWAKEREQ);
5211                 if (wait_for((I915_READ(VLV_GTLC_PW_STATUS) &
5212                               VLV_GTLC_ALLOWWAKEACK), 10))
5213                         DRM_DEBUG_DRIVER("allow wake ack timed out\n");
5214         }
5215
5216         if (!i915.enable_execlists) {
5217                 dev_priv->gt.execbuf_submit = i915_gem_ringbuffer_submission;
5218                 dev_priv->gt.init_rings = i915_gem_init_rings;
5219                 dev_priv->gt.cleanup_ring = intel_cleanup_ring_buffer;
5220                 dev_priv->gt.stop_ring = intel_stop_ring_buffer;
5221         } else {
5222                 dev_priv->gt.execbuf_submit = intel_execlists_submission;
5223                 dev_priv->gt.init_rings = intel_logical_rings_init;
5224                 dev_priv->gt.cleanup_ring = intel_logical_ring_cleanup;
5225                 dev_priv->gt.stop_ring = intel_logical_ring_stop;
5226         }
5227
5228         /* This is just a security blanket to placate dragons.
5229          * On some systems, we very sporadically observe that the first TLBs
5230          * used by the CS may be stale, despite us poking the TLB reset. If
5231          * we hold the forcewake during initialisation these problems
5232          * just magically go away.
5233          */
5234         intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
5235
5236         ret = i915_gem_init_userptr(dev);
5237         if (ret)
5238                 goto out_unlock;
5239
5240         i915_gem_init_global_gtt(dev);
5241
5242         ret = i915_gem_context_init(dev);
5243         if (ret)
5244                 goto out_unlock;
5245
5246         ret = dev_priv->gt.init_rings(dev);
5247         if (ret)
5248                 goto out_unlock;
5249
5250         ret = i915_gem_init_hw(dev);
5251         if (ret == -EIO) {
5252                 /* Allow ring initialisation to fail by marking the GPU as
5253                  * wedged. But we only want to do this where the GPU is angry,
5254                  * for all other failure, such as an allocation failure, bail.
5255                  */
5256                 DRM_ERROR("Failed to initialize GPU, declaring it wedged\n");
5257                 atomic_set_mask(I915_WEDGED, &dev_priv->gpu_error.reset_counter);
5258                 ret = 0;
5259         }
5260
5261 out_unlock:
5262         intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
5263         mutex_unlock(&dev->struct_mutex);
5264
5265         return ret;
5266 }
5267
5268 void
5269 i915_gem_cleanup_ringbuffer(struct drm_device *dev)
5270 {
5271         struct drm_i915_private *dev_priv = dev->dev_private;
5272         struct intel_engine_cs *ring;
5273         int i;
5274
5275         for_each_ring(ring, dev_priv, i)
5276                 dev_priv->gt.cleanup_ring(ring);
5277 }
5278
5279 static void
5280 init_ring_lists(struct intel_engine_cs *ring)
5281 {
5282         INIT_LIST_HEAD(&ring->active_list);
5283         INIT_LIST_HEAD(&ring->request_list);
5284 }
5285
5286 void i915_init_vm(struct drm_i915_private *dev_priv,
5287                   struct i915_address_space *vm)
5288 {
5289         if (!i915_is_ggtt(vm))
5290                 drm_mm_init(&vm->mm, vm->start, vm->total);
5291         vm->dev = dev_priv->dev;
5292         INIT_LIST_HEAD(&vm->active_list);
5293         INIT_LIST_HEAD(&vm->inactive_list);
5294         INIT_LIST_HEAD(&vm->global_link);
5295         list_add_tail(&vm->global_link, &dev_priv->vm_list);
5296 }
5297
5298 void
5299 i915_gem_load(struct drm_device *dev)
5300 {
5301         struct drm_i915_private *dev_priv = dev->dev_private;
5302         int i;
5303
5304         INIT_LIST_HEAD(&dev_priv->vm_list);
5305         i915_init_vm(dev_priv, &dev_priv->gtt.base);
5306
5307         INIT_LIST_HEAD(&dev_priv->context_list);
5308         INIT_LIST_HEAD(&dev_priv->mm.unbound_list);
5309         INIT_LIST_HEAD(&dev_priv->mm.bound_list);
5310         INIT_LIST_HEAD(&dev_priv->mm.fence_list);
5311         for (i = 0; i < I915_NUM_RINGS; i++)
5312                 init_ring_lists(&dev_priv->ring[i]);
5313         for (i = 0; i < I915_MAX_NUM_FENCES; i++)
5314                 INIT_LIST_HEAD(&dev_priv->fence_regs[i].lru_list);
5315         INIT_DELAYED_WORK(&dev_priv->mm.retire_work,
5316                           i915_gem_retire_work_handler);
5317         INIT_DELAYED_WORK(&dev_priv->mm.idle_work,
5318                           i915_gem_idle_work_handler);
5319         init_waitqueue_head(&dev_priv->gpu_error.reset_queue);
5320
5321         dev_priv->relative_constants_mode = I915_EXEC_CONSTANTS_REL_GENERAL;
5322
5323         if (INTEL_INFO(dev)->gen >= 7 && !IS_VALLEYVIEW(dev))
5324                 dev_priv->num_fence_regs = 32;
5325         else if (INTEL_INFO(dev)->gen >= 4 || IS_I945G(dev) || IS_I945GM(dev) || IS_G33(dev))
5326                 dev_priv->num_fence_regs = 16;
5327         else
5328                 dev_priv->num_fence_regs = 8;
5329
5330         if (intel_vgpu_active(dev))
5331                 dev_priv->num_fence_regs =
5332                                 I915_READ(vgtif_reg(avail_rs.fence_num));
5333
5334         /* Initialize fence registers to zero */
5335         INIT_LIST_HEAD(&dev_priv->mm.fence_list);
5336         i915_gem_restore_fences(dev);
5337
5338         i915_gem_detect_bit_6_swizzle(dev);
5339         init_waitqueue_head(&dev_priv->pending_flip_queue);
5340
5341         dev_priv->mm.interruptible = true;
5342
5343         i915_gem_shrinker_init(dev_priv);
5344
5345         lockinit(&dev_priv->fb_tracking.lock, "drmftl", 0, LK_CANRECURSE);
5346 }
5347
5348 void i915_gem_release(struct drm_device *dev, struct drm_file *file)
5349 {
5350         struct drm_i915_file_private *file_priv = file->driver_priv;
5351
5352         /* Clean up our request list when the client is going away, so that
5353          * later retire_requests won't dereference our soon-to-be-gone
5354          * file_priv.
5355          */
5356         spin_lock(&file_priv->mm.lock);
5357         while (!list_empty(&file_priv->mm.request_list)) {
5358                 struct drm_i915_gem_request *request;
5359
5360                 request = list_first_entry(&file_priv->mm.request_list,
5361                                            struct drm_i915_gem_request,
5362                                            client_list);
5363                 list_del(&request->client_list);
5364                 request->file_priv = NULL;
5365         }
5366         spin_unlock(&file_priv->mm.lock);
5367
5368         if (!list_empty(&file_priv->rps.link)) {
5369                 lockmgr(&to_i915(dev)->rps.client_lock, LK_EXCLUSIVE);
5370                 list_del(&file_priv->rps.link);
5371                 lockmgr(&to_i915(dev)->rps.client_lock, LK_RELEASE);
5372         }
5373 }
5374
5375 int
5376 i915_gem_pager_ctor(void *handle, vm_ooffset_t size, vm_prot_t prot,
5377     vm_ooffset_t foff, struct ucred *cred, u_short *color)
5378 {
5379         *color = 0; /* XXXKIB */
5380         return (0);
5381 }
5382
5383 void
5384 i915_gem_pager_dtor(void *handle)
5385 {
5386         struct drm_gem_object *obj;
5387         struct drm_device *dev;
5388
5389         obj = handle;
5390         dev = obj->dev;
5391
5392         mutex_lock(&dev->struct_mutex);
5393         drm_gem_free_mmap_offset(obj);
5394         i915_gem_release_mmap(to_intel_bo(obj));
5395         drm_gem_object_unreference(obj);
5396         mutex_unlock(&dev->struct_mutex);
5397 }
5398
5399 int i915_gem_open(struct drm_device *dev, struct drm_file *file)
5400 {
5401         struct drm_i915_file_private *file_priv;
5402         int ret;
5403
5404         DRM_DEBUG_DRIVER("\n");
5405
5406         file_priv = kzalloc(sizeof(*file_priv), GFP_KERNEL);
5407         if (!file_priv)
5408                 return -ENOMEM;
5409
5410         file->driver_priv = file_priv;
5411         file_priv->dev_priv = dev->dev_private;
5412         file_priv->file = file;
5413         INIT_LIST_HEAD(&file_priv->rps.link);
5414
5415         spin_init(&file_priv->mm.lock, "i915_priv");
5416         INIT_LIST_HEAD(&file_priv->mm.request_list);
5417
5418         ret = i915_gem_context_open(dev, file);
5419         if (ret)
5420                 kfree(file_priv);
5421
5422         return ret;
5423 }
5424
5425 /**
5426  * i915_gem_track_fb - update frontbuffer tracking
5427  * old: current GEM buffer for the frontbuffer slots
5428  * new: new GEM buffer for the frontbuffer slots
5429  * frontbuffer_bits: bitmask of frontbuffer slots
5430  *
5431  * This updates the frontbuffer tracking bits @frontbuffer_bits by clearing them
5432  * from @old and setting them in @new. Both @old and @new can be NULL.
5433  */
5434 void i915_gem_track_fb(struct drm_i915_gem_object *old,
5435                        struct drm_i915_gem_object *new,
5436                        unsigned frontbuffer_bits)
5437 {
5438         if (old) {
5439                 WARN_ON(!mutex_is_locked(&old->base.dev->struct_mutex));
5440                 WARN_ON(!(old->frontbuffer_bits & frontbuffer_bits));
5441                 old->frontbuffer_bits &= ~frontbuffer_bits;
5442         }
5443
5444         if (new) {
5445                 WARN_ON(!mutex_is_locked(&new->base.dev->struct_mutex));
5446                 WARN_ON(new->frontbuffer_bits & frontbuffer_bits);
5447                 new->frontbuffer_bits |= frontbuffer_bits;
5448         }
5449 }
5450
5451 /* All the new VM stuff */
5452 unsigned long
5453 i915_gem_obj_offset(struct drm_i915_gem_object *o,
5454                     struct i915_address_space *vm)
5455 {
5456         struct drm_i915_private *dev_priv = o->base.dev->dev_private;
5457         struct i915_vma *vma;
5458
5459         WARN_ON(vm == &dev_priv->mm.aliasing_ppgtt->base);
5460
5461         list_for_each_entry(vma, &o->vma_list, vma_link) {
5462                 if (i915_is_ggtt(vma->vm) &&
5463                     vma->ggtt_view.type != I915_GGTT_VIEW_NORMAL)
5464                         continue;
5465                 if (vma->vm == vm)
5466                         return vma->node.start;
5467         }
5468
5469         WARN(1, "%s vma for this object not found.\n",
5470              i915_is_ggtt(vm) ? "global" : "ppgtt");
5471         return -1;
5472 }
5473
5474 unsigned long
5475 i915_gem_obj_ggtt_offset_view(struct drm_i915_gem_object *o,
5476                               const struct i915_ggtt_view *view)
5477 {
5478         struct i915_address_space *ggtt = i915_obj_to_ggtt(o);
5479         struct i915_vma *vma;
5480
5481         list_for_each_entry(vma, &o->vma_list, vma_link)
5482                 if (vma->vm == ggtt &&
5483                     i915_ggtt_view_equal(&vma->ggtt_view, view))
5484                         return vma->node.start;
5485
5486         WARN(1, "global vma for this object not found. (view=%u)\n", view->type);
5487         return -1;
5488 }
5489
5490 bool i915_gem_obj_bound(struct drm_i915_gem_object *o,
5491                         struct i915_address_space *vm)
5492 {
5493         struct i915_vma *vma;
5494
5495         list_for_each_entry(vma, &o->vma_list, vma_link) {
5496                 if (i915_is_ggtt(vma->vm) &&
5497                     vma->ggtt_view.type != I915_GGTT_VIEW_NORMAL)
5498                         continue;
5499                 if (vma->vm == vm && drm_mm_node_allocated(&vma->node))
5500                         return true;
5501         }
5502
5503         return false;
5504 }
5505
5506 bool i915_gem_obj_ggtt_bound_view(struct drm_i915_gem_object *o,
5507                                   const struct i915_ggtt_view *view)
5508 {
5509         struct i915_address_space *ggtt = i915_obj_to_ggtt(o);
5510         struct i915_vma *vma;
5511
5512         list_for_each_entry(vma, &o->vma_list, vma_link)
5513                 if (vma->vm == ggtt &&
5514                     i915_ggtt_view_equal(&vma->ggtt_view, view) &&
5515                     drm_mm_node_allocated(&vma->node))
5516                         return true;
5517
5518         return false;
5519 }
5520
5521 bool i915_gem_obj_bound_any(struct drm_i915_gem_object *o)
5522 {
5523         struct i915_vma *vma;
5524
5525         list_for_each_entry(vma, &o->vma_list, vma_link)
5526                 if (drm_mm_node_allocated(&vma->node))
5527                         return true;
5528
5529         return false;
5530 }
5531
5532 unsigned long i915_gem_obj_size(struct drm_i915_gem_object *o,
5533                                 struct i915_address_space *vm)
5534 {
5535         struct drm_i915_private *dev_priv = o->base.dev->dev_private;
5536         struct i915_vma *vma;
5537
5538         WARN_ON(vm == &dev_priv->mm.aliasing_ppgtt->base);
5539
5540         BUG_ON(list_empty(&o->vma_list));
5541
5542         list_for_each_entry(vma, &o->vma_list, vma_link) {
5543                 if (i915_is_ggtt(vma->vm) &&
5544                     vma->ggtt_view.type != I915_GGTT_VIEW_NORMAL)
5545                         continue;
5546                 if (vma->vm == vm)
5547                         return vma->node.size;
5548         }
5549         return 0;
5550 }
5551
5552 bool i915_gem_obj_is_pinned(struct drm_i915_gem_object *obj)
5553 {
5554         struct i915_vma *vma;
5555         list_for_each_entry(vma, &obj->vma_list, vma_link)
5556                 if (vma->pin_count > 0)
5557                         return true;
5558
5559         return false;
5560 }
5561