drivers/gpu/drm/vc4/vc4_gem.c

   1 /*
   2  * Copyright © 2014 Broadcom
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21  * IN THE SOFTWARE.
  22  */
  23
  24 #include <linux/module.h>
  25 #include <linux/platform_device.h>
  26 #include <linux/pm_runtime.h>
  27 #include <linux/device.h>
  28 #include <linux/io.h>
  29
  30 #include "uapi/drm/vc4_drm.h"
  31 #include "vc4_drv.h"
  32 #include "vc4_regs.h"
  33 #include "vc4_trace.h"
  34
  35 static void
  36 vc4_queue_hangcheck(struct drm_device *dev)
  37 {
  38         struct vc4_dev *vc4 = to_vc4_dev(dev);
  39
  40         mod_timer(&vc4->hangcheck.timer,
  41                   round_jiffies_up(jiffies + msecs_to_jiffies(100)));
  42 }
  43
  44 struct vc4_hang_state {
  45         struct drm_vc4_get_hang_state user_state;
  46
  47         u32 bo_count;
  48         struct drm_gem_object **bo;
  49 };
  50
  51 static void
  52 vc4_free_hang_state(struct drm_device *dev, struct vc4_hang_state *state)
  53 {
  54         unsigned int i;
  55
  56         mutex_lock(&dev->struct_mutex);
  57         for (i = 0; i < state->user_state.bo_count; i++)
  58                 drm_gem_object_unreference(state->bo[i]);
  59         mutex_unlock(&dev->struct_mutex);
  60
  61         kfree(state);
  62 }
  63
  64 int
  65 vc4_get_hang_state_ioctl(struct drm_device *dev, void *data,
  66                          struct drm_file *file_priv)
  67 {
  68         struct drm_vc4_get_hang_state *get_state = data;
  69         struct drm_vc4_get_hang_state_bo *bo_state;
  70         struct vc4_hang_state *kernel_state;
  71         struct drm_vc4_get_hang_state *state;
  72         struct vc4_dev *vc4 = to_vc4_dev(dev);
  73         unsigned long irqflags;
  74         u32 i;
  75         int ret = 0;
  76
  77         spin_lock_irqsave(&vc4->job_lock, irqflags);
  78         kernel_state = vc4->hang_state;
  79         if (!kernel_state) {
  80                 spin_unlock_irqrestore(&vc4->job_lock, irqflags);
  81                 return -ENOENT;
  82         }
  83         state = &kernel_state->user_state;
  84
  85         /* If the user's array isn't big enough, just return the
  86          * required array size.
  87          */
  88         if (get_state->bo_count < state->bo_count) {
  89                 get_state->bo_count = state->bo_count;
  90                 spin_unlock_irqrestore(&vc4->job_lock, irqflags);
  91                 return 0;
  92         }
  93
  94         vc4->hang_state = NULL;
  95         spin_unlock_irqrestore(&vc4->job_lock, irqflags);
  96
  97         /* Save the user's BO pointer, so we don't stomp it with the memcpy. */
  98         state->bo = get_state->bo;
  99         memcpy(get_state, state, sizeof(*state));
 100
 101         bo_state = kcalloc(state->bo_count, sizeof(*bo_state), GFP_KERNEL);
 102         if (!bo_state) {
 103                 ret = -ENOMEM;
 104                 goto err_free;
 105         }
 106
 107         for (i = 0; i < state->bo_count; i++) {
 108                 struct vc4_bo *vc4_bo = to_vc4_bo(kernel_state->bo[i]);
 109                 u32 handle;
 110
 111                 ret = drm_gem_handle_create(file_priv, kernel_state->bo[i],
 112                                             &handle);
 113
 114                 if (ret) {
 115                         state->bo_count = i - 1;
 116                         goto err;
 117                 }
 118                 bo_state[i].handle = handle;
 119                 bo_state[i].paddr = vc4_bo->base.paddr;
 120                 bo_state[i].size = vc4_bo->base.base.size;
 121         }
 122
 123         if (copy_to_user((void __user *)(uintptr_t)get_state->bo,
 124                          bo_state,
 125                          state->bo_count * sizeof(*bo_state)))
 126                 ret = -EFAULT;
 127
 128         kfree(bo_state);
 129
 130 err_free:
 131
 132         vc4_free_hang_state(dev, kernel_state);
 133
 134 err:
 135         return ret;
 136 }
 137
 138 static void
 139 vc4_save_hang_state(struct drm_device *dev)
 140 {
 141         struct vc4_dev *vc4 = to_vc4_dev(dev);
 142         struct drm_vc4_get_hang_state *state;
 143         struct vc4_hang_state *kernel_state;
 144         struct vc4_exec_info *exec;
 145         struct vc4_bo *bo;
 146         unsigned long irqflags;
 147         unsigned int i, unref_list_count;
 148
 149         kernel_state = kcalloc(1, sizeof(*kernel_state), GFP_KERNEL);
 150         if (!kernel_state)
 151                 return;
 152
 153         state = &kernel_state->user_state;
 154
 155         spin_lock_irqsave(&vc4->job_lock, irqflags);
 156         exec = vc4_first_job(vc4);
 157         if (!exec) {
 158                 spin_unlock_irqrestore(&vc4->job_lock, irqflags);
 159                 return;
 160         }
 161
 162         unref_list_count = 0;
 163         list_for_each_entry(bo, &exec->unref_list, unref_head)
 164                 unref_list_count++;
 165
 166         state->bo_count = exec->bo_count + unref_list_count;
 167         kernel_state->bo = kcalloc(state->bo_count, sizeof(*kernel_state->bo),
 168                                    GFP_ATOMIC);
 169         if (!kernel_state->bo) {
 170                 spin_unlock_irqrestore(&vc4->job_lock, irqflags);
 171                 return;
 172         }
 173
 174         for (i = 0; i < exec->bo_count; i++) {
 175                 drm_gem_object_reference(&exec->bo[i]->base);
 176                 kernel_state->bo[i] = &exec->bo[i]->base;
 177         }
 178
 179         list_for_each_entry(bo, &exec->unref_list, unref_head) {
 180                 drm_gem_object_reference(&bo->base.base);
 181                 kernel_state->bo[i] = &bo->base.base;
 182                 i++;
 183         }
 184
 185         state->start_bin = exec->ct0ca;
 186         state->start_render = exec->ct1ca;
 187
 188         spin_unlock_irqrestore(&vc4->job_lock, irqflags);
 189
 190         state->ct0ca = V3D_READ(V3D_CTNCA(0));
 191         state->ct0ea = V3D_READ(V3D_CTNEA(0));
 192
 193         state->ct1ca = V3D_READ(V3D_CTNCA(1));
 194         state->ct1ea = V3D_READ(V3D_CTNEA(1));
 195
 196         state->ct0cs = V3D_READ(V3D_CTNCS(0));
 197         state->ct1cs = V3D_READ(V3D_CTNCS(1));
 198
 199         state->ct0ra0 = V3D_READ(V3D_CT00RA0);
 200         state->ct1ra0 = V3D_READ(V3D_CT01RA0);
 201
 202         state->bpca = V3D_READ(V3D_BPCA);
 203         state->bpcs = V3D_READ(V3D_BPCS);
 204         state->bpoa = V3D_READ(V3D_BPOA);
 205         state->bpos = V3D_READ(V3D_BPOS);
 206
 207         state->vpmbase = V3D_READ(V3D_VPMBASE);
 208
 209         state->dbge = V3D_READ(V3D_DBGE);
 210         state->fdbgo = V3D_READ(V3D_FDBGO);
 211         state->fdbgb = V3D_READ(V3D_FDBGB);
 212         state->fdbgr = V3D_READ(V3D_FDBGR);
 213         state->fdbgs = V3D_READ(V3D_FDBGS);
 214         state->errstat = V3D_READ(V3D_ERRSTAT);
 215
 216         spin_lock_irqsave(&vc4->job_lock, irqflags);
 217         if (vc4->hang_state) {
 218                 spin_unlock_irqrestore(&vc4->job_lock, irqflags);
 219                 vc4_free_hang_state(dev, kernel_state);
 220         } else {
 221                 vc4->hang_state = kernel_state;
 222                 spin_unlock_irqrestore(&vc4->job_lock, irqflags);
 223         }
 224 }
 225
 226 static void
 227 vc4_reset(struct drm_device *dev)
 228 {
 229         struct vc4_dev *vc4 = to_vc4_dev(dev);
 230
 231         DRM_INFO("Resetting GPU.\n");
 232
 233         mutex_lock(&vc4->power_lock);
 234         if (vc4->power_refcount) {
 235                 /* Power the device off and back on the by dropping the
 236                  * reference on runtime PM.
 237                  */
 238                 pm_runtime_put_sync_suspend(&vc4->v3d->pdev->dev);
 239                 pm_runtime_get_sync(&vc4->v3d->pdev->dev);
 240         }
 241         mutex_unlock(&vc4->power_lock);
 242
 243         vc4_irq_reset(dev);
 244
 245         /* Rearm the hangcheck -- another job might have been waiting
 246          * for our hung one to get kicked off, and vc4_irq_reset()
 247          * would have started it.
 248          */
 249         vc4_queue_hangcheck(dev);
 250 }
 251
 252 static void
 253 vc4_reset_work(struct work_struct *work)
 254 {
 255         struct vc4_dev *vc4 =
 256                 container_of(work, struct vc4_dev, hangcheck.reset_work);
 257
 258         vc4_save_hang_state(vc4->dev);
 259
 260         vc4_reset(vc4->dev);
 261 }
 262
 263 static void
 264 vc4_hangcheck_elapsed(unsigned long data)
 265 {
 266         struct drm_device *dev = (struct drm_device *)data;
 267         struct vc4_dev *vc4 = to_vc4_dev(dev);
 268         uint32_t ct0ca, ct1ca;
 269         unsigned long irqflags;
 270         struct vc4_exec_info *exec;
 271
 272         spin_lock_irqsave(&vc4->job_lock, irqflags);
 273         exec = vc4_first_job(vc4);
 274
 275         /* If idle, we can stop watching for hangs. */
 276         if (!exec) {
 277                 spin_unlock_irqrestore(&vc4->job_lock, irqflags);
 278                 return;
 279         }
 280
 281         ct0ca = V3D_READ(V3D_CTNCA(0));
 282         ct1ca = V3D_READ(V3D_CTNCA(1));
 283
 284         /* If we've made any progress in execution, rearm the timer
 285          * and wait.
 286          */
 287         if (ct0ca != exec->last_ct0ca || ct1ca != exec->last_ct1ca) {
 288                 exec->last_ct0ca = ct0ca;
 289                 exec->last_ct1ca = ct1ca;
 290                 spin_unlock_irqrestore(&vc4->job_lock, irqflags);
 291                 vc4_queue_hangcheck(dev);
 292                 return;
 293         }
 294
 295         spin_unlock_irqrestore(&vc4->job_lock, irqflags);
 296
 297         /* We've gone too long with no progress, reset.  This has to
 298          * be done from a work struct, since resetting can sleep and
 299          * this timer hook isn't allowed to.
 300          */
 301         schedule_work(&vc4->hangcheck.reset_work);
 302 }
 303
 304 static void
 305 submit_cl(struct drm_device *dev, uint32_t thread, uint32_t start, uint32_t end)
 306 {
 307         struct vc4_dev *vc4 = to_vc4_dev(dev);
 308
 309         /* Set the current and end address of the control list.
 310          * Writing the end register is what starts the job.
 311          */
 312         V3D_WRITE(V3D_CTNCA(thread), start);
 313         V3D_WRITE(V3D_CTNEA(thread), end);
 314 }
 315
 316 int
 317 vc4_wait_for_seqno(struct drm_device *dev, uint64_t seqno, uint64_t timeout_ns,
 318                    bool interruptible)
 319 {
 320         struct vc4_dev *vc4 = to_vc4_dev(dev);
 321         int ret = 0;
 322         unsigned long timeout_expire;
 323         DEFINE_WAIT(wait);
 324
 325         if (vc4->finished_seqno >= seqno)
 326                 return 0;
 327
 328         if (timeout_ns == 0)
 329                 return -ETIME;
 330
 331         timeout_expire = jiffies + nsecs_to_jiffies(timeout_ns);
 332
 333         trace_vc4_wait_for_seqno_begin(dev, seqno, timeout_ns);
 334         for (;;) {
 335                 prepare_to_wait(&vc4->job_wait_queue, &wait,
 336                                 interruptible ? TASK_INTERRUPTIBLE :
 337                                 TASK_UNINTERRUPTIBLE);
 338
 339                 if (interruptible && signal_pending(current)) {
 340                         ret = -ERESTARTSYS;
 341                         break;
 342                 }
 343
 344                 if (vc4->finished_seqno >= seqno)
 345                         break;
 346
 347                 if (timeout_ns != ~0ull) {
 348                         if (time_after_eq(jiffies, timeout_expire)) {
 349                                 ret = -ETIME;
 350                                 break;
 351                         }
 352                         schedule_timeout(timeout_expire - jiffies);
 353                 } else {
 354                         schedule();
 355                 }
 356         }
 357
 358         finish_wait(&vc4->job_wait_queue, &wait);
 359         trace_vc4_wait_for_seqno_end(dev, seqno);
 360
 361         return ret;
 362 }
 363
 364 static void
 365 vc4_flush_caches(struct drm_device *dev)
 366 {
 367         struct vc4_dev *vc4 = to_vc4_dev(dev);
 368
 369         /* Flush the GPU L2 caches.  These caches sit on top of system
 370          * L3 (the 128kb or so shared with the CPU), and are
 371          * non-allocating in the L3.
 372          */
 373         V3D_WRITE(V3D_L2CACTL,
 374                   V3D_L2CACTL_L2CCLR);
 375
 376         V3D_WRITE(V3D_SLCACTL,
 377                   VC4_SET_FIELD(0xf, V3D_SLCACTL_T1CC) |
 378                   VC4_SET_FIELD(0xf, V3D_SLCACTL_T0CC) |
 379                   VC4_SET_FIELD(0xf, V3D_SLCACTL_UCC) |
 380                   VC4_SET_FIELD(0xf, V3D_SLCACTL_ICC));
 381 }
 382
 383 /* Sets the registers for the next job to be actually be executed in
 384  * the hardware.
 385  *
 386  * The job_lock should be held during this.
 387  */
 388 void
 389 vc4_submit_next_job(struct drm_device *dev)
 390 {
 391         struct vc4_dev *vc4 = to_vc4_dev(dev);
 392         struct vc4_exec_info *exec = vc4_first_job(vc4);
 393
 394         if (!exec)
 395                 return;
 396
 397         vc4_flush_caches(dev);
 398
 399         /* Disable the binner's pre-loaded overflow memory address */
 400         V3D_WRITE(V3D_BPOA, 0);
 401         V3D_WRITE(V3D_BPOS, 0);
 402
 403         if (exec->ct0ca != exec->ct0ea)
 404                 submit_cl(dev, 0, exec->ct0ca, exec->ct0ea);
 405         submit_cl(dev, 1, exec->ct1ca, exec->ct1ea);
 406 }
 407
 408 static void
 409 vc4_update_bo_seqnos(struct vc4_exec_info *exec, uint64_t seqno)
 410 {
 411         struct vc4_bo *bo;
 412         unsigned i;
 413
 414         for (i = 0; i < exec->bo_count; i++) {
 415                 bo = to_vc4_bo(&exec->bo[i]->base);
 416                 bo->seqno = seqno;
 417         }
 418
 419         list_for_each_entry(bo, &exec->unref_list, unref_head) {
 420                 bo->seqno = seqno;
 421         }
 422 }
 423
 424 /* Queues a struct vc4_exec_info for execution.  If no job is
 425  * currently executing, then submits it.
 426  *
 427  * Unlike most GPUs, our hardware only handles one command list at a
 428  * time.  To queue multiple jobs at once, we'd need to edit the
 429  * previous command list to have a jump to the new one at the end, and
 430  * then bump the end address.  That's a change for a later date,
 431  * though.
 432  */
 433 static void
 434 vc4_queue_submit(struct drm_device *dev, struct vc4_exec_info *exec)
 435 {
 436         struct vc4_dev *vc4 = to_vc4_dev(dev);
 437         uint64_t seqno;
 438         unsigned long irqflags;
 439
 440         spin_lock_irqsave(&vc4->job_lock, irqflags);
 441
 442         seqno = ++vc4->emit_seqno;
 443         exec->seqno = seqno;
 444         vc4_update_bo_seqnos(exec, seqno);
 445
 446         list_add_tail(&exec->head, &vc4->job_list);
 447
 448         /* If no job was executing, kick ours off.  Otherwise, it'll
 449          * get started when the previous job's frame done interrupt
 450          * occurs.
 451          */
 452         if (vc4_first_job(vc4) == exec) {
 453                 vc4_submit_next_job(dev);
 454                 vc4_queue_hangcheck(dev);
 455         }
 456
 457         spin_unlock_irqrestore(&vc4->job_lock, irqflags);
 458 }
 459
 460 /**
 461  * Looks up a bunch of GEM handles for BOs and stores the array for
 462  * use in the command validator that actually writes relocated
 463  * addresses pointing to them.
 464  */
 465 static int
 466 vc4_cl_lookup_bos(struct drm_device *dev,
 467                   struct drm_file *file_priv,
 468                   struct vc4_exec_info *exec)
 469 {
 470         struct drm_vc4_submit_cl *args = exec->args;
 471         uint32_t *handles;
 472         int ret = 0;
 473         int i;
 474
 475         exec->bo_count = args->bo_handle_count;
 476
 477         if (!exec->bo_count) {
 478                 /* See comment on bo_index for why we have to check
 479                  * this.
 480                  */
 481                 DRM_ERROR("Rendering requires BOs to validate\n");
 482                 return -EINVAL;
 483         }
 484
 485         exec->bo = kcalloc(exec->bo_count, sizeof(struct drm_gem_cma_object *),
 486                            GFP_KERNEL);
 487         if (!exec->bo) {
 488                 DRM_ERROR("Failed to allocate validated BO pointers\n");
 489                 return -ENOMEM;
 490         }
 491
 492         handles = drm_malloc_ab(exec->bo_count, sizeof(uint32_t));
 493         if (!handles) {
 494                 DRM_ERROR("Failed to allocate incoming GEM handles\n");
 495                 goto fail;
 496         }
 497
 498         ret = copy_from_user(handles,
 499                              (void __user *)(uintptr_t)args->bo_handles,
 500                              exec->bo_count * sizeof(uint32_t));
 501         if (ret) {
 502                 DRM_ERROR("Failed to copy in GEM handles\n");
 503                 goto fail;
 504         }
 505
 506         spin_lock(&file_priv->table_lock);
 507         for (i = 0; i < exec->bo_count; i++) {
 508                 struct drm_gem_object *bo = idr_find(&file_priv->object_idr,
 509                                                      handles[i]);
 510                 if (!bo) {
 511                         DRM_ERROR("Failed to look up GEM BO %d: %d\n",
 512                                   i, handles[i]);
 513                         ret = -EINVAL;
 514                         spin_unlock(&file_priv->table_lock);
 515                         goto fail;
 516                 }
 517                 drm_gem_object_reference(bo);
 518                 exec->bo[i] = (struct drm_gem_cma_object *)bo;
 519         }
 520         spin_unlock(&file_priv->table_lock);
 521
 522 fail:
 523         kfree(handles);
 524         return 0;
 525 }
 526
 527 static int
 528 vc4_get_bcl(struct drm_device *dev, struct vc4_exec_info *exec)
 529 {
 530         struct drm_vc4_submit_cl *args = exec->args;
 531         void *temp = NULL;
 532         void *bin;
 533         int ret = 0;
 534         uint32_t bin_offset = 0;
 535         uint32_t shader_rec_offset = roundup(bin_offset + args->bin_cl_size,
 536                                              16);
 537         uint32_t uniforms_offset = shader_rec_offset + args->shader_rec_size;
 538         uint32_t exec_size = uniforms_offset + args->uniforms_size;
 539         uint32_t temp_size = exec_size + (sizeof(struct vc4_shader_state) *
 540                                           args->shader_rec_count);
 541         struct vc4_bo *bo;
 542
 543         if (uniforms_offset < shader_rec_offset ||
 544             exec_size < uniforms_offset ||
 545             args->shader_rec_count >= (UINT_MAX /
 546                                           sizeof(struct vc4_shader_state)) ||
 547             temp_size < exec_size) {
 548                 DRM_ERROR("overflow in exec arguments\n");
 549                 goto fail;
 550         }
 551
 552         /* Allocate space where we'll store the copied in user command lists
 553          * and shader records.
 554          *
 555          * We don't just copy directly into the BOs because we need to
 556          * read the contents back for validation, and I think the
 557          * bo->vaddr is uncached access.
 558          */
 559         temp = kmalloc(temp_size, GFP_KERNEL);
 560         if (!temp) {
 561                 DRM_ERROR("Failed to allocate storage for copying "
 562                           "in bin/render CLs.\n");
 563                 ret = -ENOMEM;
 564                 goto fail;
 565         }
 566         bin = temp + bin_offset;
 567         exec->shader_rec_u = temp + shader_rec_offset;
 568         exec->uniforms_u = temp + uniforms_offset;
 569         exec->shader_state = temp + exec_size;
 570         exec->shader_state_size = args->shader_rec_count;
 571
 572         if (copy_from_user(bin,
 573                            (void __user *)(uintptr_t)args->bin_cl,
 574                            args->bin_cl_size)) {
 575                 ret = -EFAULT;
 576                 goto fail;
 577         }
 578
 579         if (copy_from_user(exec->shader_rec_u,
 580                            (void __user *)(uintptr_t)args->shader_rec,
 581                            args->shader_rec_size)) {
 582                 ret = -EFAULT;
 583                 goto fail;
 584         }
 585
 586         if (copy_from_user(exec->uniforms_u,
 587                            (void __user *)(uintptr_t)args->uniforms,
 588                            args->uniforms_size)) {
 589                 ret = -EFAULT;
 590                 goto fail;
 591         }
 592
 593         bo = vc4_bo_create(dev, exec_size, true);
 594         if (IS_ERR(bo)) {
 595                 DRM_ERROR("Couldn't allocate BO for binning\n");
 596                 ret = PTR_ERR(bo);
 597                 goto fail;
 598         }
 599         exec->exec_bo = &bo->base;
 600
 601         list_add_tail(&to_vc4_bo(&exec->exec_bo->base)->unref_head,
 602                       &exec->unref_list);
 603
 604         exec->ct0ca = exec->exec_bo->paddr + bin_offset;
 605
 606         exec->bin_u = bin;
 607
 608         exec->shader_rec_v = exec->exec_bo->vaddr + shader_rec_offset;
 609         exec->shader_rec_p = exec->exec_bo->paddr + shader_rec_offset;
 610         exec->shader_rec_size = args->shader_rec_size;
 611
 612         exec->uniforms_v = exec->exec_bo->vaddr + uniforms_offset;
 613         exec->uniforms_p = exec->exec_bo->paddr + uniforms_offset;
 614         exec->uniforms_size = args->uniforms_size;
 615
 616         ret = vc4_validate_bin_cl(dev,
 617                                   exec->exec_bo->vaddr + bin_offset,
 618                                   bin,
 619                                   exec);
 620         if (ret)
 621                 goto fail;
 622
 623         ret = vc4_validate_shader_recs(dev, exec);
 624
 625 fail:
 626         kfree(temp);
 627         return ret;
 628 }
 629
 630 static void
 631 vc4_complete_exec(struct drm_device *dev, struct vc4_exec_info *exec)
 632 {
 633         struct vc4_dev *vc4 = to_vc4_dev(dev);
 634         unsigned i;
 635
 636         /* Need the struct lock for drm_gem_object_unreference(). */
 637         mutex_lock(&dev->struct_mutex);
 638         if (exec->bo) {
 639                 for (i = 0; i < exec->bo_count; i++)
 640                         drm_gem_object_unreference(&exec->bo[i]->base);
 641                 kfree(exec->bo);
 642         }
 643
 644         while (!list_empty(&exec->unref_list)) {
 645                 struct vc4_bo *bo = list_first_entry(&exec->unref_list,
 646                                                      struct vc4_bo, unref_head);
 647                 list_del(&bo->unref_head);
 648                 drm_gem_object_unreference(&bo->base.base);
 649         }
 650         mutex_unlock(&dev->struct_mutex);
 651
 652         mutex_lock(&vc4->power_lock);
 653         if (--vc4->power_refcount == 0)
 654                 pm_runtime_put(&vc4->v3d->pdev->dev);
 655         mutex_unlock(&vc4->power_lock);
 656
 657         kfree(exec);
 658 }
 659
 660 void
 661 vc4_job_handle_completed(struct vc4_dev *vc4)
 662 {
 663         unsigned long irqflags;
 664         struct vc4_seqno_cb *cb, *cb_temp;
 665
 666         spin_lock_irqsave(&vc4->job_lock, irqflags);
 667         while (!list_empty(&vc4->job_done_list)) {
 668                 struct vc4_exec_info *exec =
 669                         list_first_entry(&vc4->job_done_list,
 670                                          struct vc4_exec_info, head);
 671                 list_del(&exec->head);
 672
 673                 spin_unlock_irqrestore(&vc4->job_lock, irqflags);
 674                 vc4_complete_exec(vc4->dev, exec);
 675                 spin_lock_irqsave(&vc4->job_lock, irqflags);
 676         }
 677
 678         list_for_each_entry_safe(cb, cb_temp, &vc4->seqno_cb_list, work.entry) {
 679                 if (cb->seqno <= vc4->finished_seqno) {
 680                         list_del_init(&cb->work.entry);
 681                         schedule_work(&cb->work);
 682                 }
 683         }
 684
 685         spin_unlock_irqrestore(&vc4->job_lock, irqflags);
 686 }
 687
 688 static void vc4_seqno_cb_work(struct work_struct *work)
 689 {
 690         struct vc4_seqno_cb *cb = container_of(work, struct vc4_seqno_cb, work);
 691
 692         cb->func(cb);
 693 }
 694
 695 int vc4_queue_seqno_cb(struct drm_device *dev,
 696                        struct vc4_seqno_cb *cb, uint64_t seqno,
 697                        void (*func)(struct vc4_seqno_cb *cb))
 698 {
 699         struct vc4_dev *vc4 = to_vc4_dev(dev);
 700         int ret = 0;
 701         unsigned long irqflags;
 702
 703         cb->func = func;
 704         INIT_WORK(&cb->work, vc4_seqno_cb_work);
 705
 706         spin_lock_irqsave(&vc4->job_lock, irqflags);
 707         if (seqno > vc4->finished_seqno) {
 708                 cb->seqno = seqno;
 709                 list_add_tail(&cb->work.entry, &vc4->seqno_cb_list);
 710         } else {
 711                 schedule_work(&cb->work);
 712         }
 713         spin_unlock_irqrestore(&vc4->job_lock, irqflags);
 714
 715         return ret;
 716 }
 717
 718 /* Scheduled when any job has been completed, this walks the list of
 719  * jobs that had completed and unrefs their BOs and frees their exec
 720  * structs.
 721  */
 722 static void
 723 vc4_job_done_work(struct work_struct *work)
 724 {
 725         struct vc4_dev *vc4 =
 726                 container_of(work, struct vc4_dev, job_done_work);
 727
 728         vc4_job_handle_completed(vc4);
 729 }
 730
 731 static int
 732 vc4_wait_for_seqno_ioctl_helper(struct drm_device *dev,
 733                                 uint64_t seqno,
 734                                 uint64_t *timeout_ns)
 735 {
 736         unsigned long start = jiffies;
 737         int ret = vc4_wait_for_seqno(dev, seqno, *timeout_ns, true);
 738
 739         if ((ret == -EINTR || ret == -ERESTARTSYS) && *timeout_ns != ~0ull) {
 740                 uint64_t delta = jiffies_to_nsecs(jiffies - start);
 741
 742                 if (*timeout_ns >= delta)
 743                         *timeout_ns -= delta;
 744         }
 745
 746         return ret;
 747 }
 748
 749 int
 750 vc4_wait_seqno_ioctl(struct drm_device *dev, void *data,
 751                      struct drm_file *file_priv)
 752 {
 753         struct drm_vc4_wait_seqno *args = data;
 754
 755         return vc4_wait_for_seqno_ioctl_helper(dev, args->seqno,
 756                                                &args->timeout_ns);
 757 }
 758
 759 int
 760 vc4_wait_bo_ioctl(struct drm_device *dev, void *data,
 761                   struct drm_file *file_priv)
 762 {
 763         int ret;
 764         struct drm_vc4_wait_bo *args = data;
 765         struct drm_gem_object *gem_obj;
 766         struct vc4_bo *bo;
 767
 768         if (args->pad != 0)
 769                 return -EINVAL;
 770
 771         gem_obj = drm_gem_object_lookup(dev, file_priv, args->handle);
 772         if (!gem_obj) {
 773                 DRM_ERROR("Failed to look up GEM BO %d\n", args->handle);
 774                 return -EINVAL;
 775         }
 776         bo = to_vc4_bo(gem_obj);
 777
 778         ret = vc4_wait_for_seqno_ioctl_helper(dev, bo->seqno,
 779                                               &args->timeout_ns);
 780
 781         drm_gem_object_unreference_unlocked(gem_obj);
 782         return ret;
 783 }
 784
 785 /**
 786  * Submits a command list to the VC4.
 787  *
 788  * This is what is called batchbuffer emitting on other hardware.
 789  */
 790 int
 791 vc4_submit_cl_ioctl(struct drm_device *dev, void *data,
 792                     struct drm_file *file_priv)
 793 {
 794         struct vc4_dev *vc4 = to_vc4_dev(dev);
 795         struct drm_vc4_submit_cl *args = data;
 796         struct vc4_exec_info *exec;
 797         int ret = 0;
 798
 799         if ((args->flags & ~VC4_SUBMIT_CL_USE_CLEAR_COLOR) != 0) {
 800                 DRM_ERROR("Unknown flags: 0x%02x\n", args->flags);
 801                 return -EINVAL;
 802         }
 803
 804         exec = kcalloc(1, sizeof(*exec), GFP_KERNEL);
 805         if (!exec) {
 806                 DRM_ERROR("malloc failure on exec struct\n");
 807                 return -ENOMEM;
 808         }
 809
 810         mutex_lock(&vc4->power_lock);
 811         if (vc4->power_refcount++ == 0)
 812                 ret = pm_runtime_get_sync(&vc4->v3d->pdev->dev);
 813         mutex_unlock(&vc4->power_lock);
 814         if (ret < 0) {
 815                 kfree(exec);
 816                 return ret;
 817         }
 818
 819         exec->args = args;
 820         INIT_LIST_HEAD(&exec->unref_list);
 821
 822         ret = vc4_cl_lookup_bos(dev, file_priv, exec);
 823         if (ret)
 824                 goto fail;
 825
 826         if (exec->args->bin_cl_size != 0) {
 827                 ret = vc4_get_bcl(dev, exec);
 828                 if (ret)
 829                         goto fail;
 830         } else {
 831                 exec->ct0ca = 0;
 832                 exec->ct0ea = 0;
 833         }
 834
 835         ret = vc4_get_rcl(dev, exec);
 836         if (ret)
 837                 goto fail;
 838
 839         /* Clear this out of the struct we'll be putting in the queue,
 840          * since it's part of our stack.
 841          */
 842         exec->args = NULL;
 843
 844         vc4_queue_submit(dev, exec);
 845
 846         /* Return the seqno for our job. */
 847         args->seqno = vc4->emit_seqno;
 848
 849         return 0;
 850
 851 fail:
 852         vc4_complete_exec(vc4->dev, exec);
 853
 854         return ret;
 855 }
 856
 857 void
 858 vc4_gem_init(struct drm_device *dev)
 859 {
 860         struct vc4_dev *vc4 = to_vc4_dev(dev);
 861
 862         INIT_LIST_HEAD(&vc4->job_list);
 863         INIT_LIST_HEAD(&vc4->job_done_list);
 864         INIT_LIST_HEAD(&vc4->seqno_cb_list);
 865         spin_lock_init(&vc4->job_lock);
 866
 867         INIT_WORK(&vc4->hangcheck.reset_work, vc4_reset_work);
 868         setup_timer(&vc4->hangcheck.timer,
 869                     vc4_hangcheck_elapsed,
 870                     (unsigned long)dev);
 871
 872         INIT_WORK(&vc4->job_done_work, vc4_job_done_work);
 873
 874         mutex_init(&vc4->power_lock);
 875 }
 876
 877 void
 878 vc4_gem_destroy(struct drm_device *dev)
 879 {
 880         struct vc4_dev *vc4 = to_vc4_dev(dev);
 881
 882         /* Waiting for exec to finish would need to be done before
 883          * unregistering V3D.
 884          */
 885         WARN_ON(vc4->emit_seqno != vc4->finished_seqno);
 886
 887         /* V3D should already have disabled its interrupt and cleared
 888          * the overflow allocation registers.  Now free the object.
 889          */
 890         if (vc4->overflow_mem) {
 891                 drm_gem_object_unreference_unlocked(&vc4->overflow_mem->base.base);
 892                 vc4->overflow_mem = NULL;
 893         }
 894
 895         vc4_bo_cache_destroy(dev);
 896
 897         if (vc4->hang_state)
 898                 vc4_free_hang_state(dev, vc4->hang_state);
 899 }