drivers/misc/vmw_vmci/vmci_context.c

   1 /*
   2  * VMware VMCI Driver
   3  *
   4  * Copyright (C) 2012 VMware, Inc. All rights reserved.
   5  *
   6  * This program is free software; you can redistribute it and/or modify it
   7  * under the terms of the GNU General Public License as published by the
   8  * Free Software Foundation version 2 and no later version.
   9  *
  10  * This program is distributed in the hope that it will be useful, but
  11  * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  12  * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  13  * for more details.
  14  */
  15
  16 #include <linux/vmw_vmci_defs.h>
  17 #include <linux/vmw_vmci_api.h>
  18 #include <linux/highmem.h>
  19 #include <linux/kernel.h>
  20 #include <linux/module.h>
  21 #include <linux/sched.h>
  22 #include <linux/cred.h>
  23 #include <linux/slab.h>
  24
  25 #include "vmci_queue_pair.h"
  26 #include "vmci_datagram.h"
  27 #include "vmci_doorbell.h"
  28 #include "vmci_context.h"
  29 #include "vmci_driver.h"
  30 #include "vmci_event.h"
  31
  32 /*
  33  * List of current VMCI contexts.  Contexts can be added by
  34  * vmci_ctx_create() and removed via vmci_ctx_destroy().
  35  * These, along with context lookup, are protected by the
  36  * list structure's lock.
  37  */
  38 static struct {
  39         struct list_head head;
  40         spinlock_t lock; /* Spinlock for context list operations */
  41 } ctx_list = {
  42         .head = LIST_HEAD_INIT(ctx_list.head),
  43         .lock = __SPIN_LOCK_UNLOCKED(ctx_list.lock),
  44 };
  45
  46 /* Used by contexts that did not set up notify flag pointers */
  47 static bool ctx_dummy_notify;
  48
  49 static void ctx_signal_notify(struct vmci_ctx *context)
  50 {
  51         *context->notify = true;
  52 }
  53
  54 static void ctx_clear_notify(struct vmci_ctx *context)
  55 {
  56         *context->notify = false;
  57 }
  58
  59 /*
  60  * If nothing requires the attention of the guest, clears both
  61  * notify flag and call.
  62  */
  63 static void ctx_clear_notify_call(struct vmci_ctx *context)
  64 {
  65         if (context->pending_datagrams == 0 &&
  66             vmci_handle_arr_get_size(context->pending_doorbell_array) == 0)
  67                 ctx_clear_notify(context);
  68 }
  69
  70 /*
  71  * Sets the context's notify flag iff datagrams are pending for this
  72  * context.  Called from vmci_setup_notify().
  73  */
  74 void vmci_ctx_check_signal_notify(struct vmci_ctx *context)
  75 {
  76         spin_lock(&context->lock);
  77         if (context->pending_datagrams)
  78                 ctx_signal_notify(context);
  79         spin_unlock(&context->lock);
  80 }
  81
  82 /*
  83  * Allocates and initializes a VMCI context.
  84  */
  85 struct vmci_ctx *vmci_ctx_create(u32 cid, u32 priv_flags,
  86                                  uintptr_t event_hnd,
  87                                  int user_version,
  88                                  const struct cred *cred)
  89 {
  90         struct vmci_ctx *context;
  91         int error;
  92
  93         if (cid == VMCI_INVALID_ID) {
  94                 pr_devel("Invalid context ID for VMCI context\n");
  95                 error = -EINVAL;
  96                 goto err_out;
  97         }
  98
  99         if (priv_flags & ~VMCI_PRIVILEGE_ALL_FLAGS) {
 100                 pr_devel("Invalid flag (flags=0x%x) for VMCI context\n",
 101                          priv_flags);
 102                 error = -EINVAL;
 103                 goto err_out;
 104         }
 105
 106         if (user_version == 0) {
 107                 pr_devel("Invalid suer_version %d\n", user_version);
 108                 error = -EINVAL;
 109                 goto err_out;
 110         }
 111
 112         context = kzalloc(sizeof(*context), GFP_KERNEL);
 113         if (!context) {
 114                 pr_warn("Failed to allocate memory for VMCI context\n");
 115                 error = -EINVAL;
 116                 goto err_out;
 117         }
 118
 119         kref_init(&context->kref);
 120         spin_lock_init(&context->lock);
 121         INIT_LIST_HEAD(&context->list_item);
 122         INIT_LIST_HEAD(&context->datagram_queue);
 123         INIT_LIST_HEAD(&context->notifier_list);
 124
 125         /* Initialize host-specific VMCI context. */
 126         init_waitqueue_head(&context->host_context.wait_queue);
 127
 128         context->queue_pair_array = vmci_handle_arr_create(0);
 129         if (!context->queue_pair_array) {
 130                 error = -ENOMEM;
 131                 goto err_free_ctx;
 132         }
 133
 134         context->doorbell_array = vmci_handle_arr_create(0);
 135         if (!context->doorbell_array) {
 136                 error = -ENOMEM;
 137                 goto err_free_qp_array;
 138         }
 139
 140         context->pending_doorbell_array = vmci_handle_arr_create(0);
 141         if (!context->pending_doorbell_array) {
 142                 error = -ENOMEM;
 143                 goto err_free_db_array;
 144         }
 145
 146         context->user_version = user_version;
 147
 148         context->priv_flags = priv_flags;
 149
 150         if (cred)
 151                 context->cred = get_cred(cred);
 152
 153         context->notify = &ctx_dummy_notify;
 154         context->notify_page = NULL;
 155
 156         /*
 157          * If we collide with an existing context we generate a new
 158          * and use it instead. The VMX will determine if regeneration
 159          * is okay. Since there isn't 4B - 16 VMs running on a given
 160          * host, the below loop will terminate.
 161          */
 162         spin_lock(&ctx_list.lock);
 163
 164         while (vmci_ctx_exists(cid)) {
 165                 /* We reserve the lowest 16 ids for fixed contexts. */
 166                 cid = max(cid, VMCI_RESERVED_CID_LIMIT - 1) + 1;
 167                 if (cid == VMCI_INVALID_ID)
 168                         cid = VMCI_RESERVED_CID_LIMIT;
 169         }
 170         context->cid = cid;
 171
 172         list_add_tail_rcu(&context->list_item, &ctx_list.head);
 173         spin_unlock(&ctx_list.lock);
 174
 175         return context;
 176
 177  err_free_db_array:
 178         vmci_handle_arr_destroy(context->doorbell_array);
 179  err_free_qp_array:
 180         vmci_handle_arr_destroy(context->queue_pair_array);
 181  err_free_ctx:
 182         kfree(context);
 183  err_out:
 184         return ERR_PTR(error);
 185 }
 186
 187 /*
 188  * Destroy VMCI context.
 189  */
 190 void vmci_ctx_destroy(struct vmci_ctx *context)
 191 {
 192         spin_lock(&ctx_list.lock);
 193         list_del_rcu(&context->list_item);
 194         spin_unlock(&ctx_list.lock);
 195         synchronize_rcu();
 196
 197         vmci_ctx_put(context);
 198 }
 199
 200 /*
 201  * Fire notification for all contexts interested in given cid.
 202  */
 203 static int ctx_fire_notification(u32 context_id, u32 priv_flags)
 204 {
 205         u32 i, array_size;
 206         struct vmci_ctx *sub_ctx;
 207         struct vmci_handle_arr *subscriber_array;
 208         struct vmci_handle context_handle =
 209                 vmci_make_handle(context_id, VMCI_EVENT_HANDLER);
 210
 211         /*
 212          * We create an array to hold the subscribers we find when
 213          * scanning through all contexts.
 214          */
 215         subscriber_array = vmci_handle_arr_create(0);
 216         if (subscriber_array == NULL)
 217                 return VMCI_ERROR_NO_MEM;
 218
 219         /*
 220          * Scan all contexts to find who is interested in being
 221          * notified about given contextID.
 222          */
 223         rcu_read_lock();
 224         list_for_each_entry_rcu(sub_ctx, &ctx_list.head, list_item) {
 225                 struct vmci_handle_list *node;
 226
 227                 /*
 228                  * We only deliver notifications of the removal of
 229                  * contexts, if the two contexts are allowed to
 230                  * interact.
 231                  */
 232                 if (vmci_deny_interaction(priv_flags, sub_ctx->priv_flags))
 233                         continue;
 234
 235                 list_for_each_entry_rcu(node, &sub_ctx->notifier_list, node) {
 236                         if (!vmci_handle_is_equal(node->handle, context_handle))
 237                                 continue;
 238
 239                         vmci_handle_arr_append_entry(&subscriber_array,
 240                                         vmci_make_handle(sub_ctx->cid,
 241                                                          VMCI_EVENT_HANDLER));
 242                 }
 243         }
 244         rcu_read_unlock();
 245
 246         /* Fire event to all subscribers. */
 247         array_size = vmci_handle_arr_get_size(subscriber_array);
 248         for (i = 0; i < array_size; i++) {
 249                 int result;
 250                 struct vmci_event_ctx ev;
 251
 252                 ev.msg.hdr.dst = vmci_handle_arr_get_entry(subscriber_array, i);
 253                 ev.msg.hdr.src = vmci_make_handle(VMCI_HYPERVISOR_CONTEXT_ID,
 254                                                   VMCI_CONTEXT_RESOURCE_ID);
 255                 ev.msg.hdr.payload_size = sizeof(ev) - sizeof(ev.msg.hdr);
 256                 ev.msg.event_data.event = VMCI_EVENT_CTX_REMOVED;
 257                 ev.payload.context_id = context_id;
 258
 259                 result = vmci_datagram_dispatch(VMCI_HYPERVISOR_CONTEXT_ID,
 260                                                 &ev.msg.hdr, false);
 261                 if (result < VMCI_SUCCESS) {
 262                         pr_devel("Failed to enqueue event datagram (type=%d) for context (ID=0x%x)\n",
 263                                  ev.msg.event_data.event,
 264                                  ev.msg.hdr.dst.context);
 265                         /* We continue to enqueue on next subscriber. */
 266                 }
 267         }
 268         vmci_handle_arr_destroy(subscriber_array);
 269
 270         return VMCI_SUCCESS;
 271 }
 272
 273 /*
 274  * Returns the current number of pending datagrams. The call may
 275  * also serve as a synchronization point for the datagram queue,
 276  * as no enqueue operations can occur concurrently.
 277  */
 278 int vmci_ctx_pending_datagrams(u32 cid, u32 *pending)
 279 {
 280         struct vmci_ctx *context;
 281
 282         context = vmci_ctx_get(cid);
 283         if (context == NULL)
 284                 return VMCI_ERROR_INVALID_ARGS;
 285
 286         spin_lock(&context->lock);
 287         if (pending)
 288                 *pending = context->pending_datagrams;
 289         spin_unlock(&context->lock);
 290         vmci_ctx_put(context);
 291
 292         return VMCI_SUCCESS;
 293 }
 294
 295 /*
 296  * Queues a VMCI datagram for the appropriate target VM context.
 297  */
 298 int vmci_ctx_enqueue_datagram(u32 cid, struct vmci_datagram *dg)
 299 {
 300         struct vmci_datagram_queue_entry *dq_entry;
 301         struct vmci_ctx *context;
 302         struct vmci_handle dg_src;
 303         size_t vmci_dg_size;
 304
 305         vmci_dg_size = VMCI_DG_SIZE(dg);
 306         if (vmci_dg_size > VMCI_MAX_DG_SIZE) {
 307                 pr_devel("Datagram too large (bytes=%zu)\n", vmci_dg_size);
 308                 return VMCI_ERROR_INVALID_ARGS;
 309         }
 310
 311         /* Get the target VM's VMCI context. */
 312         context = vmci_ctx_get(cid);
 313         if (!context) {
 314                 pr_devel("Invalid context (ID=0x%x)\n", cid);
 315                 return VMCI_ERROR_INVALID_ARGS;
 316         }
 317
 318         /* Allocate guest call entry and add it to the target VM's queue. */
 319         dq_entry = kmalloc(sizeof(*dq_entry), GFP_KERNEL);
 320         if (dq_entry == NULL) {
 321                 pr_warn("Failed to allocate memory for datagram\n");
 322                 vmci_ctx_put(context);
 323                 return VMCI_ERROR_NO_MEM;
 324         }
 325         dq_entry->dg = dg;
 326         dq_entry->dg_size = vmci_dg_size;
 327         dg_src = dg->src;
 328         INIT_LIST_HEAD(&dq_entry->list_item);
 329
 330         spin_lock(&context->lock);
 331
 332         /*
 333          * We put a higher limit on datagrams from the hypervisor.  If
 334          * the pending datagram is not from hypervisor, then we check
 335          * if enqueueing it would exceed the
 336          * VMCI_MAX_DATAGRAM_QUEUE_SIZE limit on the destination.  If
 337          * the pending datagram is from hypervisor, we allow it to be
 338          * queued at the destination side provided we don't reach the
 339          * VMCI_MAX_DATAGRAM_AND_EVENT_QUEUE_SIZE limit.
 340          */
 341         if (context->datagram_queue_size + vmci_dg_size >=
 342             VMCI_MAX_DATAGRAM_QUEUE_SIZE &&
 343             (!vmci_handle_is_equal(dg_src,
 344                                 vmci_make_handle
 345                                 (VMCI_HYPERVISOR_CONTEXT_ID,
 346                                  VMCI_CONTEXT_RESOURCE_ID)) ||
 347              context->datagram_queue_size + vmci_dg_size >=
 348              VMCI_MAX_DATAGRAM_AND_EVENT_QUEUE_SIZE)) {
 349                 spin_unlock(&context->lock);
 350                 vmci_ctx_put(context);
 351                 kfree(dq_entry);
 352                 pr_devel("Context (ID=0x%x) receive queue is full\n", cid);
 353                 return VMCI_ERROR_NO_RESOURCES;
 354         }
 355
 356         list_add(&dq_entry->list_item, &context->datagram_queue);
 357         context->pending_datagrams++;
 358         context->datagram_queue_size += vmci_dg_size;
 359         ctx_signal_notify(context);
 360         wake_up(&context->host_context.wait_queue);
 361         spin_unlock(&context->lock);
 362         vmci_ctx_put(context);
 363
 364         return vmci_dg_size;
 365 }
 366
 367 /*
 368  * Verifies whether a context with the specified context ID exists.
 369  * FIXME: utility is dubious as no decisions can be reliably made
 370  * using this data as context can appear and disappear at any time.
 371  */
 372 bool vmci_ctx_exists(u32 cid)
 373 {
 374         struct vmci_ctx *context;
 375         bool exists = false;
 376
 377         rcu_read_lock();
 378
 379         list_for_each_entry_rcu(context, &ctx_list.head, list_item) {
 380                 if (context->cid == cid) {
 381                         exists = true;
 382                         break;
 383                 }
 384         }
 385
 386         rcu_read_unlock();
 387         return exists;
 388 }
 389
 390 /*
 391  * Retrieves VMCI context corresponding to the given cid.
 392  */
 393 struct vmci_ctx *vmci_ctx_get(u32 cid)
 394 {
 395         struct vmci_ctx *c, *context = NULL;
 396
 397         if (cid == VMCI_INVALID_ID)
 398                 return NULL;
 399
 400         rcu_read_lock();
 401         list_for_each_entry_rcu(c, &ctx_list.head, list_item) {
 402                 if (c->cid == cid) {
 403                         /*
 404                          * The context owner drops its own reference to the
 405                          * context only after removing it from the list and
 406                          * waiting for RCU grace period to expire. This
 407                          * means that we are not about to increase the
 408                          * reference count of something that is in the
 409                          * process of being destroyed.
 410                          */
 411                         context = c;
 412                         kref_get(&context->kref);
 413                         break;
 414                 }
 415         }
 416         rcu_read_unlock();
 417
 418         return context;
 419 }
 420
 421 /*
 422  * Deallocates all parts of a context data structure. This
 423  * function doesn't lock the context, because it assumes that
 424  * the caller was holding the last reference to context.
 425  */
 426 static void ctx_free_ctx(struct kref *kref)
 427 {
 428         struct vmci_ctx *context = container_of(kref, struct vmci_ctx, kref);
 429         struct vmci_datagram_queue_entry *dq_entry, *dq_entry_tmp;
 430         struct vmci_handle temp_handle;
 431         struct vmci_handle_list *notifier, *tmp;
 432
 433         /*
 434          * Fire event to all contexts interested in knowing this
 435          * context is dying.
 436          */
 437         ctx_fire_notification(context->cid, context->priv_flags);
 438
 439         /*
 440          * Cleanup all queue pair resources attached to context.  If
 441          * the VM dies without cleaning up, this code will make sure
 442          * that no resources are leaked.
 443          */
 444         temp_handle = vmci_handle_arr_get_entry(context->queue_pair_array, 0);
 445         while (!vmci_handle_is_equal(temp_handle, VMCI_INVALID_HANDLE)) {
 446                 if (vmci_qp_broker_detach(temp_handle,
 447                                           context) < VMCI_SUCCESS) {
 448                         /*
 449                          * When vmci_qp_broker_detach() succeeds it
 450                          * removes the handle from the array.  If
 451                          * detach fails, we must remove the handle
 452                          * ourselves.
 453                          */
 454                         vmci_handle_arr_remove_entry(context->queue_pair_array,
 455                                                      temp_handle);
 456                 }
 457                 temp_handle =
 458                     vmci_handle_arr_get_entry(context->queue_pair_array, 0);
 459         }
 460
 461         /*
 462          * It is fine to destroy this without locking the callQueue, as
 463          * this is the only thread having a reference to the context.
 464          */
 465         list_for_each_entry_safe(dq_entry, dq_entry_tmp,
 466                                  &context->datagram_queue, list_item) {
 467                 WARN_ON(dq_entry->dg_size != VMCI_DG_SIZE(dq_entry->dg));
 468                 list_del(&dq_entry->list_item);
 469                 kfree(dq_entry->dg);
 470                 kfree(dq_entry);
 471         }
 472
 473         list_for_each_entry_safe(notifier, tmp,
 474                                  &context->notifier_list, node) {
 475                 list_del(&notifier->node);
 476                 kfree(notifier);
 477         }
 478
 479         vmci_handle_arr_destroy(context->queue_pair_array);
 480         vmci_handle_arr_destroy(context->doorbell_array);
 481         vmci_handle_arr_destroy(context->pending_doorbell_array);
 482         vmci_ctx_unset_notify(context);
 483         if (context->cred)
 484                 put_cred(context->cred);
 485         kfree(context);
 486 }
 487
 488 /*
 489  * Drops reference to VMCI context. If this is the last reference to
 490  * the context it will be deallocated. A context is created with
 491  * a reference count of one, and on destroy, it is removed from
 492  * the context list before its reference count is decremented. Thus,
 493  * if we reach zero, we are sure that nobody else are about to increment
 494  * it (they need the entry in the context list for that), and so there
 495  * is no need for locking.
 496  */
 497 void vmci_ctx_put(struct vmci_ctx *context)
 498 {
 499         kref_put(&context->kref, ctx_free_ctx);
 500 }
 501
 502 /*
 503  * Dequeues the next datagram and returns it to caller.
 504  * The caller passes in a pointer to the max size datagram
 505  * it can handle and the datagram is only unqueued if the
 506  * size is less than max_size. If larger max_size is set to
 507  * the size of the datagram to give the caller a chance to
 508  * set up a larger buffer for the guestcall.
 509  */
 510 int vmci_ctx_dequeue_datagram(struct vmci_ctx *context,
 511                               size_t *max_size,
 512                               struct vmci_datagram **dg)
 513 {
 514         struct vmci_datagram_queue_entry *dq_entry;
 515         struct list_head *list_item;
 516         int rv;
 517
 518         /* Dequeue the next datagram entry. */
 519         spin_lock(&context->lock);
 520         if (context->pending_datagrams == 0) {
 521                 ctx_clear_notify_call(context);
 522                 spin_unlock(&context->lock);
 523                 pr_devel("No datagrams pending\n");
 524                 return VMCI_ERROR_NO_MORE_DATAGRAMS;
 525         }
 526
 527         list_item = context->datagram_queue.next;
 528
 529         dq_entry =
 530             list_entry(list_item, struct vmci_datagram_queue_entry, list_item);
 531
 532         /* Check size of caller's buffer. */
 533         if (*max_size < dq_entry->dg_size) {
 534                 *max_size = dq_entry->dg_size;
 535                 spin_unlock(&context->lock);
 536                 pr_devel("Caller's buffer should be at least (size=%u bytes)\n",
 537                          (u32) *max_size);
 538                 return VMCI_ERROR_NO_MEM;
 539         }
 540
 541         list_del(list_item);
 542         context->pending_datagrams--;
 543         context->datagram_queue_size -= dq_entry->dg_size;
 544         if (context->pending_datagrams == 0) {
 545                 ctx_clear_notify_call(context);
 546                 rv = VMCI_SUCCESS;
 547         } else {
 548                 /*
 549                  * Return the size of the next datagram.
 550                  */
 551                 struct vmci_datagram_queue_entry *next_entry;
 552
 553                 list_item = context->datagram_queue.next;
 554                 next_entry =
 555                     list_entry(list_item, struct vmci_datagram_queue_entry,
 556                                list_item);
 557
 558                 /*
 559                  * The following size_t -> int truncation is fine as
 560                  * the maximum size of a (routable) datagram is 68KB.
 561                  */
 562                 rv = (int)next_entry->dg_size;
 563         }
 564         spin_unlock(&context->lock);
 565
 566         /* Caller must free datagram. */
 567         *dg = dq_entry->dg;
 568         dq_entry->dg = NULL;
 569         kfree(dq_entry);
 570
 571         return rv;
 572 }
 573
 574 /*
 575  * Reverts actions set up by vmci_setup_notify().  Unmaps and unlocks the
 576  * page mapped/locked by vmci_setup_notify().
 577  */
 578 void vmci_ctx_unset_notify(struct vmci_ctx *context)
 579 {
 580         struct page *notify_page;
 581
 582         spin_lock(&context->lock);
 583
 584         notify_page = context->notify_page;
 585         context->notify = &ctx_dummy_notify;
 586         context->notify_page = NULL;
 587
 588         spin_unlock(&context->lock);
 589
 590         if (notify_page) {
 591                 kunmap(notify_page);
 592                 put_page(notify_page);
 593         }
 594 }
 595
 596 /*
 597  * Add remote_cid to list of contexts current contexts wants
 598  * notifications from/about.
 599  */
 600 int vmci_ctx_add_notification(u32 context_id, u32 remote_cid)
 601 {
 602         struct vmci_ctx *context;
 603         struct vmci_handle_list *notifier, *n;
 604         int result;
 605         bool exists = false;
 606
 607         context = vmci_ctx_get(context_id);
 608         if (!context)
 609                 return VMCI_ERROR_NOT_FOUND;
 610
 611         if (VMCI_CONTEXT_IS_VM(context_id) && VMCI_CONTEXT_IS_VM(remote_cid)) {
 612                 pr_devel("Context removed notifications for other VMs not supported (src=0x%x, remote=0x%x)\n",
 613                          context_id, remote_cid);
 614                 result = VMCI_ERROR_DST_UNREACHABLE;
 615                 goto out;
 616         }
 617
 618         if (context->priv_flags & VMCI_PRIVILEGE_FLAG_RESTRICTED) {
 619                 result = VMCI_ERROR_NO_ACCESS;
 620                 goto out;
 621         }
 622
 623         notifier = kmalloc(sizeof(struct vmci_handle_list), GFP_KERNEL);
 624         if (!notifier) {
 625                 result = VMCI_ERROR_NO_MEM;
 626                 goto out;
 627         }
 628
 629         INIT_LIST_HEAD(&notifier->node);
 630         notifier->handle = vmci_make_handle(remote_cid, VMCI_EVENT_HANDLER);
 631
 632         spin_lock(&context->lock);
 633
 634         list_for_each_entry(n, &context->notifier_list, node) {
 635                 if (vmci_handle_is_equal(n->handle, notifier->handle)) {
 636                         exists = true;
 637                         break;
 638                 }
 639         }
 640
 641         if (exists) {
 642                 kfree(notifier);
 643                 result = VMCI_ERROR_ALREADY_EXISTS;
 644         } else {
 645                 list_add_tail_rcu(&notifier->node, &context->notifier_list);
 646                 context->n_notifiers++;
 647                 result = VMCI_SUCCESS;
 648         }
 649
 650         spin_unlock(&context->lock);
 651
 652  out:
 653         vmci_ctx_put(context);
 654         return result;
 655 }
 656
 657 /*
 658  * Remove remote_cid from current context's list of contexts it is
 659  * interested in getting notifications from/about.
 660  */
 661 int vmci_ctx_remove_notification(u32 context_id, u32 remote_cid)
 662 {
 663         struct vmci_ctx *context;
 664         struct vmci_handle_list *notifier, *tmp;
 665         struct vmci_handle handle;
 666         bool found = false;
 667
 668         context = vmci_ctx_get(context_id);
 669         if (!context)
 670                 return VMCI_ERROR_NOT_FOUND;
 671
 672         handle = vmci_make_handle(remote_cid, VMCI_EVENT_HANDLER);
 673
 674         spin_lock(&context->lock);
 675         list_for_each_entry_safe(notifier, tmp,
 676                                  &context->notifier_list, node) {
 677                 if (vmci_handle_is_equal(notifier->handle, handle)) {
 678                         list_del_rcu(&notifier->node);
 679                         context->n_notifiers--;
 680                         found = true;
 681                         break;
 682                 }
 683         }
 684         spin_unlock(&context->lock);
 685
 686         if (found) {
 687                 synchronize_rcu();
 688                 kfree(notifier);
 689         }
 690
 691         vmci_ctx_put(context);
 692
 693         return found ? VMCI_SUCCESS : VMCI_ERROR_NOT_FOUND;
 694 }
 695
 696 static int vmci_ctx_get_chkpt_notifiers(struct vmci_ctx *context,
 697                                         u32 *buf_size, void **pbuf)
 698 {
 699         u32 *notifiers;
 700         size_t data_size;
 701         struct vmci_handle_list *entry;
 702         int i = 0;
 703
 704         if (context->n_notifiers == 0) {
 705                 *buf_size = 0;
 706                 *pbuf = NULL;
 707                 return VMCI_SUCCESS;
 708         }
 709
 710         data_size = context->n_notifiers * sizeof(*notifiers);
 711         if (*buf_size < data_size) {
 712                 *buf_size = data_size;
 713                 return VMCI_ERROR_MORE_DATA;
 714         }
 715
 716         notifiers = kmalloc(data_size, GFP_ATOMIC); /* FIXME: want GFP_KERNEL */
 717         if (!notifiers)
 718                 return VMCI_ERROR_NO_MEM;
 719
 720         list_for_each_entry(entry, &context->notifier_list, node)
 721                 notifiers[i++] = entry->handle.context;
 722
 723         *buf_size = data_size;
 724         *pbuf = notifiers;
 725         return VMCI_SUCCESS;
 726 }
 727
 728 static int vmci_ctx_get_chkpt_doorbells(struct vmci_ctx *context,
 729                                         u32 *buf_size, void **pbuf)
 730 {
 731         struct dbell_cpt_state *dbells;
 732         size_t n_doorbells;
 733         int i;
 734
 735         n_doorbells = vmci_handle_arr_get_size(context->doorbell_array);
 736         if (n_doorbells > 0) {
 737                 size_t data_size = n_doorbells * sizeof(*dbells);
 738                 if (*buf_size < data_size) {
 739                         *buf_size = data_size;
 740                         return VMCI_ERROR_MORE_DATA;
 741                 }
 742
 743                 dbells = kmalloc(data_size, GFP_ATOMIC);
 744                 if (!dbells)
 745                         return VMCI_ERROR_NO_MEM;
 746
 747                 for (i = 0; i < n_doorbells; i++)
 748                         dbells[i].handle = vmci_handle_arr_get_entry(
 749                                                 context->doorbell_array, i);
 750
 751                 *buf_size = data_size;
 752                 *pbuf = dbells;
 753         } else {
 754                 *buf_size = 0;
 755                 *pbuf = NULL;
 756         }
 757
 758         return VMCI_SUCCESS;
 759 }
 760
 761 /*
 762  * Get current context's checkpoint state of given type.
 763  */
 764 int vmci_ctx_get_chkpt_state(u32 context_id,
 765                              u32 cpt_type,
 766                              u32 *buf_size,
 767                              void **pbuf)
 768 {
 769         struct vmci_ctx *context;
 770         int result;
 771
 772         context = vmci_ctx_get(context_id);
 773         if (!context)
 774                 return VMCI_ERROR_NOT_FOUND;
 775
 776         spin_lock(&context->lock);
 777
 778         switch (cpt_type) {
 779         case VMCI_NOTIFICATION_CPT_STATE:
 780                 result = vmci_ctx_get_chkpt_notifiers(context, buf_size, pbuf);
 781                 break;
 782
 783         case VMCI_WELLKNOWN_CPT_STATE:
 784                 /*
 785                  * For compatibility with VMX'en with VM to VM communication, we
 786                  * always return zero wellknown handles.
 787                  */
 788
 789                 *buf_size = 0;
 790                 *pbuf = NULL;
 791                 result = VMCI_SUCCESS;
 792                 break;
 793
 794         case VMCI_DOORBELL_CPT_STATE:
 795                 result = vmci_ctx_get_chkpt_doorbells(context, buf_size, pbuf);
 796                 break;
 797
 798         default:
 799                 pr_devel("Invalid cpt state (type=%d)\n", cpt_type);
 800                 result = VMCI_ERROR_INVALID_ARGS;
 801                 break;
 802         }
 803
 804         spin_unlock(&context->lock);
 805         vmci_ctx_put(context);
 806
 807         return result;
 808 }
 809
 810 /*
 811  * Set current context's checkpoint state of given type.
 812  */
 813 int vmci_ctx_set_chkpt_state(u32 context_id,
 814                              u32 cpt_type,
 815                              u32 buf_size,
 816                              void *cpt_buf)
 817 {
 818         u32 i;
 819         u32 current_id;
 820         int result = VMCI_SUCCESS;
 821         u32 num_ids = buf_size / sizeof(u32);
 822
 823         if (cpt_type == VMCI_WELLKNOWN_CPT_STATE && num_ids > 0) {
 824                 /*
 825                  * We would end up here if VMX with VM to VM communication
 826                  * attempts to restore a checkpoint with wellknown handles.
 827                  */
 828                 pr_warn("Attempt to restore checkpoint with obsolete wellknown handles\n");
 829                 return VMCI_ERROR_OBSOLETE;
 830         }
 831
 832         if (cpt_type != VMCI_NOTIFICATION_CPT_STATE) {
 833                 pr_devel("Invalid cpt state (type=%d)\n", cpt_type);
 834                 return VMCI_ERROR_INVALID_ARGS;
 835         }
 836
 837         for (i = 0; i < num_ids && result == VMCI_SUCCESS; i++) {
 838                 current_id = ((u32 *)cpt_buf)[i];
 839                 result = vmci_ctx_add_notification(context_id, current_id);
 840                 if (result != VMCI_SUCCESS)
 841                         break;
 842         }
 843         if (result != VMCI_SUCCESS)
 844                 pr_devel("Failed to set cpt state (type=%d) (error=%d)\n",
 845                          cpt_type, result);
 846
 847         return result;
 848 }
 849
 850 /*
 851  * Retrieves the specified context's pending notifications in the
 852  * form of a handle array. The handle arrays returned are the
 853  * actual data - not a copy and should not be modified by the
 854  * caller. They must be released using
 855  * vmci_ctx_rcv_notifications_release.
 856  */
 857 int vmci_ctx_rcv_notifications_get(u32 context_id,
 858                                    struct vmci_handle_arr **db_handle_array,
 859                                    struct vmci_handle_arr **qp_handle_array)
 860 {
 861         struct vmci_ctx *context;
 862         int result = VMCI_SUCCESS;
 863
 864         context = vmci_ctx_get(context_id);
 865         if (context == NULL)
 866                 return VMCI_ERROR_NOT_FOUND;
 867
 868         spin_lock(&context->lock);
 869
 870         *db_handle_array = context->pending_doorbell_array;
 871         context->pending_doorbell_array = vmci_handle_arr_create(0);
 872         if (!context->pending_doorbell_array) {
 873                 context->pending_doorbell_array = *db_handle_array;
 874                 *db_handle_array = NULL;
 875                 result = VMCI_ERROR_NO_MEM;
 876         }
 877         *qp_handle_array = NULL;
 878
 879         spin_unlock(&context->lock);
 880         vmci_ctx_put(context);
 881
 882         return result;
 883 }
 884
 885 /*
 886  * Releases handle arrays with pending notifications previously
 887  * retrieved using vmci_ctx_rcv_notifications_get. If the
 888  * notifications were not successfully handed over to the guest,
 889  * success must be false.
 890  */
 891 void vmci_ctx_rcv_notifications_release(u32 context_id,
 892                                         struct vmci_handle_arr *db_handle_array,
 893                                         struct vmci_handle_arr *qp_handle_array,
 894                                         bool success)
 895 {
 896         struct vmci_ctx *context = vmci_ctx_get(context_id);
 897
 898         spin_lock(&context->lock);
 899         if (!success) {
 900                 struct vmci_handle handle;
 901
 902                 /*
 903                  * New notifications may have been added while we were not
 904                  * holding the context lock, so we transfer any new pending
 905                  * doorbell notifications to the old array, and reinstate the
 906                  * old array.
 907                  */
 908
 909                 handle = vmci_handle_arr_remove_tail(
 910                                         context->pending_doorbell_array);
 911                 while (!vmci_handle_is_invalid(handle)) {
 912                         if (!vmci_handle_arr_has_entry(db_handle_array,
 913                                                        handle)) {
 914                                 vmci_handle_arr_append_entry(
 915                                                 &db_handle_array, handle);
 916                         }
 917                         handle = vmci_handle_arr_remove_tail(
 918                                         context->pending_doorbell_array);
 919                 }
 920                 vmci_handle_arr_destroy(context->pending_doorbell_array);
 921                 context->pending_doorbell_array = db_handle_array;
 922                 db_handle_array = NULL;
 923         } else {
 924                 ctx_clear_notify_call(context);
 925         }
 926         spin_unlock(&context->lock);
 927         vmci_ctx_put(context);
 928
 929         if (db_handle_array)
 930                 vmci_handle_arr_destroy(db_handle_array);
 931
 932         if (qp_handle_array)
 933                 vmci_handle_arr_destroy(qp_handle_array);
 934 }
 935
 936 /*
 937  * Registers that a new doorbell handle has been allocated by the
 938  * context. Only doorbell handles registered can be notified.
 939  */
 940 int vmci_ctx_dbell_create(u32 context_id, struct vmci_handle handle)
 941 {
 942         struct vmci_ctx *context;
 943         int result;
 944
 945         if (context_id == VMCI_INVALID_ID || vmci_handle_is_invalid(handle))
 946                 return VMCI_ERROR_INVALID_ARGS;
 947
 948         context = vmci_ctx_get(context_id);
 949         if (context == NULL)
 950                 return VMCI_ERROR_NOT_FOUND;
 951
 952         spin_lock(&context->lock);
 953         if (!vmci_handle_arr_has_entry(context->doorbell_array, handle)) {
 954                 vmci_handle_arr_append_entry(&context->doorbell_array, handle);
 955                 result = VMCI_SUCCESS;
 956         } else {
 957                 result = VMCI_ERROR_DUPLICATE_ENTRY;
 958         }
 959
 960         spin_unlock(&context->lock);
 961         vmci_ctx_put(context);
 962
 963         return result;
 964 }
 965
 966 /*
 967  * Unregisters a doorbell handle that was previously registered
 968  * with vmci_ctx_dbell_create.
 969  */
 970 int vmci_ctx_dbell_destroy(u32 context_id, struct vmci_handle handle)
 971 {
 972         struct vmci_ctx *context;
 973         struct vmci_handle removed_handle;
 974
 975         if (context_id == VMCI_INVALID_ID || vmci_handle_is_invalid(handle))
 976                 return VMCI_ERROR_INVALID_ARGS;
 977
 978         context = vmci_ctx_get(context_id);
 979         if (context == NULL)
 980                 return VMCI_ERROR_NOT_FOUND;
 981
 982         spin_lock(&context->lock);
 983         removed_handle =
 984             vmci_handle_arr_remove_entry(context->doorbell_array, handle);
 985         vmci_handle_arr_remove_entry(context->pending_doorbell_array, handle);
 986         spin_unlock(&context->lock);
 987
 988         vmci_ctx_put(context);
 989
 990         return vmci_handle_is_invalid(removed_handle) ?
 991             VMCI_ERROR_NOT_FOUND : VMCI_SUCCESS;
 992 }
 993
 994 /*
 995  * Unregisters all doorbell handles that were previously
 996  * registered with vmci_ctx_dbell_create.
 997  */
 998 int vmci_ctx_dbell_destroy_all(u32 context_id)
 999 {
1000         struct vmci_ctx *context;
1001         struct vmci_handle handle;
1002
1003         if (context_id == VMCI_INVALID_ID)
1004                 return VMCI_ERROR_INVALID_ARGS;
1005
1006         context = vmci_ctx_get(context_id);
1007         if (context == NULL)
1008                 return VMCI_ERROR_NOT_FOUND;
1009
1010         spin_lock(&context->lock);
1011         do {
1012                 struct vmci_handle_arr *arr = context->doorbell_array;
1013                 handle = vmci_handle_arr_remove_tail(arr);
1014         } while (!vmci_handle_is_invalid(handle));
1015         do {
1016                 struct vmci_handle_arr *arr = context->pending_doorbell_array;
1017                 handle = vmci_handle_arr_remove_tail(arr);
1018         } while (!vmci_handle_is_invalid(handle));
1019         spin_unlock(&context->lock);
1020
1021         vmci_ctx_put(context);
1022
1023         return VMCI_SUCCESS;
1024 }
1025
1026 /*
1027  * Registers a notification of a doorbell handle initiated by the
1028  * specified source context. The notification of doorbells are
1029  * subject to the same isolation rules as datagram delivery. To
1030  * allow host side senders of notifications a finer granularity
1031  * of sender rights than those assigned to the sending context
1032  * itself, the host context is required to specify a different
1033  * set of privilege flags that will override the privileges of
1034  * the source context.
1035  */
1036 int vmci_ctx_notify_dbell(u32 src_cid,
1037                           struct vmci_handle handle,
1038                           u32 src_priv_flags)
1039 {
1040         struct vmci_ctx *dst_context;
1041         int result;
1042
1043         if (vmci_handle_is_invalid(handle))
1044                 return VMCI_ERROR_INVALID_ARGS;
1045
1046         /* Get the target VM's VMCI context. */
1047         dst_context = vmci_ctx_get(handle.context);
1048         if (!dst_context) {
1049                 pr_devel("Invalid context (ID=0x%x)\n", handle.context);
1050                 return VMCI_ERROR_NOT_FOUND;
1051         }
1052
1053         if (src_cid != handle.context) {
1054                 u32 dst_priv_flags;
1055
1056                 if (VMCI_CONTEXT_IS_VM(src_cid) &&
1057                     VMCI_CONTEXT_IS_VM(handle.context)) {
1058                         pr_devel("Doorbell notification from VM to VM not supported (src=0x%x, dst=0x%x)\n",
1059                                  src_cid, handle.context);
1060                         result = VMCI_ERROR_DST_UNREACHABLE;
1061                         goto out;
1062                 }
1063
1064                 result = vmci_dbell_get_priv_flags(handle, &dst_priv_flags);
1065                 if (result < VMCI_SUCCESS) {
1066                         pr_warn("Failed to get privilege flags for destination (handle=0x%x:0x%x)\n",
1067                                 handle.context, handle.resource);
1068                         goto out;
1069                 }
1070
1071                 if (src_cid != VMCI_HOST_CONTEXT_ID ||
1072                     src_priv_flags == VMCI_NO_PRIVILEGE_FLAGS) {
1073                         src_priv_flags = vmci_context_get_priv_flags(src_cid);
1074                 }
1075
1076                 if (vmci_deny_interaction(src_priv_flags, dst_priv_flags)) {
1077                         result = VMCI_ERROR_NO_ACCESS;
1078                         goto out;
1079                 }
1080         }
1081
1082         if (handle.context == VMCI_HOST_CONTEXT_ID) {
1083                 result = vmci_dbell_host_context_notify(src_cid, handle);
1084         } else {
1085                 spin_lock(&dst_context->lock);
1086
1087                 if (!vmci_handle_arr_has_entry(dst_context->doorbell_array,
1088                                                handle)) {
1089                         result = VMCI_ERROR_NOT_FOUND;
1090                 } else {
1091                         if (!vmci_handle_arr_has_entry(
1092                                         dst_context->pending_doorbell_array,
1093                                         handle)) {
1094                                 vmci_handle_arr_append_entry(
1095                                         &dst_context->pending_doorbell_array,
1096                                         handle);
1097
1098                                 ctx_signal_notify(dst_context);
1099                                 wake_up(&dst_context->host_context.wait_queue);
1100
1101                         }
1102                         result = VMCI_SUCCESS;
1103                 }
1104                 spin_unlock(&dst_context->lock);
1105         }
1106
1107  out:
1108         vmci_ctx_put(dst_context);
1109
1110         return result;
1111 }
1112
1113 bool vmci_ctx_supports_host_qp(struct vmci_ctx *context)
1114 {
1115         return context && context->user_version >= VMCI_VERSION_HOSTQP;
1116 }
1117
1118 /*
1119  * Registers that a new queue pair handle has been allocated by
1120  * the context.
1121  */
1122 int vmci_ctx_qp_create(struct vmci_ctx *context, struct vmci_handle handle)
1123 {
1124         int result;
1125
1126         if (context == NULL || vmci_handle_is_invalid(handle))
1127                 return VMCI_ERROR_INVALID_ARGS;
1128
1129         if (!vmci_handle_arr_has_entry(context->queue_pair_array, handle)) {
1130                 vmci_handle_arr_append_entry(&context->queue_pair_array,
1131                                              handle);
1132                 result = VMCI_SUCCESS;
1133         } else {
1134                 result = VMCI_ERROR_DUPLICATE_ENTRY;
1135         }
1136
1137         return result;
1138 }
1139
1140 /*
1141  * Unregisters a queue pair handle that was previously registered
1142  * with vmci_ctx_qp_create.
1143  */
1144 int vmci_ctx_qp_destroy(struct vmci_ctx *context, struct vmci_handle handle)
1145 {
1146         struct vmci_handle hndl;
1147
1148         if (context == NULL || vmci_handle_is_invalid(handle))
1149                 return VMCI_ERROR_INVALID_ARGS;
1150
1151         hndl = vmci_handle_arr_remove_entry(context->queue_pair_array, handle);
1152
1153         return vmci_handle_is_invalid(hndl) ?
1154                 VMCI_ERROR_NOT_FOUND : VMCI_SUCCESS;
1155 }
1156
1157 /*
1158  * Determines whether a given queue pair handle is registered
1159  * with the given context.
1160  */
1161 bool vmci_ctx_qp_exists(struct vmci_ctx *context, struct vmci_handle handle)
1162 {
1163         if (context == NULL || vmci_handle_is_invalid(handle))
1164                 return false;
1165
1166         return vmci_handle_arr_has_entry(context->queue_pair_array, handle);
1167 }
1168
1169 /*
1170  * vmci_context_get_priv_flags() - Retrieve privilege flags.
1171  * @context_id: The context ID of the VMCI context.
1172  *
1173  * Retrieves privilege flags of the given VMCI context ID.
1174  */
1175 u32 vmci_context_get_priv_flags(u32 context_id)
1176 {
1177         if (vmci_host_code_active()) {
1178                 u32 flags;
1179                 struct vmci_ctx *context;
1180
1181                 context = vmci_ctx_get(context_id);
1182                 if (!context)
1183                         return VMCI_LEAST_PRIVILEGE_FLAGS;
1184
1185                 flags = context->priv_flags;
1186                 vmci_ctx_put(context);
1187                 return flags;
1188         }
1189         return VMCI_NO_PRIVILEGE_FLAGS;
1190 }
1191 EXPORT_SYMBOL_GPL(vmci_context_get_priv_flags);
1192
1193 /*
1194  * vmci_is_context_owner() - Determimnes if user is the context owner
1195  * @context_id: The context ID of the VMCI context.
1196  * @uid:        The host user id (real kernel value).
1197  *
1198  * Determines whether a given UID is the owner of given VMCI context.
1199  */
1200 bool vmci_is_context_owner(u32 context_id, kuid_t uid)
1201 {
1202         bool is_owner = false;
1203
1204         if (vmci_host_code_active()) {
1205                 struct vmci_ctx *context = vmci_ctx_get(context_id);
1206                 if (context) {
1207                         if (context->cred)
1208                                 is_owner = uid_eq(context->cred->uid, uid);
1209                         vmci_ctx_put(context);
1210                 }
1211         }
1212
1213         return is_owner;
1214 }
1215 EXPORT_SYMBOL_GPL(vmci_is_context_owner);