drivers/infiniband/ulp/ipoib/ipoib_ib.c

   1 /*
   2  * Copyright (c) 2004, 2005 Topspin Communications.  All rights reserved.
   3  * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
   4  * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
   5  * Copyright (c) 2004, 2005 Voltaire, Inc. All rights reserved.
   6  *
   7  * This software is available to you under a choice of one of two
   8  * licenses.  You may choose to be licensed under the terms of the GNU
   9  * General Public License (GPL) Version 2, available from the file
  10  * COPYING in the main directory of this source tree, or the
  11  * OpenIB.org BSD license below:
  12  *
  13  *     Redistribution and use in source and binary forms, with or
  14  *     without modification, are permitted provided that the following
  15  *     conditions are met:
  16  *
  17  *      - Redistributions of source code must retain the above
  18  *        copyright notice, this list of conditions and the following
  19  *        disclaimer.
  20  *
  21  *      - Redistributions in binary form must reproduce the above
  22  *        copyright notice, this list of conditions and the following
  23  *        disclaimer in the documentation and/or other materials
  24  *        provided with the distribution.
  25  *
  26  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  27  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  28  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  29  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  30  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  31  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  32  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  33  * SOFTWARE.
  34  *
  35  * $Id: ipoib_ib.c 1386 2004-12-27 16:23:17Z roland $
  36  */
  37
  38 #include <linux/delay.h>
  39 #include <linux/dma-mapping.h>
  40
  41 #include <rdma/ib_cache.h>
  42
  43 #include "ipoib.h"
  44
  45 #ifdef CONFIG_INFINIBAND_IPOIB_DEBUG_DATA
  46 static int data_debug_level;
  47
  48 module_param(data_debug_level, int, 0644);
  49 MODULE_PARM_DESC(data_debug_level,
  50                  "Enable data path debug tracing if > 0");
  51 #endif
  52
  53 #define IPOIB_OP_RECV   (1ul << 31)
  54
  55 static DEFINE_MUTEX(pkey_mutex);
  56
  57 struct ipoib_ah *ipoib_create_ah(struct net_device *dev,
  58                                  struct ib_pd *pd, struct ib_ah_attr *attr)
  59 {
  60         struct ipoib_ah *ah;
  61
  62         ah = kmalloc(sizeof *ah, GFP_KERNEL);
  63         if (!ah)
  64                 return NULL;
  65
  66         ah->dev       = dev;
  67         ah->last_send = 0;
  68         kref_init(&ah->ref);
  69
  70         ah->ah = ib_create_ah(pd, attr);
  71         if (IS_ERR(ah->ah)) {
  72                 kfree(ah);
  73                 ah = NULL;
  74         } else
  75                 ipoib_dbg(netdev_priv(dev), "Created ah %p\n", ah->ah);
  76
  77         return ah;
  78 }
  79
  80 void ipoib_free_ah(struct kref *kref)
  81 {
  82         struct ipoib_ah *ah = container_of(kref, struct ipoib_ah, ref);
  83         struct ipoib_dev_priv *priv = netdev_priv(ah->dev);
  84
  85         unsigned long flags;
  86
  87         spin_lock_irqsave(&priv->lock, flags);
  88         list_add_tail(&ah->list, &priv->dead_ahs);
  89         spin_unlock_irqrestore(&priv->lock, flags);
  90 }
  91
  92 static int ipoib_ib_post_receive(struct net_device *dev, int id)
  93 {
  94         struct ipoib_dev_priv *priv = netdev_priv(dev);
  95         struct ib_sge list;
  96         struct ib_recv_wr param;
  97         struct ib_recv_wr *bad_wr;
  98         int ret;
  99
 100         list.addr     = priv->rx_ring[id].mapping;
 101         list.length   = IPOIB_BUF_SIZE;
 102         list.lkey     = priv->mr->lkey;
 103
 104         param.next    = NULL;
 105         param.wr_id   = id | IPOIB_OP_RECV;
 106         param.sg_list = &list;
 107         param.num_sge = 1;
 108
 109         ret = ib_post_recv(priv->qp, &param, &bad_wr);
 110         if (unlikely(ret)) {
 111                 ipoib_warn(priv, "receive failed for buf %d (%d)\n", id, ret);
 112                 dma_unmap_single(priv->ca->dma_device,
 113                                  priv->rx_ring[id].mapping,
 114                                  IPOIB_BUF_SIZE, DMA_FROM_DEVICE);
 115                 dev_kfree_skb_any(priv->rx_ring[id].skb);
 116                 priv->rx_ring[id].skb = NULL;
 117         }
 118
 119         return ret;
 120 }
 121
 122 static int ipoib_alloc_rx_skb(struct net_device *dev, int id)
 123 {
 124         struct ipoib_dev_priv *priv = netdev_priv(dev);
 125         struct sk_buff *skb;
 126         dma_addr_t addr;
 127
 128         skb = dev_alloc_skb(IPOIB_BUF_SIZE + 4);
 129         if (!skb)
 130                 return -ENOMEM;
 131
 132         /*
 133          * IB will leave a 40 byte gap for a GRH and IPoIB adds a 4 byte
 134          * header.  So we need 4 more bytes to get to 48 and align the
 135          * IP header to a multiple of 16.
 136          */
 137         skb_reserve(skb, 4);
 138
 139         addr = dma_map_single(priv->ca->dma_device,
 140                               skb->data, IPOIB_BUF_SIZE,
 141                               DMA_FROM_DEVICE);
 142         if (unlikely(dma_mapping_error(addr))) {
 143                 dev_kfree_skb_any(skb);
 144                 return -EIO;
 145         }
 146
 147         priv->rx_ring[id].skb     = skb;
 148         priv->rx_ring[id].mapping = addr;
 149
 150         return 0;
 151 }
 152
 153 static int ipoib_ib_post_receives(struct net_device *dev)
 154 {
 155         struct ipoib_dev_priv *priv = netdev_priv(dev);
 156         int i;
 157
 158         for (i = 0; i < ipoib_recvq_size; ++i) {
 159                 if (ipoib_alloc_rx_skb(dev, i)) {
 160                         ipoib_warn(priv, "failed to allocate receive buffer %d\n", i);
 161                         return -ENOMEM;
 162                 }
 163                 if (ipoib_ib_post_receive(dev, i)) {
 164                         ipoib_warn(priv, "ipoib_ib_post_receive failed for buf %d\n", i);
 165                         return -EIO;
 166                 }
 167         }
 168
 169         return 0;
 170 }
 171
 172 static void ipoib_ib_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
 173 {
 174         struct ipoib_dev_priv *priv = netdev_priv(dev);
 175         unsigned int wr_id = wc->wr_id & ~IPOIB_OP_RECV;
 176         struct sk_buff *skb;
 177         dma_addr_t addr;
 178
 179         ipoib_dbg_data(priv, "recv completion: id %d, op %d, status: %d\n",
 180                        wr_id, wc->opcode, wc->status);
 181
 182         if (unlikely(wr_id >= ipoib_recvq_size)) {
 183                 ipoib_warn(priv, "recv completion event with wrid %d (> %d)\n",
 184                            wr_id, ipoib_recvq_size);
 185                 return;
 186         }
 187
 188         skb  = priv->rx_ring[wr_id].skb;
 189         addr = priv->rx_ring[wr_id].mapping;
 190
 191         if (unlikely(wc->status != IB_WC_SUCCESS)) {
 192                 if (wc->status != IB_WC_WR_FLUSH_ERR)
 193                         ipoib_warn(priv, "failed recv event "
 194                                    "(status=%d, wrid=%d vend_err %x)\n",
 195                                    wc->status, wr_id, wc->vendor_err);
 196                 dma_unmap_single(priv->ca->dma_device, addr,
 197                                  IPOIB_BUF_SIZE, DMA_FROM_DEVICE);
 198                 dev_kfree_skb_any(skb);
 199                 priv->rx_ring[wr_id].skb = NULL;
 200                 return;
 201         }
 202
 203         /*
 204          * If we can't allocate a new RX buffer, dump
 205          * this packet and reuse the old buffer.
 206          */
 207         if (unlikely(ipoib_alloc_rx_skb(dev, wr_id))) {
 208                 ++priv->stats.rx_dropped;
 209                 goto repost;
 210         }
 211
 212         ipoib_dbg_data(priv, "received %d bytes, SLID 0x%04x\n",
 213                        wc->byte_len, wc->slid);
 214
 215         dma_unmap_single(priv->ca->dma_device, addr,
 216                          IPOIB_BUF_SIZE, DMA_FROM_DEVICE);
 217
 218         skb_put(skb, wc->byte_len);
 219         skb_pull(skb, IB_GRH_BYTES);
 220
 221         if (wc->slid != priv->local_lid ||
 222             wc->src_qp != priv->qp->qp_num) {
 223                 skb->protocol = ((struct ipoib_header *) skb->data)->proto;
 224                 skb->mac.raw = skb->data;
 225                 skb_pull(skb, IPOIB_ENCAP_LEN);
 226
 227                 dev->last_rx = jiffies;
 228                 ++priv->stats.rx_packets;
 229                 priv->stats.rx_bytes += skb->len;
 230
 231                 skb->dev = dev;
 232                 /* XXX get correct PACKET_ type here */
 233                 skb->pkt_type = PACKET_HOST;
 234                 netif_rx_ni(skb);
 235         } else {
 236                 ipoib_dbg_data(priv, "dropping loopback packet\n");
 237                 dev_kfree_skb_any(skb);
 238         }
 239
 240 repost:
 241         if (unlikely(ipoib_ib_post_receive(dev, wr_id)))
 242                 ipoib_warn(priv, "ipoib_ib_post_receive failed "
 243                            "for buf %d\n", wr_id);
 244 }
 245
 246 static void ipoib_ib_handle_tx_wc(struct net_device *dev, struct ib_wc *wc)
 247 {
 248         struct ipoib_dev_priv *priv = netdev_priv(dev);
 249         unsigned int wr_id = wc->wr_id;
 250         struct ipoib_tx_buf *tx_req;
 251         unsigned long flags;
 252
 253         ipoib_dbg_data(priv, "send completion: id %d, op %d, status: %d\n",
 254                        wr_id, wc->opcode, wc->status);
 255
 256         if (unlikely(wr_id >= ipoib_sendq_size)) {
 257                 ipoib_warn(priv, "send completion event with wrid %d (> %d)\n",
 258                            wr_id, ipoib_sendq_size);
 259                 return;
 260         }
 261
 262         tx_req = &priv->tx_ring[wr_id];
 263
 264         dma_unmap_single(priv->ca->dma_device,
 265                          pci_unmap_addr(tx_req, mapping),
 266                          tx_req->skb->len,
 267                          DMA_TO_DEVICE);
 268
 269         ++priv->stats.tx_packets;
 270         priv->stats.tx_bytes += tx_req->skb->len;
 271
 272         dev_kfree_skb_any(tx_req->skb);
 273
 274         spin_lock_irqsave(&priv->tx_lock, flags);
 275         ++priv->tx_tail;
 276         if (netif_queue_stopped(dev) &&
 277             test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags) &&
 278             priv->tx_head - priv->tx_tail <= ipoib_sendq_size >> 1)
 279                 netif_wake_queue(dev);
 280         spin_unlock_irqrestore(&priv->tx_lock, flags);
 281
 282         if (wc->status != IB_WC_SUCCESS &&
 283             wc->status != IB_WC_WR_FLUSH_ERR)
 284                 ipoib_warn(priv, "failed send event "
 285                            "(status=%d, wrid=%d vend_err %x)\n",
 286                            wc->status, wr_id, wc->vendor_err);
 287 }
 288
 289 static void ipoib_ib_handle_wc(struct net_device *dev, struct ib_wc *wc)
 290 {
 291         if (wc->wr_id & IPOIB_OP_RECV)
 292                 ipoib_ib_handle_rx_wc(dev, wc);
 293         else
 294                 ipoib_ib_handle_tx_wc(dev, wc);
 295 }
 296
 297 void ipoib_ib_completion(struct ib_cq *cq, void *dev_ptr)
 298 {
 299         struct net_device *dev = (struct net_device *) dev_ptr;
 300         struct ipoib_dev_priv *priv = netdev_priv(dev);
 301         int n, i;
 302
 303         ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
 304         do {
 305                 n = ib_poll_cq(cq, IPOIB_NUM_WC, priv->ibwc);
 306                 for (i = 0; i < n; ++i)
 307                         ipoib_ib_handle_wc(dev, priv->ibwc + i);
 308         } while (n == IPOIB_NUM_WC);
 309 }
 310
 311 static inline int post_send(struct ipoib_dev_priv *priv,
 312                             unsigned int wr_id,
 313                             struct ib_ah *address, u32 qpn,
 314                             dma_addr_t addr, int len)
 315 {
 316         struct ib_send_wr *bad_wr;
 317
 318         priv->tx_sge.addr             = addr;
 319         priv->tx_sge.length           = len;
 320
 321         priv->tx_wr.wr_id             = wr_id;
 322         priv->tx_wr.wr.ud.remote_qpn  = qpn;
 323         priv->tx_wr.wr.ud.ah          = address;
 324
 325         return ib_post_send(priv->qp, &priv->tx_wr, &bad_wr);
 326 }
 327
 328 void ipoib_send(struct net_device *dev, struct sk_buff *skb,
 329                 struct ipoib_ah *address, u32 qpn)
 330 {
 331         struct ipoib_dev_priv *priv = netdev_priv(dev);
 332         struct ipoib_tx_buf *tx_req;
 333         dma_addr_t addr;
 334
 335         if (skb->len > dev->mtu + INFINIBAND_ALEN) {
 336                 ipoib_warn(priv, "packet len %d (> %d) too long to send, dropping\n",
 337                            skb->len, dev->mtu + INFINIBAND_ALEN);
 338                 ++priv->stats.tx_dropped;
 339                 ++priv->stats.tx_errors;
 340                 dev_kfree_skb_any(skb);
 341                 return;
 342         }
 343
 344         ipoib_dbg_data(priv, "sending packet, length=%d address=%p qpn=0x%06x\n",
 345                        skb->len, address, qpn);
 346
 347         /*
 348          * We put the skb into the tx_ring _before_ we call post_send()
 349          * because it's entirely possible that the completion handler will
 350          * run before we execute anything after the post_send().  That
 351          * means we have to make sure everything is properly recorded and
 352          * our state is consistent before we call post_send().
 353          */
 354         tx_req = &priv->tx_ring[priv->tx_head & (ipoib_sendq_size - 1)];
 355         tx_req->skb = skb;
 356         addr = dma_map_single(priv->ca->dma_device, skb->data, skb->len,
 357                               DMA_TO_DEVICE);
 358         pci_unmap_addr_set(tx_req, mapping, addr);
 359
 360         if (unlikely(post_send(priv, priv->tx_head & (ipoib_sendq_size - 1),
 361                                address->ah, qpn, addr, skb->len))) {
 362                 ipoib_warn(priv, "post_send failed\n");
 363                 ++priv->stats.tx_errors;
 364                 dma_unmap_single(priv->ca->dma_device, addr, skb->len,
 365                                  DMA_TO_DEVICE);
 366                 dev_kfree_skb_any(skb);
 367         } else {
 368                 dev->trans_start = jiffies;
 369
 370                 address->last_send = priv->tx_head;
 371                 ++priv->tx_head;
 372
 373                 if (priv->tx_head - priv->tx_tail == ipoib_sendq_size) {
 374                         ipoib_dbg(priv, "TX ring full, stopping kernel net queue\n");
 375                         netif_stop_queue(dev);
 376                 }
 377         }
 378 }
 379
 380 static void __ipoib_reap_ah(struct net_device *dev)
 381 {
 382         struct ipoib_dev_priv *priv = netdev_priv(dev);
 383         struct ipoib_ah *ah, *tah;
 384         LIST_HEAD(remove_list);
 385
 386         spin_lock_irq(&priv->tx_lock);
 387         spin_lock(&priv->lock);
 388         list_for_each_entry_safe(ah, tah, &priv->dead_ahs, list)
 389                 if ((int) priv->tx_tail - (int) ah->last_send >= 0) {
 390                         list_del(&ah->list);
 391                         ib_destroy_ah(ah->ah);
 392                         kfree(ah);
 393                 }
 394         spin_unlock(&priv->lock);
 395         spin_unlock_irq(&priv->tx_lock);
 396 }
 397
 398 void ipoib_reap_ah(void *dev_ptr)
 399 {
 400         struct net_device *dev = dev_ptr;
 401         struct ipoib_dev_priv *priv = netdev_priv(dev);
 402
 403         __ipoib_reap_ah(dev);
 404
 405         if (!test_bit(IPOIB_STOP_REAPER, &priv->flags))
 406                 queue_delayed_work(ipoib_workqueue, &priv->ah_reap_task, HZ);
 407 }
 408
 409 int ipoib_ib_dev_open(struct net_device *dev)
 410 {
 411         struct ipoib_dev_priv *priv = netdev_priv(dev);
 412         int ret;
 413
 414         ret = ipoib_init_qp(dev);
 415         if (ret) {
 416                 ipoib_warn(priv, "ipoib_init_qp returned %d\n", ret);
 417                 return -1;
 418         }
 419
 420         ret = ipoib_ib_post_receives(dev);
 421         if (ret) {
 422                 ipoib_warn(priv, "ipoib_ib_post_receives returned %d\n", ret);
 423                 ipoib_ib_dev_stop(dev);
 424                 return -1;
 425         }
 426
 427         clear_bit(IPOIB_STOP_REAPER, &priv->flags);
 428         queue_delayed_work(ipoib_workqueue, &priv->ah_reap_task, HZ);
 429
 430         set_bit(IPOIB_FLAG_INITIALIZED, &priv->flags);
 431
 432         return 0;
 433 }
 434
 435 static void ipoib_pkey_dev_check_presence(struct net_device *dev)
 436 {
 437         struct ipoib_dev_priv *priv = netdev_priv(dev);
 438         u16 pkey_index = 0;
 439
 440         if (ib_find_cached_pkey(priv->ca, priv->port, priv->pkey, &pkey_index))
 441                 clear_bit(IPOIB_PKEY_ASSIGNED, &priv->flags);
 442         else
 443                 set_bit(IPOIB_PKEY_ASSIGNED, &priv->flags);
 444 }
 445
 446 int ipoib_ib_dev_up(struct net_device *dev)
 447 {
 448         struct ipoib_dev_priv *priv = netdev_priv(dev);
 449
 450         ipoib_pkey_dev_check_presence(dev);
 451
 452         if (!test_bit(IPOIB_PKEY_ASSIGNED, &priv->flags)) {
 453                 ipoib_dbg(priv, "PKEY is not assigned.\n");
 454                 return 0;
 455         }
 456
 457         set_bit(IPOIB_FLAG_OPER_UP, &priv->flags);
 458
 459         return ipoib_mcast_start_thread(dev);
 460 }
 461
 462 int ipoib_ib_dev_down(struct net_device *dev, int flush)
 463 {
 464         struct ipoib_dev_priv *priv = netdev_priv(dev);
 465
 466         ipoib_dbg(priv, "downing ib_dev\n");
 467
 468         clear_bit(IPOIB_FLAG_OPER_UP, &priv->flags);
 469         netif_carrier_off(dev);
 470
 471         /* Shutdown the P_Key thread if still active */
 472         if (!test_bit(IPOIB_PKEY_ASSIGNED, &priv->flags)) {
 473                 mutex_lock(&pkey_mutex);
 474                 set_bit(IPOIB_PKEY_STOP, &priv->flags);
 475                 cancel_delayed_work(&priv->pkey_task);
 476                 mutex_unlock(&pkey_mutex);
 477                 if (flush)
 478                         flush_workqueue(ipoib_workqueue);
 479         }
 480
 481         ipoib_mcast_stop_thread(dev, flush);
 482         ipoib_mcast_dev_flush(dev);
 483
 484         ipoib_flush_paths(dev);
 485
 486         return 0;
 487 }
 488
 489 static int recvs_pending(struct net_device *dev)
 490 {
 491         struct ipoib_dev_priv *priv = netdev_priv(dev);
 492         int pending = 0;
 493         int i;
 494
 495         for (i = 0; i < ipoib_recvq_size; ++i)
 496                 if (priv->rx_ring[i].skb)
 497                         ++pending;
 498
 499         return pending;
 500 }
 501
 502 int ipoib_ib_dev_stop(struct net_device *dev)
 503 {
 504         struct ipoib_dev_priv *priv = netdev_priv(dev);
 505         struct ib_qp_attr qp_attr;
 506         unsigned long begin;
 507         struct ipoib_tx_buf *tx_req;
 508         int i;
 509
 510         clear_bit(IPOIB_FLAG_INITIALIZED, &priv->flags);
 511
 512         /*
 513          * Move our QP to the error state and then reinitialize in
 514          * when all work requests have completed or have been flushed.
 515          */
 516         qp_attr.qp_state = IB_QPS_ERR;
 517         if (ib_modify_qp(priv->qp, &qp_attr, IB_QP_STATE))
 518                 ipoib_warn(priv, "Failed to modify QP to ERROR state\n");
 519
 520         /* Wait for all sends and receives to complete */
 521         begin = jiffies;
 522
 523         while (priv->tx_head != priv->tx_tail || recvs_pending(dev)) {
 524                 if (time_after(jiffies, begin + 5 * HZ)) {
 525                         ipoib_warn(priv, "timing out; %d sends %d receives not completed\n",
 526                                    priv->tx_head - priv->tx_tail, recvs_pending(dev));
 527
 528                         /*
 529                          * assume the HW is wedged and just free up
 530                          * all our pending work requests.
 531                          */
 532                         while ((int) priv->tx_tail - (int) priv->tx_head < 0) {
 533                                 tx_req = &priv->tx_ring[priv->tx_tail &
 534                                                         (ipoib_sendq_size - 1)];
 535                                 dma_unmap_single(priv->ca->dma_device,
 536                                                  pci_unmap_addr(tx_req, mapping),
 537                                                  tx_req->skb->len,
 538                                                  DMA_TO_DEVICE);
 539                                 dev_kfree_skb_any(tx_req->skb);
 540                                 ++priv->tx_tail;
 541                         }
 542
 543                         for (i = 0; i < ipoib_recvq_size; ++i)
 544                                 if (priv->rx_ring[i].skb) {
 545                                         dma_unmap_single(priv->ca->dma_device,
 546                                                          pci_unmap_addr(&priv->rx_ring[i],
 547                                                                         mapping),
 548                                                          IPOIB_BUF_SIZE,
 549                                                          DMA_FROM_DEVICE);
 550                                         dev_kfree_skb_any(priv->rx_ring[i].skb);
 551                                         priv->rx_ring[i].skb = NULL;
 552                                 }
 553
 554                         goto timeout;
 555                 }
 556
 557                 msleep(1);
 558         }
 559
 560         ipoib_dbg(priv, "All sends and receives done.\n");
 561
 562 timeout:
 563         qp_attr.qp_state = IB_QPS_RESET;
 564         if (ib_modify_qp(priv->qp, &qp_attr, IB_QP_STATE))
 565                 ipoib_warn(priv, "Failed to modify QP to RESET state\n");
 566
 567         /* Wait for all AHs to be reaped */
 568         set_bit(IPOIB_STOP_REAPER, &priv->flags);
 569         cancel_delayed_work(&priv->ah_reap_task);
 570         flush_workqueue(ipoib_workqueue);
 571
 572         begin = jiffies;
 573
 574         while (!list_empty(&priv->dead_ahs)) {
 575                 __ipoib_reap_ah(dev);
 576
 577                 if (time_after(jiffies, begin + HZ)) {
 578                         ipoib_warn(priv, "timing out; will leak address handles\n");
 579                         break;
 580                 }
 581
 582                 msleep(1);
 583         }
 584
 585         return 0;
 586 }
 587
 588 int ipoib_ib_dev_init(struct net_device *dev, struct ib_device *ca, int port)
 589 {
 590         struct ipoib_dev_priv *priv = netdev_priv(dev);
 591
 592         priv->ca = ca;
 593         priv->port = port;
 594         priv->qp = NULL;
 595
 596         if (ipoib_transport_dev_init(dev, ca)) {
 597                 printk(KERN_WARNING "%s: ipoib_transport_dev_init failed\n", ca->name);
 598                 return -ENODEV;
 599         }
 600
 601         if (dev->flags & IFF_UP) {
 602                 if (ipoib_ib_dev_open(dev)) {
 603                         ipoib_transport_dev_cleanup(dev);
 604                         return -ENODEV;
 605                 }
 606         }
 607
 608         return 0;
 609 }
 610
 611 void ipoib_ib_dev_flush(void *_dev)
 612 {
 613         struct net_device *dev = (struct net_device *)_dev;
 614         struct ipoib_dev_priv *priv = netdev_priv(dev), *cpriv;
 615
 616         if (!test_bit(IPOIB_FLAG_INITIALIZED, &priv->flags) ) {
 617                 ipoib_dbg(priv, "Not flushing - IPOIB_FLAG_INITIALIZED not set.\n");
 618                 return;
 619         }
 620
 621         if (!test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags)) {
 622                 ipoib_dbg(priv, "Not flushing - IPOIB_FLAG_ADMIN_UP not set.\n");
 623                 return;
 624         }
 625
 626         ipoib_dbg(priv, "flushing\n");
 627
 628         ipoib_ib_dev_down(dev, 0);
 629
 630         /*
 631          * The device could have been brought down between the start and when
 632          * we get here, don't bring it back up if it's not configured up
 633          */
 634         if (test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags)) {
 635                 ipoib_ib_dev_up(dev);
 636                 ipoib_mcast_restart_task(dev);
 637         }
 638
 639         mutex_lock(&priv->vlan_mutex);
 640
 641         /* Flush any child interfaces too */
 642         list_for_each_entry(cpriv, &priv->child_intfs, list)
 643                 ipoib_ib_dev_flush(cpriv->dev);
 644
 645         mutex_unlock(&priv->vlan_mutex);
 646 }
 647
 648 void ipoib_ib_dev_cleanup(struct net_device *dev)
 649 {
 650         struct ipoib_dev_priv *priv = netdev_priv(dev);
 651
 652         ipoib_dbg(priv, "cleaning up ib_dev\n");
 653
 654         ipoib_mcast_stop_thread(dev, 1);
 655         ipoib_mcast_dev_flush(dev);
 656
 657         ipoib_transport_dev_cleanup(dev);
 658 }
 659
 660 /*
 661  * Delayed P_Key Assigment Interim Support
 662  *
 663  * The following is initial implementation of delayed P_Key assigment
 664  * mechanism. It is using the same approach implemented for the multicast
 665  * group join. The single goal of this implementation is to quickly address
 666  * Bug #2507. This implementation will probably be removed when the P_Key
 667  * change async notification is available.
 668  */
 669
 670 void ipoib_pkey_poll(void *dev_ptr)
 671 {
 672         struct net_device *dev = dev_ptr;
 673         struct ipoib_dev_priv *priv = netdev_priv(dev);
 674
 675         ipoib_pkey_dev_check_presence(dev);
 676
 677         if (test_bit(IPOIB_PKEY_ASSIGNED, &priv->flags))
 678                 ipoib_open(dev);
 679         else {
 680                 mutex_lock(&pkey_mutex);
 681                 if (!test_bit(IPOIB_PKEY_STOP, &priv->flags))
 682                         queue_delayed_work(ipoib_workqueue,
 683                                            &priv->pkey_task,
 684                                            HZ);
 685                 mutex_unlock(&pkey_mutex);
 686         }
 687 }
 688
 689 int ipoib_pkey_dev_delay_open(struct net_device *dev)
 690 {
 691         struct ipoib_dev_priv *priv = netdev_priv(dev);
 692
 693         /* Look for the interface pkey value in the IB Port P_Key table and */
 694         /* set the interface pkey assigment flag                            */
 695         ipoib_pkey_dev_check_presence(dev);
 696
 697         /* P_Key value not assigned yet - start polling */
 698         if (!test_bit(IPOIB_PKEY_ASSIGNED, &priv->flags)) {
 699                 mutex_lock(&pkey_mutex);
 700                 clear_bit(IPOIB_PKEY_STOP, &priv->flags);
 701                 queue_delayed_work(ipoib_workqueue,
 702                                    &priv->pkey_task,
 703                                    HZ);
 704                 mutex_unlock(&pkey_mutex);
 705                 return 1;
 706         }
 707
 708         return 0;
 709 }