linux/drivers/infiniband/hw/mlx4/srq.c
Linus Torvalds 048ccca8c1 Initial roundup of 4.5 merge window patches
- Remove usage of ib_query_device and instead store attributes in
   ib_device struct
 - Move iopoll out of block and into lib, rename to irqpoll, and use
   in several places in the rdma stack as our new completion queue
   polling library mechanism.  Update the other block drivers that
   already used iopoll to use the new mechanism too.
 - Replace the per-entry GID table locks with a single GID table lock
 - IPoIB multicast cleanup
 - Cleanups to the IB MR facility
 - Add support for 64bit extended IB counters
 - Fix for netlink oops while parsing RDMA nl messages
 - RoCEv2 support for the core IB code
 - mlx4 RoCEv2 support
 - mlx5 RoCEv2 support
 - Cross Channel support for mlx5
 - Timestamp support for mlx5
 - Atomic support for mlx5
 - Raw QP support for mlx5
 - MAINTAINERS update for mlx4/mlx5
 - Misc ocrdma, qib, nes, usNIC, cxgb3, cxgb4, mlx4, mlx5 updates
 - Add support for remote invalidate to the iSER driver (pushed through the
   RDMA tree due to dependencies, acknowledged by nab)
 - Update to NFSoRDMA (pushed through the RDMA tree due to dependencies,
   acknowledged by Bruce)
 -----BEGIN PGP SIGNATURE-----
 Version: GnuPG v1
 
 iQIcBAABAgAGBQJWoSygAAoJELgmozMOVy/dDjsP/2vbTda2MvQfkfkGEZBQdJSg
 095RN0gQgCJdg78lAl8yuaK8r4VN/7uefpDtFdudH1I/Pei7X0wxN9R1UzFNG4KR
 AD53lz92IVPs15328SbPR2kvNWISR9aBFQo3rlElq3Grqlp0EMn2Ou1vtu87rekF
 aMllxr8Nl0uZhP+eWusOsYpJUUtwirLgRnrAyfqo2UxZh/TMIroT0TCx1KXjVcAg
 dhDARiZAdu3OgSc6OsWqmH+DELEq6dFVA5F+DDBGAb8bFZqlJc7cuMHWInwNsNXT
 so4bnEQ835alTbsdYtqs5DUNS8heJTAJP4Uz0ehkTh/uNCcvnKeUTw1c2P/lXI1k
 7s33gMM+0FXj0swMBw0kKwAF2d9Hhus9UAN7NwjBuOyHcjGRd5q7SAnfWkvKx000
 s9jVW19slb2I38gB58nhjOh8s+vXUArgxnV1+kTia1+bJSR5swvVoWRicRXdF0vh
 TvLX/BjbSIU73g1TnnLNYoBTV3ybFKQ6bVdQW7fzSTDs54dsI1vvdHXi3bYZCpnL
 HVwQTZRfEzkvb0AdKbcvf8p/TlaAHem3ODqtO1eHvO4if1QJBSn+SptTEeJVYYdK
 n4B3l/dMoBH4JXJUmEHB9jwAvYOpv/YLAFIvdL7NFwbqGNsC3nfXFcmkVORB1W3B
 KEMcM2we4bz+uyKMjEAD
 =5oO7
 -----END PGP SIGNATURE-----

Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/dledford/rdma

Pull rdma updates from Doug Ledford:
 "Initial roundup of 4.5 merge window patches

   - Remove usage of ib_query_device and instead store attributes in
     ib_device struct

   - Move iopoll out of block and into lib, rename to irqpoll, and use
     in several places in the rdma stack as our new completion queue
     polling library mechanism.  Update the other block drivers that
     already used iopoll to use the new mechanism too.

   - Replace the per-entry GID table locks with a single GID table lock

   - IPoIB multicast cleanup

   - Cleanups to the IB MR facility

   - Add support for 64bit extended IB counters

   - Fix for netlink oops while parsing RDMA nl messages

   - RoCEv2 support for the core IB code

   - mlx4 RoCEv2 support

   - mlx5 RoCEv2 support

   - Cross Channel support for mlx5

   - Timestamp support for mlx5

   - Atomic support for mlx5

   - Raw QP support for mlx5

   - MAINTAINERS update for mlx4/mlx5

   - Misc ocrdma, qib, nes, usNIC, cxgb3, cxgb4, mlx4, mlx5 updates

   - Add support for remote invalidate to the iSER driver (pushed
     through the RDMA tree due to dependencies, acknowledged by nab)

   - Update to NFSoRDMA (pushed through the RDMA tree due to
     dependencies, acknowledged by Bruce)"

* tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/dledford/rdma: (169 commits)
  IB/mlx5: Unify CQ create flags check
  IB/mlx5: Expose Raw Packet QP to user space consumers
  {IB, net}/mlx5: Move the modify QP operation table to mlx5_ib
  IB/mlx5: Support setting Ethernet priority for Raw Packet QPs
  IB/mlx5: Add Raw Packet QP query functionality
  IB/mlx5: Add create and destroy functionality for Raw Packet QP
  IB/mlx5: Refactor mlx5_ib_qp to accommodate other QP types
  IB/mlx5: Allocate a Transport Domain for each ucontext
  net/mlx5_core: Warn on unsupported events of QP/RQ/SQ
  net/mlx5_core: Add RQ and SQ event handling
  net/mlx5_core: Export transport objects
  IB/mlx5: Expose CQE version to user-space
  IB/mlx5: Add CQE version 1 support to user QPs and SRQs
  IB/mlx5: Fix data validation in mlx5_ib_alloc_ucontext
  IB/sa: Fix netlink local service GFP crash
  IB/srpt: Remove redundant wc array
  IB/qib: Improve ipoib UD performance
  IB/mlx4: Advertise RoCE v2 support
  IB/mlx4: Create and use another QP1 for RoCEv2
  IB/mlx4: Enable send of RoCE QP1 packets with IP/UDP headers
  ...
2016-01-23 18:45:06 -08:00

384 lines
9.4 KiB
C

/*
* Copyright (c) 2007 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include <linux/mlx4/qp.h>
#include <linux/mlx4/srq.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
#include "mlx4_ib.h"
#include "user.h"
static void *get_wqe(struct mlx4_ib_srq *srq, int n)
{
return mlx4_buf_offset(&srq->buf, n << srq->msrq.wqe_shift);
}
static void mlx4_ib_srq_event(struct mlx4_srq *srq, enum mlx4_event type)
{
struct ib_event event;
struct ib_srq *ibsrq = &to_mibsrq(srq)->ibsrq;
if (ibsrq->event_handler) {
event.device = ibsrq->device;
event.element.srq = ibsrq;
switch (type) {
case MLX4_EVENT_TYPE_SRQ_LIMIT:
event.event = IB_EVENT_SRQ_LIMIT_REACHED;
break;
case MLX4_EVENT_TYPE_SRQ_CATAS_ERROR:
event.event = IB_EVENT_SRQ_ERR;
break;
default:
pr_warn("Unexpected event type %d "
"on SRQ %06x\n", type, srq->srqn);
return;
}
ibsrq->event_handler(&event, ibsrq->srq_context);
}
}
struct ib_srq *mlx4_ib_create_srq(struct ib_pd *pd,
struct ib_srq_init_attr *init_attr,
struct ib_udata *udata)
{
struct mlx4_ib_dev *dev = to_mdev(pd->device);
struct mlx4_ib_srq *srq;
struct mlx4_wqe_srq_next_seg *next;
struct mlx4_wqe_data_seg *scatter;
u32 cqn;
u16 xrcdn;
int desc_size;
int buf_size;
int err;
int i;
/* Sanity check SRQ size before proceeding */
if (init_attr->attr.max_wr >= dev->dev->caps.max_srq_wqes ||
init_attr->attr.max_sge > dev->dev->caps.max_srq_sge)
return ERR_PTR(-EINVAL);
srq = kmalloc(sizeof *srq, GFP_KERNEL);
if (!srq)
return ERR_PTR(-ENOMEM);
mutex_init(&srq->mutex);
spin_lock_init(&srq->lock);
srq->msrq.max = roundup_pow_of_two(init_attr->attr.max_wr + 1);
srq->msrq.max_gs = init_attr->attr.max_sge;
desc_size = max(32UL,
roundup_pow_of_two(sizeof (struct mlx4_wqe_srq_next_seg) +
srq->msrq.max_gs *
sizeof (struct mlx4_wqe_data_seg)));
srq->msrq.wqe_shift = ilog2(desc_size);
buf_size = srq->msrq.max * desc_size;
if (pd->uobject) {
struct mlx4_ib_create_srq ucmd;
if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd)) {
err = -EFAULT;
goto err_srq;
}
srq->umem = ib_umem_get(pd->uobject->context, ucmd.buf_addr,
buf_size, 0, 0);
if (IS_ERR(srq->umem)) {
err = PTR_ERR(srq->umem);
goto err_srq;
}
err = mlx4_mtt_init(dev->dev, ib_umem_page_count(srq->umem),
ilog2(srq->umem->page_size), &srq->mtt);
if (err)
goto err_buf;
err = mlx4_ib_umem_write_mtt(dev, &srq->mtt, srq->umem);
if (err)
goto err_mtt;
err = mlx4_ib_db_map_user(to_mucontext(pd->uobject->context),
ucmd.db_addr, &srq->db);
if (err)
goto err_mtt;
} else {
err = mlx4_db_alloc(dev->dev, &srq->db, 0, GFP_KERNEL);
if (err)
goto err_srq;
*srq->db.db = 0;
if (mlx4_buf_alloc(dev->dev, buf_size, PAGE_SIZE * 2, &srq->buf,
GFP_KERNEL)) {
err = -ENOMEM;
goto err_db;
}
srq->head = 0;
srq->tail = srq->msrq.max - 1;
srq->wqe_ctr = 0;
for (i = 0; i < srq->msrq.max; ++i) {
next = get_wqe(srq, i);
next->next_wqe_index =
cpu_to_be16((i + 1) & (srq->msrq.max - 1));
for (scatter = (void *) (next + 1);
(void *) scatter < (void *) next + desc_size;
++scatter)
scatter->lkey = cpu_to_be32(MLX4_INVALID_LKEY);
}
err = mlx4_mtt_init(dev->dev, srq->buf.npages, srq->buf.page_shift,
&srq->mtt);
if (err)
goto err_buf;
err = mlx4_buf_write_mtt(dev->dev, &srq->mtt, &srq->buf, GFP_KERNEL);
if (err)
goto err_mtt;
srq->wrid = kmalloc_array(srq->msrq.max, sizeof(u64),
GFP_KERNEL | __GFP_NOWARN);
if (!srq->wrid) {
srq->wrid = __vmalloc(srq->msrq.max * sizeof(u64),
GFP_KERNEL, PAGE_KERNEL);
if (!srq->wrid) {
err = -ENOMEM;
goto err_mtt;
}
}
}
cqn = (init_attr->srq_type == IB_SRQT_XRC) ?
to_mcq(init_attr->ext.xrc.cq)->mcq.cqn : 0;
xrcdn = (init_attr->srq_type == IB_SRQT_XRC) ?
to_mxrcd(init_attr->ext.xrc.xrcd)->xrcdn :
(u16) dev->dev->caps.reserved_xrcds;
err = mlx4_srq_alloc(dev->dev, to_mpd(pd)->pdn, cqn, xrcdn, &srq->mtt,
srq->db.dma, &srq->msrq);
if (err)
goto err_wrid;
srq->msrq.event = mlx4_ib_srq_event;
srq->ibsrq.ext.xrc.srq_num = srq->msrq.srqn;
if (pd->uobject)
if (ib_copy_to_udata(udata, &srq->msrq.srqn, sizeof (__u32))) {
err = -EFAULT;
goto err_wrid;
}
init_attr->attr.max_wr = srq->msrq.max - 1;
return &srq->ibsrq;
err_wrid:
if (pd->uobject)
mlx4_ib_db_unmap_user(to_mucontext(pd->uobject->context), &srq->db);
else
kvfree(srq->wrid);
err_mtt:
mlx4_mtt_cleanup(dev->dev, &srq->mtt);
err_buf:
if (pd->uobject)
ib_umem_release(srq->umem);
else
mlx4_buf_free(dev->dev, buf_size, &srq->buf);
err_db:
if (!pd->uobject)
mlx4_db_free(dev->dev, &srq->db);
err_srq:
kfree(srq);
return ERR_PTR(err);
}
int mlx4_ib_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
enum ib_srq_attr_mask attr_mask, struct ib_udata *udata)
{
struct mlx4_ib_dev *dev = to_mdev(ibsrq->device);
struct mlx4_ib_srq *srq = to_msrq(ibsrq);
int ret;
/* We don't support resizing SRQs (yet?) */
if (attr_mask & IB_SRQ_MAX_WR)
return -EINVAL;
if (attr_mask & IB_SRQ_LIMIT) {
if (attr->srq_limit >= srq->msrq.max)
return -EINVAL;
mutex_lock(&srq->mutex);
ret = mlx4_srq_arm(dev->dev, &srq->msrq, attr->srq_limit);
mutex_unlock(&srq->mutex);
if (ret)
return ret;
}
return 0;
}
int mlx4_ib_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr)
{
struct mlx4_ib_dev *dev = to_mdev(ibsrq->device);
struct mlx4_ib_srq *srq = to_msrq(ibsrq);
int ret;
int limit_watermark;
ret = mlx4_srq_query(dev->dev, &srq->msrq, &limit_watermark);
if (ret)
return ret;
srq_attr->srq_limit = limit_watermark;
srq_attr->max_wr = srq->msrq.max - 1;
srq_attr->max_sge = srq->msrq.max_gs;
return 0;
}
int mlx4_ib_destroy_srq(struct ib_srq *srq)
{
struct mlx4_ib_dev *dev = to_mdev(srq->device);
struct mlx4_ib_srq *msrq = to_msrq(srq);
mlx4_srq_free(dev->dev, &msrq->msrq);
mlx4_mtt_cleanup(dev->dev, &msrq->mtt);
if (srq->uobject) {
mlx4_ib_db_unmap_user(to_mucontext(srq->uobject->context), &msrq->db);
ib_umem_release(msrq->umem);
} else {
kvfree(msrq->wrid);
mlx4_buf_free(dev->dev, msrq->msrq.max << msrq->msrq.wqe_shift,
&msrq->buf);
mlx4_db_free(dev->dev, &msrq->db);
}
kfree(msrq);
return 0;
}
void mlx4_ib_free_srq_wqe(struct mlx4_ib_srq *srq, int wqe_index)
{
struct mlx4_wqe_srq_next_seg *next;
/* always called with interrupts disabled. */
spin_lock(&srq->lock);
next = get_wqe(srq, srq->tail);
next->next_wqe_index = cpu_to_be16(wqe_index);
srq->tail = wqe_index;
spin_unlock(&srq->lock);
}
int mlx4_ib_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
struct ib_recv_wr **bad_wr)
{
struct mlx4_ib_srq *srq = to_msrq(ibsrq);
struct mlx4_wqe_srq_next_seg *next;
struct mlx4_wqe_data_seg *scat;
unsigned long flags;
int err = 0;
int nreq;
int i;
struct mlx4_ib_dev *mdev = to_mdev(ibsrq->device);
spin_lock_irqsave(&srq->lock, flags);
if (mdev->dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR) {
err = -EIO;
*bad_wr = wr;
nreq = 0;
goto out;
}
for (nreq = 0; wr; ++nreq, wr = wr->next) {
if (unlikely(wr->num_sge > srq->msrq.max_gs)) {
err = -EINVAL;
*bad_wr = wr;
break;
}
if (unlikely(srq->head == srq->tail)) {
err = -ENOMEM;
*bad_wr = wr;
break;
}
srq->wrid[srq->head] = wr->wr_id;
next = get_wqe(srq, srq->head);
srq->head = be16_to_cpu(next->next_wqe_index);
scat = (struct mlx4_wqe_data_seg *) (next + 1);
for (i = 0; i < wr->num_sge; ++i) {
scat[i].byte_count = cpu_to_be32(wr->sg_list[i].length);
scat[i].lkey = cpu_to_be32(wr->sg_list[i].lkey);
scat[i].addr = cpu_to_be64(wr->sg_list[i].addr);
}
if (i < srq->msrq.max_gs) {
scat[i].byte_count = 0;
scat[i].lkey = cpu_to_be32(MLX4_INVALID_LKEY);
scat[i].addr = 0;
}
}
if (likely(nreq)) {
srq->wqe_ctr += nreq;
/*
* Make sure that descriptors are written before
* doorbell record.
*/
wmb();
*srq->db.db = cpu_to_be32(srq->wqe_ctr);
}
out:
spin_unlock_irqrestore(&srq->lock, flags);
return err;
}