2
0
mirror of https://github.com/edk2-porting/linux-next.git synced 2024-11-27 12:04:22 +08:00

5.1 Merge Window Pull Request

This has been a slightly more active cycle than normal with ongoing core
 changes and quite a lot of collected driver updates.
 
 - Various driver fixes for bnxt_re, cxgb4, hns, mlx5, pvrdma, rxe
 
 - A new data transfer mode for HFI1 giving higher performance
 
 - Significant functional and bug fix update to the mlx5 On-Demand-Paging MR
   feature
 
 - A chip hang reset recovery system for hns
 
 - Change mm->pinned_vm to an atomic64
 
 - Update bnxt_re to support a new 57500 chip
 
 - A sane netlink 'rdma link add' method for creating rxe devices and fixing
   the various unregistration race conditions in rxe's unregister flow
 
 - Allow lookup up objects by an ID over netlink
 
 - Various reworking of the core to driver interface:
   * Drivers should not assume umem SGLs are in PAGE_SIZE chunks
   * ucontext is accessed via udata not other means
   * Start to make the core code responsible for object memory
     allocation
   * Drivers should convert struct device to struct ib_device
     via a helper
   * Drivers have more tools to avoid use after unregister problems
 -----BEGIN PGP SIGNATURE-----
 
 iQIzBAABCgAdFiEEfB7FMLh+8QxL+6i3OG33FX4gmxoFAlyAJYYACgkQOG33FX4g
 mxrWwQ/+OyAx4Moru7Aix0C6GWxTJp/wKgw21CS3reZxgLai6x81xNYG/s2wCNjo
 IccObVd7mvzyqPdxOeyHBsJBbQDqWvoD6O2duH8cqGMgBRgh3CSdUep2zLvPpSAx
 2W1SvWYCLDnCuarboFrCA8c4AN3eCZiqD7z9lHyFQGjy3nTUWzk1uBaOP46uaiMv
 w89N8EMdXJ/iY6ONzihvE05NEYbMA8fuvosKLLNdghRiHIjbMQU8SneY23pvyPDd
 ZziPu9NcO3Hw9OVbkwtJp47U3KCBgvKHmnixyZKkikjiD+HVoABw2IMwcYwyBZwP
 Bic/ddONJUvAxMHpKRnQaW7znAiHARk21nDG28UAI7FWXH/wMXgicMp6LRcNKqKF
 vqXdxHTKJb0QUR4xrYI+eA8ihstss7UUpgSgByuANJ0X729xHiJtlEvPb1DPo1Dz
 9CB4OHOVRl5O8sA5Jc6PSusZiKEpvWoyWbdmw0IiwDF5pe922VLl5Nv88ta+sJ38
 v2Ll5AgYcluk7F3599Uh9D7gwp5hxW2Ph3bNYyg2j3HP4/dKsL9XvIJPXqEthgCr
 3KQS9rOZfI/7URieT+H+Mlf+OWZhXsZilJG7No0fYgIVjgJ00h3SF1/299YIq6Qp
 9W7ZXBfVSwLYA2AEVSvGFeZPUxgBwHrSZ62wya4uFeB1jyoodPk=
 =p12E
 -----END PGP SIGNATURE-----

Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma

Pull rdma updates from Jason Gunthorpe:
 "This has been a slightly more active cycle than normal with ongoing
  core changes and quite a lot of collected driver updates.

   - Various driver fixes for bnxt_re, cxgb4, hns, mlx5, pvrdma, rxe

   - A new data transfer mode for HFI1 giving higher performance

   - Significant functional and bug fix update to the mlx5
     On-Demand-Paging MR feature

   - A chip hang reset recovery system for hns

   - Change mm->pinned_vm to an atomic64

   - Update bnxt_re to support a new 57500 chip

   - A sane netlink 'rdma link add' method for creating rxe devices and
     fixing the various unregistration race conditions in rxe's
     unregister flow

   - Allow lookup up objects by an ID over netlink

   - Various reworking of the core to driver interface:
       - drivers should not assume umem SGLs are in PAGE_SIZE chunks
       - ucontext is accessed via udata not other means
       - start to make the core code responsible for object memory
         allocation
       - drivers should convert struct device to struct ib_device via a
         helper
       - drivers have more tools to avoid use after unregister problems"

* tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma: (280 commits)
  net/mlx5: ODP support for XRC transport is not enabled by default in FW
  IB/hfi1: Close race condition on user context disable and close
  RDMA/umem: Revert broken 'off by one' fix
  RDMA/umem: minor bug fix in error handling path
  RDMA/hns: Use GFP_ATOMIC in hns_roce_v2_modify_qp
  cxgb4: kfree mhp after the debug print
  IB/rdmavt: Fix concurrency panics in QP post_send and modify to error
  IB/rdmavt: Fix loopback send with invalidate ordering
  IB/iser: Fix dma_nents type definition
  IB/mlx5: Set correct write permissions for implicit ODP MR
  bnxt_re: Clean cq for kernel consumers only
  RDMA/uverbs: Don't do double free of allocated PD
  RDMA: Handle ucontext allocations by IB/core
  RDMA/core: Fix a WARN() message
  bnxt_re: fix the regression due to changes in alloc_pbl
  IB/mlx4: Increase the timeout for CM cache
  IB/core: Abort page fault handler silently during owning process exit
  IB/mlx5: Validate correct PD before prefetch MR
  IB/mlx5: Protect against prefetch of invalid MR
  RDMA/uverbs: Store PR pointer before it is overwritten
  ...
This commit is contained in:
Linus Torvalds 2019-03-09 15:53:03 -08:00
commit a50243b1dd
264 changed files with 16724 additions and 5028 deletions

View File

@ -240,6 +240,7 @@ ForEachMacros:
- 'for_each_set_bit'
- 'for_each_set_bit_from'
- 'for_each_sg'
- 'for_each_sg_dma_page'
- 'for_each_sg_page'
- 'for_each_sibling_event'
- '__for_each_thread'
@ -360,6 +361,7 @@ ForEachMacros:
- 'radix_tree_for_each_slot'
- 'radix_tree_for_each_tagged'
- 'rbtree_postorder_for_each_entry_safe'
- 'rdma_for_each_port'
- 'resource_list_for_each_entry'
- 'resource_list_for_each_entry_safe'
- 'rhl_for_each_entry_rcu'

View File

@ -46,11 +46,11 @@ Memory pinning
I/O targets be kept resident at the same physical address. The
ib_uverbs module manages pinning and unpinning memory regions via
get_user_pages() and put_page() calls. It also accounts for the
amount of memory pinned in the process's locked_vm, and checks that
amount of memory pinned in the process's pinned_vm, and checks that
unprivileged processes do not exceed their RLIMIT_MEMLOCK limit.
Pages that are pinned multiple times are counted each time they are
pinned, so the value of locked_vm may be an overestimate of the
pinned, so the value of pinned_vm may be an overestimate of the
number of pages pinned by a process.
/dev files

View File

@ -311,7 +311,13 @@ static dma_addr_t __vmw_piter_dma_addr(struct vmw_piter *viter)
static dma_addr_t __vmw_piter_sg_addr(struct vmw_piter *viter)
{
return sg_page_iter_dma_address(&viter->iter);
/*
* FIXME: This driver wrongly mixes DMA and CPU SG list iteration and
* needs revision. See
* https://lore.kernel.org/lkml/20190104223531.GA1705@ziepe.ca/
*/
return sg_page_iter_dma_address(
container_of(&viter->iter, struct sg_dma_page_iter, base));
}

View File

@ -89,6 +89,7 @@ config INFINIBAND_ADDR_TRANS_CONFIGFS
This allows the user to config the default GID type that the CM
uses for each device, when initiaing new connections.
if INFINIBAND_USER_ACCESS || !INFINIBAND_USER_ACCESS
source "drivers/infiniband/hw/mthca/Kconfig"
source "drivers/infiniband/hw/qib/Kconfig"
source "drivers/infiniband/hw/cxgb3/Kconfig"
@ -101,6 +102,12 @@ source "drivers/infiniband/hw/ocrdma/Kconfig"
source "drivers/infiniband/hw/vmw_pvrdma/Kconfig"
source "drivers/infiniband/hw/usnic/Kconfig"
source "drivers/infiniband/hw/hns/Kconfig"
source "drivers/infiniband/hw/bnxt_re/Kconfig"
source "drivers/infiniband/hw/hfi1/Kconfig"
source "drivers/infiniband/hw/qedr/Kconfig"
source "drivers/infiniband/sw/rdmavt/Kconfig"
source "drivers/infiniband/sw/rxe/Kconfig"
endif
source "drivers/infiniband/ulp/ipoib/Kconfig"
@ -111,13 +118,5 @@ source "drivers/infiniband/ulp/iser/Kconfig"
source "drivers/infiniband/ulp/isert/Kconfig"
source "drivers/infiniband/ulp/opa_vnic/Kconfig"
source "drivers/infiniband/sw/rdmavt/Kconfig"
source "drivers/infiniband/sw/rxe/Kconfig"
source "drivers/infiniband/hw/hfi1/Kconfig"
source "drivers/infiniband/hw/qedr/Kconfig"
source "drivers/infiniband/hw/bnxt_re/Kconfig"
endif # INFINIBAND

View File

@ -15,8 +15,6 @@ ib_core-y := packer.o ud_header.o verbs.o cq.o rw.o sysfs.o \
nldev.o restrack.o
ib_core-$(CONFIG_SECURITY_INFINIBAND) += security.o
ib_core-$(CONFIG_INFINIBAND_USER_MEM) += umem.o
ib_core-$(CONFIG_INFINIBAND_ON_DEMAND_PAGING) += umem_odp.o
ib_core-$(CONFIG_CGROUP_RDMA) += cgroup.o
ib_cm-y := cm.o
@ -39,3 +37,5 @@ ib_uverbs-y := uverbs_main.o uverbs_cmd.o uverbs_marshall.o \
uverbs_std_types_flow_action.o uverbs_std_types_dm.o \
uverbs_std_types_mr.o uverbs_std_types_counters.o \
uverbs_uapi.o uverbs_std_types_device.o
ib_uverbs-$(CONFIG_INFINIBAND_USER_MEM) += umem.o
ib_uverbs-$(CONFIG_INFINIBAND_ON_DEMAND_PAGING) += umem_odp.o

View File

@ -185,7 +185,7 @@ EXPORT_SYMBOL(ib_cache_gid_parse_type_str);
static struct ib_gid_table *rdma_gid_table(struct ib_device *device, u8 port)
{
return device->cache.ports[port - rdma_start_port(device)].gid;
return device->port_data[port].cache.gid;
}
static bool is_gid_entry_free(const struct ib_gid_table_entry *entry)
@ -547,21 +547,19 @@ int ib_cache_gid_add(struct ib_device *ib_dev, u8 port,
unsigned long mask;
int ret;
if (ib_dev->ops.get_netdev) {
idev = ib_dev->ops.get_netdev(ib_dev, port);
if (idev && attr->ndev != idev) {
union ib_gid default_gid;
idev = ib_device_get_netdev(ib_dev, port);
if (idev && attr->ndev != idev) {
union ib_gid default_gid;
/* Adding default GIDs in not permitted */
make_default_gid(idev, &default_gid);
if (!memcmp(gid, &default_gid, sizeof(*gid))) {
dev_put(idev);
return -EPERM;
}
}
if (idev)
/* Adding default GIDs is not permitted */
make_default_gid(idev, &default_gid);
if (!memcmp(gid, &default_gid, sizeof(*gid))) {
dev_put(idev);
return -EPERM;
}
}
if (idev)
dev_put(idev);
mask = GID_ATTR_FIND_MASK_GID |
GID_ATTR_FIND_MASK_GID_TYPE |
@ -765,7 +763,7 @@ err_free_table:
return NULL;
}
static void release_gid_table(struct ib_device *device, u8 port,
static void release_gid_table(struct ib_device *device,
struct ib_gid_table *table)
{
bool leak = false;
@ -863,31 +861,27 @@ static void gid_table_reserve_default(struct ib_device *ib_dev, u8 port,
static void gid_table_release_one(struct ib_device *ib_dev)
{
struct ib_gid_table *table;
u8 port;
unsigned int p;
for (port = 0; port < ib_dev->phys_port_cnt; port++) {
table = ib_dev->cache.ports[port].gid;
release_gid_table(ib_dev, port, table);
ib_dev->cache.ports[port].gid = NULL;
rdma_for_each_port (ib_dev, p) {
release_gid_table(ib_dev, ib_dev->port_data[p].cache.gid);
ib_dev->port_data[p].cache.gid = NULL;
}
}
static int _gid_table_setup_one(struct ib_device *ib_dev)
{
u8 port;
struct ib_gid_table *table;
unsigned int rdma_port;
for (port = 0; port < ib_dev->phys_port_cnt; port++) {
u8 rdma_port = port + rdma_start_port(ib_dev);
table = alloc_gid_table(
ib_dev->port_immutable[rdma_port].gid_tbl_len);
rdma_for_each_port (ib_dev, rdma_port) {
table = alloc_gid_table(
ib_dev->port_data[rdma_port].immutable.gid_tbl_len);
if (!table)
goto rollback_table_setup;
gid_table_reserve_default(ib_dev, rdma_port, table);
ib_dev->cache.ports[port].gid = table;
ib_dev->port_data[rdma_port].cache.gid = table;
}
return 0;
@ -898,14 +892,11 @@ rollback_table_setup:
static void gid_table_cleanup_one(struct ib_device *ib_dev)
{
struct ib_gid_table *table;
u8 port;
unsigned int p;
for (port = 0; port < ib_dev->phys_port_cnt; port++) {
table = ib_dev->cache.ports[port].gid;
cleanup_gid_table_port(ib_dev, port + rdma_start_port(ib_dev),
table);
}
rdma_for_each_port (ib_dev, p)
cleanup_gid_table_port(ib_dev, p,
ib_dev->port_data[p].cache.gid);
}
static int gid_table_setup_one(struct ib_device *ib_dev)
@ -983,17 +974,17 @@ const struct ib_gid_attr *rdma_find_gid(struct ib_device *device,
unsigned long mask = GID_ATTR_FIND_MASK_GID |
GID_ATTR_FIND_MASK_GID_TYPE;
struct ib_gid_attr gid_attr_val = {.ndev = ndev, .gid_type = gid_type};
u8 p;
unsigned int p;
if (ndev)
mask |= GID_ATTR_FIND_MASK_NETDEV;
for (p = 0; p < device->phys_port_cnt; p++) {
rdma_for_each_port(device, p) {
struct ib_gid_table *table;
unsigned long flags;
int index;
table = device->cache.ports[p].gid;
table = device->port_data[p].cache.gid;
read_lock_irqsave(&table->rwlock, flags);
index = find_gid(table, gid, &gid_attr_val, false, mask, NULL);
if (index >= 0) {
@ -1025,7 +1016,7 @@ int ib_get_cached_pkey(struct ib_device *device,
read_lock_irqsave(&device->cache.lock, flags);
cache = device->cache.ports[port_num - rdma_start_port(device)].pkey;
cache = device->port_data[port_num].cache.pkey;
if (index < 0 || index >= cache->table_len)
ret = -EINVAL;
@ -1043,14 +1034,12 @@ int ib_get_cached_subnet_prefix(struct ib_device *device,
u64 *sn_pfx)
{
unsigned long flags;
int p;
if (!rdma_is_port_valid(device, port_num))
return -EINVAL;
p = port_num - rdma_start_port(device);
read_lock_irqsave(&device->cache.lock, flags);
*sn_pfx = device->cache.ports[p].subnet_prefix;
*sn_pfx = device->port_data[port_num].cache.subnet_prefix;
read_unlock_irqrestore(&device->cache.lock, flags);
return 0;
@ -1073,7 +1062,7 @@ int ib_find_cached_pkey(struct ib_device *device,
read_lock_irqsave(&device->cache.lock, flags);
cache = device->cache.ports[port_num - rdma_start_port(device)].pkey;
cache = device->port_data[port_num].cache.pkey;
*index = -1;
@ -1113,7 +1102,7 @@ int ib_find_exact_cached_pkey(struct ib_device *device,
read_lock_irqsave(&device->cache.lock, flags);
cache = device->cache.ports[port_num - rdma_start_port(device)].pkey;
cache = device->port_data[port_num].cache.pkey;
*index = -1;
@ -1141,7 +1130,7 @@ int ib_get_cached_lmc(struct ib_device *device,
return -EINVAL;
read_lock_irqsave(&device->cache.lock, flags);
*lmc = device->cache.ports[port_num - rdma_start_port(device)].lmc;
*lmc = device->port_data[port_num].cache.lmc;
read_unlock_irqrestore(&device->cache.lock, flags);
return ret;
@ -1159,8 +1148,7 @@ int ib_get_cached_port_state(struct ib_device *device,
return -EINVAL;
read_lock_irqsave(&device->cache.lock, flags);
*port_state = device->cache.ports[port_num
- rdma_start_port(device)].port_state;
*port_state = device->port_data[port_num].cache.port_state;
read_unlock_irqrestore(&device->cache.lock, flags);
return ret;
@ -1361,16 +1349,13 @@ static void ib_cache_update(struct ib_device *device,
write_lock_irq(&device->cache.lock);
old_pkey_cache = device->cache.ports[port -
rdma_start_port(device)].pkey;
old_pkey_cache = device->port_data[port].cache.pkey;
device->cache.ports[port - rdma_start_port(device)].pkey = pkey_cache;
device->cache.ports[port - rdma_start_port(device)].lmc = tprops->lmc;
device->cache.ports[port - rdma_start_port(device)].port_state =
tprops->state;
device->port_data[port].cache.pkey = pkey_cache;
device->port_data[port].cache.lmc = tprops->lmc;
device->port_data[port].cache.port_state = tprops->state;
device->cache.ports[port - rdma_start_port(device)].subnet_prefix =
tprops->subnet_prefix;
device->port_data[port].cache.subnet_prefix = tprops->subnet_prefix;
write_unlock_irq(&device->cache.lock);
if (enforce_security)
@ -1428,27 +1413,17 @@ static void ib_cache_event(struct ib_event_handler *handler,
int ib_cache_setup_one(struct ib_device *device)
{
int p;
unsigned int p;
int err;
rwlock_init(&device->cache.lock);
device->cache.ports =
kcalloc(rdma_end_port(device) - rdma_start_port(device) + 1,
sizeof(*device->cache.ports),
GFP_KERNEL);
if (!device->cache.ports)
return -ENOMEM;
err = gid_table_setup_one(device);
if (err) {
kfree(device->cache.ports);
device->cache.ports = NULL;
if (err)
return err;
}
for (p = 0; p <= rdma_end_port(device) - rdma_start_port(device); ++p)
ib_cache_update(device, p + rdma_start_port(device), true);
rdma_for_each_port (device, p)
ib_cache_update(device, p, true);
INIT_IB_EVENT_HANDLER(&device->cache.event_handler,
device, ib_cache_event);
@ -1458,7 +1433,7 @@ int ib_cache_setup_one(struct ib_device *device)
void ib_cache_release_one(struct ib_device *device)
{
int p;
unsigned int p;
/*
* The release function frees all the cache elements.
@ -1466,11 +1441,10 @@ void ib_cache_release_one(struct ib_device *device)
* all the device's resources when the cache could no
* longer be accessed.
*/
for (p = 0; p <= rdma_end_port(device) - rdma_start_port(device); ++p)
kfree(device->cache.ports[p].pkey);
rdma_for_each_port (device, p)
kfree(device->port_data[p].cache.pkey);
gid_table_release_one(device);
kfree(device->cache.ports);
}
void ib_cache_cleanup_one(struct ib_device *device)

View File

@ -21,12 +21,11 @@
* Register with the rdma cgroup. Should be called before
* exposing rdma device to user space applications to avoid
* resource accounting leak.
* Returns 0 on success or otherwise failure code.
*/
int ib_device_register_rdmacg(struct ib_device *device)
void ib_device_register_rdmacg(struct ib_device *device)
{
device->cg_device.name = device->name;
return rdmacg_register_device(&device->cg_device);
rdmacg_register_device(&device->cg_device);
}
/**

View File

@ -4052,8 +4052,7 @@ static void cm_recv_handler(struct ib_mad_agent *mad_agent,
atomic_long_inc(&port->counter_group[CM_RECV].
counter[attr_id - CM_ATTR_ID_OFFSET]);
work = kmalloc(sizeof(*work) + sizeof(struct sa_path_rec) * paths,
GFP_KERNEL);
work = kmalloc(struct_size(work, path, paths), GFP_KERNEL);
if (!work) {
ib_free_recv_mad(mad_recv_wc);
return;

View File

@ -659,7 +659,7 @@ static int cma_acquire_dev_by_src_ip(struct rdma_id_private *id_priv)
struct cma_device *cma_dev;
enum ib_gid_type gid_type;
int ret = -ENODEV;
u8 port;
unsigned int port;
if (dev_addr->dev_type != ARPHRD_INFINIBAND &&
id_priv->id.ps == RDMA_PS_IPOIB)
@ -673,8 +673,7 @@ static int cma_acquire_dev_by_src_ip(struct rdma_id_private *id_priv)
mutex_lock(&lock);
list_for_each_entry(cma_dev, &dev_list, list) {
for (port = rdma_start_port(cma_dev->device);
port <= rdma_end_port(cma_dev->device); port++) {
rdma_for_each_port (cma_dev->device, port) {
gidp = rdma_protocol_roce(cma_dev->device, port) ?
&iboe_gid : &gid;
gid_type = cma_dev->default_gid_type[port - 1];
@ -888,6 +887,7 @@ struct rdma_cm_id *__rdma_create_id(struct net *net,
id_priv->id.ps = ps;
id_priv->id.qp_type = qp_type;
id_priv->tos_set = false;
id_priv->timeout_set = false;
id_priv->gid_type = IB_GID_TYPE_IB;
spin_lock_init(&id_priv->lock);
mutex_init(&id_priv->qp_mutex);
@ -1130,6 +1130,9 @@ int rdma_init_qp_attr(struct rdma_cm_id *id, struct ib_qp_attr *qp_attr,
} else
ret = -ENOSYS;
if ((*qp_attr_mask & IB_QP_TIMEOUT) && id_priv->timeout_set)
qp_attr->timeout = id_priv->timeout;
return ret;
}
EXPORT_SYMBOL(rdma_init_qp_attr);
@ -2410,6 +2413,7 @@ static int cma_iw_listen(struct rdma_id_private *id_priv, int backlog)
return PTR_ERR(id);
id->tos = id_priv->tos;
id->tos_set = id_priv->tos_set;
id_priv->cm_id.iw = id;
memcpy(&id_priv->cm_id.iw->local_addr, cma_src_addr(id_priv),
@ -2462,6 +2466,8 @@ static void cma_listen_on_dev(struct rdma_id_private *id_priv,
atomic_inc(&id_priv->refcount);
dev_id_priv->internal_id = 1;
dev_id_priv->afonly = id_priv->afonly;
dev_id_priv->tos_set = id_priv->tos_set;
dev_id_priv->tos = id_priv->tos;
ret = rdma_listen(id, id_priv->backlog);
if (ret)
@ -2490,6 +2496,34 @@ void rdma_set_service_type(struct rdma_cm_id *id, int tos)
}
EXPORT_SYMBOL(rdma_set_service_type);
/**
* rdma_set_ack_timeout() - Set the ack timeout of QP associated
* with a connection identifier.
* @id: Communication identifier to associated with service type.
* @timeout: Ack timeout to set a QP, expressed as 4.096 * 2^(timeout) usec.
*
* This function should be called before rdma_connect() on active side,
* and on passive side before rdma_accept(). It is applicable to primary
* path only. The timeout will affect the local side of the QP, it is not
* negotiated with remote side and zero disables the timer.
*
* Return: 0 for success
*/
int rdma_set_ack_timeout(struct rdma_cm_id *id, u8 timeout)
{
struct rdma_id_private *id_priv;
if (id->qp_type != IB_QPT_RC)
return -EINVAL;
id_priv = container_of(id, struct rdma_id_private, id);
id_priv->timeout = timeout;
id_priv->timeout_set = true;
return 0;
}
EXPORT_SYMBOL(rdma_set_ack_timeout);
static void cma_query_handler(int status, struct sa_path_rec *path_rec,
void *context)
{
@ -2966,13 +3000,22 @@ static void addr_handler(int status, struct sockaddr *src_addr,
{
struct rdma_id_private *id_priv = context;
struct rdma_cm_event event = {};
struct sockaddr *addr;
struct sockaddr_storage old_addr;
mutex_lock(&id_priv->handler_mutex);
if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_QUERY,
RDMA_CM_ADDR_RESOLVED))
goto out;
memcpy(cma_src_addr(id_priv), src_addr, rdma_addr_size(src_addr));
/*
* Store the previous src address, so that if we fail to acquire
* matching rdma device, old address can be restored back, which helps
* to cancel the cma listen operation correctly.
*/
addr = cma_src_addr(id_priv);
memcpy(&old_addr, addr, rdma_addr_size(addr));
memcpy(addr, src_addr, rdma_addr_size(src_addr));
if (!status && !id_priv->cma_dev) {
status = cma_acquire_dev_by_src_ip(id_priv);
if (status)
@ -2983,6 +3026,8 @@ static void addr_handler(int status, struct sockaddr *src_addr,
}
if (status) {
memcpy(addr, &old_addr,
rdma_addr_size((struct sockaddr *)&old_addr));
if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_RESOLVED,
RDMA_CM_ADDR_BOUND))
goto out;
@ -3798,6 +3843,7 @@ static int cma_connect_iw(struct rdma_id_private *id_priv,
return PTR_ERR(cm_id);
cm_id->tos = id_priv->tos;
cm_id->tos_set = id_priv->tos_set;
id_priv->cm_id.iw = cm_id;
memcpy(&cm_id->local_addr, cma_src_addr(id_priv),
@ -4501,7 +4547,7 @@ static void cma_add_one(struct ib_device *device)
if (!cma_dev->default_roce_tos)
goto free_gid_type;
for (i = rdma_start_port(device); i <= rdma_end_port(device); i++) {
rdma_for_each_port (device, i) {
supported_gids = roce_gid_type_mask_support(device, i);
WARN_ON(!supported_gids);
if (supported_gids & (1 << CMA_PREFERRED_ROCE_GID_TYPE))
@ -4605,85 +4651,6 @@ static void cma_remove_one(struct ib_device *device, void *client_data)
kfree(cma_dev);
}
static int cma_get_id_stats(struct sk_buff *skb, struct netlink_callback *cb)
{
struct nlmsghdr *nlh;
struct rdma_cm_id_stats *id_stats;
struct rdma_id_private *id_priv;
struct rdma_cm_id *id = NULL;
struct cma_device *cma_dev;
int i_dev = 0, i_id = 0;
/*
* We export all of the IDs as a sequence of messages. Each
* ID gets its own netlink message.
*/
mutex_lock(&lock);
list_for_each_entry(cma_dev, &dev_list, list) {
if (i_dev < cb->args[0]) {
i_dev++;
continue;
}
i_id = 0;
list_for_each_entry(id_priv, &cma_dev->id_list, list) {
if (i_id < cb->args[1]) {
i_id++;
continue;
}
id_stats = ibnl_put_msg(skb, &nlh, cb->nlh->nlmsg_seq,
sizeof *id_stats, RDMA_NL_RDMA_CM,
RDMA_NL_RDMA_CM_ID_STATS,
NLM_F_MULTI);
if (!id_stats)
goto out;
memset(id_stats, 0, sizeof *id_stats);
id = &id_priv->id;
id_stats->node_type = id->route.addr.dev_addr.dev_type;
id_stats->port_num = id->port_num;
id_stats->bound_dev_if =
id->route.addr.dev_addr.bound_dev_if;
if (ibnl_put_attr(skb, nlh,
rdma_addr_size(cma_src_addr(id_priv)),
cma_src_addr(id_priv),
RDMA_NL_RDMA_CM_ATTR_SRC_ADDR))
goto out;
if (ibnl_put_attr(skb, nlh,
rdma_addr_size(cma_dst_addr(id_priv)),
cma_dst_addr(id_priv),
RDMA_NL_RDMA_CM_ATTR_DST_ADDR))
goto out;
id_stats->pid = task_pid_vnr(id_priv->res.task);
id_stats->port_space = id->ps;
id_stats->cm_state = id_priv->state;
id_stats->qp_num = id_priv->qp_num;
id_stats->qp_type = id->qp_type;
i_id++;
nlmsg_end(skb, nlh);
}
cb->args[1] = 0;
i_dev++;
}
out:
mutex_unlock(&lock);
cb->args[0] = i_dev;
cb->args[1] = i_id;
return skb->len;
}
static const struct rdma_nl_cbs cma_cb_table[RDMA_NL_RDMA_CM_NUM_OPS] = {
[RDMA_NL_RDMA_CM_ID_STATS] = { .dump = cma_get_id_stats},
};
static int cma_init_net(struct net *net)
{
struct cma_pernet *pernet = cma_pernet(net);
@ -4732,7 +4699,6 @@ static int __init cma_init(void)
if (ret)
goto err;
rdma_nl_register(RDMA_NL_RDMA_CM, cma_cb_table);
cma_configfs_init();
return 0;
@ -4748,7 +4714,6 @@ err_wq:
static void __exit cma_cleanup(void)
{
cma_configfs_exit();
rdma_nl_unregister(RDMA_NL_RDMA_CM);
ib_unregister_client(&cma_client);
unregister_netdevice_notifier(&cma_nb);
ib_sa_unregister_client(&sa_client);
@ -4756,7 +4721,5 @@ static void __exit cma_cleanup(void)
destroy_workqueue(cma_wq);
}
MODULE_ALIAS_RDMA_NETLINK(RDMA_NL_RDMA_CM, 1);
module_init(cma_init);
module_exit(cma_cleanup);

View File

@ -84,9 +84,11 @@ struct rdma_id_private {
u32 options;
u8 srq;
u8 tos;
bool tos_set;
u8 tos_set:1;
u8 timeout_set:1;
u8 reuseaddr;
u8 afonly;
u8 timeout;
enum ib_gid_type gid_type;
/*

View File

@ -54,9 +54,9 @@ struct pkey_index_qp_list {
struct list_head qp_list;
};
int ib_device_register_sysfs(struct ib_device *device,
int (*port_callback)(struct ib_device *,
u8, struct kobject *));
extern const struct attribute_group ib_dev_attr_group;
int ib_device_register_sysfs(struct ib_device *device);
void ib_device_unregister_sysfs(struct ib_device *device);
int ib_device_rename(struct ib_device *ibdev, const char *name);
@ -66,6 +66,9 @@ typedef void (*roce_netdev_callback)(struct ib_device *device, u8 port,
typedef bool (*roce_netdev_filter)(struct ib_device *device, u8 port,
struct net_device *idev, void *cookie);
struct net_device *ib_device_get_netdev(struct ib_device *ib_dev,
unsigned int port);
void ib_enum_roce_netdev(struct ib_device *ib_dev,
roce_netdev_filter filter,
void *filter_cookie,
@ -117,7 +120,7 @@ void ib_cache_cleanup_one(struct ib_device *device);
void ib_cache_release_one(struct ib_device *device);
#ifdef CONFIG_CGROUP_RDMA
int ib_device_register_rdmacg(struct ib_device *device);
void ib_device_register_rdmacg(struct ib_device *device);
void ib_device_unregister_rdmacg(struct ib_device *device);
int ib_rdmacg_try_charge(struct ib_rdmacg_object *cg_obj,
@ -128,21 +131,26 @@ void ib_rdmacg_uncharge(struct ib_rdmacg_object *cg_obj,
struct ib_device *device,
enum rdmacg_resource_type resource_index);
#else
static inline int ib_device_register_rdmacg(struct ib_device *device)
{ return 0; }
static inline void ib_device_register_rdmacg(struct ib_device *device)
{
}
static inline void ib_device_unregister_rdmacg(struct ib_device *device)
{ }
{
}
static inline int ib_rdmacg_try_charge(struct ib_rdmacg_object *cg_obj,
struct ib_device *device,
enum rdmacg_resource_type resource_index)
{ return 0; }
{
return 0;
}
static inline void ib_rdmacg_uncharge(struct ib_rdmacg_object *cg_obj,
struct ib_device *device,
enum rdmacg_resource_type resource_index)
{ }
{
}
#endif
static inline bool rdma_is_upper_dev_rcu(struct net_device *dev,
@ -178,7 +186,7 @@ int ib_get_cached_subnet_prefix(struct ib_device *device,
u64 *sn_pfx);
#ifdef CONFIG_SECURITY_INFINIBAND
void ib_security_destroy_port_pkey_list(struct ib_device *device);
void ib_security_release_port_pkey_list(struct ib_device *device);
void ib_security_cache_change(struct ib_device *device,
u8 port_num,
@ -199,8 +207,9 @@ int ib_mad_agent_security_setup(struct ib_mad_agent *agent,
enum ib_qp_type qp_type);
void ib_mad_agent_security_cleanup(struct ib_mad_agent *agent);
int ib_mad_enforce_security(struct ib_mad_agent_private *map, u16 pkey_index);
void ib_mad_agent_security_change(void);
#else
static inline void ib_security_destroy_port_pkey_list(struct ib_device *device)
static inline void ib_security_release_port_pkey_list(struct ib_device *device)
{
}
@ -264,6 +273,10 @@ static inline int ib_mad_enforce_security(struct ib_mad_agent_private *map,
{
return 0;
}
static inline void ib_mad_agent_security_change(void)
{
}
#endif
struct ib_device *ib_device_get_by_index(u32 ifindex);

File diff suppressed because it is too large Load Diff

View File

@ -87,7 +87,8 @@ static struct rdma_nl_cbs iwcm_nl_cb_table[RDMA_NL_IWPM_NUM_OPS] = {
[RDMA_NL_IWPM_REMOTE_INFO] = {.dump = iwpm_remote_info_cb},
[RDMA_NL_IWPM_HANDLE_ERR] = {.dump = iwpm_mapping_error_cb},
[RDMA_NL_IWPM_MAPINFO] = {.dump = iwpm_mapping_info_cb},
[RDMA_NL_IWPM_MAPINFO_NUM] = {.dump = iwpm_ack_mapping_info_cb}
[RDMA_NL_IWPM_MAPINFO_NUM] = {.dump = iwpm_ack_mapping_info_cb},
[RDMA_NL_IWPM_HELLO] = {.dump = iwpm_hello_cb}
};
static struct workqueue_struct *iwcm_wq;
@ -504,7 +505,7 @@ static int iw_cm_map(struct iw_cm_id *cm_id, bool active)
{
const char *devname = dev_name(&cm_id->device->dev);
const char *ifname = cm_id->device->iwcm->ifname;
struct iwpm_dev_data pm_reg_msg;
struct iwpm_dev_data pm_reg_msg = {};
struct iwpm_sa_data pm_msg;
int status;
@ -515,8 +516,8 @@ static int iw_cm_map(struct iw_cm_id *cm_id, bool active)
cm_id->m_local_addr = cm_id->local_addr;
cm_id->m_remote_addr = cm_id->remote_addr;
strncpy(pm_reg_msg.dev_name, devname, sizeof(pm_reg_msg.dev_name));
strncpy(pm_reg_msg.if_name, ifname, sizeof(pm_reg_msg.if_name));
strcpy(pm_reg_msg.dev_name, devname);
strcpy(pm_reg_msg.if_name, ifname);
if (iwpm_register_pid(&pm_reg_msg, RDMA_NL_IWCM) ||
!iwpm_valid_pid())
@ -525,6 +526,8 @@ static int iw_cm_map(struct iw_cm_id *cm_id, bool active)
cm_id->mapped = true;
pm_msg.loc_addr = cm_id->local_addr;
pm_msg.rem_addr = cm_id->remote_addr;
pm_msg.flags = (cm_id->device->iwcm->driver_flags & IW_F_NO_PORT_MAP) ?
IWPM_FLAGS_NO_PORT_MAP : 0;
if (active)
status = iwpm_add_and_query_mapping(&pm_msg,
RDMA_NL_IWCM);
@ -543,7 +546,7 @@ static int iw_cm_map(struct iw_cm_id *cm_id, bool active)
return iwpm_create_mapinfo(&cm_id->local_addr,
&cm_id->m_local_addr,
RDMA_NL_IWCM);
RDMA_NL_IWCM, pm_msg.flags);
}
/*

View File

@ -34,18 +34,25 @@
#include "iwpm_util.h"
static const char iwpm_ulib_name[IWPM_ULIBNAME_SIZE] = "iWarpPortMapperUser";
static int iwpm_ulib_version = 3;
u16 iwpm_ulib_version = IWPM_UABI_VERSION_MIN;
static int iwpm_user_pid = IWPM_PID_UNDEFINED;
static atomic_t echo_nlmsg_seq;
/**
* iwpm_valid_pid - Check if the userspace iwarp port mapper pid is valid
*
* Returns true if the pid is greater than zero, otherwise returns false
*/
int iwpm_valid_pid(void)
{
return iwpm_user_pid > 0;
}
/*
* iwpm_register_pid - Send a netlink query to user space
* for the iwarp port mapper pid
/**
* iwpm_register_pid - Send a netlink query to userspace
* to get the iwarp port mapper pid
* @pm_msg: Contains driver info to send to the userspace port mapper
* @nl_client: The index of the netlink client
*
* nlmsg attributes:
* [IWPM_NLA_REG_PID_SEQ]
@ -124,12 +131,19 @@ pid_query_error:
return ret;
}
/*
* iwpm_add_mapping - Send a netlink add mapping message
* to the port mapper
/**
* iwpm_add_mapping - Send a netlink add mapping request to
* the userspace port mapper
* @pm_msg: Contains the local ip/tcp address info to send
* @nl_client: The index of the netlink client
*
* nlmsg attributes:
* [IWPM_NLA_MANAGE_MAPPING_SEQ]
* [IWPM_NLA_MANAGE_ADDR]
* [IWPM_NLA_MANAGE_FLAGS]
*
* If the request is successful, the pm_msg stores
* the port mapper response (mapped address info)
*/
int iwpm_add_mapping(struct iwpm_sa_data *pm_msg, u8 nl_client)
{
@ -173,6 +187,18 @@ int iwpm_add_mapping(struct iwpm_sa_data *pm_msg, u8 nl_client)
if (ret)
goto add_mapping_error;
/* If flags are required and we're not V4, then return a quiet error */
if (pm_msg->flags && iwpm_ulib_version == IWPM_UABI_VERSION_MIN) {
ret = -EINVAL;
goto add_mapping_error_nowarn;
}
if (iwpm_ulib_version > IWPM_UABI_VERSION_MIN) {
ret = ibnl_put_attr(skb, nlh, sizeof(u32), &pm_msg->flags,
IWPM_NLA_MANAGE_FLAGS);
if (ret)
goto add_mapping_error;
}
nlmsg_end(skb, nlh);
nlmsg_request->req_buffer = pm_msg;
@ -187,6 +213,7 @@ int iwpm_add_mapping(struct iwpm_sa_data *pm_msg, u8 nl_client)
return ret;
add_mapping_error:
pr_info("%s: %s (client = %d)\n", __func__, err_str, nl_client);
add_mapping_error_nowarn:
if (skb)
dev_kfree_skb(skb);
if (nlmsg_request)
@ -194,13 +221,17 @@ add_mapping_error:
return ret;
}
/*
* iwpm_add_and_query_mapping - Send a netlink add and query
* mapping message to the port mapper
/**
* iwpm_add_and_query_mapping - Process the port mapper response to
* iwpm_add_and_query_mapping request
* @pm_msg: Contains the local ip/tcp address info to send
* @nl_client: The index of the netlink client
*
* nlmsg attributes:
* [IWPM_NLA_QUERY_MAPPING_SEQ]
* [IWPM_NLA_QUERY_LOCAL_ADDR]
* [IWPM_NLA_QUERY_REMOTE_ADDR]
* [IWPM_NLA_QUERY_FLAGS]
*/
int iwpm_add_and_query_mapping(struct iwpm_sa_data *pm_msg, u8 nl_client)
{
@ -251,6 +282,18 @@ int iwpm_add_and_query_mapping(struct iwpm_sa_data *pm_msg, u8 nl_client)
if (ret)
goto query_mapping_error;
/* If flags are required and we're not V4, then return a quite error */
if (pm_msg->flags && iwpm_ulib_version == IWPM_UABI_VERSION_MIN) {
ret = -EINVAL;
goto query_mapping_error_nowarn;
}
if (iwpm_ulib_version > IWPM_UABI_VERSION_MIN) {
ret = ibnl_put_attr(skb, nlh, sizeof(u32), &pm_msg->flags,
IWPM_NLA_QUERY_FLAGS);
if (ret)
goto query_mapping_error;
}
nlmsg_end(skb, nlh);
nlmsg_request->req_buffer = pm_msg;
@ -264,6 +307,7 @@ int iwpm_add_and_query_mapping(struct iwpm_sa_data *pm_msg, u8 nl_client)
return ret;
query_mapping_error:
pr_info("%s: %s (client = %d)\n", __func__, err_str, nl_client);
query_mapping_error_nowarn:
if (skb)
dev_kfree_skb(skb);
if (nlmsg_request)
@ -271,9 +315,13 @@ query_mapping_error:
return ret;
}
/*
* iwpm_remove_mapping - Send a netlink remove mapping message
* to the port mapper
/**
* iwpm_remove_mapping - Send a netlink remove mapping request
* to the userspace port mapper
*
* @local_addr: Local ip/tcp address to remove
* @nl_client: The index of the netlink client
*
* nlmsg attributes:
* [IWPM_NLA_MANAGE_MAPPING_SEQ]
* [IWPM_NLA_MANAGE_ADDR]
@ -344,9 +392,14 @@ static const struct nla_policy resp_reg_policy[IWPM_NLA_RREG_PID_MAX] = {
[IWPM_NLA_RREG_PID_ERR] = { .type = NLA_U16 }
};
/*
* iwpm_register_pid_cb - Process a port mapper response to
* iwpm_register_pid()
/**
* iwpm_register_pid_cb - Process the port mapper response to
* iwpm_register_pid query
* @skb:
* @cb: Contains the received message (payload and netlink header)
*
* If successful, the function receives the userspace port mapper pid
* which is used in future communication with the port mapper
*/
int iwpm_register_pid_cb(struct sk_buff *skb, struct netlink_callback *cb)
{
@ -379,7 +432,7 @@ int iwpm_register_pid_cb(struct sk_buff *skb, struct netlink_callback *cb)
/* check device name, ulib name and version */
if (strcmp(pm_msg->dev_name, dev_name) ||
strcmp(iwpm_ulib_name, iwpm_name) ||
iwpm_version != iwpm_ulib_version) {
iwpm_version < IWPM_UABI_VERSION_MIN) {
pr_info("%s: Incorrect info (dev = %s name = %s version = %d)\n",
__func__, dev_name, iwpm_name, iwpm_version);
@ -387,6 +440,10 @@ int iwpm_register_pid_cb(struct sk_buff *skb, struct netlink_callback *cb)
goto register_pid_response_exit;
}
iwpm_user_pid = cb->nlh->nlmsg_pid;
iwpm_ulib_version = iwpm_version;
if (iwpm_ulib_version < IWPM_UABI_VERSION)
pr_warn_once("%s: Down level iwpmd/pid %u. Continuing...",
__func__, iwpm_user_pid);
atomic_set(&echo_nlmsg_seq, cb->nlh->nlmsg_seq);
pr_debug("%s: iWarp Port Mapper (pid = %d) is available!\n",
__func__, iwpm_user_pid);
@ -403,15 +460,19 @@ register_pid_response_exit:
/* netlink attribute policy for the received response to add mapping request */
static const struct nla_policy resp_add_policy[IWPM_NLA_RMANAGE_MAPPING_MAX] = {
[IWPM_NLA_MANAGE_MAPPING_SEQ] = { .type = NLA_U32 },
[IWPM_NLA_MANAGE_ADDR] = { .len = sizeof(struct sockaddr_storage) },
[IWPM_NLA_MANAGE_MAPPED_LOC_ADDR] = { .len = sizeof(struct sockaddr_storage) },
[IWPM_NLA_RMANAGE_MAPPING_ERR] = { .type = NLA_U16 }
[IWPM_NLA_RMANAGE_MAPPING_SEQ] = { .type = NLA_U32 },
[IWPM_NLA_RMANAGE_ADDR] = {
.len = sizeof(struct sockaddr_storage) },
[IWPM_NLA_RMANAGE_MAPPED_LOC_ADDR] = {
.len = sizeof(struct sockaddr_storage) },
[IWPM_NLA_RMANAGE_MAPPING_ERR] = { .type = NLA_U16 }
};
/*
* iwpm_add_mapping_cb - Process a port mapper response to
* iwpm_add_mapping()
/**
* iwpm_add_mapping_cb - Process the port mapper response to
* iwpm_add_mapping request
* @skb:
* @cb: Contains the received message (payload and netlink header)
*/
int iwpm_add_mapping_cb(struct sk_buff *skb, struct netlink_callback *cb)
{
@ -430,7 +491,7 @@ int iwpm_add_mapping_cb(struct sk_buff *skb, struct netlink_callback *cb)
atomic_set(&echo_nlmsg_seq, cb->nlh->nlmsg_seq);
msg_seq = nla_get_u32(nltb[IWPM_NLA_MANAGE_MAPPING_SEQ]);
msg_seq = nla_get_u32(nltb[IWPM_NLA_RMANAGE_MAPPING_SEQ]);
nlmsg_request = iwpm_find_nlmsg_request(msg_seq);
if (!nlmsg_request) {
pr_info("%s: Could not find a matching request (seq = %u)\n",
@ -439,9 +500,9 @@ int iwpm_add_mapping_cb(struct sk_buff *skb, struct netlink_callback *cb)
}
pm_msg = nlmsg_request->req_buffer;
local_sockaddr = (struct sockaddr_storage *)
nla_data(nltb[IWPM_NLA_MANAGE_ADDR]);
nla_data(nltb[IWPM_NLA_RMANAGE_ADDR]);
mapped_sockaddr = (struct sockaddr_storage *)
nla_data(nltb[IWPM_NLA_MANAGE_MAPPED_LOC_ADDR]);
nla_data(nltb[IWPM_NLA_RMANAGE_MAPPED_LOC_ADDR]);
if (iwpm_compare_sockaddr(local_sockaddr, &pm_msg->loc_addr)) {
nlmsg_request->err_code = IWPM_USER_LIB_INFO_ERR;
@ -472,17 +533,23 @@ add_mapping_response_exit:
/* netlink attribute policy for the response to add and query mapping request
* and response with remote address info */
static const struct nla_policy resp_query_policy[IWPM_NLA_RQUERY_MAPPING_MAX] = {
[IWPM_NLA_QUERY_MAPPING_SEQ] = { .type = NLA_U32 },
[IWPM_NLA_QUERY_LOCAL_ADDR] = { .len = sizeof(struct sockaddr_storage) },
[IWPM_NLA_QUERY_REMOTE_ADDR] = { .len = sizeof(struct sockaddr_storage) },
[IWPM_NLA_RQUERY_MAPPED_LOC_ADDR] = { .len = sizeof(struct sockaddr_storage) },
[IWPM_NLA_RQUERY_MAPPED_REM_ADDR] = { .len = sizeof(struct sockaddr_storage) },
[IWPM_NLA_RQUERY_MAPPING_SEQ] = { .type = NLA_U32 },
[IWPM_NLA_RQUERY_LOCAL_ADDR] = {
.len = sizeof(struct sockaddr_storage) },
[IWPM_NLA_RQUERY_REMOTE_ADDR] = {
.len = sizeof(struct sockaddr_storage) },
[IWPM_NLA_RQUERY_MAPPED_LOC_ADDR] = {
.len = sizeof(struct sockaddr_storage) },
[IWPM_NLA_RQUERY_MAPPED_REM_ADDR] = {
.len = sizeof(struct sockaddr_storage) },
[IWPM_NLA_RQUERY_MAPPING_ERR] = { .type = NLA_U16 }
};
/*
* iwpm_add_and_query_mapping_cb - Process a port mapper response to
* iwpm_add_and_query_mapping()
/**
* iwpm_add_and_query_mapping_cb - Process the port mapper response to
* iwpm_add_and_query_mapping request
* @skb:
* @cb: Contains the received message (payload and netlink header)
*/
int iwpm_add_and_query_mapping_cb(struct sk_buff *skb,
struct netlink_callback *cb)
@ -502,7 +569,7 @@ int iwpm_add_and_query_mapping_cb(struct sk_buff *skb,
return -EINVAL;
atomic_set(&echo_nlmsg_seq, cb->nlh->nlmsg_seq);
msg_seq = nla_get_u32(nltb[IWPM_NLA_QUERY_MAPPING_SEQ]);
msg_seq = nla_get_u32(nltb[IWPM_NLA_RQUERY_MAPPING_SEQ]);
nlmsg_request = iwpm_find_nlmsg_request(msg_seq);
if (!nlmsg_request) {
pr_info("%s: Could not find a matching request (seq = %u)\n",
@ -511,9 +578,9 @@ int iwpm_add_and_query_mapping_cb(struct sk_buff *skb,
}
pm_msg = nlmsg_request->req_buffer;
local_sockaddr = (struct sockaddr_storage *)
nla_data(nltb[IWPM_NLA_QUERY_LOCAL_ADDR]);
nla_data(nltb[IWPM_NLA_RQUERY_LOCAL_ADDR]);
remote_sockaddr = (struct sockaddr_storage *)
nla_data(nltb[IWPM_NLA_QUERY_REMOTE_ADDR]);
nla_data(nltb[IWPM_NLA_RQUERY_REMOTE_ADDR]);
mapped_loc_sockaddr = (struct sockaddr_storage *)
nla_data(nltb[IWPM_NLA_RQUERY_MAPPED_LOC_ADDR]);
mapped_rem_sockaddr = (struct sockaddr_storage *)
@ -560,9 +627,13 @@ query_mapping_response_exit:
return 0;
}
/*
* iwpm_remote_info_cb - Process a port mapper message, containing
* the remote connecting peer address info
/**
* iwpm_remote_info_cb - Process remote connecting peer address info, which
* the port mapper has received from the connecting peer
* @skb:
* @cb: Contains the received message (payload and netlink header)
*
* Stores the IPv4/IPv6 address info in a hash table
*/
int iwpm_remote_info_cb(struct sk_buff *skb, struct netlink_callback *cb)
{
@ -588,9 +659,9 @@ int iwpm_remote_info_cb(struct sk_buff *skb, struct netlink_callback *cb)
atomic_set(&echo_nlmsg_seq, cb->nlh->nlmsg_seq);
local_sockaddr = (struct sockaddr_storage *)
nla_data(nltb[IWPM_NLA_QUERY_LOCAL_ADDR]);
nla_data(nltb[IWPM_NLA_RQUERY_LOCAL_ADDR]);
remote_sockaddr = (struct sockaddr_storage *)
nla_data(nltb[IWPM_NLA_QUERY_REMOTE_ADDR]);
nla_data(nltb[IWPM_NLA_RQUERY_REMOTE_ADDR]);
mapped_loc_sockaddr = (struct sockaddr_storage *)
nla_data(nltb[IWPM_NLA_RQUERY_MAPPED_LOC_ADDR]);
mapped_rem_sockaddr = (struct sockaddr_storage *)
@ -635,8 +706,14 @@ static const struct nla_policy resp_mapinfo_policy[IWPM_NLA_MAPINFO_REQ_MAX] = {
[IWPM_NLA_MAPINFO_ULIB_VER] = { .type = NLA_U16 }
};
/*
* iwpm_mapping_info_cb - Process a port mapper request for mapping info
/**
* iwpm_mapping_info_cb - Process a notification that the userspace
* port mapper daemon is started
* @skb:
* @cb: Contains the received message (payload and netlink header)
*
* Using the received port mapper pid, send all the local mapping
* info records to the userspace port mapper
*/
int iwpm_mapping_info_cb(struct sk_buff *skb, struct netlink_callback *cb)
{
@ -655,7 +732,7 @@ int iwpm_mapping_info_cb(struct sk_buff *skb, struct netlink_callback *cb)
iwpm_name = (char *)nla_data(nltb[IWPM_NLA_MAPINFO_ULIB_NAME]);
iwpm_version = nla_get_u16(nltb[IWPM_NLA_MAPINFO_ULIB_VER]);
if (strcmp(iwpm_ulib_name, iwpm_name) ||
iwpm_version != iwpm_ulib_version) {
iwpm_version < IWPM_UABI_VERSION_MIN) {
pr_info("%s: Invalid port mapper name = %s version = %d\n",
__func__, iwpm_name, iwpm_version);
return ret;
@ -669,6 +746,11 @@ int iwpm_mapping_info_cb(struct sk_buff *skb, struct netlink_callback *cb)
iwpm_set_registration(nl_client, IWPM_REG_INCOMPL);
atomic_set(&echo_nlmsg_seq, cb->nlh->nlmsg_seq);
iwpm_user_pid = cb->nlh->nlmsg_pid;
if (iwpm_ulib_version < IWPM_UABI_VERSION)
pr_warn_once("%s: Down level iwpmd/pid %u. Continuing...",
__func__, iwpm_user_pid);
if (!iwpm_mapinfo_available())
return 0;
pr_debug("%s: iWarp Port Mapper (pid = %d) is available!\n",
@ -684,9 +766,11 @@ static const struct nla_policy ack_mapinfo_policy[IWPM_NLA_MAPINFO_NUM_MAX] = {
[IWPM_NLA_MAPINFO_ACK_NUM] = { .type = NLA_U32 }
};
/*
* iwpm_ack_mapping_info_cb - Process a port mapper ack for
* the provided mapping info records
/**
* iwpm_ack_mapping_info_cb - Process the port mapper ack for
* the provided local mapping info records
* @skb:
* @cb: Contains the received message (payload and netlink header)
*/
int iwpm_ack_mapping_info_cb(struct sk_buff *skb, struct netlink_callback *cb)
{
@ -712,8 +796,11 @@ static const struct nla_policy map_error_policy[IWPM_NLA_ERR_MAX] = {
[IWPM_NLA_ERR_CODE] = { .type = NLA_U16 },
};
/*
* iwpm_mapping_error_cb - Process a port mapper error message
/**
* iwpm_mapping_error_cb - Process port mapper notification for error
*
* @skb:
* @cb: Contains the received message (payload and netlink header)
*/
int iwpm_mapping_error_cb(struct sk_buff *skb, struct netlink_callback *cb)
{
@ -748,3 +835,46 @@ int iwpm_mapping_error_cb(struct sk_buff *skb, struct netlink_callback *cb)
up(&nlmsg_request->sem);
return 0;
}
/* netlink attribute policy for the received hello request */
static const struct nla_policy hello_policy[IWPM_NLA_HELLO_MAX] = {
[IWPM_NLA_HELLO_ABI_VERSION] = { .type = NLA_U16 }
};
/**
* iwpm_hello_cb - Process a hello message from iwpmd
*
* @skb:
* @cb: Contains the received message (payload and netlink header)
*
* Using the received port mapper pid, send the kernel's abi_version
* after adjusting it to support the iwpmd version.
*/
int iwpm_hello_cb(struct sk_buff *skb, struct netlink_callback *cb)
{
struct nlattr *nltb[IWPM_NLA_HELLO_MAX];
const char *msg_type = "Hello request";
u8 nl_client;
u16 abi_version;
int ret = -EINVAL;
if (iwpm_parse_nlmsg(cb, IWPM_NLA_HELLO_MAX, hello_policy, nltb,
msg_type)) {
pr_info("%s: Unable to parse nlmsg\n", __func__);
return ret;
}
abi_version = nla_get_u16(nltb[IWPM_NLA_HELLO_ABI_VERSION]);
nl_client = RDMA_NL_GET_CLIENT(cb->nlh->nlmsg_type);
if (!iwpm_valid_client(nl_client)) {
pr_info("%s: Invalid port mapper client = %d\n",
__func__, nl_client);
return ret;
}
iwpm_set_registration(nl_client, IWPM_REG_INCOMPL);
atomic_set(&echo_nlmsg_seq, cb->nlh->nlmsg_seq);
iwpm_ulib_version = min_t(u16, IWPM_UABI_VERSION, abi_version);
pr_debug("Using ABI version %u\n", iwpm_ulib_version);
iwpm_user_pid = cb->nlh->nlmsg_pid;
ret = iwpm_send_hello(nl_client, iwpm_user_pid, iwpm_ulib_version);
return ret;
}

View File

@ -51,6 +51,12 @@ static DEFINE_SPINLOCK(iwpm_reminfo_lock);
static DEFINE_MUTEX(iwpm_admin_lock);
static struct iwpm_admin_data iwpm_admin;
/**
* iwpm_init - Allocate resources for the iwarp port mapper
* @nl_client: The index of the netlink client
*
* Should be called when network interface goes up.
*/
int iwpm_init(u8 nl_client)
{
int ret = 0;
@ -87,6 +93,12 @@ init_exit:
static void free_hash_bucket(void);
static void free_reminfo_bucket(void);
/**
* iwpm_exit - Deallocate resources for the iwarp port mapper
* @nl_client: The index of the netlink client
*
* Should be called when network interface goes down.
*/
int iwpm_exit(u8 nl_client)
{
@ -112,9 +124,17 @@ int iwpm_exit(u8 nl_client)
static struct hlist_head *get_mapinfo_hash_bucket(struct sockaddr_storage *,
struct sockaddr_storage *);
/**
* iwpm_create_mapinfo - Store local and mapped IPv4/IPv6 address
* info in a hash table
* @local_addr: Local ip/tcp address
* @mapped_addr: Mapped local ip/tcp address
* @nl_client: The index of the netlink client
* @map_flags: IWPM mapping flags
*/
int iwpm_create_mapinfo(struct sockaddr_storage *local_sockaddr,
struct sockaddr_storage *mapped_sockaddr,
u8 nl_client)
u8 nl_client, u32 map_flags)
{
struct hlist_head *hash_bucket_head = NULL;
struct iwpm_mapping_info *map_info;
@ -132,6 +152,7 @@ int iwpm_create_mapinfo(struct sockaddr_storage *local_sockaddr,
memcpy(&map_info->mapped_sockaddr, mapped_sockaddr,
sizeof(struct sockaddr_storage));
map_info->nl_client = nl_client;
map_info->map_flags = map_flags;
spin_lock_irqsave(&iwpm_mapinfo_lock, flags);
if (iwpm_hash_bucket) {
@ -150,6 +171,15 @@ int iwpm_create_mapinfo(struct sockaddr_storage *local_sockaddr,
return ret;
}
/**
* iwpm_remove_mapinfo - Remove local and mapped IPv4/IPv6 address
* info from the hash table
* @local_addr: Local ip/tcp address
* @mapped_local_addr: Mapped local ip/tcp address
*
* Returns err code if mapping info is not found in the hash table,
* otherwise returns 0
*/
int iwpm_remove_mapinfo(struct sockaddr_storage *local_sockaddr,
struct sockaddr_storage *mapped_local_addr)
{
@ -250,6 +280,17 @@ void iwpm_add_remote_info(struct iwpm_remote_info *rem_info)
spin_unlock_irqrestore(&iwpm_reminfo_lock, flags);
}
/**
* iwpm_get_remote_info - Get the remote connecting peer address info
*
* @mapped_loc_addr: Mapped local address of the listening peer
* @mapped_rem_addr: Mapped remote address of the connecting peer
* @remote_addr: To store the remote address of the connecting peer
* @nl_client: The index of the netlink client
*
* The remote address info is retrieved and provided to the client in
* the remote_addr. After that it is removed from the hash table
*/
int iwpm_get_remote_info(struct sockaddr_storage *mapped_loc_addr,
struct sockaddr_storage *mapped_rem_addr,
struct sockaddr_storage *remote_addr,
@ -686,6 +727,14 @@ int iwpm_send_mapinfo(u8 nl_client, int iwpm_pid)
if (ret)
goto send_mapping_info_unlock;
if (iwpm_ulib_version > IWPM_UABI_VERSION_MIN) {
ret = ibnl_put_attr(skb, nlh, sizeof(u32),
&map_info->map_flags,
IWPM_NLA_MAPINFO_FLAGS);
if (ret)
goto send_mapping_info_unlock;
}
nlmsg_end(skb, nlh);
iwpm_print_sockaddr(&map_info->local_sockaddr,
@ -754,3 +803,38 @@ int iwpm_mapinfo_available(void)
spin_unlock_irqrestore(&iwpm_mapinfo_lock, flags);
return full_bucket;
}
int iwpm_send_hello(u8 nl_client, int iwpm_pid, u16 abi_version)
{
struct sk_buff *skb = NULL;
struct nlmsghdr *nlh;
const char *err_str = "";
int ret = -EINVAL;
skb = iwpm_create_nlmsg(RDMA_NL_IWPM_HELLO, &nlh, nl_client);
if (!skb) {
err_str = "Unable to create a nlmsg";
goto hello_num_error;
}
nlh->nlmsg_seq = iwpm_get_nlmsg_seq();
err_str = "Unable to put attribute of abi_version into nlmsg";
ret = ibnl_put_attr(skb, nlh, sizeof(u16), &abi_version,
IWPM_NLA_HELLO_ABI_VERSION);
if (ret)
goto hello_num_error;
nlmsg_end(skb, nlh);
ret = rdma_nl_unicast(skb, iwpm_pid);
if (ret) {
skb = NULL;
err_str = "Unable to send a nlmsg";
goto hello_num_error;
}
pr_debug("%s: Sent hello abi_version = %u\n", __func__, abi_version);
return 0;
hello_num_error:
pr_info("%s: %s\n", __func__, err_str);
if (skb)
dev_kfree_skb(skb);
return ret;
}

View File

@ -78,6 +78,7 @@ struct iwpm_mapping_info {
struct sockaddr_storage local_sockaddr;
struct sockaddr_storage mapped_sockaddr;
u8 nl_client;
u32 map_flags;
};
struct iwpm_remote_info {
@ -266,4 +267,15 @@ int iwpm_parse_nlmsg(struct netlink_callback *cb, int policy_max,
* @msg: Message to print
*/
void iwpm_print_sockaddr(struct sockaddr_storage *sockaddr, char *msg);
/**
* iwpm_send_hello - Send hello response to iwpmd
*
* @nl_client: The index of the netlink client
* @abi_version: The kernel's abi_version
*
* Returns 0 on success or a negative error code
*/
int iwpm_send_hello(u8 nl_client, int iwpm_pid, u16 abi_version);
extern u16 iwpm_ulib_version;
#endif

View File

@ -3326,9 +3326,9 @@ error:
static void ib_mad_remove_device(struct ib_device *device, void *client_data)
{
int i;
unsigned int i;
for (i = rdma_start_port(device); i <= rdma_end_port(device); i++) {
rdma_for_each_port (device, i) {
if (!rdma_cap_ib_mad(device, i))
continue;

View File

@ -56,7 +56,6 @@ EXPORT_SYMBOL(rdma_nl_chk_listeners);
static bool is_nl_msg_valid(unsigned int type, unsigned int op)
{
static const unsigned int max_num_ops[RDMA_NL_NUM_CLIENTS] = {
[RDMA_NL_RDMA_CM] = RDMA_NL_RDMA_CM_NUM_OPS,
[RDMA_NL_IWCM] = RDMA_NL_IWPM_NUM_OPS,
[RDMA_NL_LS] = RDMA_NL_LS_NUM_OPS,
[RDMA_NL_NLDEV] = RDMA_NLDEV_NUM_OPS,
@ -181,8 +180,7 @@ static int rdma_nl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh,
return -EINVAL;
}
/* FIXME: Convert IWCM to properly handle doit callbacks */
if ((nlh->nlmsg_flags & NLM_F_DUMP) || index == RDMA_NL_RDMA_CM ||
index == RDMA_NL_IWCM) {
if ((nlh->nlmsg_flags & NLM_F_DUMP) || index == RDMA_NL_IWCM) {
struct netlink_dump_control c = {
.dump = cb_table[op].dump,
};

View File

@ -33,12 +33,14 @@
#include <linux/module.h>
#include <linux/pid.h>
#include <linux/pid_namespace.h>
#include <linux/mutex.h>
#include <net/netlink.h>
#include <rdma/rdma_cm.h>
#include <rdma/rdma_netlink.h>
#include "core_priv.h"
#include "cma_priv.h"
#include "restrack.h"
static const struct nla_policy nldev_policy[RDMA_NLDEV_ATTR_MAX] = {
[RDMA_NLDEV_ATTR_DEV_INDEX] = { .type = NLA_U32 },
@ -107,6 +109,13 @@ static const struct nla_policy nldev_policy[RDMA_NLDEV_ATTR_MAX] = {
[RDMA_NLDEV_ATTR_DRIVER_U32] = { .type = NLA_U32 },
[RDMA_NLDEV_ATTR_DRIVER_S64] = { .type = NLA_S64 },
[RDMA_NLDEV_ATTR_DRIVER_U64] = { .type = NLA_U64 },
[RDMA_NLDEV_ATTR_RES_PDN] = { .type = NLA_U32 },
[RDMA_NLDEV_ATTR_RES_CQN] = { .type = NLA_U32 },
[RDMA_NLDEV_ATTR_RES_MRN] = { .type = NLA_U32 },
[RDMA_NLDEV_ATTR_RES_CM_IDN] = { .type = NLA_U32 },
[RDMA_NLDEV_ATTR_RES_CTXN] = { .type = NLA_U32 },
[RDMA_NLDEV_ATTR_LINK_TYPE] = { .type = NLA_NUL_STRING,
.len = RDMA_NLDEV_ATTR_ENTRY_STRLEN },
};
static int put_driver_name_print_type(struct sk_buff *msg, const char *name,
@ -262,9 +271,7 @@ static int fill_port_info(struct sk_buff *msg,
if (nla_put_u8(msg, RDMA_NLDEV_ATTR_PORT_PHYS_STATE, attr.phys_state))
return -EMSGSIZE;
if (device->ops.get_netdev)
netdev = device->ops.get_netdev(device, port);
netdev = ib_device_get_netdev(device, port);
if (netdev && net_eq(dev_net(netdev), net)) {
ret = nla_put_u32(msg,
RDMA_NLDEV_ATTR_NDEV_INDEX, netdev->ifindex);
@ -314,7 +321,6 @@ static int fill_res_info(struct sk_buff *msg, struct ib_device *device)
[RDMA_RESTRACK_CTX] = "ctx",
};
struct rdma_restrack_root *res = &device->res;
struct nlattr *table_attr;
int ret, i, curr;
@ -328,7 +334,8 @@ static int fill_res_info(struct sk_buff *msg, struct ib_device *device)
for (i = 0; i < RDMA_RESTRACK_MAX; i++) {
if (!names[i])
continue;
curr = rdma_restrack_count(res, i, task_active_pid_ns(current));
curr = rdma_restrack_count(device, i,
task_active_pid_ns(current));
ret = fill_res_info_entry(msg, names[i], curr);
if (ret)
goto err;
@ -361,13 +368,20 @@ static int fill_res_name_pid(struct sk_buff *msg,
return 0;
}
static int fill_res_qp_entry(struct sk_buff *msg, struct netlink_callback *cb,
static bool fill_res_entry(struct ib_device *dev, struct sk_buff *msg,
struct rdma_restrack_entry *res)
{
if (!dev->ops.fill_res_entry)
return false;
return dev->ops.fill_res_entry(msg, res);
}
static int fill_res_qp_entry(struct sk_buff *msg, bool has_cap_net_admin,
struct rdma_restrack_entry *res, uint32_t port)
{
struct ib_qp *qp = container_of(res, struct ib_qp, res);
struct rdma_restrack_root *resroot = &qp->device->res;
struct ib_device *dev = qp->device;
struct ib_qp_init_attr qp_init_attr;
struct nlattr *entry_attr;
struct ib_qp_attr qp_attr;
int ret;
@ -376,11 +390,7 @@ static int fill_res_qp_entry(struct sk_buff *msg, struct netlink_callback *cb,
return ret;
if (port && port != qp_attr.port_num)
return 0;
entry_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_QP_ENTRY);
if (!entry_attr)
goto out;
return -EAGAIN;
/* In create_qp() port is not set yet */
if (qp_attr.port_num &&
@ -412,38 +422,32 @@ static int fill_res_qp_entry(struct sk_buff *msg, struct netlink_callback *cb,
if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_STATE, qp_attr.qp_state))
goto err;
if (!rdma_is_kernel_res(res) &&
nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PDN, qp->pd->res.id))
goto err;
if (fill_res_name_pid(msg, res))
goto err;
if (resroot->fill_res_entry(msg, res))
if (fill_res_entry(dev, msg, res))
goto err;
nla_nest_end(msg, entry_attr);
return 0;
err:
nla_nest_cancel(msg, entry_attr);
out:
return -EMSGSIZE;
err: return -EMSGSIZE;
}
static int fill_res_cm_id_entry(struct sk_buff *msg,
struct netlink_callback *cb,
static int fill_res_cm_id_entry(struct sk_buff *msg, bool has_cap_net_admin,
struct rdma_restrack_entry *res, uint32_t port)
{
struct rdma_id_private *id_priv =
container_of(res, struct rdma_id_private, res);
struct rdma_restrack_root *resroot = &id_priv->id.device->res;
struct ib_device *dev = id_priv->id.device;
struct rdma_cm_id *cm_id = &id_priv->id;
struct nlattr *entry_attr;
if (port && port != cm_id->port_num)
return 0;
entry_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_CM_ID_ENTRY);
if (!entry_attr)
goto out;
if (cm_id->port_num &&
nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, cm_id->port_num))
goto err;
@ -472,31 +476,25 @@ static int fill_res_cm_id_entry(struct sk_buff *msg,
&cm_id->route.addr.dst_addr))
goto err;
if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CM_IDN, res->id))
goto err;
if (fill_res_name_pid(msg, res))
goto err;
if (resroot->fill_res_entry(msg, res))
if (fill_res_entry(dev, msg, res))
goto err;
nla_nest_end(msg, entry_attr);
return 0;
err:
nla_nest_cancel(msg, entry_attr);
out:
return -EMSGSIZE;
err: return -EMSGSIZE;
}
static int fill_res_cq_entry(struct sk_buff *msg, struct netlink_callback *cb,
static int fill_res_cq_entry(struct sk_buff *msg, bool has_cap_net_admin,
struct rdma_restrack_entry *res, uint32_t port)
{
struct ib_cq *cq = container_of(res, struct ib_cq, res);
struct rdma_restrack_root *resroot = &cq->device->res;
struct nlattr *entry_attr;
entry_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_CQ_ENTRY);
if (!entry_attr)
goto out;
struct ib_device *dev = cq->device;
if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CQE, cq->cqe))
goto err;
@ -509,33 +507,31 @@ static int fill_res_cq_entry(struct sk_buff *msg, struct netlink_callback *cb,
nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_POLL_CTX, cq->poll_ctx))
goto err;
if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CQN, res->id))
goto err;
if (!rdma_is_kernel_res(res) &&
nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CTXN,
cq->uobject->context->res.id))
goto err;
if (fill_res_name_pid(msg, res))
goto err;
if (resroot->fill_res_entry(msg, res))
if (fill_res_entry(dev, msg, res))
goto err;
nla_nest_end(msg, entry_attr);
return 0;
err:
nla_nest_cancel(msg, entry_attr);
out:
return -EMSGSIZE;
err: return -EMSGSIZE;
}
static int fill_res_mr_entry(struct sk_buff *msg, struct netlink_callback *cb,
static int fill_res_mr_entry(struct sk_buff *msg, bool has_cap_net_admin,
struct rdma_restrack_entry *res, uint32_t port)
{
struct ib_mr *mr = container_of(res, struct ib_mr, res);
struct rdma_restrack_root *resroot = &mr->pd->device->res;
struct nlattr *entry_attr;
struct ib_device *dev = mr->pd->device;
entry_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_MR_ENTRY);
if (!entry_attr)
goto out;
if (netlink_capable(cb->skb, CAP_NET_ADMIN)) {
if (has_cap_net_admin) {
if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_RKEY, mr->rkey))
goto err;
if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LKEY, mr->lkey))
@ -546,33 +542,31 @@ static int fill_res_mr_entry(struct sk_buff *msg, struct netlink_callback *cb,
RDMA_NLDEV_ATTR_PAD))
goto err;
if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_MRN, res->id))
goto err;
if (!rdma_is_kernel_res(res) &&
nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PDN, mr->pd->res.id))
goto err;
if (fill_res_name_pid(msg, res))
goto err;
if (resroot->fill_res_entry(msg, res))
if (fill_res_entry(dev, msg, res))
goto err;
nla_nest_end(msg, entry_attr);
return 0;
err:
nla_nest_cancel(msg, entry_attr);
out:
return -EMSGSIZE;
err: return -EMSGSIZE;
}
static int fill_res_pd_entry(struct sk_buff *msg, struct netlink_callback *cb,
static int fill_res_pd_entry(struct sk_buff *msg, bool has_cap_net_admin,
struct rdma_restrack_entry *res, uint32_t port)
{
struct ib_pd *pd = container_of(res, struct ib_pd, res);
struct rdma_restrack_root *resroot = &pd->device->res;
struct nlattr *entry_attr;
struct ib_device *dev = pd->device;
entry_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_PD_ENTRY);
if (!entry_attr)
goto out;
if (netlink_capable(cb->skb, CAP_NET_ADMIN)) {
if (has_cap_net_admin) {
if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LOCAL_DMA_LKEY,
pd->local_dma_lkey))
goto err;
@ -585,19 +579,23 @@ static int fill_res_pd_entry(struct sk_buff *msg, struct netlink_callback *cb,
atomic_read(&pd->usecnt), RDMA_NLDEV_ATTR_PAD))
goto err;
if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PDN, res->id))
goto err;
if (!rdma_is_kernel_res(res) &&
nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CTXN,
pd->uobject->context->res.id))
goto err;
if (fill_res_name_pid(msg, res))
goto err;
if (resroot->fill_res_entry(msg, res))
if (fill_res_entry(dev, msg, res))
goto err;
nla_nest_end(msg, entry_attr);
return 0;
err:
nla_nest_cancel(msg, entry_attr);
out:
return -EMSGSIZE;
err: return -EMSGSIZE;
}
static int nldev_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
@ -777,7 +775,7 @@ static int nldev_port_get_dumpit(struct sk_buff *skb,
u32 idx = 0;
u32 ifindex;
int err;
u32 p;
unsigned int p;
err = nlmsg_parse(cb->nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
nldev_policy, NULL);
@ -789,7 +787,7 @@ static int nldev_port_get_dumpit(struct sk_buff *skb,
if (!device)
return -EINVAL;
for (p = rdma_start_port(device); p <= rdma_end_port(device); ++p) {
rdma_for_each_port (device, p) {
/*
* The dumpit function returns all information from specific
* index. This specific index is taken from the netlink
@ -905,10 +903,17 @@ static int nldev_res_get_dumpit(struct sk_buff *skb,
}
struct nldev_fill_res_entry {
int (*fill_res_func)(struct sk_buff *msg, struct netlink_callback *cb,
int (*fill_res_func)(struct sk_buff *msg, bool has_cap_net_admin,
struct rdma_restrack_entry *res, u32 port);
enum rdma_nldev_attr nldev_attr;
enum rdma_nldev_command nldev_cmd;
u8 flags;
u32 entry;
u32 id;
};
enum nldev_res_flags {
NLDEV_PER_DEV = 1 << 0,
};
static const struct nldev_fill_res_entry fill_entries[RDMA_RESTRACK_MAX] = {
@ -916,29 +921,136 @@ static const struct nldev_fill_res_entry fill_entries[RDMA_RESTRACK_MAX] = {
.fill_res_func = fill_res_qp_entry,
.nldev_cmd = RDMA_NLDEV_CMD_RES_QP_GET,
.nldev_attr = RDMA_NLDEV_ATTR_RES_QP,
.entry = RDMA_NLDEV_ATTR_RES_QP_ENTRY,
.id = RDMA_NLDEV_ATTR_RES_LQPN,
},
[RDMA_RESTRACK_CM_ID] = {
.fill_res_func = fill_res_cm_id_entry,
.nldev_cmd = RDMA_NLDEV_CMD_RES_CM_ID_GET,
.nldev_attr = RDMA_NLDEV_ATTR_RES_CM_ID,
.entry = RDMA_NLDEV_ATTR_RES_CM_ID_ENTRY,
.id = RDMA_NLDEV_ATTR_RES_CM_IDN,
},
[RDMA_RESTRACK_CQ] = {
.fill_res_func = fill_res_cq_entry,
.nldev_cmd = RDMA_NLDEV_CMD_RES_CQ_GET,
.nldev_attr = RDMA_NLDEV_ATTR_RES_CQ,
.flags = NLDEV_PER_DEV,
.entry = RDMA_NLDEV_ATTR_RES_CQ_ENTRY,
.id = RDMA_NLDEV_ATTR_RES_CQN,
},
[RDMA_RESTRACK_MR] = {
.fill_res_func = fill_res_mr_entry,
.nldev_cmd = RDMA_NLDEV_CMD_RES_MR_GET,
.nldev_attr = RDMA_NLDEV_ATTR_RES_MR,
.flags = NLDEV_PER_DEV,
.entry = RDMA_NLDEV_ATTR_RES_MR_ENTRY,
.id = RDMA_NLDEV_ATTR_RES_MRN,
},
[RDMA_RESTRACK_PD] = {
.fill_res_func = fill_res_pd_entry,
.nldev_cmd = RDMA_NLDEV_CMD_RES_PD_GET,
.nldev_attr = RDMA_NLDEV_ATTR_RES_PD,
.flags = NLDEV_PER_DEV,
.entry = RDMA_NLDEV_ATTR_RES_PD_ENTRY,
.id = RDMA_NLDEV_ATTR_RES_PDN,
},
};
static bool is_visible_in_pid_ns(struct rdma_restrack_entry *res)
{
/*
* 1. Kern resources should be visible in init name space only
* 2. Present only resources visible in the current namespace
*/
if (rdma_is_kernel_res(res))
return task_active_pid_ns(current) == &init_pid_ns;
return task_active_pid_ns(current) == task_active_pid_ns(res->task);
}
static int res_get_common_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
struct netlink_ext_ack *extack,
enum rdma_restrack_type res_type)
{
const struct nldev_fill_res_entry *fe = &fill_entries[res_type];
struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
struct rdma_restrack_entry *res;
struct ib_device *device;
u32 index, id, port = 0;
bool has_cap_net_admin;
struct sk_buff *msg;
int ret;
ret = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
nldev_policy, extack);
if (ret || !tb[RDMA_NLDEV_ATTR_DEV_INDEX] || !fe->id || !tb[fe->id])
return -EINVAL;
index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
device = ib_device_get_by_index(index);
if (!device)
return -EINVAL;
if (tb[RDMA_NLDEV_ATTR_PORT_INDEX]) {
port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
if (!rdma_is_port_valid(device, port)) {
ret = -EINVAL;
goto err;
}
}
if ((port && fe->flags & NLDEV_PER_DEV) ||
(!port && ~fe->flags & NLDEV_PER_DEV)) {
ret = -EINVAL;
goto err;
}
id = nla_get_u32(tb[fe->id]);
res = rdma_restrack_get_byid(device, res_type, id);
if (IS_ERR(res)) {
ret = PTR_ERR(res);
goto err;
}
if (!is_visible_in_pid_ns(res)) {
ret = -ENOENT;
goto err_get;
}
msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
if (!msg) {
ret = -ENOMEM;
goto err;
}
nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, fe->nldev_cmd),
0, 0);
if (fill_nldev_handle(msg, device)) {
ret = -EMSGSIZE;
goto err_free;
}
has_cap_net_admin = netlink_capable(skb, CAP_NET_ADMIN);
ret = fe->fill_res_func(msg, has_cap_net_admin, res, port);
rdma_restrack_put(res);
if (ret)
goto err_free;
nlmsg_end(msg, nlh);
ib_device_put(device);
return rdma_nl_unicast(msg, NETLINK_CB(skb).portid);
err_free:
nlmsg_free(msg);
err_get:
rdma_restrack_put(res);
err:
ib_device_put(device);
return ret;
}
static int res_get_common_dumpit(struct sk_buff *skb,
struct netlink_callback *cb,
enum rdma_restrack_type res_type)
@ -946,11 +1058,15 @@ static int res_get_common_dumpit(struct sk_buff *skb,
const struct nldev_fill_res_entry *fe = &fill_entries[res_type];
struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
struct rdma_restrack_entry *res;
struct rdma_restrack_root *rt;
int err, ret = 0, idx = 0;
struct nlattr *table_attr;
struct nlattr *entry_attr;
struct ib_device *device;
int start = cb->args[0];
bool has_cap_net_admin;
struct nlmsghdr *nlh;
unsigned long id;
u32 index, port = 0;
bool filled = false;
@ -998,55 +1114,51 @@ static int res_get_common_dumpit(struct sk_buff *skb,
goto err;
}
down_read(&device->res.rwsem);
hash_for_each_possible(device->res.hash, res, node, res_type) {
if (idx < start)
has_cap_net_admin = netlink_capable(cb->skb, CAP_NET_ADMIN);
rt = &device->res[res_type];
xa_lock(&rt->xa);
/*
* FIXME: if the skip ahead is something common this loop should
* use xas_for_each & xas_pause to optimize, we can have a lot of
* objects.
*/
xa_for_each(&rt->xa, id, res) {
if (!is_visible_in_pid_ns(res))
continue;
if (idx < start || !rdma_restrack_get(res))
goto next;
if ((rdma_is_kernel_res(res) &&
task_active_pid_ns(current) != &init_pid_ns) ||
(!rdma_is_kernel_res(res) && task_active_pid_ns(current) !=
task_active_pid_ns(res->task)))
/*
* 1. Kern resources should be visible in init
* namspace only
* 2. Present only resources visible in the current
* namespace
*/
goto next;
if (!rdma_restrack_get(res))
/*
* Resource is under release now, but we are not
* relesing lock now, so it will be released in
* our next pass, once we will get ->next pointer.
*/
goto next;
xa_unlock(&rt->xa);
filled = true;
up_read(&device->res.rwsem);
ret = fe->fill_res_func(skb, cb, res, port);
down_read(&device->res.rwsem);
/*
* Return resource back, but it won't be released till
* the &device->res.rwsem will be released for write.
*/
entry_attr = nla_nest_start(skb, fe->entry);
if (!entry_attr) {
ret = -EMSGSIZE;
rdma_restrack_put(res);
goto msg_full;
}
ret = fe->fill_res_func(skb, has_cap_net_admin, res, port);
rdma_restrack_put(res);
if (ret == -EMSGSIZE)
/*
* There is a chance to optimize here.
* It can be done by using list_prepare_entry
* and list_for_each_entry_continue afterwards.
*/
break;
if (ret)
if (ret) {
nla_nest_cancel(skb, entry_attr);
if (ret == -EMSGSIZE)
goto msg_full;
if (ret == -EAGAIN)
goto again;
goto res_err;
}
nla_nest_end(skb, entry_attr);
again: xa_lock(&rt->xa);
next: idx++;
}
up_read(&device->res.rwsem);
xa_unlock(&rt->xa);
msg_full:
nla_nest_end(skb, table_attr);
nlmsg_end(skb, nlh);
cb->args[0] = idx;
@ -1063,7 +1175,6 @@ next: idx++;
res_err:
nla_nest_cancel(skb, table_attr);
up_read(&device->res.rwsem);
err:
nlmsg_cancel(skb, nlh);
@ -1073,34 +1184,132 @@ err_index:
return ret;
}
static int nldev_res_get_qp_dumpit(struct sk_buff *skb,
struct netlink_callback *cb)
#define RES_GET_FUNCS(name, type) \
static int nldev_res_get_##name##_dumpit(struct sk_buff *skb, \
struct netlink_callback *cb) \
{ \
return res_get_common_dumpit(skb, cb, type); \
} \
static int nldev_res_get_##name##_doit(struct sk_buff *skb, \
struct nlmsghdr *nlh, \
struct netlink_ext_ack *extack) \
{ \
return res_get_common_doit(skb, nlh, extack, type); \
}
RES_GET_FUNCS(qp, RDMA_RESTRACK_QP);
RES_GET_FUNCS(cm_id, RDMA_RESTRACK_CM_ID);
RES_GET_FUNCS(cq, RDMA_RESTRACK_CQ);
RES_GET_FUNCS(pd, RDMA_RESTRACK_PD);
RES_GET_FUNCS(mr, RDMA_RESTRACK_MR);
static LIST_HEAD(link_ops);
static DECLARE_RWSEM(link_ops_rwsem);
static const struct rdma_link_ops *link_ops_get(const char *type)
{
return res_get_common_dumpit(skb, cb, RDMA_RESTRACK_QP);
const struct rdma_link_ops *ops;
list_for_each_entry(ops, &link_ops, list) {
if (!strcmp(ops->type, type))
goto out;
}
ops = NULL;
out:
return ops;
}
static int nldev_res_get_cm_id_dumpit(struct sk_buff *skb,
struct netlink_callback *cb)
void rdma_link_register(struct rdma_link_ops *ops)
{
return res_get_common_dumpit(skb, cb, RDMA_RESTRACK_CM_ID);
down_write(&link_ops_rwsem);
if (WARN_ON_ONCE(link_ops_get(ops->type)))
goto out;
list_add(&ops->list, &link_ops);
out:
up_write(&link_ops_rwsem);
}
EXPORT_SYMBOL(rdma_link_register);
void rdma_link_unregister(struct rdma_link_ops *ops)
{
down_write(&link_ops_rwsem);
list_del(&ops->list);
up_write(&link_ops_rwsem);
}
EXPORT_SYMBOL(rdma_link_unregister);
static int nldev_newlink(struct sk_buff *skb, struct nlmsghdr *nlh,
struct netlink_ext_ack *extack)
{
struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
char ibdev_name[IB_DEVICE_NAME_MAX];
const struct rdma_link_ops *ops;
char ndev_name[IFNAMSIZ];
struct net_device *ndev;
char type[IFNAMSIZ];
int err;
err = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
nldev_policy, extack);
if (err || !tb[RDMA_NLDEV_ATTR_DEV_NAME] ||
!tb[RDMA_NLDEV_ATTR_LINK_TYPE] || !tb[RDMA_NLDEV_ATTR_NDEV_NAME])
return -EINVAL;
nla_strlcpy(ibdev_name, tb[RDMA_NLDEV_ATTR_DEV_NAME],
sizeof(ibdev_name));
if (strchr(ibdev_name, '%'))
return -EINVAL;
nla_strlcpy(type, tb[RDMA_NLDEV_ATTR_LINK_TYPE], sizeof(type));
nla_strlcpy(ndev_name, tb[RDMA_NLDEV_ATTR_NDEV_NAME],
sizeof(ndev_name));
ndev = dev_get_by_name(&init_net, ndev_name);
if (!ndev)
return -ENODEV;
down_read(&link_ops_rwsem);
ops = link_ops_get(type);
#ifdef CONFIG_MODULES
if (!ops) {
up_read(&link_ops_rwsem);
request_module("rdma-link-%s", type);
down_read(&link_ops_rwsem);
ops = link_ops_get(type);
}
#endif
err = ops ? ops->newlink(ibdev_name, ndev) : -EINVAL;
up_read(&link_ops_rwsem);
dev_put(ndev);
return err;
}
static int nldev_res_get_cq_dumpit(struct sk_buff *skb,
struct netlink_callback *cb)
static int nldev_dellink(struct sk_buff *skb, struct nlmsghdr *nlh,
struct netlink_ext_ack *extack)
{
return res_get_common_dumpit(skb, cb, RDMA_RESTRACK_CQ);
}
struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
struct ib_device *device;
u32 index;
int err;
static int nldev_res_get_mr_dumpit(struct sk_buff *skb,
struct netlink_callback *cb)
{
return res_get_common_dumpit(skb, cb, RDMA_RESTRACK_MR);
}
err = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
nldev_policy, extack);
if (err || !tb[RDMA_NLDEV_ATTR_DEV_INDEX])
return -EINVAL;
static int nldev_res_get_pd_dumpit(struct sk_buff *skb,
struct netlink_callback *cb)
{
return res_get_common_dumpit(skb, cb, RDMA_RESTRACK_PD);
index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
device = ib_device_get_by_index(index);
if (!device)
return -EINVAL;
if (!(device->attrs.device_cap_flags & IB_DEVICE_ALLOW_USER_UNREG)) {
ib_device_put(device);
return -EINVAL;
}
ib_unregister_device_and_put(device);
return 0;
}
static const struct rdma_nl_cbs nldev_cb_table[RDMA_NLDEV_NUM_OPS] = {
@ -1112,6 +1321,14 @@ static const struct rdma_nl_cbs nldev_cb_table[RDMA_NLDEV_NUM_OPS] = {
.doit = nldev_set_doit,
.flags = RDMA_NL_ADMIN_PERM,
},
[RDMA_NLDEV_CMD_NEWLINK] = {
.doit = nldev_newlink,
.flags = RDMA_NL_ADMIN_PERM,
},
[RDMA_NLDEV_CMD_DELLINK] = {
.doit = nldev_dellink,
.flags = RDMA_NL_ADMIN_PERM,
},
[RDMA_NLDEV_CMD_PORT_GET] = {
.doit = nldev_port_get_doit,
.dump = nldev_port_get_dumpit,
@ -1121,28 +1338,23 @@ static const struct rdma_nl_cbs nldev_cb_table[RDMA_NLDEV_NUM_OPS] = {
.dump = nldev_res_get_dumpit,
},
[RDMA_NLDEV_CMD_RES_QP_GET] = {
.doit = nldev_res_get_qp_doit,
.dump = nldev_res_get_qp_dumpit,
/*
* .doit is not implemented yet for two reasons:
* 1. It is not needed yet.
* 2. There is a need to provide identifier, while it is easy
* for the QPs (device index + port index + LQPN), it is not
* the case for the rest of resources (PD and CQ). Because it
* is better to provide similar interface for all resources,
* let's wait till we will have other resources implemented
* too.
*/
},
[RDMA_NLDEV_CMD_RES_CM_ID_GET] = {
.doit = nldev_res_get_cm_id_doit,
.dump = nldev_res_get_cm_id_dumpit,
},
[RDMA_NLDEV_CMD_RES_CQ_GET] = {
.doit = nldev_res_get_cq_doit,
.dump = nldev_res_get_cq_dumpit,
},
[RDMA_NLDEV_CMD_RES_MR_GET] = {
.doit = nldev_res_get_mr_doit,
.dump = nldev_res_get_mr_dumpit,
},
[RDMA_NLDEV_CMD_RES_PD_GET] = {
.doit = nldev_res_get_pd_doit,
.dump = nldev_res_get_pd_dumpit,
},
};

View File

@ -438,6 +438,38 @@ free:
uverbs_uobject_put(uobj);
return ERR_PTR(ret);
}
struct ib_uobject *_uobj_get_read(enum uverbs_default_objects type,
u32 object_id,
struct uverbs_attr_bundle *attrs)
{
struct ib_uobject *uobj;
uobj = rdma_lookup_get_uobject(uobj_get_type(attrs, type), attrs->ufile,
object_id, UVERBS_LOOKUP_READ);
if (IS_ERR(uobj))
return uobj;
attrs->context = uobj->context;
return uobj;
}
struct ib_uobject *_uobj_get_write(enum uverbs_default_objects type,
u32 object_id,
struct uverbs_attr_bundle *attrs)
{
struct ib_uobject *uobj;
uobj = rdma_lookup_get_uobject(uobj_get_type(attrs, type), attrs->ufile,
object_id, UVERBS_LOOKUP_WRITE);
if (IS_ERR(uobj))
return uobj;
attrs->context = uobj->context;
return uobj;
}
static struct ib_uobject *
alloc_begin_idr_uobject(const struct uverbs_api_object *obj,
@ -801,6 +833,7 @@ void uverbs_close_fd(struct file *f)
/* Pairs with filp->private_data in alloc_begin_fd_uobject */
uverbs_uobject_put(uobj);
}
EXPORT_SYMBOL(uverbs_close_fd);
/*
* Drop the ucontext off the ufile and completely disconnect it from the
@ -811,7 +844,6 @@ static void ufile_destroy_ucontext(struct ib_uverbs_file *ufile,
{
struct ib_ucontext *ucontext = ufile->ucontext;
struct ib_device *ib_dev = ucontext->device;
int ret;
/*
* If we are closing the FD then the user mmap VMAs must have
@ -829,12 +861,8 @@ static void ufile_destroy_ucontext(struct ib_uverbs_file *ufile,
rdma_restrack_del(&ucontext->res);
/*
* FIXME: Drivers are not permitted to fail dealloc_ucontext, remove
* the error return.
*/
ret = ib_dev->ops.dealloc_ucontext(ucontext);
WARN_ON(ret);
ib_dev->ops.dealloc_ucontext(ucontext);
kfree(ucontext);
ufile->ucontext = NULL;
}

View File

@ -11,17 +11,51 @@
#include <linux/pid_namespace.h>
#include "cma_priv.h"
#include "restrack.h"
static int fill_res_noop(struct sk_buff *msg,
struct rdma_restrack_entry *entry)
static int rt_xa_alloc_cyclic(struct xarray *xa, u32 *id, void *entry,
u32 *next)
{
return 0;
int err;
*id = *next;
if (*next == U32_MAX)
*id = 0;
xa_lock(xa);
err = __xa_alloc(xa, id, U32_MAX, entry, GFP_KERNEL);
if (err && *next != U32_MAX) {
*id = 0;
err = __xa_alloc(xa, id, *next, entry, GFP_KERNEL);
}
if (!err)
*next = *id + 1;
xa_unlock(xa);
return err;
}
void rdma_restrack_init(struct rdma_restrack_root *res)
/**
* rdma_restrack_init() - initialize and allocate resource tracking
* @dev: IB device
*
* Return: 0 on success
*/
int rdma_restrack_init(struct ib_device *dev)
{
init_rwsem(&res->rwsem);
res->fill_res_entry = fill_res_noop;
struct rdma_restrack_root *rt;
int i;
dev->res = kcalloc(RDMA_RESTRACK_MAX, sizeof(*rt), GFP_KERNEL);
if (!dev->res)
return -ENOMEM;
rt = dev->res;
for (i = 0; i < RDMA_RESTRACK_MAX; i++)
xa_init_flags(&rt[i].xa, XA_FLAGS_ALLOC);
return 0;
}
static const char *type2str(enum rdma_restrack_type type)
@ -38,55 +72,79 @@ static const char *type2str(enum rdma_restrack_type type)
return names[type];
};
void rdma_restrack_clean(struct rdma_restrack_root *res)
/**
* rdma_restrack_clean() - clean resource tracking
* @dev: IB device
*/
void rdma_restrack_clean(struct ib_device *dev)
{
struct rdma_restrack_root *rt = dev->res;
struct rdma_restrack_entry *e;
char buf[TASK_COMM_LEN];
struct ib_device *dev;
bool found = false;
const char *owner;
int bkt;
int i;
if (hash_empty(res->hash))
return;
for (i = 0 ; i < RDMA_RESTRACK_MAX; i++) {
struct xarray *xa = &dev->res[i].xa;
dev = container_of(res, struct ib_device, res);
pr_err("restrack: %s", CUT_HERE);
dev_err(&dev->dev, "BUG: RESTRACK detected leak of resources\n");
hash_for_each(res->hash, bkt, e, node) {
if (rdma_is_kernel_res(e)) {
owner = e->kern_name;
} else {
/*
* There is no need to call get_task_struct here,
* because we can be here only if there are more
* get_task_struct() call than put_task_struct().
*/
get_task_comm(buf, e->task);
owner = buf;
if (!xa_empty(xa)) {
unsigned long index;
if (!found) {
pr_err("restrack: %s", CUT_HERE);
dev_err(&dev->dev, "BUG: RESTRACK detected leak of resources\n");
}
xa_for_each(xa, index, e) {
if (rdma_is_kernel_res(e)) {
owner = e->kern_name;
} else {
/*
* There is no need to call get_task_struct here,
* because we can be here only if there are more
* get_task_struct() call than put_task_struct().
*/
get_task_comm(buf, e->task);
owner = buf;
}
pr_err("restrack: %s %s object allocated by %s is not freed\n",
rdma_is_kernel_res(e) ? "Kernel" :
"User",
type2str(e->type), owner);
}
found = true;
}
pr_err("restrack: %s %s object allocated by %s is not freed\n",
rdma_is_kernel_res(e) ? "Kernel" : "User",
type2str(e->type), owner);
xa_destroy(xa);
}
pr_err("restrack: %s", CUT_HERE);
if (found)
pr_err("restrack: %s", CUT_HERE);
kfree(rt);
}
int rdma_restrack_count(struct rdma_restrack_root *res,
enum rdma_restrack_type type,
/**
* rdma_restrack_count() - the current usage of specific object
* @dev: IB device
* @type: actual type of object to operate
* @ns: PID namespace
*/
int rdma_restrack_count(struct ib_device *dev, enum rdma_restrack_type type,
struct pid_namespace *ns)
{
struct rdma_restrack_root *rt = &dev->res[type];
struct rdma_restrack_entry *e;
XA_STATE(xas, &rt->xa, 0);
u32 cnt = 0;
down_read(&res->rwsem);
hash_for_each_possible(res->hash, e, node, type) {
xa_lock(&rt->xa);
xas_for_each(&xas, e, U32_MAX) {
if (ns == &init_pid_ns ||
(!rdma_is_kernel_res(e) &&
ns == task_active_pid_ns(e->task)))
cnt++;
}
up_read(&res->rwsem);
xa_unlock(&rt->xa);
return cnt;
}
EXPORT_SYMBOL(rdma_restrack_count);
@ -157,28 +215,28 @@ EXPORT_SYMBOL(rdma_restrack_set_task);
static void rdma_restrack_add(struct rdma_restrack_entry *res)
{
struct ib_device *dev = res_to_dev(res);
struct rdma_restrack_root *rt;
int ret;
if (!dev)
return;
if (res->type != RDMA_RESTRACK_CM_ID || rdma_is_kernel_res(res))
res->task = NULL;
if (!rdma_is_kernel_res(res)) {
if (!res->task)
rdma_restrack_set_task(res, NULL);
res->kern_name = NULL;
} else {
set_kern_name(res);
}
rt = &dev->res[res->type];
kref_init(&res->kref);
init_completion(&res->comp);
res->valid = true;
if (res->type != RDMA_RESTRACK_QP)
ret = rt_xa_alloc_cyclic(&rt->xa, &res->id, res, &rt->next_id);
else {
/* Special case to ensure that LQPN points to right QP */
struct ib_qp *qp = container_of(res, struct ib_qp, res);
down_write(&dev->res.rwsem);
hash_add(dev->res.hash, &res->node, res->type);
up_write(&dev->res.rwsem);
ret = xa_insert(&rt->xa, qp->qp_num, res, GFP_KERNEL);
res->id = ret ? 0 : qp->qp_num;
}
if (!ret)
res->valid = true;
}
/**
@ -187,6 +245,8 @@ static void rdma_restrack_add(struct rdma_restrack_entry *res)
*/
void rdma_restrack_kadd(struct rdma_restrack_entry *res)
{
res->task = NULL;
set_kern_name(res);
res->user = false;
rdma_restrack_add(res);
}
@ -198,6 +258,13 @@ EXPORT_SYMBOL(rdma_restrack_kadd);
*/
void rdma_restrack_uadd(struct rdma_restrack_entry *res)
{
if (res->type != RDMA_RESTRACK_CM_ID)
res->task = NULL;
if (!res->task)
rdma_restrack_set_task(res, NULL);
res->kern_name = NULL;
res->user = true;
rdma_restrack_add(res);
}
@ -209,6 +276,31 @@ int __must_check rdma_restrack_get(struct rdma_restrack_entry *res)
}
EXPORT_SYMBOL(rdma_restrack_get);
/**
* rdma_restrack_get_byid() - translate from ID to restrack object
* @dev: IB device
* @type: resource track type
* @id: ID to take a look
*
* Return: Pointer to restrack entry or -ENOENT in case of error.
*/
struct rdma_restrack_entry *
rdma_restrack_get_byid(struct ib_device *dev,
enum rdma_restrack_type type, u32 id)
{
struct rdma_restrack_root *rt = &dev->res[type];
struct rdma_restrack_entry *res;
xa_lock(&rt->xa);
res = xa_load(&rt->xa, id);
if (!res || !rdma_restrack_get(res))
res = ERR_PTR(-ENOENT);
xa_unlock(&rt->xa);
return res;
}
EXPORT_SYMBOL(rdma_restrack_get_byid);
static void restrack_release(struct kref *kref)
{
struct rdma_restrack_entry *res;
@ -225,23 +317,25 @@ EXPORT_SYMBOL(rdma_restrack_put);
void rdma_restrack_del(struct rdma_restrack_entry *res)
{
struct rdma_restrack_entry *old;
struct rdma_restrack_root *rt;
struct ib_device *dev;
if (!res->valid)
goto out;
dev = res_to_dev(res);
if (!dev)
if (WARN_ON(!dev))
return;
rdma_restrack_put(res);
rt = &dev->res[res->type];
wait_for_completion(&res->comp);
down_write(&dev->res.rwsem);
hash_del(&res->node);
old = xa_erase(&rt->xa, res->id);
WARN_ON(old != res);
res->valid = false;
up_write(&dev->res.rwsem);
rdma_restrack_put(res);
wait_for_completion(&res->comp);
out:
if (res->task) {

View File

@ -0,0 +1,28 @@
/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
/*
* Copyright (c) 2017-2019 Mellanox Technologies. All rights reserved.
*/
#ifndef _RDMA_CORE_RESTRACK_H_
#define _RDMA_CORE_RESTRACK_H_
#include <linux/mutex.h>
/**
* struct rdma_restrack_root - main resource tracking management
* entity, per-device
*/
struct rdma_restrack_root {
/**
* @xa: Array of XArray structure to hold restrack entries.
*/
struct xarray xa;
/**
* @next_id: Next ID to support cyclic allocation
*/
u32 next_id;
};
int rdma_restrack_init(struct ib_device *dev);
void rdma_restrack_clean(struct ib_device *dev);
#endif /* _RDMA_CORE_RESTRACK_H_ */

View File

@ -179,7 +179,6 @@ static int rdma_rw_init_map_wrs(struct rdma_rw_ctx *ctx, struct ib_qp *qp,
struct scatterlist *sg, u32 sg_cnt, u32 offset,
u64 remote_addr, u32 rkey, enum dma_data_direction dir)
{
struct ib_device *dev = qp->pd->device;
u32 max_sge = dir == DMA_TO_DEVICE ? qp->max_write_sge :
qp->max_read_sge;
struct ib_sge *sge;
@ -209,8 +208,8 @@ static int rdma_rw_init_map_wrs(struct rdma_rw_ctx *ctx, struct ib_qp *qp,
rdma_wr->wr.sg_list = sge;
for (j = 0; j < nr_sge; j++, sg = sg_next(sg)) {
sge->addr = ib_sg_dma_address(dev, sg) + offset;
sge->length = ib_sg_dma_len(dev, sg) - offset;
sge->addr = sg_dma_address(sg) + offset;
sge->length = sg_dma_len(sg) - offset;
sge->lkey = qp->pd->local_dma_lkey;
total_len += sge->length;
@ -236,14 +235,13 @@ static int rdma_rw_init_single_wr(struct rdma_rw_ctx *ctx, struct ib_qp *qp,
struct scatterlist *sg, u32 offset, u64 remote_addr, u32 rkey,
enum dma_data_direction dir)
{
struct ib_device *dev = qp->pd->device;
struct ib_rdma_wr *rdma_wr = &ctx->single.wr;
ctx->nr_ops = 1;
ctx->single.sge.lkey = qp->pd->local_dma_lkey;
ctx->single.sge.addr = ib_sg_dma_address(dev, sg) + offset;
ctx->single.sge.length = ib_sg_dma_len(dev, sg) - offset;
ctx->single.sge.addr = sg_dma_address(sg) + offset;
ctx->single.sge.length = sg_dma_len(sg) - offset;
memset(rdma_wr, 0, sizeof(*rdma_wr));
if (dir == DMA_TO_DEVICE)
@ -294,7 +292,7 @@ int rdma_rw_ctx_init(struct rdma_rw_ctx *ctx, struct ib_qp *qp, u8 port_num,
* Skip to the S/G entry that sg_offset falls into:
*/
for (;;) {
u32 len = ib_sg_dma_len(dev, sg);
u32 len = sg_dma_len(sg);
if (sg_offset < len)
break;

View File

@ -2342,9 +2342,7 @@ static void ib_sa_add_one(struct ib_device *device)
s = rdma_start_port(device);
e = rdma_end_port(device);
sa_dev = kzalloc(sizeof *sa_dev +
(e - s + 1) * sizeof (struct ib_sa_port),
GFP_KERNEL);
sa_dev = kzalloc(struct_size(sa_dev, port, e - s + 1), GFP_KERNEL);
if (!sa_dev)
return;

View File

@ -39,22 +39,25 @@
#include "core_priv.h"
#include "mad_priv.h"
static LIST_HEAD(mad_agent_list);
/* Lock to protect mad_agent_list */
static DEFINE_SPINLOCK(mad_agent_list_lock);
static struct pkey_index_qp_list *get_pkey_idx_qp_list(struct ib_port_pkey *pp)
{
struct pkey_index_qp_list *pkey = NULL;
struct pkey_index_qp_list *tmp_pkey;
struct ib_device *dev = pp->sec->dev;
spin_lock(&dev->port_pkey_list[pp->port_num].list_lock);
list_for_each_entry(tmp_pkey,
&dev->port_pkey_list[pp->port_num].pkey_list,
pkey_index_list) {
spin_lock(&dev->port_data[pp->port_num].pkey_list_lock);
list_for_each_entry (tmp_pkey, &dev->port_data[pp->port_num].pkey_list,
pkey_index_list) {
if (tmp_pkey->pkey_index == pp->pkey_index) {
pkey = tmp_pkey;
break;
}
}
spin_unlock(&dev->port_pkey_list[pp->port_num].list_lock);
spin_unlock(&dev->port_data[pp->port_num].pkey_list_lock);
return pkey;
}
@ -259,12 +262,12 @@ static int port_pkey_list_insert(struct ib_port_pkey *pp)
if (!pkey)
return -ENOMEM;
spin_lock(&dev->port_pkey_list[port_num].list_lock);
spin_lock(&dev->port_data[port_num].pkey_list_lock);
/* Check for the PKey again. A racing process may
* have created it.
*/
list_for_each_entry(tmp_pkey,
&dev->port_pkey_list[port_num].pkey_list,
&dev->port_data[port_num].pkey_list,
pkey_index_list) {
if (tmp_pkey->pkey_index == pp->pkey_index) {
kfree(pkey);
@ -279,9 +282,9 @@ static int port_pkey_list_insert(struct ib_port_pkey *pp)
spin_lock_init(&pkey->qp_list_lock);
INIT_LIST_HEAD(&pkey->qp_list);
list_add(&pkey->pkey_index_list,
&dev->port_pkey_list[port_num].pkey_list);
&dev->port_data[port_num].pkey_list);
}
spin_unlock(&dev->port_pkey_list[port_num].list_lock);
spin_unlock(&dev->port_data[port_num].pkey_list_lock);
}
spin_lock(&pkey->qp_list_lock);
@ -418,12 +421,15 @@ void ib_close_shared_qp_security(struct ib_qp_security *sec)
int ib_create_qp_security(struct ib_qp *qp, struct ib_device *dev)
{
u8 i = rdma_start_port(dev);
unsigned int i;
bool is_ib = false;
int ret;
while (i <= rdma_end_port(dev) && !is_ib)
rdma_for_each_port (dev, i) {
is_ib = rdma_protocol_ib(dev, i++);
if (is_ib)
break;
}
/* If this isn't an IB device don't create the security context */
if (!is_ib)
@ -544,9 +550,8 @@ void ib_security_cache_change(struct ib_device *device,
{
struct pkey_index_qp_list *pkey;
list_for_each_entry(pkey,
&device->port_pkey_list[port_num].pkey_list,
pkey_index_list) {
list_for_each_entry (pkey, &device->port_data[port_num].pkey_list,
pkey_index_list) {
check_pkey_qps(pkey,
device,
port_num,
@ -554,21 +559,19 @@ void ib_security_cache_change(struct ib_device *device,
}
}
void ib_security_destroy_port_pkey_list(struct ib_device *device)
void ib_security_release_port_pkey_list(struct ib_device *device)
{
struct pkey_index_qp_list *pkey, *tmp_pkey;
int i;
unsigned int i;
for (i = rdma_start_port(device); i <= rdma_end_port(device); i++) {
spin_lock(&device->port_pkey_list[i].list_lock);
rdma_for_each_port (device, i) {
list_for_each_entry_safe(pkey,
tmp_pkey,
&device->port_pkey_list[i].pkey_list,
&device->port_data[i].pkey_list,
pkey_index_list) {
list_del(&pkey->pkey_index_list);
kfree(pkey);
}
spin_unlock(&device->port_pkey_list[i].list_lock);
}
}
@ -676,19 +679,18 @@ static int ib_security_pkey_access(struct ib_device *dev,
return security_ib_pkey_access(sec, subnet_prefix, pkey);
}
static int ib_mad_agent_security_change(struct notifier_block *nb,
unsigned long event,
void *data)
void ib_mad_agent_security_change(void)
{
struct ib_mad_agent *ag = container_of(nb, struct ib_mad_agent, lsm_nb);
struct ib_mad_agent *ag;
if (event != LSM_POLICY_CHANGE)
return NOTIFY_DONE;
ag->smp_allowed = !security_ib_endport_manage_subnet(
ag->security, dev_name(&ag->device->dev), ag->port_num);
return NOTIFY_OK;
spin_lock(&mad_agent_list_lock);
list_for_each_entry(ag,
&mad_agent_list,
mad_agent_sec_list)
WRITE_ONCE(ag->smp_allowed,
!security_ib_endport_manage_subnet(ag->security,
dev_name(&ag->device->dev), ag->port_num));
spin_unlock(&mad_agent_list_lock);
}
int ib_mad_agent_security_setup(struct ib_mad_agent *agent,
@ -699,6 +701,8 @@ int ib_mad_agent_security_setup(struct ib_mad_agent *agent,
if (!rdma_protocol_ib(agent->device, agent->port_num))
return 0;
INIT_LIST_HEAD(&agent->mad_agent_sec_list);
ret = security_ib_alloc_security(&agent->security);
if (ret)
return ret;
@ -706,20 +710,22 @@ int ib_mad_agent_security_setup(struct ib_mad_agent *agent,
if (qp_type != IB_QPT_SMI)
return 0;
spin_lock(&mad_agent_list_lock);
ret = security_ib_endport_manage_subnet(agent->security,
dev_name(&agent->device->dev),
agent->port_num);
if (ret)
return ret;
goto free_security;
agent->lsm_nb.notifier_call = ib_mad_agent_security_change;
ret = register_lsm_notifier(&agent->lsm_nb);
if (ret)
return ret;
agent->smp_allowed = true;
agent->lsm_nb_reg = true;
WRITE_ONCE(agent->smp_allowed, true);
list_add(&agent->mad_agent_sec_list, &mad_agent_list);
spin_unlock(&mad_agent_list_lock);
return 0;
free_security:
spin_unlock(&mad_agent_list_lock);
security_ib_free_security(agent->security);
return ret;
}
void ib_mad_agent_security_cleanup(struct ib_mad_agent *agent)
@ -727,9 +733,13 @@ void ib_mad_agent_security_cleanup(struct ib_mad_agent *agent)
if (!rdma_protocol_ib(agent->device, agent->port_num))
return;
if (agent->qp->qp_type == IB_QPT_SMI) {
spin_lock(&mad_agent_list_lock);
list_del(&agent->mad_agent_sec_list);
spin_unlock(&mad_agent_list_lock);
}
security_ib_free_security(agent->security);
if (agent->lsm_nb_reg)
unregister_lsm_notifier(&agent->lsm_nb);
}
int ib_mad_enforce_security(struct ib_mad_agent_private *map, u16 pkey_index)
@ -738,7 +748,7 @@ int ib_mad_enforce_security(struct ib_mad_agent_private *map, u16 pkey_index)
return 0;
if (map->agent.qp->qp_type == IB_QPT_SMI) {
if (!map->agent.smp_allowed)
if (!READ_ONCE(map->agent.smp_allowed))
return -EACCES;
return 0;
}

View File

@ -1015,9 +1015,7 @@ err_free_stats:
return;
}
static int add_port(struct ib_device *device, int port_num,
int (*port_callback)(struct ib_device *,
u8, struct kobject *))
static int add_port(struct ib_device *device, int port_num)
{
struct ib_port *p;
struct ib_port_attr attr;
@ -1113,8 +1111,8 @@ static int add_port(struct ib_device *device, int port_num,
if (ret)
goto err_free_pkey;
if (port_callback) {
ret = port_callback(device, port_num, &p->kobj);
if (device->ops.init_port) {
ret = device->ops.init_port(device, port_num, &p->kobj);
if (ret)
goto err_remove_pkey;
}
@ -1189,7 +1187,7 @@ err_put:
static ssize_t node_type_show(struct device *device,
struct device_attribute *attr, char *buf)
{
struct ib_device *dev = container_of(device, struct ib_device, dev);
struct ib_device *dev = rdma_device_to_ibdev(device);
switch (dev->node_type) {
case RDMA_NODE_IB_CA: return sprintf(buf, "%d: CA\n", dev->node_type);
@ -1206,7 +1204,7 @@ static DEVICE_ATTR_RO(node_type);
static ssize_t sys_image_guid_show(struct device *device,
struct device_attribute *dev_attr, char *buf)
{
struct ib_device *dev = container_of(device, struct ib_device, dev);
struct ib_device *dev = rdma_device_to_ibdev(device);
return sprintf(buf, "%04x:%04x:%04x:%04x\n",
be16_to_cpu(((__be16 *) &dev->attrs.sys_image_guid)[0]),
@ -1219,7 +1217,7 @@ static DEVICE_ATTR_RO(sys_image_guid);
static ssize_t node_guid_show(struct device *device,
struct device_attribute *attr, char *buf)
{
struct ib_device *dev = container_of(device, struct ib_device, dev);
struct ib_device *dev = rdma_device_to_ibdev(device);
return sprintf(buf, "%04x:%04x:%04x:%04x\n",
be16_to_cpu(((__be16 *) &dev->node_guid)[0]),
@ -1232,7 +1230,7 @@ static DEVICE_ATTR_RO(node_guid);
static ssize_t node_desc_show(struct device *device,
struct device_attribute *attr, char *buf)
{
struct ib_device *dev = container_of(device, struct ib_device, dev);
struct ib_device *dev = rdma_device_to_ibdev(device);
return sprintf(buf, "%.64s\n", dev->node_desc);
}
@ -1241,7 +1239,7 @@ static ssize_t node_desc_store(struct device *device,
struct device_attribute *attr,
const char *buf, size_t count)
{
struct ib_device *dev = container_of(device, struct ib_device, dev);
struct ib_device *dev = rdma_device_to_ibdev(device);
struct ib_device_modify desc = {};
int ret;
@ -1260,7 +1258,7 @@ static DEVICE_ATTR_RW(node_desc);
static ssize_t fw_ver_show(struct device *device, struct device_attribute *attr,
char *buf)
{
struct ib_device *dev = container_of(device, struct ib_device, dev);
struct ib_device *dev = rdma_device_to_ibdev(device);
ib_get_device_fw_str(dev, buf);
strlcat(buf, "\n", IB_FW_VERSION_NAME_MAX);
@ -1277,21 +1275,21 @@ static struct attribute *ib_dev_attrs[] = {
NULL,
};
static const struct attribute_group dev_attr_group = {
const struct attribute_group ib_dev_attr_group = {
.attrs = ib_dev_attrs,
};
static void free_port_list_attributes(struct ib_device *device)
static void ib_free_port_attrs(struct ib_device *device)
{
struct kobject *p, *t;
list_for_each_entry_safe(p, t, &device->port_list, entry) {
struct ib_port *port = container_of(p, struct ib_port, kobj);
list_del(&p->entry);
if (port->hw_stats) {
kfree(port->hw_stats);
if (port->hw_stats_ag)
free_hsag(&port->kobj, port->hw_stats_ag);
}
kfree(port->hw_stats);
if (port->pma_table)
sysfs_remove_group(p, port->pma_table);
@ -1308,62 +1306,47 @@ static void free_port_list_attributes(struct ib_device *device)
kobject_put(device->ports_kobj);
}
int ib_device_register_sysfs(struct ib_device *device,
int (*port_callback)(struct ib_device *,
u8, struct kobject *))
static int ib_setup_port_attrs(struct ib_device *device)
{
struct device *class_dev = &device->dev;
unsigned int port;
int ret;
int i;
device->groups[0] = &dev_attr_group;
class_dev->groups = device->groups;
device->ports_kobj = kobject_create_and_add("ports", &device->dev.kobj);
if (!device->ports_kobj)
return -ENOMEM;
ret = device_add(class_dev);
if (ret)
goto err;
device->ports_kobj = kobject_create_and_add("ports", &class_dev->kobj);
if (!device->ports_kobj) {
ret = -ENOMEM;
goto err_put;
}
if (rdma_cap_ib_switch(device)) {
ret = add_port(device, 0, port_callback);
rdma_for_each_port (device, port) {
ret = add_port(device, port);
if (ret)
goto err_put;
} else {
for (i = 1; i <= device->phys_port_cnt; ++i) {
ret = add_port(device, i, port_callback);
if (ret)
goto err_put;
}
}
return 0;
err_put:
ib_free_port_attrs(device);
return ret;
}
int ib_device_register_sysfs(struct ib_device *device)
{
int ret;
ret = ib_setup_port_attrs(device);
if (ret)
return ret;
if (device->ops.alloc_hw_stats)
setup_hw_stats(device, NULL, 0);
return 0;
err_put:
free_port_list_attributes(device);
device_del(class_dev);
err:
return ret;
}
void ib_device_unregister_sysfs(struct ib_device *device)
{
/* Hold device until ib_dealloc_device() */
get_device(&device->dev);
free_port_list_attributes(device);
if (device->hw_stats) {
kfree(device->hw_stats);
if (device->hw_stats_ag)
free_hsag(&device->dev.kobj, device->hw_stats_ag);
}
kfree(device->hw_stats);
device_unregister(&device->dev);
ib_free_port_attrs(device);
}

View File

@ -1236,6 +1236,13 @@ static int ucma_set_option_id(struct ucma_context *ctx, int optname,
}
ret = rdma_set_afonly(ctx->cm_id, *((int *) optval) ? 1 : 0);
break;
case RDMA_OPTION_ID_ACK_TIMEOUT:
if (optlen != sizeof(u8)) {
ret = -EINVAL;
break;
}
ret = rdma_set_ack_timeout(ctx->cm_id, *((u8 *)optval));
break;
default:
ret = -ENOSYS;
}

View File

@ -72,15 +72,16 @@ static void __ib_umem_release(struct ib_device *dev, struct ib_umem *umem, int d
* If access flags indicate ODP memory, avoid pinning. Instead, stores
* the mm for future page fault handling in conjunction with MMU notifiers.
*
* @context: userspace context to pin memory for
* @udata: userspace context to pin memory for
* @addr: userspace virtual address to start at
* @size: length of region to pin
* @access: IB_ACCESS_xxx flags for memory being pinned
* @dmasync: flush in-flight DMA when the memory region is written
*/
struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
struct ib_umem *ib_umem_get(struct ib_udata *udata, unsigned long addr,
size_t size, int access, int dmasync)
{
struct ib_ucontext *context;
struct ib_umem *umem;
struct page **page_list;
struct vm_area_struct **vma_list;
@ -95,6 +96,14 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
struct scatterlist *sg, *sg_list_start;
unsigned int gup_flags = FOLL_WRITE;
if (!udata)
return ERR_PTR(-EIO);
context = container_of(udata, struct uverbs_attr_bundle, driver_udata)
->context;
if (!context)
return ERR_PTR(-EIO);
if (dmasync)
dma_attrs |= DMA_ATTR_WRITE_BARRIER;
@ -160,15 +169,12 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
down_write(&mm->mmap_sem);
if (check_add_overflow(mm->pinned_vm, npages, &new_pinned) ||
(new_pinned > lock_limit && !capable(CAP_IPC_LOCK))) {
up_write(&mm->mmap_sem);
new_pinned = atomic64_add_return(npages, &mm->pinned_vm);
if (new_pinned > lock_limit && !capable(CAP_IPC_LOCK)) {
atomic64_sub(npages, &mm->pinned_vm);
ret = -ENOMEM;
goto out;
}
mm->pinned_vm = new_pinned;
up_write(&mm->mmap_sem);
cur_base = addr & PAGE_MASK;
@ -228,9 +234,7 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
umem_release:
__ib_umem_release(context->device, umem, 0);
vma:
down_write(&mm->mmap_sem);
mm->pinned_vm -= ib_umem_num_pages(umem);
up_write(&mm->mmap_sem);
atomic64_sub(ib_umem_num_pages(umem), &mm->pinned_vm);
out:
if (vma_list)
free_page((unsigned long) vma_list);
@ -253,25 +257,12 @@ static void __ib_umem_release_tail(struct ib_umem *umem)
kfree(umem);
}
static void ib_umem_release_defer(struct work_struct *work)
{
struct ib_umem *umem = container_of(work, struct ib_umem, work);
down_write(&umem->owning_mm->mmap_sem);
umem->owning_mm->pinned_vm -= ib_umem_num_pages(umem);
up_write(&umem->owning_mm->mmap_sem);
__ib_umem_release_tail(umem);
}
/**
* ib_umem_release - release memory pinned with ib_umem_get
* @umem: umem struct to release
*/
void ib_umem_release(struct ib_umem *umem)
{
struct ib_ucontext *context = umem->context;
if (umem->is_odp) {
ib_umem_odp_release(to_ib_umem_odp(umem));
__ib_umem_release_tail(umem);
@ -280,26 +271,7 @@ void ib_umem_release(struct ib_umem *umem)
__ib_umem_release(umem->context->device, umem, 1);
/*
* We may be called with the mm's mmap_sem already held. This
* can happen when a userspace munmap() is the call that drops
* the last reference to our file and calls our release
* method. If there are memory regions to destroy, we'll end
* up here and not be able to take the mmap_sem. In that case
* we defer the vm_locked accounting a workqueue.
*/
if (context->closing) {
if (!down_write_trylock(&umem->owning_mm->mmap_sem)) {
INIT_WORK(&umem->work, ib_umem_release_defer);
queue_work(ib_wq, &umem->work);
return;
}
} else {
down_write(&umem->owning_mm->mmap_sem);
}
umem->owning_mm->pinned_vm -= ib_umem_num_pages(umem);
up_write(&umem->owning_mm->mmap_sem);
atomic64_sub(ib_umem_num_pages(umem), &umem->owning_mm->pinned_vm);
__ib_umem_release_tail(umem);
}
EXPORT_SYMBOL(ib_umem_release);

View File

@ -40,6 +40,7 @@
#include <linux/vmalloc.h>
#include <linux/hugetlb.h>
#include <linux/interval_tree_generic.h>
#include <linux/pagemap.h>
#include <rdma/ib_verbs.h>
#include <rdma/ib_umem.h>
@ -299,7 +300,7 @@ static void free_per_mm(struct rcu_head *rcu)
kfree(container_of(rcu, struct ib_ucontext_per_mm, rcu));
}
void put_per_mm(struct ib_umem_odp *umem_odp)
static void put_per_mm(struct ib_umem_odp *umem_odp)
{
struct ib_ucontext_per_mm *per_mm = umem_odp->per_mm;
struct ib_ucontext *ctx = umem_odp->umem.context;
@ -332,9 +333,10 @@ void put_per_mm(struct ib_umem_odp *umem_odp)
mmu_notifier_call_srcu(&per_mm->rcu, free_per_mm);
}
struct ib_umem_odp *ib_alloc_odp_umem(struct ib_ucontext_per_mm *per_mm,
struct ib_umem_odp *ib_alloc_odp_umem(struct ib_umem_odp *root,
unsigned long addr, size_t size)
{
struct ib_ucontext_per_mm *per_mm = root->per_mm;
struct ib_ucontext *ctx = per_mm->context;
struct ib_umem_odp *odp_data;
struct ib_umem *umem;
@ -349,7 +351,7 @@ struct ib_umem_odp *ib_alloc_odp_umem(struct ib_ucontext_per_mm *per_mm,
umem->length = size;
umem->address = addr;
umem->page_shift = PAGE_SHIFT;
umem->writable = 1;
umem->writable = root->umem.writable;
umem->is_odp = 1;
odp_data->per_mm = per_mm;
umem->owning_mm = per_mm->mm;
@ -617,7 +619,7 @@ int ib_umem_odp_map_dma_pages(struct ib_umem_odp *umem_odp, u64 user_virt,
* mmget_not_zero will fail in this case.
*/
owning_process = get_pid_task(umem_odp->per_mm->tgid, PIDTYPE_PID);
if (WARN_ON(!mmget_not_zero(umem_odp->umem.owning_mm))) {
if (!owning_process || !mmget_not_zero(owning_mm)) {
ret = -EINVAL;
goto out_put_task;
}
@ -684,9 +686,14 @@ int ib_umem_odp_map_dma_pages(struct ib_umem_odp *umem_odp, u64 user_virt,
mutex_unlock(&umem_odp->umem_mutex);
if (ret < 0) {
/* Release left over pages when handling errors. */
for (++j; j < npages; ++j)
put_page(local_page_list[j]);
/*
* Release pages, remembering that the first page
* to hit an error was already released by
* ib_umem_odp_map_dma_single_page().
*/
if (npages - (j + 1) > 0)
release_pages(&local_page_list[j+1],
npages - (j + 1));
break;
}
}

View File

@ -957,19 +957,22 @@ static int ib_umad_open(struct inode *inode, struct file *filp)
{
struct ib_umad_port *port;
struct ib_umad_file *file;
int ret = -ENXIO;
int ret = 0;
port = container_of(inode->i_cdev, struct ib_umad_port, cdev);
mutex_lock(&port->file_mutex);
if (!port->ib_dev)
if (!port->ib_dev) {
ret = -ENXIO;
goto out;
}
ret = -ENOMEM;
file = kzalloc(sizeof *file, GFP_KERNEL);
if (!file)
file = kzalloc(sizeof(*file), GFP_KERNEL);
if (!file) {
ret = -ENOMEM;
goto out;
}
mutex_init(&file->mutex);
spin_lock_init(&file->send_lock);
@ -982,14 +985,7 @@ static int ib_umad_open(struct inode *inode, struct file *filp)
list_add_tail(&file->port_list, &port->file_list);
ret = nonseekable_open(inode, filp);
if (ret) {
list_del(&file->port_list);
kfree(file);
goto out;
}
ib_umad_dev_get(port->umad_dev);
nonseekable_open(inode, filp);
out:
mutex_unlock(&port->file_mutex);
return ret;
@ -998,7 +994,6 @@ out:
static int ib_umad_close(struct inode *inode, struct file *filp)
{
struct ib_umad_file *file = filp->private_data;
struct ib_umad_device *dev = file->port->umad_dev;
struct ib_umad_packet *packet, *tmp;
int already_dead;
int i;
@ -1027,7 +1022,6 @@ static int ib_umad_close(struct inode *inode, struct file *filp)
mutex_unlock(&file->port->file_mutex);
kfree(file);
ib_umad_dev_put(dev);
return 0;
}
@ -1073,17 +1067,9 @@ static int ib_umad_sm_open(struct inode *inode, struct file *filp)
filp->private_data = port;
ret = nonseekable_open(inode, filp);
if (ret)
goto err_clr_sm_cap;
ib_umad_dev_get(port->umad_dev);
nonseekable_open(inode, filp);
return 0;
err_clr_sm_cap:
swap(props.set_port_cap_mask, props.clr_port_cap_mask);
ib_modify_port(port->ib_dev, port->port_num, 0, &props);
err_up_sem:
up(&port->sm_sem);
@ -1106,7 +1092,6 @@ static int ib_umad_sm_close(struct inode *inode, struct file *filp)
up(&port->sm_sem);
ib_umad_dev_put(port->umad_dev);
return ret;
}
@ -1283,10 +1268,12 @@ static void ib_umad_kill_port(struct ib_umad_port *port)
mutex_unlock(&port->file_mutex);
cdev_device_del(&port->sm_cdev, &port->sm_dev);
put_device(&port->sm_dev);
cdev_device_del(&port->cdev, &port->dev);
put_device(&port->dev);
ida_free(&umad_ida, port->dev_num);
/* balances device_initialize() */
put_device(&port->sm_dev);
put_device(&port->dev);
}
static void ib_umad_add_one(struct ib_device *device)
@ -1329,21 +1316,24 @@ err:
ib_umad_kill_port(&umad_dev->ports[i - s]);
}
free:
/* balances kref_init */
ib_umad_dev_put(umad_dev);
}
static void ib_umad_remove_one(struct ib_device *device, void *client_data)
{
struct ib_umad_device *umad_dev = client_data;
int i;
unsigned int i;
if (!umad_dev)
return;
for (i = 0; i <= rdma_end_port(device) - rdma_start_port(device); ++i) {
if (rdma_cap_ib_mad(device, i + rdma_start_port(device)))
ib_umad_kill_port(&umad_dev->ports[i]);
rdma_for_each_port (device, i) {
if (rdma_cap_ib_mad(device, i))
ib_umad_kill_port(
&umad_dev->ports[i - rdma_start_port(device)]);
}
/* balances kref_init() */
ib_umad_dev_put(umad_dev);
}

View File

@ -224,12 +224,13 @@ static int ib_uverbs_get_context(struct uverbs_attr_bundle *attrs)
if (ret)
goto err;
ucontext = ib_dev->ops.alloc_ucontext(ib_dev, &attrs->driver_udata);
if (IS_ERR(ucontext)) {
ret = PTR_ERR(ucontext);
ucontext = rdma_zalloc_drv_obj(ib_dev, ib_ucontext);
if (!ucontext) {
ret = -ENOMEM;
goto err_alloc;
}
ucontext->res.type = RDMA_RESTRACK_CTX;
ucontext->device = ib_dev;
ucontext->cg_obj = cg_obj;
/* ufile is required when some objects are released */
@ -238,15 +239,8 @@ static int ib_uverbs_get_context(struct uverbs_attr_bundle *attrs)
ucontext->closing = false;
ucontext->cleanup_retryable = false;
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
mutex_init(&ucontext->per_mm_list_lock);
INIT_LIST_HEAD(&ucontext->per_mm_list);
if (!(ib_dev->attrs.device_cap_flags & IB_DEVICE_ON_DEMAND_PAGING))
ucontext->invalidate_range = NULL;
#endif
resp.num_comp_vectors = file->device->num_comp_vectors;
ret = get_unused_fd_flags(O_CLOEXEC);
if (ret < 0)
@ -259,15 +253,22 @@ static int ib_uverbs_get_context(struct uverbs_attr_bundle *attrs)
goto err_fd;
}
resp.num_comp_vectors = file->device->num_comp_vectors;
ret = uverbs_response(attrs, &resp, sizeof(resp));
if (ret)
goto err_file;
fd_install(resp.async_fd, filp);
ret = ib_dev->ops.alloc_ucontext(ucontext, &attrs->driver_udata);
if (ret)
goto err_file;
if (!(ib_dev->attrs.device_cap_flags & IB_DEVICE_ON_DEMAND_PAGING))
ucontext->invalidate_range = NULL;
ucontext->res.type = RDMA_RESTRACK_CTX;
rdma_restrack_uadd(&ucontext->res);
fd_install(resp.async_fd, filp);
/*
* Make sure that ib_uverbs_get_ucontext() sees the pointer update
* only after all writes to setup the ucontext have completed
@ -286,7 +287,7 @@ err_fd:
put_unused_fd(resp.async_fd);
err_free:
ib_dev->ops.dealloc_ucontext(ucontext);
kfree(ucontext);
err_alloc:
ib_rdmacg_uncharge(&cg_obj, ib_dev, RDMACG_RESOURCE_HCA_HANDLE);
@ -410,9 +411,9 @@ static int ib_uverbs_alloc_pd(struct uverbs_attr_bundle *attrs)
if (IS_ERR(uobj))
return PTR_ERR(uobj);
pd = ib_dev->ops.alloc_pd(ib_dev, uobj->context, &attrs->driver_udata);
if (IS_ERR(pd)) {
ret = PTR_ERR(pd);
pd = rdma_zalloc_drv_obj(ib_dev, ib_pd);
if (!pd) {
ret = -ENOMEM;
goto err;
}
@ -420,11 +421,15 @@ static int ib_uverbs_alloc_pd(struct uverbs_attr_bundle *attrs)
pd->uobject = uobj;
pd->__internal_mr = NULL;
atomic_set(&pd->usecnt, 0);
pd->res.type = RDMA_RESTRACK_PD;
ret = ib_dev->ops.alloc_pd(pd, uobj->context, &attrs->driver_udata);
if (ret)
goto err_alloc;
uobj->object = pd;
memset(&resp, 0, sizeof resp);
resp.pd_handle = uobj->id;
pd->res.type = RDMA_RESTRACK_PD;
rdma_restrack_uadd(&pd->res);
ret = uverbs_response(attrs, &resp, sizeof(resp));
@ -435,7 +440,9 @@ static int ib_uverbs_alloc_pd(struct uverbs_attr_bundle *attrs)
err_copy:
ib_dealloc_pd(pd);
pd = NULL;
err_alloc:
kfree(pd);
err:
uobj_alloc_abort(uobj);
return ret;
@ -822,14 +829,13 @@ static int ib_uverbs_rereg_mr(struct uverbs_attr_bundle *attrs)
cmd.length, cmd.hca_va,
cmd.access_flags, pd,
&attrs->driver_udata);
if (!ret) {
if (cmd.flags & IB_MR_REREG_PD) {
atomic_inc(&pd->usecnt);
mr->pd = pd;
atomic_dec(&old_pd->usecnt);
}
} else {
if (ret)
goto put_uobj_pd;
if (cmd.flags & IB_MR_REREG_PD) {
atomic_inc(&pd->usecnt);
mr->pd = pd;
atomic_dec(&old_pd->usecnt);
}
memset(&resp, 0, sizeof(resp));
@ -884,6 +890,11 @@ static int ib_uverbs_alloc_mw(struct uverbs_attr_bundle *attrs)
goto err_free;
}
if (cmd.mw_type != IB_MW_TYPE_1 && cmd.mw_type != IB_MW_TYPE_2) {
ret = -EINVAL;
goto err_put;
}
mw = pd->device->ops.alloc_mw(pd, cmd.mw_type, &attrs->driver_udata);
if (IS_ERR(mw)) {
ret = PTR_ERR(mw);
@ -1184,12 +1195,11 @@ static int ib_uverbs_poll_cq(struct uverbs_attr_bundle *attrs)
ret = -EFAULT;
goto out_put;
}
ret = 0;
if (uverbs_attr_is_valid(attrs, UVERBS_ATTR_CORE_OUT))
ret = uverbs_output_written(attrs, UVERBS_ATTR_CORE_OUT);
ret = 0;
out_put:
uobj_put_obj_read(cq);
return ret;
@ -2632,7 +2642,7 @@ void flow_resources_add(struct ib_uflow_resources *uflow_res,
}
EXPORT_SYMBOL(flow_resources_add);
static int kern_spec_to_ib_spec_action(const struct uverbs_attr_bundle *attrs,
static int kern_spec_to_ib_spec_action(struct uverbs_attr_bundle *attrs,
struct ib_uverbs_flow_spec *kern_spec,
union ib_flow_spec *ib_spec,
struct ib_uflow_resources *uflow_res)
@ -3618,7 +3628,6 @@ static int ib_uverbs_ex_query_device(struct uverbs_attr_bundle *attrs)
copy_query_dev_fields(ucontext, &resp.base, &attr);
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
resp.odp_caps.general_caps = attr.odp_caps.general_caps;
resp.odp_caps.per_transport_caps.rc_odp_caps =
attr.odp_caps.per_transport_caps.rc_odp_caps;
@ -3626,7 +3635,7 @@ static int ib_uverbs_ex_query_device(struct uverbs_attr_bundle *attrs)
attr.odp_caps.per_transport_caps.uc_odp_caps;
resp.odp_caps.per_transport_caps.ud_odp_caps =
attr.odp_caps.per_transport_caps.ud_odp_caps;
#endif
resp.xrc_odp_caps = attr.odp_caps.per_transport_caps.xrc_odp_caps;
resp.timestamp_mask = attr.timestamp_mask;
resp.hca_core_clock = attr.hca_core_clock;

View File

@ -213,6 +213,7 @@ static int uverbs_process_idrs_array(struct bundle_priv *pbundle,
ret = PTR_ERR(attr->uobjects[i]);
break;
}
pbundle->bundle.context = attr->uobjects[i]->context;
}
attr->len = i;
@ -330,6 +331,7 @@ static int uverbs_process_attr(struct bundle_priv *pbundle,
uattr->data_s64);
if (IS_ERR(o_attr->uobject))
return PTR_ERR(o_attr->uobject);
pbundle->bundle.context = o_attr->uobject->context;
__set_bit(attr_bkey, pbundle->uobj_finalize);
if (spec->u.obj.access == UVERBS_ACCESS_NEW) {
@ -592,6 +594,7 @@ static int ib_uverbs_cmd_verbs(struct ib_uverbs_file *ufile,
pbundle->method_elm = method_elm;
pbundle->method_key = attrs_iter.index;
pbundle->bundle.ufile = ufile;
pbundle->bundle.context = NULL; /* only valid if bundle has uobject */
pbundle->radix = &uapi->radix;
pbundle->radix_slots = slot;
pbundle->radix_slots_len = radix_tree_chunk_size(&attrs_iter);

View File

@ -695,6 +695,7 @@ static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf,
memset(bundle.attr_present, 0, sizeof(bundle.attr_present));
bundle.ufile = file;
bundle.context = NULL; /* only valid if bundle has uobject */
if (!method_elm->is_ex) {
size_t in_len = hdr.in_words * 4 - sizeof(hdr);
size_t out_len = hdr.out_words * 4;
@ -1135,6 +1136,7 @@ static const struct file_operations uverbs_mmap_fops = {
static struct ib_client uverbs_client = {
.name = "uverbs",
.no_kverbs_req = true,
.add = ib_uverbs_add_one,
.remove = ib_uverbs_remove_one
};

View File

@ -188,7 +188,7 @@ static int uverbs_free_pd(struct ib_uobject *uobject,
if (ret)
return ret;
ib_dealloc_pd((struct ib_pd *)uobject->object);
ib_dealloc_pd(pd);
return 0;
}

View File

@ -188,13 +188,18 @@ static int uapi_merge_obj_tree(struct uverbs_api *uapi,
obj_elm->type_attrs = obj->type_attrs;
obj_elm->type_class = obj->type_attrs->type_class;
/*
* Today drivers are only permitted to use idr_class
* types. They cannot use FD types because we currently have
* no way to revoke the fops pointer after device
* disassociation.
* Today drivers are only permitted to use idr_class and
* fd_class types. We can revoke the IDR types during
* disassociation, and the FD types require the driver to use
* struct file_operations.owner to prevent the driver module
* code from unloading while the file is open. This provides
* enough safety that uverbs_close_fd() will continue to work.
* Drivers using FD are responsible to handle disassociation of
* the device on their own.
*/
if (WARN_ON(is_driver &&
obj->type_attrs->type_class != &uverbs_idr_class))
obj->type_attrs->type_class != &uverbs_idr_class &&
obj->type_attrs->type_class != &uverbs_fd_class))
return -EINVAL;
}

View File

@ -254,10 +254,11 @@ struct ib_pd *__ib_alloc_pd(struct ib_device *device, unsigned int flags,
{
struct ib_pd *pd;
int mr_access_flags = 0;
int ret;
pd = device->ops.alloc_pd(device, NULL, NULL);
if (IS_ERR(pd))
return pd;
pd = rdma_zalloc_drv_obj(device, ib_pd);
if (!pd)
return ERR_PTR(-ENOMEM);
pd->device = device;
pd->uobject = NULL;
@ -265,6 +266,16 @@ struct ib_pd *__ib_alloc_pd(struct ib_device *device, unsigned int flags,
atomic_set(&pd->usecnt, 0);
pd->flags = flags;
pd->res.type = RDMA_RESTRACK_PD;
rdma_restrack_set_task(&pd->res, caller);
ret = device->ops.alloc_pd(pd, NULL, NULL);
if (ret) {
kfree(pd);
return ERR_PTR(ret);
}
rdma_restrack_kadd(&pd->res);
if (device->attrs.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY)
pd->local_dma_lkey = device->local_dma_lkey;
else
@ -275,10 +286,6 @@ struct ib_pd *__ib_alloc_pd(struct ib_device *device, unsigned int flags,
mr_access_flags |= IB_ACCESS_REMOTE_READ | IB_ACCESS_REMOTE_WRITE;
}
pd->res.type = RDMA_RESTRACK_PD;
rdma_restrack_set_task(&pd->res, caller);
rdma_restrack_kadd(&pd->res);
if (mr_access_flags) {
struct ib_mr *mr;
@ -329,10 +336,8 @@ void ib_dealloc_pd(struct ib_pd *pd)
WARN_ON(atomic_read(&pd->usecnt));
rdma_restrack_del(&pd->res);
/* Making delalloc_pd a void return is a WIP, no driver should return
an error here. */
ret = pd->device->ops.dealloc_pd(pd);
WARN_ONCE(ret, "Infiniband HW driver failed dealloc_pd");
pd->device->ops.dealloc_pd(pd);
kfree(pd);
}
EXPORT_SYMBOL(ib_dealloc_pd);
@ -1106,8 +1111,8 @@ struct ib_qp *ib_open_qp(struct ib_xrcd *xrcd,
}
EXPORT_SYMBOL(ib_open_qp);
static struct ib_qp *ib_create_xrc_qp(struct ib_qp *qp,
struct ib_qp_init_attr *qp_init_attr)
static struct ib_qp *create_xrc_qp(struct ib_qp *qp,
struct ib_qp_init_attr *qp_init_attr)
{
struct ib_qp *real_qp = qp;
@ -1122,10 +1127,10 @@ static struct ib_qp *ib_create_xrc_qp(struct ib_qp *qp,
qp = __ib_open_qp(real_qp, qp_init_attr->event_handler,
qp_init_attr->qp_context);
if (!IS_ERR(qp))
__ib_insert_xrcd_qp(qp_init_attr->xrcd, real_qp);
else
real_qp->device->ops.destroy_qp(real_qp);
if (IS_ERR(qp))
return qp;
__ib_insert_xrcd_qp(qp_init_attr->xrcd, real_qp);
return qp;
}
@ -1156,10 +1161,8 @@ struct ib_qp *ib_create_qp(struct ib_pd *pd,
return qp;
ret = ib_create_qp_security(qp, device);
if (ret) {
ib_destroy_qp(qp);
return ERR_PTR(ret);
}
if (ret)
goto err;
qp->real_qp = qp;
qp->qp_type = qp_init_attr->qp_type;
@ -1172,8 +1175,15 @@ struct ib_qp *ib_create_qp(struct ib_pd *pd,
INIT_LIST_HEAD(&qp->sig_mrs);
qp->port = 0;
if (qp_init_attr->qp_type == IB_QPT_XRC_TGT)
return ib_create_xrc_qp(qp, qp_init_attr);
if (qp_init_attr->qp_type == IB_QPT_XRC_TGT) {
struct ib_qp *xrc_qp = create_xrc_qp(qp, qp_init_attr);
if (IS_ERR(xrc_qp)) {
ret = PTR_ERR(xrc_qp);
goto err;
}
return xrc_qp;
}
qp->event_handler = qp_init_attr->event_handler;
qp->qp_context = qp_init_attr->qp_context;
@ -1200,11 +1210,8 @@ struct ib_qp *ib_create_qp(struct ib_pd *pd,
if (qp_init_attr->cap.max_rdma_ctxs) {
ret = rdma_rw_init_mrs(qp, qp_init_attr);
if (ret) {
pr_err("failed to init MR pool ret= %d\n", ret);
ib_destroy_qp(qp);
return ERR_PTR(ret);
}
if (ret)
goto err;
}
/*
@ -1217,6 +1224,11 @@ struct ib_qp *ib_create_qp(struct ib_pd *pd,
device->attrs.max_sge_rd);
return qp;
err:
ib_destroy_qp(qp);
return ERR_PTR(ret);
}
EXPORT_SYMBOL(ib_create_qp);
@ -1711,10 +1723,7 @@ int ib_get_eth_speed(struct ib_device *dev, u8 port_num, u8 *speed, u8 *width)
if (rdma_port_get_link_layer(dev, port_num) != IB_LINK_LAYER_ETHERNET)
return -EINVAL;
if (!dev->ops.get_netdev)
return -EOPNOTSUPP;
netdev = dev->ops.get_netdev(dev, port_num);
netdev = ib_device_get_netdev(dev, port_num);
if (!netdev)
return -ENODEV;

View File

@ -1,5 +1,6 @@
config INFINIBAND_BNXT_RE
tristate "Broadcom Netxtreme HCA support"
depends on 64BIT
depends on ETHERNET && NETDEVICES && PCI && INET && DCB
select NET_VENDOR_BROADCOM
select BNXT

View File

@ -1,6 +1,6 @@
# SPDX-License-Identifier: GPL-2.0
ccflags-y := -Idrivers/net/ethernet/broadcom/bnxt
ccflags-y := -I $(srctree)/drivers/net/ethernet/broadcom/bnxt
obj-$(CONFIG_INFINIBAND_BNXT_RE) += bnxt_re.o
bnxt_re-y := main.o ib_verbs.o \
qplib_res.o qplib_rcfw.o \

View File

@ -124,6 +124,7 @@ struct bnxt_re_dev {
#define BNXT_RE_FLAG_ISSUE_ROCE_STATS 29
struct net_device *netdev;
unsigned int version, major, minor;
struct bnxt_qplib_chip_ctx chip_ctx;
struct bnxt_en_dev *en_dev;
struct bnxt_msix_entry msix_entries[BNXT_RE_MAX_MSIX];
int num_msix;

View File

@ -48,6 +48,7 @@
#include <rdma/ib_addr.h>
#include <rdma/ib_mad.h>
#include <rdma/ib_cache.h>
#include <rdma/uverbs_ioctl.h>
#include "bnxt_ulp.h"
@ -563,41 +564,29 @@ fail:
}
/* Protection Domains */
int bnxt_re_dealloc_pd(struct ib_pd *ib_pd)
void bnxt_re_dealloc_pd(struct ib_pd *ib_pd)
{
struct bnxt_re_pd *pd = container_of(ib_pd, struct bnxt_re_pd, ib_pd);
struct bnxt_re_dev *rdev = pd->rdev;
int rc;
bnxt_re_destroy_fence_mr(pd);
if (pd->qplib_pd.id) {
rc = bnxt_qplib_dealloc_pd(&rdev->qplib_res,
&rdev->qplib_res.pd_tbl,
&pd->qplib_pd);
if (rc)
dev_err(rdev_to_dev(rdev), "Failed to deallocate HW PD");
}
kfree(pd);
return 0;
if (pd->qplib_pd.id)
bnxt_qplib_dealloc_pd(&rdev->qplib_res, &rdev->qplib_res.pd_tbl,
&pd->qplib_pd);
}
struct ib_pd *bnxt_re_alloc_pd(struct ib_device *ibdev,
struct ib_ucontext *ucontext,
struct ib_udata *udata)
int bnxt_re_alloc_pd(struct ib_pd *ibpd, struct ib_ucontext *ucontext,
struct ib_udata *udata)
{
struct ib_device *ibdev = ibpd->device;
struct bnxt_re_dev *rdev = to_bnxt_re_dev(ibdev, ibdev);
struct bnxt_re_ucontext *ucntx = container_of(ucontext,
struct bnxt_re_ucontext,
ib_uctx);
struct bnxt_re_pd *pd;
struct bnxt_re_pd *pd = container_of(ibpd, struct bnxt_re_pd, ib_pd);
int rc;
pd = kzalloc(sizeof(*pd), GFP_KERNEL);
if (!pd)
return ERR_PTR(-ENOMEM);
pd->rdev = rdev;
if (bnxt_qplib_alloc_pd(&rdev->qplib_res.pd_tbl, &pd->qplib_pd)) {
dev_err(rdev_to_dev(rdev), "Failed to allocate HW PD");
@ -637,13 +626,12 @@ struct ib_pd *bnxt_re_alloc_pd(struct ib_device *ibdev,
if (bnxt_re_create_fence_mr(pd))
dev_warn(rdev_to_dev(rdev),
"Failed to create Fence-MR\n");
return &pd->ib_pd;
return 0;
dbfail:
(void)bnxt_qplib_dealloc_pd(&rdev->qplib_res, &rdev->qplib_res.pd_tbl,
&pd->qplib_pd);
bnxt_qplib_dealloc_pd(&rdev->qplib_res, &rdev->qplib_res.pd_tbl,
&pd->qplib_pd);
fail:
kfree(pd);
return ERR_PTR(rc);
return rc;
}
/* Address Handles */
@ -663,17 +651,36 @@ int bnxt_re_destroy_ah(struct ib_ah *ib_ah, u32 flags)
return 0;
}
static u8 bnxt_re_stack_to_dev_nw_type(enum rdma_network_type ntype)
{
u8 nw_type;
switch (ntype) {
case RDMA_NETWORK_IPV4:
nw_type = CMDQ_CREATE_AH_TYPE_V2IPV4;
break;
case RDMA_NETWORK_IPV6:
nw_type = CMDQ_CREATE_AH_TYPE_V2IPV6;
break;
default:
nw_type = CMDQ_CREATE_AH_TYPE_V1;
break;
}
return nw_type;
}
struct ib_ah *bnxt_re_create_ah(struct ib_pd *ib_pd,
struct rdma_ah_attr *ah_attr,
u32 flags,
struct ib_udata *udata)
{
struct bnxt_re_pd *pd = container_of(ib_pd, struct bnxt_re_pd, ib_pd);
struct bnxt_re_dev *rdev = pd->rdev;
struct bnxt_re_ah *ah;
const struct ib_global_route *grh = rdma_ah_read_grh(ah_attr);
int rc;
struct bnxt_re_dev *rdev = pd->rdev;
const struct ib_gid_attr *sgid_attr;
struct bnxt_re_ah *ah;
u8 nw_type;
int rc;
if (!(rdma_ah_get_ah_flags(ah_attr) & IB_AH_GRH)) {
dev_err(rdev_to_dev(rdev), "Failed to alloc AH: GRH not set");
@ -700,28 +707,11 @@ struct ib_ah *bnxt_re_create_ah(struct ib_pd *ib_pd,
ah->qplib_ah.flow_label = grh->flow_label;
ah->qplib_ah.hop_limit = grh->hop_limit;
ah->qplib_ah.sl = rdma_ah_get_sl(ah_attr);
if (udata &&
!rdma_is_multicast_addr((struct in6_addr *)
grh->dgid.raw) &&
!rdma_link_local_addr((struct in6_addr *)
grh->dgid.raw)) {
const struct ib_gid_attr *sgid_attr;
sgid_attr = grh->sgid_attr;
/* Get network header type for this GID */
nw_type = rdma_gid_attr_network_type(sgid_attr);
switch (nw_type) {
case RDMA_NETWORK_IPV4:
ah->qplib_ah.nw_type = CMDQ_CREATE_AH_TYPE_V2IPV4;
break;
case RDMA_NETWORK_IPV6:
ah->qplib_ah.nw_type = CMDQ_CREATE_AH_TYPE_V2IPV6;
break;
default:
ah->qplib_ah.nw_type = CMDQ_CREATE_AH_TYPE_V1;
break;
}
}
sgid_attr = grh->sgid_attr;
/* Get network header type for this GID */
nw_type = rdma_gid_attr_network_type(sgid_attr);
ah->qplib_ah.nw_type = bnxt_re_stack_to_dev_nw_type(nw_type);
memcpy(ah->qplib_ah.dmac, ah_attr->roce.dmac, ETH_ALEN);
rc = bnxt_qplib_create_ah(&rdev->qplib_res, &ah->qplib_ah,
@ -733,12 +723,11 @@ struct ib_ah *bnxt_re_create_ah(struct ib_pd *ib_pd,
/* Write AVID to shared page. */
if (udata) {
struct ib_ucontext *ib_uctx = ib_pd->uobject->context;
struct bnxt_re_ucontext *uctx;
struct bnxt_re_ucontext *uctx = rdma_udata_to_drv_context(
udata, struct bnxt_re_ucontext, ib_uctx);
unsigned long flag;
u32 *wrptr;
uctx = container_of(ib_uctx, struct bnxt_re_ucontext, ib_uctx);
spin_lock_irqsave(&uctx->sh_lock, flag);
wrptr = (u32 *)(uctx->shpg + BNXT_RE_AVID_OFFT);
*wrptr = ah->qplib_ah.id;
@ -804,8 +793,8 @@ int bnxt_re_destroy_qp(struct ib_qp *ib_qp)
{
struct bnxt_re_qp *qp = container_of(ib_qp, struct bnxt_re_qp, ib_qp);
struct bnxt_re_dev *rdev = qp->rdev;
int rc;
unsigned int flags;
int rc;
bnxt_qplib_flush_cqn_wq(&qp->qplib_qp);
rc = bnxt_qplib_destroy_qp(&rdev->qplib_res, &qp->qplib_qp);
@ -814,9 +803,12 @@ int bnxt_re_destroy_qp(struct ib_qp *ib_qp)
return rc;
}
flags = bnxt_re_lock_cqs(qp);
bnxt_qplib_clean_qp(&qp->qplib_qp);
bnxt_re_unlock_cqs(qp, flags);
if (rdma_is_kernel_res(&qp->ib_qp.res)) {
flags = bnxt_re_lock_cqs(qp);
bnxt_qplib_clean_qp(&qp->qplib_qp);
bnxt_re_unlock_cqs(qp, flags);
}
bnxt_qplib_free_qp_res(&rdev->qplib_res, &qp->qplib_qp);
if (ib_qp->qp_type == IB_QPT_GSI && rdev->qp1_sqp) {
@ -882,21 +874,23 @@ static int bnxt_re_init_user_qp(struct bnxt_re_dev *rdev, struct bnxt_re_pd *pd,
struct bnxt_re_qp_req ureq;
struct bnxt_qplib_qp *qplib_qp = &qp->qplib_qp;
struct ib_umem *umem;
int bytes = 0;
struct ib_ucontext *context = pd->ib_pd.uobject->context;
struct bnxt_re_ucontext *cntx = container_of(context,
struct bnxt_re_ucontext,
ib_uctx);
int bytes = 0, psn_sz;
struct bnxt_re_ucontext *cntx = rdma_udata_to_drv_context(
udata, struct bnxt_re_ucontext, ib_uctx);
if (ib_copy_from_udata(&ureq, udata, sizeof(ureq)))
return -EFAULT;
bytes = (qplib_qp->sq.max_wqe * BNXT_QPLIB_MAX_SQE_ENTRY_SIZE);
/* Consider mapping PSN search memory only for RC QPs. */
if (qplib_qp->type == CMDQ_CREATE_QP_TYPE_RC)
bytes += (qplib_qp->sq.max_wqe * sizeof(struct sq_psn_search));
if (qplib_qp->type == CMDQ_CREATE_QP_TYPE_RC) {
psn_sz = bnxt_qplib_is_chip_gen_p5(&rdev->chip_ctx) ?
sizeof(struct sq_psn_search_ext) :
sizeof(struct sq_psn_search);
bytes += (qplib_qp->sq.max_wqe * psn_sz);
}
bytes = PAGE_ALIGN(bytes);
umem = ib_umem_get(context, ureq.qpsva, bytes,
IB_ACCESS_LOCAL_WRITE, 1);
umem = ib_umem_get(udata, ureq.qpsva, bytes, IB_ACCESS_LOCAL_WRITE, 1);
if (IS_ERR(umem))
return PTR_ERR(umem);
@ -908,7 +902,7 @@ static int bnxt_re_init_user_qp(struct bnxt_re_dev *rdev, struct bnxt_re_pd *pd,
if (!qp->qplib_qp.srq) {
bytes = (qplib_qp->rq.max_wqe * BNXT_QPLIB_MAX_RQE_ENTRY_SIZE);
bytes = PAGE_ALIGN(bytes);
umem = ib_umem_get(context, ureq.qprva, bytes,
umem = ib_umem_get(udata, ureq.qprva, bytes,
IB_ACCESS_LOCAL_WRITE, 1);
if (IS_ERR(umem))
goto rqfail;
@ -1066,12 +1060,17 @@ struct ib_qp *bnxt_re_create_qp(struct ib_pd *ib_pd,
qp->qplib_qp.pd = &pd->qplib_pd;
qp->qplib_qp.qp_handle = (u64)(unsigned long)(&qp->qplib_qp);
qp->qplib_qp.type = __from_ib_qp_type(qp_init_attr->qp_type);
if (qp_init_attr->qp_type == IB_QPT_GSI &&
bnxt_qplib_is_chip_gen_p5(&rdev->chip_ctx))
qp->qplib_qp.type = CMDQ_CREATE_QP_TYPE_GSI;
if (qp->qplib_qp.type == IB_QPT_MAX) {
dev_err(rdev_to_dev(rdev), "QP type 0x%x not supported",
qp->qplib_qp.type);
rc = -EINVAL;
goto fail;
}
qp->qplib_qp.max_inline_data = qp_init_attr->cap.max_inline_data;
qp->qplib_qp.sig_type = ((qp_init_attr->sq_sig_type ==
IB_SIGNAL_ALL_WR) ? true : false);
@ -1132,7 +1131,8 @@ struct ib_qp *bnxt_re_create_qp(struct ib_pd *ib_pd,
qp->qplib_qp.mtu = ib_mtu_enum_to_int(iboe_get_mtu(rdev->netdev->mtu));
if (qp_init_attr->qp_type == IB_QPT_GSI) {
if (qp_init_attr->qp_type == IB_QPT_GSI &&
!(bnxt_qplib_is_chip_gen_p5(&rdev->chip_ctx))) {
/* Allocate 1 more than what's provided */
entries = roundup_pow_of_two(qp_init_attr->cap.max_send_wr + 1);
qp->qplib_qp.sq.max_wqe = min_t(u32, entries,
@ -1361,17 +1361,15 @@ static int bnxt_re_init_user_srq(struct bnxt_re_dev *rdev,
struct bnxt_qplib_srq *qplib_srq = &srq->qplib_srq;
struct ib_umem *umem;
int bytes = 0;
struct ib_ucontext *context = pd->ib_pd.uobject->context;
struct bnxt_re_ucontext *cntx = container_of(context,
struct bnxt_re_ucontext,
ib_uctx);
struct bnxt_re_ucontext *cntx = rdma_udata_to_drv_context(
udata, struct bnxt_re_ucontext, ib_uctx);
if (ib_copy_from_udata(&ureq, udata, sizeof(ureq)))
return -EFAULT;
bytes = (qplib_srq->max_wqe * BNXT_QPLIB_MAX_RQE_ENTRY_SIZE);
bytes = PAGE_ALIGN(bytes);
umem = ib_umem_get(context, ureq.srqva, bytes,
IB_ACCESS_LOCAL_WRITE, 1);
umem = ib_umem_get(udata, ureq.srqva, bytes, IB_ACCESS_LOCAL_WRITE, 1);
if (IS_ERR(umem))
return PTR_ERR(umem);
@ -1646,6 +1644,9 @@ int bnxt_re_modify_qp(struct ib_qp *ib_qp, struct ib_qp_attr *qp_attr,
__from_ib_access_flags(qp_attr->qp_access_flags);
/* LOCAL_WRITE access must be set to allow RC receive */
qp->qplib_qp.access |= BNXT_QPLIB_ACCESS_LOCAL_WRITE;
/* Temp: Set all params on QP as of now */
qp->qplib_qp.access |= CMDQ_MODIFY_QP_ACCESS_REMOTE_WRITE;
qp->qplib_qp.access |= CMDQ_MODIFY_QP_ACCESS_REMOTE_READ;
}
if (qp_attr_mask & IB_QP_PKEY_INDEX) {
qp->qplib_qp.modify_flags |= CMDQ_MODIFY_QP_MODIFY_MASK_PKEY;
@ -2093,7 +2094,8 @@ static int bnxt_re_build_qp1_shadow_qp_recv(struct bnxt_re_qp *qp,
static int is_ud_qp(struct bnxt_re_qp *qp)
{
return qp->qplib_qp.type == CMDQ_CREATE_QP_TYPE_UD;
return (qp->qplib_qp.type == CMDQ_CREATE_QP_TYPE_UD ||
qp->qplib_qp.type == CMDQ_CREATE_QP_TYPE_GSI);
}
static int bnxt_re_build_send_wqe(struct bnxt_re_qp *qp,
@ -2397,7 +2399,7 @@ int bnxt_re_post_send(struct ib_qp *ib_qp, const struct ib_send_wr *wr,
switch (wr->opcode) {
case IB_WR_SEND:
case IB_WR_SEND_WITH_IMM:
if (ib_qp->qp_type == IB_QPT_GSI) {
if (qp->qplib_qp.type == CMDQ_CREATE_QP1_TYPE_GSI) {
rc = bnxt_re_build_qp1_send_v2(qp, wr, &wqe,
payload_sz);
if (rc)
@ -2527,7 +2529,8 @@ int bnxt_re_post_recv(struct ib_qp *ib_qp, const struct ib_recv_wr *wr,
wqe.wr_id = wr->wr_id;
wqe.type = BNXT_QPLIB_SWQE_TYPE_RECV;
if (ib_qp->qp_type == IB_QPT_GSI)
if (ib_qp->qp_type == IB_QPT_GSI &&
qp->qplib_qp.type != CMDQ_CREATE_QP_TYPE_GSI)
rc = bnxt_re_build_qp1_shadow_qp_recv(qp, wr, &wqe,
payload_sz);
if (!rc)
@ -2622,7 +2625,7 @@ struct ib_cq *bnxt_re_create_cq(struct ib_device *ibdev,
goto fail;
}
cq->umem = ib_umem_get(context, req.cq_va,
cq->umem = ib_umem_get(udata, req.cq_va,
entries * sizeof(struct cq_base),
IB_ACCESS_LOCAL_WRITE, 1);
if (IS_ERR(cq->umem)) {
@ -3122,19 +3125,33 @@ static void bnxt_re_process_res_shadow_qp_wc(struct bnxt_re_qp *qp,
}
}
static void bnxt_re_process_res_ud_wc(struct ib_wc *wc,
static void bnxt_re_process_res_ud_wc(struct bnxt_re_qp *qp,
struct ib_wc *wc,
struct bnxt_qplib_cqe *cqe)
{
u8 nw_type;
wc->opcode = IB_WC_RECV;
wc->status = __rc_to_ib_wc_status(cqe->status);
if (cqe->flags & CQ_RES_RC_FLAGS_IMM)
if (cqe->flags & CQ_RES_UD_FLAGS_IMM)
wc->wc_flags |= IB_WC_WITH_IMM;
if (cqe->flags & CQ_RES_RC_FLAGS_INV)
wc->wc_flags |= IB_WC_WITH_INVALIDATE;
if ((cqe->flags & (CQ_RES_RC_FLAGS_RDMA | CQ_RES_RC_FLAGS_IMM)) ==
(CQ_RES_RC_FLAGS_RDMA | CQ_RES_RC_FLAGS_IMM))
wc->opcode = IB_WC_RECV_RDMA_WITH_IMM;
/* report only on GSI QP for Thor */
if (qp->qplib_qp.type == CMDQ_CREATE_QP_TYPE_GSI) {
wc->wc_flags |= IB_WC_GRH;
memcpy(wc->smac, cqe->smac, ETH_ALEN);
wc->wc_flags |= IB_WC_WITH_SMAC;
if (cqe->flags & CQ_RES_UD_FLAGS_META_FORMAT_VLAN) {
wc->vlan_id = (cqe->cfa_meta & 0xFFF);
if (wc->vlan_id < 0x1000)
wc->wc_flags |= IB_WC_WITH_VLAN;
}
nw_type = (cqe->flags & CQ_RES_UD_FLAGS_ROCE_IP_VER_MASK) >>
CQ_RES_UD_FLAGS_ROCE_IP_VER_SFT;
wc->network_hdr_type = bnxt_re_to_ib_nw_type(nw_type);
wc->wc_flags |= IB_WC_WITH_NETWORK_HDR_TYPE;
}
}
static int send_phantom_wqe(struct bnxt_re_qp *qp)
@ -3226,7 +3243,7 @@ int bnxt_re_poll_cq(struct ib_cq *ib_cq, int num_entries, struct ib_wc *wc)
switch (cqe->opcode) {
case CQ_BASE_CQE_TYPE_REQ:
if (qp->qplib_qp.id ==
if (qp->rdev->qp1_sqp && qp->qplib_qp.id ==
qp->rdev->qp1_sqp->qplib_qp.id) {
/* Handle this completion with
* the stored completion
@ -3261,7 +3278,7 @@ int bnxt_re_poll_cq(struct ib_cq *ib_cq, int num_entries, struct ib_wc *wc)
bnxt_re_process_res_rc_wc(wc, cqe);
break;
case CQ_BASE_CQE_TYPE_RES_UD:
if (qp->qplib_qp.id ==
if (qp->rdev->qp1_sqp && qp->qplib_qp.id ==
qp->rdev->qp1_sqp->qplib_qp.id) {
/* Handle this completion with
* the stored completion
@ -3274,7 +3291,7 @@ int bnxt_re_poll_cq(struct ib_cq *ib_cq, int num_entries, struct ib_wc *wc)
break;
}
}
bnxt_re_process_res_ud_wc(wc, cqe);
bnxt_re_process_res_ud_wc(qp, wc, cqe);
break;
default:
dev_err(rdev_to_dev(cq->rdev),
@ -3301,10 +3318,10 @@ int bnxt_re_req_notify_cq(struct ib_cq *ib_cq,
spin_lock_irqsave(&cq->cq_lock, flags);
/* Trigger on the very next completion */
if (ib_cqn_flags & IB_CQ_NEXT_COMP)
type = DBR_DBR_TYPE_CQ_ARMALL;
type = DBC_DBC_TYPE_CQ_ARMALL;
/* Trigger on the next solicited completion */
else if (ib_cqn_flags & IB_CQ_SOLICITED)
type = DBR_DBR_TYPE_CQ_ARMSE;
type = DBC_DBC_TYPE_CQ_ARMSE;
/* Poll to see if there are missed events */
if ((ib_cqn_flags & IB_CQ_REPORT_MISSED_EVENTS) &&
@ -3537,19 +3554,14 @@ static int fill_umem_pbl_tbl(struct ib_umem *umem, u64 *pbl_tbl_orig,
u64 *pbl_tbl = pbl_tbl_orig;
u64 paddr;
u64 page_mask = (1ULL << page_shift) - 1;
int i, pages;
struct scatterlist *sg;
int entry;
struct sg_dma_page_iter sg_iter;
for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) {
pages = sg_dma_len(sg) >> PAGE_SHIFT;
for (i = 0; i < pages; i++) {
paddr = sg_dma_address(sg) + (i << PAGE_SHIFT);
if (pbl_tbl == pbl_tbl_orig)
*pbl_tbl++ = paddr & ~page_mask;
else if ((paddr & page_mask) == 0)
*pbl_tbl++ = paddr;
}
for_each_sg_dma_page (umem->sg_head.sgl, &sg_iter, umem->nmap, 0) {
paddr = sg_page_iter_dma_address(&sg_iter);
if (pbl_tbl == pbl_tbl_orig)
*pbl_tbl++ = paddr & ~page_mask;
else if ((paddr & page_mask) == 0)
*pbl_tbl++ = paddr;
}
return pbl_tbl - pbl_tbl_orig;
}
@ -3589,8 +3601,7 @@ struct ib_mr *bnxt_re_reg_user_mr(struct ib_pd *ib_pd, u64 start, u64 length,
/* The fixed portion of the rkey is the same as the lkey */
mr->ib_mr.rkey = mr->qplib_mr.rkey;
umem = ib_umem_get(ib_pd->uobject->context, start, length,
mr_access_flags, 0);
umem = ib_umem_get(udata, start, length, mr_access_flags, 0);
if (IS_ERR(umem)) {
dev_err(rdev_to_dev(rdev), "Failed to get umem");
rc = -EFAULT;
@ -3613,7 +3624,7 @@ struct ib_mr *bnxt_re_reg_user_mr(struct ib_pd *ib_pd, u64 start, u64 length,
goto free_umem;
}
page_shift = umem->page_shift;
page_shift = PAGE_SHIFT;
if (!bnxt_re_page_size_ok(page_shift)) {
dev_err(rdev_to_dev(rdev), "umem page size unsupported!");
@ -3660,13 +3671,15 @@ free_mr:
return ERR_PTR(rc);
}
struct ib_ucontext *bnxt_re_alloc_ucontext(struct ib_device *ibdev,
struct ib_udata *udata)
int bnxt_re_alloc_ucontext(struct ib_ucontext *ctx, struct ib_udata *udata)
{
struct ib_device *ibdev = ctx->device;
struct bnxt_re_ucontext *uctx =
container_of(ctx, struct bnxt_re_ucontext, ib_uctx);
struct bnxt_re_dev *rdev = to_bnxt_re_dev(ibdev, ibdev);
struct bnxt_re_uctx_resp resp;
struct bnxt_re_ucontext *uctx;
struct bnxt_qplib_dev_attr *dev_attr = &rdev->dev_attr;
struct bnxt_re_uctx_resp resp;
u32 chip_met_rev_num = 0;
int rc;
dev_dbg(rdev_to_dev(rdev), "ABI version requested %d",
@ -3675,13 +3688,9 @@ struct ib_ucontext *bnxt_re_alloc_ucontext(struct ib_device *ibdev,
if (ibdev->uverbs_abi_ver != BNXT_RE_ABI_VERSION) {
dev_dbg(rdev_to_dev(rdev), " is different from the device %d ",
BNXT_RE_ABI_VERSION);
return ERR_PTR(-EPERM);
return -EPERM;
}
uctx = kzalloc(sizeof(*uctx), GFP_KERNEL);
if (!uctx)
return ERR_PTR(-ENOMEM);
uctx->rdev = rdev;
uctx->shpg = (void *)__get_free_page(GFP_KERNEL);
@ -3691,37 +3700,45 @@ struct ib_ucontext *bnxt_re_alloc_ucontext(struct ib_device *ibdev,
}
spin_lock_init(&uctx->sh_lock);
resp.dev_id = rdev->en_dev->pdev->devfn; /*Temp, Use idr_alloc instead*/
resp.comp_mask = BNXT_RE_UCNTX_CMASK_HAVE_CCTX;
chip_met_rev_num = rdev->chip_ctx.chip_num;
chip_met_rev_num |= ((u32)rdev->chip_ctx.chip_rev & 0xFF) <<
BNXT_RE_CHIP_ID0_CHIP_REV_SFT;
chip_met_rev_num |= ((u32)rdev->chip_ctx.chip_metal & 0xFF) <<
BNXT_RE_CHIP_ID0_CHIP_MET_SFT;
resp.chip_id0 = chip_met_rev_num;
/* Future extension of chip info */
resp.chip_id1 = 0;
/*Temp, Use idr_alloc instead */
resp.dev_id = rdev->en_dev->pdev->devfn;
resp.max_qp = rdev->qplib_ctx.qpc_count;
resp.pg_size = PAGE_SIZE;
resp.cqe_sz = sizeof(struct cq_base);
resp.max_cqd = dev_attr->max_cq_wqes;
resp.rsvd = 0;
rc = ib_copy_to_udata(udata, &resp, sizeof(resp));
rc = ib_copy_to_udata(udata, &resp, min(udata->outlen, sizeof(resp)));
if (rc) {
dev_err(rdev_to_dev(rdev), "Failed to copy user context");
rc = -EFAULT;
goto cfail;
}
return &uctx->ib_uctx;
return 0;
cfail:
free_page((unsigned long)uctx->shpg);
uctx->shpg = NULL;
fail:
kfree(uctx);
return ERR_PTR(rc);
return rc;
}
int bnxt_re_dealloc_ucontext(struct ib_ucontext *ib_uctx)
void bnxt_re_dealloc_ucontext(struct ib_ucontext *ib_uctx)
{
struct bnxt_re_ucontext *uctx = container_of(ib_uctx,
struct bnxt_re_ucontext,
ib_uctx);
struct bnxt_re_dev *rdev = uctx->rdev;
int rc = 0;
if (uctx->shpg)
free_page((unsigned long)uctx->shpg);
@ -3730,17 +3747,10 @@ int bnxt_re_dealloc_ucontext(struct ib_ucontext *ib_uctx)
/* Free DPI only if this is the first PD allocated by the
* application and mark the context dpi as NULL
*/
rc = bnxt_qplib_dealloc_dpi(&rdev->qplib_res,
&rdev->qplib_res.dpi_tbl,
&uctx->dpi);
if (rc)
dev_err(rdev_to_dev(rdev), "Deallocate HW DPI failed!");
/* Don't fail, continue*/
bnxt_qplib_dealloc_dpi(&rdev->qplib_res,
&rdev->qplib_res.dpi_tbl, &uctx->dpi);
uctx->dpi.dbr = NULL;
}
kfree(uctx);
return 0;
}
/* Helper function to mmap the virtual memory from user app */

View File

@ -56,8 +56,8 @@ struct bnxt_re_fence_data {
};
struct bnxt_re_pd {
struct ib_pd ib_pd;
struct bnxt_re_dev *rdev;
struct ib_pd ib_pd;
struct bnxt_qplib_pd qplib_pd;
struct bnxt_re_fence_data fence;
};
@ -135,8 +135,8 @@ struct bnxt_re_mw {
};
struct bnxt_re_ucontext {
struct ib_ucontext ib_uctx;
struct bnxt_re_dev *rdev;
struct ib_ucontext ib_uctx;
struct bnxt_qplib_dpi dpi;
void *shpg;
spinlock_t sh_lock; /* protect shpg */
@ -163,10 +163,9 @@ int bnxt_re_query_gid(struct ib_device *ibdev, u8 port_num,
int index, union ib_gid *gid);
enum rdma_link_layer bnxt_re_get_link_layer(struct ib_device *ibdev,
u8 port_num);
struct ib_pd *bnxt_re_alloc_pd(struct ib_device *ibdev,
struct ib_ucontext *context,
struct ib_udata *udata);
int bnxt_re_dealloc_pd(struct ib_pd *pd);
int bnxt_re_alloc_pd(struct ib_pd *pd, struct ib_ucontext *context,
struct ib_udata *udata);
void bnxt_re_dealloc_pd(struct ib_pd *pd);
struct ib_ah *bnxt_re_create_ah(struct ib_pd *pd,
struct rdma_ah_attr *ah_attr,
u32 flags,
@ -216,9 +215,8 @@ int bnxt_re_dealloc_mw(struct ib_mw *mw);
struct ib_mr *bnxt_re_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
u64 virt_addr, int mr_access_flags,
struct ib_udata *udata);
struct ib_ucontext *bnxt_re_alloc_ucontext(struct ib_device *ibdev,
struct ib_udata *udata);
int bnxt_re_dealloc_ucontext(struct ib_ucontext *context);
int bnxt_re_alloc_ucontext(struct ib_ucontext *ctx, struct ib_udata *udata);
void bnxt_re_dealloc_ucontext(struct ib_ucontext *context);
int bnxt_re_mmap(struct ib_ucontext *context, struct vm_area_struct *vma);
unsigned long bnxt_re_lock_cqs(struct bnxt_re_qp *qp);

View File

@ -80,6 +80,29 @@ static DEFINE_MUTEX(bnxt_re_dev_lock);
static struct workqueue_struct *bnxt_re_wq;
static void bnxt_re_ib_unreg(struct bnxt_re_dev *rdev);
static void bnxt_re_destroy_chip_ctx(struct bnxt_re_dev *rdev)
{
rdev->rcfw.res = NULL;
rdev->qplib_res.cctx = NULL;
}
static int bnxt_re_setup_chip_ctx(struct bnxt_re_dev *rdev)
{
struct bnxt_en_dev *en_dev;
struct bnxt *bp;
en_dev = rdev->en_dev;
bp = netdev_priv(en_dev->net);
rdev->chip_ctx.chip_num = bp->chip_num;
/* rest members to follow eventually */
rdev->qplib_res.cctx = &rdev->chip_ctx;
rdev->rcfw.res = &rdev->qplib_res;
return 0;
}
/* SR-IOV helper functions */
static void bnxt_re_get_sriov_func_type(struct bnxt_re_dev *rdev)
@ -278,6 +301,7 @@ static int bnxt_re_register_netdev(struct bnxt_re_dev *rdev)
rc = en_dev->en_ops->bnxt_register_device(en_dev, BNXT_ROCE_ULP,
&bnxt_re_ulp_ops, rdev);
rdev->qplib_res.pdev = rdev->en_dev->pdev;
return rc;
}
@ -345,7 +369,8 @@ static void bnxt_re_fill_fw_msg(struct bnxt_fw_msg *fw_msg, void *msg,
fw_msg->timeout = timeout;
}
static int bnxt_re_net_ring_free(struct bnxt_re_dev *rdev, u16 fw_ring_id)
static int bnxt_re_net_ring_free(struct bnxt_re_dev *rdev,
u16 fw_ring_id, int type)
{
struct bnxt_en_dev *en_dev = rdev->en_dev;
struct hwrm_ring_free_input req = {0};
@ -359,7 +384,7 @@ static int bnxt_re_net_ring_free(struct bnxt_re_dev *rdev, u16 fw_ring_id)
memset(&fw_msg, 0, sizeof(fw_msg));
bnxt_re_init_hwrm_hdr(rdev, (void *)&req, HWRM_RING_FREE, -1, -1);
req.ring_type = RING_ALLOC_REQ_RING_TYPE_L2_CMPL;
req.ring_type = type;
req.ring_id = cpu_to_le16(fw_ring_id);
bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp,
sizeof(resp), DFLT_HWRM_CMD_TIMEOUT);
@ -396,7 +421,7 @@ static int bnxt_re_net_ring_alloc(struct bnxt_re_dev *rdev, dma_addr_t *dma_arr,
/* Association of ring index with doorbell index and MSIX number */
req.logical_id = cpu_to_le16(map_index);
req.length = cpu_to_le32(ring_mask + 1);
req.ring_type = RING_ALLOC_REQ_RING_TYPE_L2_CMPL;
req.ring_type = type;
req.int_mode = RING_ALLOC_REQ_INT_MODE_MSIX;
bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp,
sizeof(resp), DFLT_HWRM_CMD_TIMEOUT);
@ -538,7 +563,8 @@ static struct bnxt_en_dev *bnxt_re_dev_probe(struct net_device *netdev)
static ssize_t hw_rev_show(struct device *device, struct device_attribute *attr,
char *buf)
{
struct bnxt_re_dev *rdev = to_bnxt_re_dev(device, ibdev.dev);
struct bnxt_re_dev *rdev =
rdma_device_to_drv_device(device, struct bnxt_re_dev, ibdev);
return scnprintf(buf, PAGE_SIZE, "0x%x\n", rdev->en_dev->pdev->vendor);
}
@ -547,7 +573,8 @@ static DEVICE_ATTR_RO(hw_rev);
static ssize_t hca_type_show(struct device *device,
struct device_attribute *attr, char *buf)
{
struct bnxt_re_dev *rdev = to_bnxt_re_dev(device, ibdev.dev);
struct bnxt_re_dev *rdev =
rdma_device_to_drv_device(device, struct bnxt_re_dev, ibdev);
return scnprintf(buf, PAGE_SIZE, "%s\n", rdev->ibdev.node_desc);
}
@ -610,6 +637,8 @@ static const struct ib_device_ops bnxt_re_dev_ops = {
.query_srq = bnxt_re_query_srq,
.reg_user_mr = bnxt_re_reg_user_mr,
.req_notify_cq = bnxt_re_req_notify_cq,
INIT_RDMA_OBJ_SIZE(ib_pd, bnxt_re_pd, ib_pd),
INIT_RDMA_OBJ_SIZE(ib_ucontext, bnxt_re_ucontext, ib_uctx),
};
static int bnxt_re_register_ib(struct bnxt_re_dev *rdev)
@ -662,7 +691,7 @@ static int bnxt_re_register_ib(struct bnxt_re_dev *rdev)
rdma_set_device_sysfs_group(ibdev, &bnxt_re_dev_attr_group);
ibdev->driver_id = RDMA_DRIVER_BNXT_RE;
ib_set_device_ops(ibdev, &bnxt_re_dev_ops);
return ib_register_device(ibdev, "bnxt_re%d", NULL);
return ib_register_device(ibdev, "bnxt_re%d");
}
static void bnxt_re_dev_remove(struct bnxt_re_dev *rdev)
@ -686,7 +715,7 @@ static struct bnxt_re_dev *bnxt_re_dev_add(struct net_device *netdev,
struct bnxt_re_dev *rdev;
/* Allocate bnxt_re_dev instance here */
rdev = (struct bnxt_re_dev *)ib_alloc_device(sizeof(*rdev));
rdev = ib_alloc_device(bnxt_re_dev, ibdev);
if (!rdev) {
dev_err(NULL, "%s: bnxt_re_dev allocation failure!",
ROCE_DRV_MODULE_NAME);
@ -858,6 +887,12 @@ static int bnxt_re_cqn_handler(struct bnxt_qplib_nq *nq,
return 0;
}
static u32 bnxt_re_get_nqdb_offset(struct bnxt_re_dev *rdev, u16 indx)
{
return bnxt_qplib_is_chip_gen_p5(&rdev->chip_ctx) ?
0x10000 : rdev->msix_entries[indx].db_offset;
}
static void bnxt_re_cleanup_res(struct bnxt_re_dev *rdev)
{
int i;
@ -871,18 +906,18 @@ static void bnxt_re_cleanup_res(struct bnxt_re_dev *rdev)
static int bnxt_re_init_res(struct bnxt_re_dev *rdev)
{
int rc = 0, i;
int num_vec_enabled = 0;
int rc = 0, i;
u32 db_offt;
bnxt_qplib_init_res(&rdev->qplib_res);
for (i = 1; i < rdev->num_msix ; i++) {
db_offt = bnxt_re_get_nqdb_offset(rdev, i);
rc = bnxt_qplib_enable_nq(rdev->en_dev->pdev, &rdev->nq[i - 1],
i - 1, rdev->msix_entries[i].vector,
rdev->msix_entries[i].db_offset,
&bnxt_re_cqn_handler,
db_offt, &bnxt_re_cqn_handler,
&bnxt_re_srqn_handler);
if (rc) {
dev_err(rdev_to_dev(rdev),
"Failed to enable NQ with rc = 0x%x", rc);
@ -894,16 +929,18 @@ static int bnxt_re_init_res(struct bnxt_re_dev *rdev)
fail:
for (i = num_vec_enabled; i >= 0; i--)
bnxt_qplib_disable_nq(&rdev->nq[i]);
return rc;
}
static void bnxt_re_free_nq_res(struct bnxt_re_dev *rdev)
{
u8 type;
int i;
for (i = 0; i < rdev->num_msix - 1; i++) {
bnxt_re_net_ring_free(rdev, rdev->nq[i].ring_id);
type = bnxt_qplib_get_ring_type(&rdev->chip_ctx);
bnxt_re_net_ring_free(rdev, rdev->nq[i].ring_id, type);
rdev->nq[i].res = NULL;
bnxt_qplib_free_nq(&rdev->nq[i]);
}
}
@ -925,8 +962,11 @@ static void bnxt_re_free_res(struct bnxt_re_dev *rdev)
static int bnxt_re_alloc_res(struct bnxt_re_dev *rdev)
{
int rc = 0, i;
int num_vec_created = 0;
dma_addr_t *pg_map;
int rc = 0, i;
int pages;
u8 type;
/* Configure and allocate resources for qplib */
rdev->qplib_res.rcfw = &rdev->rcfw;
@ -947,6 +987,7 @@ static int bnxt_re_alloc_res(struct bnxt_re_dev *rdev)
goto dealloc_res;
for (i = 0; i < rdev->num_msix - 1; i++) {
rdev->nq[i].res = &rdev->qplib_res;
rdev->nq[i].hwq.max_elements = BNXT_RE_MAX_CQ_COUNT +
BNXT_RE_MAX_SRQC_COUNT + 2;
rc = bnxt_qplib_alloc_nq(rdev->en_dev->pdev, &rdev->nq[i]);
@ -955,13 +996,13 @@ static int bnxt_re_alloc_res(struct bnxt_re_dev *rdev)
i, rc);
goto free_nq;
}
rc = bnxt_re_net_ring_alloc
(rdev, rdev->nq[i].hwq.pbl[PBL_LVL_0].pg_map_arr,
rdev->nq[i].hwq.pbl[rdev->nq[i].hwq.level].pg_count,
HWRM_RING_ALLOC_CMPL,
BNXT_QPLIB_NQE_MAX_CNT - 1,
rdev->msix_entries[i + 1].ring_idx,
&rdev->nq[i].ring_id);
type = bnxt_qplib_get_ring_type(&rdev->chip_ctx);
pg_map = rdev->nq[i].hwq.pbl[PBL_LVL_0].pg_map_arr;
pages = rdev->nq[i].hwq.pbl[rdev->nq[i].hwq.level].pg_count;
rc = bnxt_re_net_ring_alloc(rdev, pg_map, pages, type,
BNXT_QPLIB_NQE_MAX_CNT - 1,
rdev->msix_entries[i + 1].ring_idx,
&rdev->nq[i].ring_id);
if (rc) {
dev_err(rdev_to_dev(rdev),
"Failed to allocate NQ fw id with rc = 0x%x",
@ -974,7 +1015,8 @@ static int bnxt_re_alloc_res(struct bnxt_re_dev *rdev)
return 0;
free_nq:
for (i = num_vec_created; i >= 0; i--) {
bnxt_re_net_ring_free(rdev, rdev->nq[i].ring_id);
type = bnxt_qplib_get_ring_type(&rdev->chip_ctx);
bnxt_re_net_ring_free(rdev, rdev->nq[i].ring_id, type);
bnxt_qplib_free_nq(&rdev->nq[i]);
}
bnxt_qplib_dealloc_dpi(&rdev->qplib_res,
@ -1228,6 +1270,7 @@ static void bnxt_re_query_hwrm_intf_version(struct bnxt_re_dev *rdev)
static void bnxt_re_ib_unreg(struct bnxt_re_dev *rdev)
{
u8 type;
int rc;
if (test_and_clear_bit(BNXT_RE_FLAG_IBDEV_REGISTERED, &rdev->flags)) {
@ -1251,7 +1294,8 @@ static void bnxt_re_ib_unreg(struct bnxt_re_dev *rdev)
bnxt_re_net_stats_ctx_free(rdev, rdev->qplib_ctx.stats.fw_id);
bnxt_qplib_free_ctx(rdev->en_dev->pdev, &rdev->qplib_ctx);
bnxt_qplib_disable_rcfw_channel(&rdev->rcfw);
bnxt_re_net_ring_free(rdev, rdev->rcfw.creq_ring_id);
type = bnxt_qplib_get_ring_type(&rdev->chip_ctx);
bnxt_re_net_ring_free(rdev, rdev->rcfw.creq_ring_id, type);
bnxt_qplib_free_rcfw_channel(&rdev->rcfw);
}
if (test_and_clear_bit(BNXT_RE_FLAG_GOT_MSIX, &rdev->flags)) {
@ -1260,6 +1304,8 @@ static void bnxt_re_ib_unreg(struct bnxt_re_dev *rdev)
dev_warn(rdev_to_dev(rdev),
"Failed to free MSI-X vectors: %#x", rc);
}
bnxt_re_destroy_chip_ctx(rdev);
if (test_and_clear_bit(BNXT_RE_FLAG_NETDEV_REGISTERED, &rdev->flags)) {
rc = bnxt_re_unregister_netdev(rdev);
if (rc)
@ -1280,9 +1326,12 @@ static void bnxt_re_worker(struct work_struct *work)
static int bnxt_re_ib_reg(struct bnxt_re_dev *rdev)
{
int rc;
dma_addr_t *pg_map;
u32 db_offt, ridx;
int pages, vid;
bool locked;
u8 type;
int rc;
/* Acquire rtnl lock through out this function */
rtnl_lock();
@ -1297,6 +1346,12 @@ static int bnxt_re_ib_reg(struct bnxt_re_dev *rdev)
}
set_bit(BNXT_RE_FLAG_NETDEV_REGISTERED, &rdev->flags);
rc = bnxt_re_setup_chip_ctx(rdev);
if (rc) {
dev_err(rdev_to_dev(rdev), "Failed to get chip context\n");
return -EINVAL;
}
/* Check whether VF or PF */
bnxt_re_get_sriov_func_type(rdev);
@ -1320,21 +1375,22 @@ static int bnxt_re_ib_reg(struct bnxt_re_dev *rdev)
pr_err("Failed to allocate RCFW Channel: %#x\n", rc);
goto fail;
}
rc = bnxt_re_net_ring_alloc
(rdev, rdev->rcfw.creq.pbl[PBL_LVL_0].pg_map_arr,
rdev->rcfw.creq.pbl[rdev->rcfw.creq.level].pg_count,
HWRM_RING_ALLOC_CMPL, BNXT_QPLIB_CREQE_MAX_CNT - 1,
rdev->msix_entries[BNXT_RE_AEQ_IDX].ring_idx,
&rdev->rcfw.creq_ring_id);
type = bnxt_qplib_get_ring_type(&rdev->chip_ctx);
pg_map = rdev->rcfw.creq.pbl[PBL_LVL_0].pg_map_arr;
pages = rdev->rcfw.creq.pbl[rdev->rcfw.creq.level].pg_count;
ridx = rdev->msix_entries[BNXT_RE_AEQ_IDX].ring_idx;
rc = bnxt_re_net_ring_alloc(rdev, pg_map, pages, type,
BNXT_QPLIB_CREQE_MAX_CNT - 1,
ridx, &rdev->rcfw.creq_ring_id);
if (rc) {
pr_err("Failed to allocate CREQ: %#x\n", rc);
goto free_rcfw;
}
rc = bnxt_qplib_enable_rcfw_channel
(rdev->en_dev->pdev, &rdev->rcfw,
rdev->msix_entries[BNXT_RE_AEQ_IDX].vector,
rdev->msix_entries[BNXT_RE_AEQ_IDX].db_offset,
rdev->is_virtfn, &bnxt_re_aeq_handler);
db_offt = bnxt_re_get_nqdb_offset(rdev, BNXT_RE_AEQ_IDX);
vid = rdev->msix_entries[BNXT_RE_AEQ_IDX].vector;
rc = bnxt_qplib_enable_rcfw_channel(rdev->en_dev->pdev, &rdev->rcfw,
vid, db_offt, rdev->is_virtfn,
&bnxt_re_aeq_handler);
if (rc) {
pr_err("Failed to enable RCFW channel: %#x\n", rc);
goto free_ring;
@ -1347,7 +1403,8 @@ static int bnxt_re_ib_reg(struct bnxt_re_dev *rdev)
if (!rdev->is_virtfn)
bnxt_re_set_resource_limits(rdev);
rc = bnxt_qplib_alloc_ctx(rdev->en_dev->pdev, &rdev->qplib_ctx, 0);
rc = bnxt_qplib_alloc_ctx(rdev->en_dev->pdev, &rdev->qplib_ctx, 0,
bnxt_qplib_is_chip_gen_p5(&rdev->chip_ctx));
if (rc) {
pr_err("Failed to allocate QPLIB context: %#x\n", rc);
goto disable_rcfw;
@ -1418,7 +1475,8 @@ free_ctx:
disable_rcfw:
bnxt_qplib_disable_rcfw_channel(&rdev->rcfw);
free_ring:
bnxt_re_net_ring_free(rdev, rdev->rcfw.creq_ring_id);
type = bnxt_qplib_get_ring_type(&rdev->chip_ctx);
bnxt_re_net_ring_free(rdev, rdev->rcfw.creq_ring_id, type);
free_rcfw:
bnxt_qplib_free_rcfw_channel(&rdev->rcfw);
fail:

View File

@ -44,6 +44,7 @@
#include <linux/slab.h>
#include <linux/pci.h>
#include <linux/prefetch.h>
#include <linux/if_ether.h>
#include "roce_hsi.h"
@ -244,6 +245,7 @@ static void bnxt_qplib_service_nq(unsigned long data)
u16 type;
int budget = nq->budget;
uintptr_t q_handle;
bool gen_p5 = bnxt_qplib_is_chip_gen_p5(nq->res->cctx);
/* Service the NQ until empty */
raw_cons = hwq->cons;
@ -290,7 +292,7 @@ static void bnxt_qplib_service_nq(unsigned long data)
q_handle |= (u64)le32_to_cpu(nqsrqe->srq_handle_high)
<< 32;
bnxt_qplib_arm_srq((struct bnxt_qplib_srq *)q_handle,
DBR_DBR_TYPE_SRQ_ARMENA);
DBC_DBC_TYPE_SRQ_ARMENA);
if (!nq->srqn_handler(nq,
(struct bnxt_qplib_srq *)q_handle,
nqsrqe->event))
@ -312,7 +314,9 @@ static void bnxt_qplib_service_nq(unsigned long data)
}
if (hwq->cons != raw_cons) {
hwq->cons = raw_cons;
NQ_DB_REARM(nq->bar_reg_iomem, hwq->cons, hwq->max_elements);
bnxt_qplib_ring_nq_db_rearm(nq->bar_reg_iomem, hwq->cons,
hwq->max_elements, nq->ring_id,
gen_p5);
}
}
@ -336,9 +340,11 @@ static irqreturn_t bnxt_qplib_nq_irq(int irq, void *dev_instance)
void bnxt_qplib_nq_stop_irq(struct bnxt_qplib_nq *nq, bool kill)
{
bool gen_p5 = bnxt_qplib_is_chip_gen_p5(nq->res->cctx);
tasklet_disable(&nq->worker);
/* Mask h/w interrupt */
NQ_DB(nq->bar_reg_iomem, nq->hwq.cons, nq->hwq.max_elements);
bnxt_qplib_ring_nq_db(nq->bar_reg_iomem, nq->hwq.cons,
nq->hwq.max_elements, nq->ring_id, gen_p5);
/* Sync with last running IRQ handler */
synchronize_irq(nq->vector);
if (kill)
@ -373,6 +379,7 @@ void bnxt_qplib_disable_nq(struct bnxt_qplib_nq *nq)
int bnxt_qplib_nq_start_irq(struct bnxt_qplib_nq *nq, int nq_indx,
int msix_vector, bool need_init)
{
bool gen_p5 = bnxt_qplib_is_chip_gen_p5(nq->res->cctx);
int rc;
if (nq->requested)
@ -399,7 +406,8 @@ int bnxt_qplib_nq_start_irq(struct bnxt_qplib_nq *nq, int nq_indx,
nq->vector, nq_indx);
}
nq->requested = true;
NQ_DB_REARM(nq->bar_reg_iomem, nq->hwq.cons, nq->hwq.max_elements);
bnxt_qplib_ring_nq_db_rearm(nq->bar_reg_iomem, nq->hwq.cons,
nq->hwq.max_elements, nq->ring_id, gen_p5);
return rc;
}
@ -433,7 +441,8 @@ int bnxt_qplib_enable_nq(struct pci_dev *pdev, struct bnxt_qplib_nq *nq,
rc = -ENOMEM;
goto fail;
}
nq->bar_reg_iomem = ioremap_nocache(nq_base + nq->bar_reg_off, 4);
/* Unconditionally map 8 bytes to support 57500 series */
nq->bar_reg_iomem = ioremap_nocache(nq_base + nq->bar_reg_off, 8);
if (!nq->bar_reg_iomem) {
rc = -ENOMEM;
goto fail;
@ -462,15 +471,17 @@ void bnxt_qplib_free_nq(struct bnxt_qplib_nq *nq)
int bnxt_qplib_alloc_nq(struct pci_dev *pdev, struct bnxt_qplib_nq *nq)
{
u8 hwq_type;
nq->pdev = pdev;
if (!nq->hwq.max_elements ||
nq->hwq.max_elements > BNXT_QPLIB_NQE_MAX_CNT)
nq->hwq.max_elements = BNXT_QPLIB_NQE_MAX_CNT;
hwq_type = bnxt_qplib_get_hwq_type(nq->res);
if (bnxt_qplib_alloc_init_hwq(nq->pdev, &nq->hwq, NULL, 0,
&nq->hwq.max_elements,
BNXT_QPLIB_MAX_NQE_ENTRY_SIZE, 0,
PAGE_SIZE, HWQ_TYPE_L2_CMPL))
PAGE_SIZE, hwq_type))
return -ENOMEM;
nq->budget = 8;
@ -481,21 +492,19 @@ int bnxt_qplib_alloc_nq(struct pci_dev *pdev, struct bnxt_qplib_nq *nq)
static void bnxt_qplib_arm_srq(struct bnxt_qplib_srq *srq, u32 arm_type)
{
struct bnxt_qplib_hwq *srq_hwq = &srq->hwq;
struct dbr_dbr db_msg = { 0 };
void __iomem *db;
u32 sw_prod = 0;
u32 sw_prod;
u64 val = 0;
/* Ring DB */
sw_prod = (arm_type == DBR_DBR_TYPE_SRQ_ARM) ? srq->threshold :
HWQ_CMP(srq_hwq->prod, srq_hwq);
db_msg.index = cpu_to_le32((sw_prod << DBR_DBR_INDEX_SFT) &
DBR_DBR_INDEX_MASK);
db_msg.type_xid = cpu_to_le32(((srq->id << DBR_DBR_XID_SFT) &
DBR_DBR_XID_MASK) | arm_type);
db = (arm_type == DBR_DBR_TYPE_SRQ_ARMENA) ?
srq->dbr_base : srq->dpi->dbr;
wmb(); /* barrier before db ring */
__iowrite64_copy(db, &db_msg, sizeof(db_msg) / sizeof(u64));
sw_prod = (arm_type == DBC_DBC_TYPE_SRQ_ARM) ?
srq->threshold : HWQ_CMP(srq_hwq->prod, srq_hwq);
db = (arm_type == DBC_DBC_TYPE_SRQ_ARMENA) ? srq->dbr_base :
srq->dpi->dbr;
val = ((srq->id << DBC_DBC_XID_SFT) & DBC_DBC_XID_MASK) | arm_type;
val <<= 32;
val |= (sw_prod << DBC_DBC_INDEX_SFT) & DBC_DBC_INDEX_MASK;
writeq(val, db);
}
int bnxt_qplib_destroy_srq(struct bnxt_qplib_res *res,
@ -590,7 +599,7 @@ int bnxt_qplib_create_srq(struct bnxt_qplib_res *res,
srq->id = le32_to_cpu(resp.xid);
srq->dbr_base = res->dpi_tbl.dbr_bar_reg_iomem;
if (srq->threshold)
bnxt_qplib_arm_srq(srq, DBR_DBR_TYPE_SRQ_ARMENA);
bnxt_qplib_arm_srq(srq, DBC_DBC_TYPE_SRQ_ARMENA);
srq->arm_req = false;
return 0;
@ -614,7 +623,7 @@ int bnxt_qplib_modify_srq(struct bnxt_qplib_res *res,
srq_hwq->max_elements - sw_cons + sw_prod;
if (count > srq->threshold) {
srq->arm_req = false;
bnxt_qplib_arm_srq(srq, DBR_DBR_TYPE_SRQ_ARM);
bnxt_qplib_arm_srq(srq, DBC_DBC_TYPE_SRQ_ARM);
} else {
/* Deferred arming */
srq->arm_req = true;
@ -702,10 +711,10 @@ int bnxt_qplib_post_srq_recv(struct bnxt_qplib_srq *srq,
srq_hwq->max_elements - sw_cons + sw_prod;
spin_unlock(&srq_hwq->lock);
/* Ring DB */
bnxt_qplib_arm_srq(srq, DBR_DBR_TYPE_SRQ);
bnxt_qplib_arm_srq(srq, DBC_DBC_TYPE_SRQ);
if (srq->arm_req == true && count > srq->threshold) {
srq->arm_req = false;
bnxt_qplib_arm_srq(srq, DBR_DBR_TYPE_SRQ_ARM);
bnxt_qplib_arm_srq(srq, DBC_DBC_TYPE_SRQ_ARM);
}
done:
return rc;
@ -853,18 +862,19 @@ exit:
int bnxt_qplib_create_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp)
{
struct bnxt_qplib_rcfw *rcfw = res->rcfw;
struct sq_send *hw_sq_send_hdr, **hw_sq_send_ptr;
struct cmdq_create_qp req;
struct creq_create_qp_resp resp;
struct bnxt_qplib_pbl *pbl;
struct sq_psn_search **psn_search_ptr;
unsigned long int psn_search, poff = 0;
struct sq_psn_search **psn_search_ptr;
struct bnxt_qplib_q *sq = &qp->sq;
struct bnxt_qplib_q *rq = &qp->rq;
int i, rc, req_size, psn_sz = 0;
struct sq_send **hw_sq_send_ptr;
struct creq_create_qp_resp resp;
struct bnxt_qplib_hwq *xrrq;
int i, rc, req_size, psn_sz;
u16 cmd_flags = 0, max_ssge;
u32 sw_prod, qp_flags = 0;
struct cmdq_create_qp req;
struct bnxt_qplib_pbl *pbl;
u32 qp_flags = 0;
u16 max_rsge;
RCFW_CMD_PREP(req, CREATE_QP, cmd_flags);
@ -874,8 +884,11 @@ int bnxt_qplib_create_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp)
req.qp_handle = cpu_to_le64(qp->qp_handle);
/* SQ */
psn_sz = (qp->type == CMDQ_CREATE_QP_TYPE_RC) ?
sizeof(struct sq_psn_search) : 0;
if (qp->type == CMDQ_CREATE_QP_TYPE_RC) {
psn_sz = bnxt_qplib_is_chip_gen_p5(res->cctx) ?
sizeof(struct sq_psn_search_ext) :
sizeof(struct sq_psn_search);
}
sq->hwq.max_elements = sq->max_wqe;
rc = bnxt_qplib_alloc_init_hwq(res->pdev, &sq->hwq, sq->sglist,
sq->nmap, &sq->hwq.max_elements,
@ -905,10 +918,16 @@ int bnxt_qplib_create_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp)
poff = (psn_search & ~PAGE_MASK) /
BNXT_QPLIB_MAX_PSNE_ENTRY_SIZE;
}
for (i = 0; i < sq->hwq.max_elements; i++)
for (i = 0; i < sq->hwq.max_elements; i++) {
sq->swq[i].psn_search =
&psn_search_ptr[get_psne_pg(i + poff)]
[get_psne_idx(i + poff)];
/*psns_ext will be used only for P5 chips. */
sq->swq[i].psn_ext =
(struct sq_psn_search_ext *)
&psn_search_ptr[get_psne_pg(i + poff)]
[get_psne_idx(i + poff)];
}
}
pbl = &sq->hwq.pbl[PBL_LVL_0];
req.sq_pbl = cpu_to_le64(pbl->pg_map_arr[0]);
@ -929,14 +948,6 @@ int bnxt_qplib_create_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp)
CMDQ_CREATE_QP_SQ_PG_SIZE_PG_1G :
CMDQ_CREATE_QP_SQ_PG_SIZE_PG_4K);
/* initialize all SQ WQEs to LOCAL_INVALID (sq prep for hw fetch) */
hw_sq_send_ptr = (struct sq_send **)sq->hwq.pbl_ptr;
for (sw_prod = 0; sw_prod < sq->hwq.max_elements; sw_prod++) {
hw_sq_send_hdr = &hw_sq_send_ptr[get_sqe_pg(sw_prod)]
[get_sqe_idx(sw_prod)];
hw_sq_send_hdr->wqe_type = SQ_BASE_WQE_TYPE_LOCAL_INVALID;
}
if (qp->scq)
req.scq_cid = cpu_to_le32(qp->scq->id);
@ -1007,8 +1018,9 @@ int bnxt_qplib_create_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp)
req.sq_fwo_sq_sge = cpu_to_le16(
((max_ssge & CMDQ_CREATE_QP_SQ_SGE_MASK)
<< CMDQ_CREATE_QP_SQ_SGE_SFT) | 0);
max_rsge = bnxt_qplib_is_chip_gen_p5(res->cctx) ? 6 : rq->max_sge;
req.rq_fwo_rq_sge = cpu_to_le16(
((rq->max_sge & CMDQ_CREATE_QP_RQ_SGE_MASK)
((max_rsge & CMDQ_CREATE_QP_RQ_SGE_MASK)
<< CMDQ_CREATE_QP_RQ_SGE_SFT) | 0);
/* ORRQ and IRRQ */
if (psn_sz) {
@ -1053,6 +1065,7 @@ int bnxt_qplib_create_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp)
qp->id = le32_to_cpu(resp.xid);
qp->cur_qp_state = CMDQ_MODIFY_QP_NEW_STATE_RESET;
qp->cctx = res->cctx;
INIT_LIST_HEAD(&qp->sq_flush);
INIT_LIST_HEAD(&qp->rq_flush);
rcfw->qp_tbl[qp->id].qp_id = qp->id;
@ -1494,19 +1507,16 @@ void *bnxt_qplib_get_qp1_rq_buf(struct bnxt_qplib_qp *qp,
void bnxt_qplib_post_send_db(struct bnxt_qplib_qp *qp)
{
struct bnxt_qplib_q *sq = &qp->sq;
struct dbr_dbr db_msg = { 0 };
u32 sw_prod;
u64 val = 0;
val = (((qp->id << DBC_DBC_XID_SFT) & DBC_DBC_XID_MASK) |
DBC_DBC_TYPE_SQ);
val <<= 32;
sw_prod = HWQ_CMP(sq->hwq.prod, &sq->hwq);
db_msg.index = cpu_to_le32((sw_prod << DBR_DBR_INDEX_SFT) &
DBR_DBR_INDEX_MASK);
db_msg.type_xid =
cpu_to_le32(((qp->id << DBR_DBR_XID_SFT) & DBR_DBR_XID_MASK) |
DBR_DBR_TYPE_SQ);
val |= (sw_prod << DBC_DBC_INDEX_SFT) & DBC_DBC_INDEX_MASK;
/* Flush all the WQE writes to HW */
wmb();
__iowrite64_copy(qp->dpi->dbr, &db_msg, sizeof(db_msg) / sizeof(u64));
writeq(val, qp->dpi->dbr);
}
int bnxt_qplib_post_send(struct bnxt_qplib_qp *qp,
@ -1617,7 +1627,8 @@ int bnxt_qplib_post_send(struct bnxt_qplib_qp *qp,
((offsetof(typeof(*sqe), data) + 15) >> 4);
sqe->inv_key_or_imm_data = cpu_to_le32(
wqe->send.inv_key);
if (qp->type == CMDQ_CREATE_QP_TYPE_UD) {
if (qp->type == CMDQ_CREATE_QP_TYPE_UD ||
qp->type == CMDQ_CREATE_QP_TYPE_GSI) {
sqe->q_key = cpu_to_le32(wqe->send.q_key);
sqe->dst_qp = cpu_to_le32(
wqe->send.dst_qp & SQ_SEND_DST_QP_MASK);
@ -1741,14 +1752,26 @@ int bnxt_qplib_post_send(struct bnxt_qplib_qp *qp,
}
swq->next_psn = sq->psn & BTH_PSN_MASK;
if (swq->psn_search) {
swq->psn_search->opcode_start_psn = cpu_to_le32(
((swq->start_psn << SQ_PSN_SEARCH_START_PSN_SFT) &
SQ_PSN_SEARCH_START_PSN_MASK) |
((wqe->type << SQ_PSN_SEARCH_OPCODE_SFT) &
SQ_PSN_SEARCH_OPCODE_MASK));
swq->psn_search->flags_next_psn = cpu_to_le32(
((swq->next_psn << SQ_PSN_SEARCH_NEXT_PSN_SFT) &
SQ_PSN_SEARCH_NEXT_PSN_MASK));
u32 opcd_spsn;
u32 flg_npsn;
opcd_spsn = ((swq->start_psn << SQ_PSN_SEARCH_START_PSN_SFT) &
SQ_PSN_SEARCH_START_PSN_MASK);
opcd_spsn |= ((wqe->type << SQ_PSN_SEARCH_OPCODE_SFT) &
SQ_PSN_SEARCH_OPCODE_MASK);
flg_npsn = ((swq->next_psn << SQ_PSN_SEARCH_NEXT_PSN_SFT) &
SQ_PSN_SEARCH_NEXT_PSN_MASK);
if (bnxt_qplib_is_chip_gen_p5(qp->cctx)) {
swq->psn_ext->opcode_start_psn =
cpu_to_le32(opcd_spsn);
swq->psn_ext->flags_next_psn =
cpu_to_le32(flg_npsn);
} else {
swq->psn_search->opcode_start_psn =
cpu_to_le32(opcd_spsn);
swq->psn_search->flags_next_psn =
cpu_to_le32(flg_npsn);
}
}
queue_err:
if (sch_handler) {
@ -1785,19 +1808,16 @@ done:
void bnxt_qplib_post_recv_db(struct bnxt_qplib_qp *qp)
{
struct bnxt_qplib_q *rq = &qp->rq;
struct dbr_dbr db_msg = { 0 };
u32 sw_prod;
u64 val = 0;
val = (((qp->id << DBC_DBC_XID_SFT) & DBC_DBC_XID_MASK) |
DBC_DBC_TYPE_RQ);
val <<= 32;
sw_prod = HWQ_CMP(rq->hwq.prod, &rq->hwq);
db_msg.index = cpu_to_le32((sw_prod << DBR_DBR_INDEX_SFT) &
DBR_DBR_INDEX_MASK);
db_msg.type_xid =
cpu_to_le32(((qp->id << DBR_DBR_XID_SFT) & DBR_DBR_XID_MASK) |
DBR_DBR_TYPE_RQ);
val |= (sw_prod << DBC_DBC_INDEX_SFT) & DBC_DBC_INDEX_MASK;
/* Flush the writes to HW Rx WQE before the ringing Rx DB */
wmb();
__iowrite64_copy(qp->dpi->dbr, &db_msg, sizeof(db_msg) / sizeof(u64));
writeq(val, qp->dpi->dbr);
}
int bnxt_qplib_post_recv(struct bnxt_qplib_qp *qp,
@ -1881,32 +1901,28 @@ done:
/* Spinlock must be held */
static void bnxt_qplib_arm_cq_enable(struct bnxt_qplib_cq *cq)
{
struct dbr_dbr db_msg = { 0 };
u64 val = 0;
db_msg.type_xid =
cpu_to_le32(((cq->id << DBR_DBR_XID_SFT) & DBR_DBR_XID_MASK) |
DBR_DBR_TYPE_CQ_ARMENA);
val = ((cq->id << DBC_DBC_XID_SFT) & DBC_DBC_XID_MASK) |
DBC_DBC_TYPE_CQ_ARMENA;
val <<= 32;
/* Flush memory writes before enabling the CQ */
wmb();
__iowrite64_copy(cq->dbr_base, &db_msg, sizeof(db_msg) / sizeof(u64));
writeq(val, cq->dbr_base);
}
static void bnxt_qplib_arm_cq(struct bnxt_qplib_cq *cq, u32 arm_type)
{
struct bnxt_qplib_hwq *cq_hwq = &cq->hwq;
struct dbr_dbr db_msg = { 0 };
u32 sw_cons;
u64 val = 0;
/* Ring DB */
val = ((cq->id << DBC_DBC_XID_SFT) & DBC_DBC_XID_MASK) | arm_type;
val <<= 32;
sw_cons = HWQ_CMP(cq_hwq->cons, cq_hwq);
db_msg.index = cpu_to_le32((sw_cons << DBR_DBR_INDEX_SFT) &
DBR_DBR_INDEX_MASK);
db_msg.type_xid =
cpu_to_le32(((cq->id << DBR_DBR_XID_SFT) & DBR_DBR_XID_MASK) |
arm_type);
val |= (sw_cons << DBC_DBC_INDEX_SFT) & DBC_DBC_INDEX_MASK;
/* flush memory writes before arming the CQ */
wmb();
__iowrite64_copy(cq->dpi->dbr, &db_msg, sizeof(db_msg) / sizeof(u64));
writeq(val, cq->dpi->dbr);
}
int bnxt_qplib_create_cq(struct bnxt_qplib_res *res, struct bnxt_qplib_cq *cq)
@ -2053,6 +2069,7 @@ static int __flush_rq(struct bnxt_qplib_q *rq, struct bnxt_qplib_qp *qp,
opcode = CQ_BASE_CQE_TYPE_RES_RC;
break;
case CMDQ_CREATE_QP_TYPE_UD:
case CMDQ_CREATE_QP_TYPE_GSI:
opcode = CQ_BASE_CQE_TYPE_RES_UD;
break;
}
@ -2125,7 +2142,7 @@ static int do_wa9060(struct bnxt_qplib_qp *qp, struct bnxt_qplib_cq *cq,
sq->send_phantom = true;
/* TODO: Only ARM if the previous SQE is ARMALL */
bnxt_qplib_arm_cq(cq, DBR_DBR_TYPE_CQ_ARMALL);
bnxt_qplib_arm_cq(cq, DBC_DBC_TYPE_CQ_ARMALL);
rc = -EAGAIN;
goto out;
@ -2410,12 +2427,14 @@ static int bnxt_qplib_cq_process_res_ud(struct bnxt_qplib_cq *cq,
}
cqe = *pcqe;
cqe->opcode = hwcqe->cqe_type_toggle & CQ_BASE_CQE_TYPE_MASK;
cqe->length = le32_to_cpu(hwcqe->length);
cqe->length = (u32)le16_to_cpu(hwcqe->length);
cqe->cfa_meta = le16_to_cpu(hwcqe->cfa_metadata);
cqe->invrkey = le32_to_cpu(hwcqe->imm_data);
cqe->flags = le16_to_cpu(hwcqe->flags);
cqe->status = hwcqe->status;
cqe->qp_handle = (u64)(unsigned long)qp;
memcpy(cqe->smac, hwcqe->src_mac, 6);
/*FIXME: Endianness fix needed for smace */
memcpy(cqe->smac, hwcqe->src_mac, ETH_ALEN);
wr_id_idx = le32_to_cpu(hwcqe->src_qp_high_srq_or_rq_wr_id)
& CQ_RES_UD_SRQ_OR_RQ_WR_ID_MASK;
cqe->src_qp = le16_to_cpu(hwcqe->src_qp_low) |
@ -2794,7 +2813,7 @@ int bnxt_qplib_poll_cq(struct bnxt_qplib_cq *cq, struct bnxt_qplib_cqe *cqe,
}
if (cq->hwq.cons != raw_cons) {
cq->hwq.cons = raw_cons;
bnxt_qplib_arm_cq(cq, DBR_DBR_TYPE_CQ);
bnxt_qplib_arm_cq(cq, DBC_DBC_TYPE_CQ);
}
exit:
return num_cqes - budget;

View File

@ -106,6 +106,7 @@ struct bnxt_qplib_swq {
u32 start_psn;
u32 next_psn;
struct sq_psn_search *psn_search;
struct sq_psn_search_ext *psn_ext;
};
struct bnxt_qplib_swqe {
@ -254,6 +255,7 @@ struct bnxt_qplib_q {
struct bnxt_qplib_qp {
struct bnxt_qplib_pd *pd;
struct bnxt_qplib_dpi *dpi;
struct bnxt_qplib_chip_ctx *cctx;
u64 qp_handle;
#define BNXT_QPLIB_QP_ID_INVALID 0xFFFFFFFF
u32 id;
@ -347,6 +349,7 @@ struct bnxt_qplib_cqe {
u8 type;
u8 opcode;
u32 length;
u16 cfa_meta;
u64 wr_id;
union {
__be32 immdata;
@ -432,13 +435,47 @@ struct bnxt_qplib_cq {
#define NQ_DB_CP_FLAGS (NQ_DB_KEY_CP | \
NQ_DB_IDX_VALID | \
NQ_DB_IRQ_DIS)
#define NQ_DB_REARM(db, raw_cons, cp_bit) \
writel(NQ_DB_CP_FLAGS_REARM | ((raw_cons) & ((cp_bit) - 1)), db)
#define NQ_DB(db, raw_cons, cp_bit) \
writel(NQ_DB_CP_FLAGS | ((raw_cons) & ((cp_bit) - 1)), db)
static inline void bnxt_qplib_ring_nq_db64(void __iomem *db, u32 index,
u32 xid, bool arm)
{
u64 val;
val = xid & DBC_DBC_XID_MASK;
val |= DBC_DBC_PATH_ROCE;
val |= arm ? DBC_DBC_TYPE_NQ_ARM : DBC_DBC_TYPE_NQ;
val <<= 32;
val |= index & DBC_DBC_INDEX_MASK;
writeq(val, db);
}
static inline void bnxt_qplib_ring_nq_db_rearm(void __iomem *db, u32 raw_cons,
u32 max_elements, u32 xid,
bool gen_p5)
{
u32 index = raw_cons & (max_elements - 1);
if (gen_p5)
bnxt_qplib_ring_nq_db64(db, index, xid, true);
else
writel(NQ_DB_CP_FLAGS_REARM | (index & DBC_DBC32_XID_MASK), db);
}
static inline void bnxt_qplib_ring_nq_db(void __iomem *db, u32 raw_cons,
u32 max_elements, u32 xid,
bool gen_p5)
{
u32 index = raw_cons & (max_elements - 1);
if (gen_p5)
bnxt_qplib_ring_nq_db64(db, index, xid, false);
else
writel(NQ_DB_CP_FLAGS | (index & DBC_DBC32_XID_MASK), db);
}
struct bnxt_qplib_nq {
struct pci_dev *pdev;
struct bnxt_qplib_res *res;
int vector;
cpumask_t mask;
@ -448,7 +485,7 @@ struct bnxt_qplib_nq {
struct bnxt_qplib_hwq hwq;
u16 bar_reg;
u16 bar_reg_off;
u32 bar_reg_off;
u16 ring_id;
void __iomem *bar_reg_iomem;

View File

@ -359,11 +359,12 @@ static int bnxt_qplib_process_qp_event(struct bnxt_qplib_rcfw *rcfw,
static void bnxt_qplib_service_creq(unsigned long data)
{
struct bnxt_qplib_rcfw *rcfw = (struct bnxt_qplib_rcfw *)data;
bool gen_p5 = bnxt_qplib_is_chip_gen_p5(rcfw->res->cctx);
struct bnxt_qplib_hwq *creq = &rcfw->creq;
u32 type, budget = CREQ_ENTRY_POLL_BUDGET;
struct creq_base *creqe, **creq_ptr;
u32 sw_cons, raw_cons;
unsigned long flags;
u32 type, budget = CREQ_ENTRY_POLL_BUDGET;
/* Service the CREQ until budget is over */
spin_lock_irqsave(&creq->lock, flags);
@ -407,8 +408,9 @@ static void bnxt_qplib_service_creq(unsigned long data)
if (creq->cons != raw_cons) {
creq->cons = raw_cons;
CREQ_DB_REARM(rcfw->creq_bar_reg_iomem, raw_cons,
creq->max_elements);
bnxt_qplib_ring_creq_db_rearm(rcfw->creq_bar_reg_iomem,
raw_cons, creq->max_elements,
rcfw->creq_ring_id, gen_p5);
}
spin_unlock_irqrestore(&creq->lock, flags);
}
@ -480,11 +482,13 @@ int bnxt_qplib_init_rcfw(struct bnxt_qplib_rcfw *rcfw,
req.log2_dbr_pg_size = cpu_to_le16(PAGE_SHIFT -
RCFW_DBR_BASE_PAGE_SHIFT);
/*
* VFs need not setup the HW context area, PF
* Gen P5 devices doesn't require this allocation
* as the L2 driver does the same for RoCE also.
* Also, VFs need not setup the HW context area, PF
* shall setup this area for VF. Skipping the
* HW programming
*/
if (is_virtfn)
if (is_virtfn || bnxt_qplib_is_chip_gen_p5(rcfw->res->cctx))
goto skip_ctx_setup;
level = ctx->qpc_tbl.level;
@ -560,12 +564,15 @@ int bnxt_qplib_alloc_rcfw_channel(struct pci_dev *pdev,
struct bnxt_qplib_ctx *ctx,
int qp_tbl_sz)
{
u8 hwq_type;
rcfw->pdev = pdev;
rcfw->creq.max_elements = BNXT_QPLIB_CREQE_MAX_CNT;
hwq_type = bnxt_qplib_get_hwq_type(rcfw->res);
if (bnxt_qplib_alloc_init_hwq(rcfw->pdev, &rcfw->creq, NULL, 0,
&rcfw->creq.max_elements,
BNXT_QPLIB_CREQE_UNITS, 0, PAGE_SIZE,
HWQ_TYPE_L2_CMPL)) {
BNXT_QPLIB_CREQE_UNITS,
0, PAGE_SIZE, hwq_type)) {
dev_err(&rcfw->pdev->dev,
"HW channel CREQ allocation failed\n");
goto fail;
@ -607,10 +614,13 @@ fail:
void bnxt_qplib_rcfw_stop_irq(struct bnxt_qplib_rcfw *rcfw, bool kill)
{
bool gen_p5 = bnxt_qplib_is_chip_gen_p5(rcfw->res->cctx);
tasklet_disable(&rcfw->worker);
/* Mask h/w interrupts */
CREQ_DB(rcfw->creq_bar_reg_iomem, rcfw->creq.cons,
rcfw->creq.max_elements);
bnxt_qplib_ring_creq_db(rcfw->creq_bar_reg_iomem, rcfw->creq.cons,
rcfw->creq.max_elements, rcfw->creq_ring_id,
gen_p5);
/* Sync with last running IRQ-handler */
synchronize_irq(rcfw->vector);
if (kill)
@ -647,6 +657,7 @@ void bnxt_qplib_disable_rcfw_channel(struct bnxt_qplib_rcfw *rcfw)
int bnxt_qplib_rcfw_start_irq(struct bnxt_qplib_rcfw *rcfw, int msix_vector,
bool need_init)
{
bool gen_p5 = bnxt_qplib_is_chip_gen_p5(rcfw->res->cctx);
int rc;
if (rcfw->requested)
@ -663,8 +674,9 @@ int bnxt_qplib_rcfw_start_irq(struct bnxt_qplib_rcfw *rcfw, int msix_vector,
if (rc)
return rc;
rcfw->requested = true;
CREQ_DB_REARM(rcfw->creq_bar_reg_iomem, rcfw->creq.cons,
rcfw->creq.max_elements);
bnxt_qplib_ring_creq_db_rearm(rcfw->creq_bar_reg_iomem,
rcfw->creq.cons, rcfw->creq.max_elements,
rcfw->creq_ring_id, gen_p5);
return 0;
}
@ -684,8 +696,7 @@ int bnxt_qplib_enable_rcfw_channel(struct pci_dev *pdev,
/* General */
rcfw->seq_num = 0;
set_bit(FIRMWARE_FIRST_FLAG, &rcfw->flags);
bmap_size = BITS_TO_LONGS(rcfw->cmdq_depth *
sizeof(unsigned long));
bmap_size = BITS_TO_LONGS(rcfw->cmdq_depth) * sizeof(unsigned long);
rcfw->cmdq_bitmap = kzalloc(bmap_size, GFP_KERNEL);
if (!rcfw->cmdq_bitmap)
return -ENOMEM;
@ -718,8 +729,9 @@ int bnxt_qplib_enable_rcfw_channel(struct pci_dev *pdev,
dev_err(&rcfw->pdev->dev,
"CREQ BAR region %d resc start is 0!\n",
rcfw->creq_bar_reg);
/* Unconditionally map 8 bytes to support 57500 series */
rcfw->creq_bar_reg_iomem = ioremap_nocache(res_base + cp_bar_reg_off,
4);
8);
if (!rcfw->creq_bar_reg_iomem) {
dev_err(&rcfw->pdev->dev, "CREQ BAR region %d mapping failed\n",
rcfw->creq_bar_reg);

View File

@ -157,10 +157,46 @@ static inline u32 get_creq_idx(u32 val)
#define CREQ_DB_CP_FLAGS (CREQ_DB_KEY_CP | \
CREQ_DB_IDX_VALID | \
CREQ_DB_IRQ_DIS)
#define CREQ_DB_REARM(db, raw_cons, cp_bit) \
writel(CREQ_DB_CP_FLAGS_REARM | ((raw_cons) & ((cp_bit) - 1)), db)
#define CREQ_DB(db, raw_cons, cp_bit) \
writel(CREQ_DB_CP_FLAGS | ((raw_cons) & ((cp_bit) - 1)), db)
static inline void bnxt_qplib_ring_creq_db64(void __iomem *db, u32 index,
u32 xid, bool arm)
{
u64 val = 0;
val = xid & DBC_DBC_XID_MASK;
val |= DBC_DBC_PATH_ROCE;
val |= arm ? DBC_DBC_TYPE_NQ_ARM : DBC_DBC_TYPE_NQ;
val <<= 32;
val |= index & DBC_DBC_INDEX_MASK;
writeq(val, db);
}
static inline void bnxt_qplib_ring_creq_db_rearm(void __iomem *db, u32 raw_cons,
u32 max_elements, u32 xid,
bool gen_p5)
{
u32 index = raw_cons & (max_elements - 1);
if (gen_p5)
bnxt_qplib_ring_creq_db64(db, index, xid, true);
else
writel(CREQ_DB_CP_FLAGS_REARM | (index & DBC_DBC32_XID_MASK),
db);
}
static inline void bnxt_qplib_ring_creq_db(void __iomem *db, u32 raw_cons,
u32 max_elements, u32 xid,
bool gen_p5)
{
u32 index = raw_cons & (max_elements - 1);
if (gen_p5)
bnxt_qplib_ring_creq_db64(db, index, xid, true);
else
writel(CREQ_DB_CP_FLAGS | (index & DBC_DBC32_XID_MASK),
db);
}
#define CREQ_ENTRY_POLL_BUDGET 0x100
@ -187,6 +223,7 @@ struct bnxt_qplib_qp_node {
/* RCFW Communication Channels */
struct bnxt_qplib_rcfw {
struct pci_dev *pdev;
struct bnxt_qplib_res *res;
int vector;
struct tasklet_struct worker;
bool requested;

View File

@ -85,7 +85,7 @@ static void __free_pbl(struct pci_dev *pdev, struct bnxt_qplib_pbl *pbl,
static int __alloc_pbl(struct pci_dev *pdev, struct bnxt_qplib_pbl *pbl,
struct scatterlist *sghead, u32 pages, u32 pg_size)
{
struct scatterlist *sg;
struct sg_dma_page_iter sg_iter;
bool is_umem = false;
int i;
@ -116,13 +116,11 @@ static int __alloc_pbl(struct pci_dev *pdev, struct bnxt_qplib_pbl *pbl,
} else {
i = 0;
is_umem = true;
for_each_sg(sghead, sg, pages, i) {
pbl->pg_map_arr[i] = sg_dma_address(sg);
pbl->pg_arr[i] = sg_virt(sg);
if (!pbl->pg_arr[i])
goto fail;
for_each_sg_dma_page (sghead, &sg_iter, pages, 0) {
pbl->pg_map_arr[i] = sg_page_iter_dma_address(&sg_iter);
pbl->pg_arr[i] = NULL;
pbl->pg_count++;
i++;
}
}
@ -330,13 +328,13 @@ void bnxt_qplib_free_ctx(struct pci_dev *pdev,
*/
int bnxt_qplib_alloc_ctx(struct pci_dev *pdev,
struct bnxt_qplib_ctx *ctx,
bool virt_fn)
bool virt_fn, bool is_p5)
{
int i, j, k, rc = 0;
int fnz_idx = -1;
__le64 **pbl_ptr;
if (virt_fn)
if (virt_fn || is_p5)
goto stats_alloc;
/* QPC Tables */
@ -762,7 +760,11 @@ static int bnxt_qplib_alloc_stats_ctx(struct pci_dev *pdev,
{
memset(stats, 0, sizeof(*stats));
stats->fw_id = -1;
stats->size = sizeof(struct ctx_hw_stats);
/* 128 byte aligned context memory is required only for 57500.
* However making this unconditional, it does not harm previous
* generation.
*/
stats->size = ALIGN(sizeof(struct ctx_hw_stats), 128);
stats->dma = dma_alloc_coherent(&pdev->dev, stats->size,
&stats->dma_map, GFP_KERNEL);
if (!stats->dma) {

View File

@ -180,12 +180,20 @@ struct bnxt_qplib_ctx {
u64 hwrm_intf_ver;
};
struct bnxt_qplib_chip_ctx {
u16 chip_num;
u8 chip_rev;
u8 chip_metal;
};
#define CHIP_NUM_57500 0x1750
struct bnxt_qplib_res {
struct pci_dev *pdev;
struct bnxt_qplib_chip_ctx *cctx;
struct net_device *netdev;
struct bnxt_qplib_rcfw *rcfw;
struct bnxt_qplib_pd_tbl pd_tbl;
struct bnxt_qplib_sgid_tbl sgid_tbl;
struct bnxt_qplib_pkey_tbl pkey_tbl;
@ -193,6 +201,24 @@ struct bnxt_qplib_res {
bool prio;
};
static inline bool bnxt_qplib_is_chip_gen_p5(struct bnxt_qplib_chip_ctx *cctx)
{
return (cctx->chip_num == CHIP_NUM_57500);
}
static inline u8 bnxt_qplib_get_hwq_type(struct bnxt_qplib_res *res)
{
return bnxt_qplib_is_chip_gen_p5(res->cctx) ?
HWQ_TYPE_QUEUE : HWQ_TYPE_L2_CMPL;
}
static inline u8 bnxt_qplib_get_ring_type(struct bnxt_qplib_chip_ctx *cctx)
{
return bnxt_qplib_is_chip_gen_p5(cctx) ?
RING_ALLOC_REQ_RING_TYPE_NQ :
RING_ALLOC_REQ_RING_TYPE_ROCE_CMPL;
}
#define to_bnxt_qplib(ptr, type, member) \
container_of(ptr, type, member)
@ -226,5 +252,5 @@ void bnxt_qplib_free_ctx(struct pci_dev *pdev,
struct bnxt_qplib_ctx *ctx);
int bnxt_qplib_alloc_ctx(struct pci_dev *pdev,
struct bnxt_qplib_ctx *ctx,
bool virt_fn);
bool virt_fn, bool is_p5);
#endif /* __BNXT_QPLIB_RES_H__ */

View File

@ -119,7 +119,8 @@ int bnxt_qplib_get_dev_attr(struct bnxt_qplib_rcfw *rcfw,
* reporting the max number
*/
attr->max_qp_wqes -= BNXT_QPLIB_RESERVED_QP_WRS;
attr->max_qp_sges = sb->max_sge;
attr->max_qp_sges = bnxt_qplib_is_chip_gen_p5(rcfw->res->cctx) ?
6 : sb->max_sge;
attr->max_cq = le32_to_cpu(sb->max_cq);
attr->max_cq_wqes = le32_to_cpu(sb->max_cqe);
attr->max_cq_sges = attr->max_qp_sges;

View File

@ -49,11 +49,11 @@ struct cmpl_doorbell {
#define CMPL_DOORBELL_IDX_SFT 0
#define CMPL_DOORBELL_RESERVED_MASK 0x3000000UL
#define CMPL_DOORBELL_RESERVED_SFT 24
#define CMPL_DOORBELL_IDX_VALID 0x4000000UL
#define CMPL_DOORBELL_IDX_VALID 0x4000000UL
#define CMPL_DOORBELL_MASK 0x8000000UL
#define CMPL_DOORBELL_KEY_MASK 0xf0000000UL
#define CMPL_DOORBELL_KEY_SFT 28
#define CMPL_DOORBELL_KEY_CMPL (0x2UL << 28)
#define CMPL_DOORBELL_KEY_CMPL (0x2UL << 28)
};
/* Status Door Bell Format (4 bytes) */
@ -71,46 +71,56 @@ struct status_doorbell {
/* RoCE Host Structures */
/* Doorbell Structures */
/* 64b Doorbell Format (8 bytes) */
struct dbr_dbr {
__le32 index;
#define DBR_DBR_INDEX_MASK 0xfffffUL
#define DBR_DBR_INDEX_SFT 0
#define DBR_DBR_RESERVED12_MASK 0xfff00000UL
#define DBR_DBR_RESERVED12_SFT 20
__le32 type_xid;
#define DBR_DBR_XID_MASK 0xfffffUL
#define DBR_DBR_XID_SFT 0
#define DBR_DBR_RESERVED8_MASK 0xff00000UL
#define DBR_DBR_RESERVED8_SFT 20
#define DBR_DBR_TYPE_MASK 0xf0000000UL
#define DBR_DBR_TYPE_SFT 28
#define DBR_DBR_TYPE_SQ (0x0UL << 28)
#define DBR_DBR_TYPE_RQ (0x1UL << 28)
#define DBR_DBR_TYPE_SRQ (0x2UL << 28)
#define DBR_DBR_TYPE_SRQ_ARM (0x3UL << 28)
#define DBR_DBR_TYPE_CQ (0x4UL << 28)
#define DBR_DBR_TYPE_CQ_ARMSE (0x5UL << 28)
#define DBR_DBR_TYPE_CQ_ARMALL (0x6UL << 28)
#define DBR_DBR_TYPE_CQ_ARMENA (0x7UL << 28)
#define DBR_DBR_TYPE_SRQ_ARMENA (0x8UL << 28)
#define DBR_DBR_TYPE_CQ_CUTOFF_ACK (0x9UL << 28)
#define DBR_DBR_TYPE_NULL (0xfUL << 28)
/* dbc_dbc (size:64b/8B) */
struct dbc_dbc {
__le32 index;
#define DBC_DBC_INDEX_MASK 0xffffffUL
#define DBC_DBC_INDEX_SFT 0
__le32 type_path_xid;
#define DBC_DBC_XID_MASK 0xfffffUL
#define DBC_DBC_XID_SFT 0
#define DBC_DBC_PATH_MASK 0x3000000UL
#define DBC_DBC_PATH_SFT 24
#define DBC_DBC_PATH_ROCE (0x0UL << 24)
#define DBC_DBC_PATH_L2 (0x1UL << 24)
#define DBC_DBC_PATH_ENGINE (0x2UL << 24)
#define DBC_DBC_PATH_LAST DBC_DBC_PATH_ENGINE
#define DBC_DBC_DEBUG_TRACE 0x8000000UL
#define DBC_DBC_TYPE_MASK 0xf0000000UL
#define DBC_DBC_TYPE_SFT 28
#define DBC_DBC_TYPE_SQ (0x0UL << 28)
#define DBC_DBC_TYPE_RQ (0x1UL << 28)
#define DBC_DBC_TYPE_SRQ (0x2UL << 28)
#define DBC_DBC_TYPE_SRQ_ARM (0x3UL << 28)
#define DBC_DBC_TYPE_CQ (0x4UL << 28)
#define DBC_DBC_TYPE_CQ_ARMSE (0x5UL << 28)
#define DBC_DBC_TYPE_CQ_ARMALL (0x6UL << 28)
#define DBC_DBC_TYPE_CQ_ARMENA (0x7UL << 28)
#define DBC_DBC_TYPE_SRQ_ARMENA (0x8UL << 28)
#define DBC_DBC_TYPE_CQ_CUTOFF_ACK (0x9UL << 28)
#define DBC_DBC_TYPE_NQ (0xaUL << 28)
#define DBC_DBC_TYPE_NQ_ARM (0xbUL << 28)
#define DBC_DBC_TYPE_NULL (0xfUL << 28)
#define DBC_DBC_TYPE_LAST DBC_DBC_TYPE_NULL
};
/* 32b Doorbell Format (4 bytes) */
struct dbr_dbr32 {
__le32 type_abs_incr_xid;
#define DBR_DBR32_XID_MASK 0xfffffUL
#define DBR_DBR32_XID_SFT 0
#define DBR_DBR32_RESERVED4_MASK 0xf00000UL
#define DBR_DBR32_RESERVED4_SFT 20
#define DBR_DBR32_INCR_MASK 0xf000000UL
#define DBR_DBR32_INCR_SFT 24
#define DBR_DBR32_ABS 0x10000000UL
#define DBR_DBR32_TYPE_MASK 0xe0000000UL
#define DBR_DBR32_TYPE_SFT 29
#define DBR_DBR32_TYPE_SQ (0x0UL << 29)
/* dbc_dbc32 (size:32b/4B) */
struct dbc_dbc32 {
__le32 type_abs_incr_xid;
#define DBC_DBC32_XID_MASK 0xfffffUL
#define DBC_DBC32_XID_SFT 0
#define DBC_DBC32_PATH_MASK 0xc00000UL
#define DBC_DBC32_PATH_SFT 22
#define DBC_DBC32_PATH_ROCE (0x0UL << 22)
#define DBC_DBC32_PATH_L2 (0x1UL << 22)
#define DBC_DBC32_PATH_LAST DBC_DBC32_PATH_L2
#define DBC_DBC32_INCR_MASK 0xf000000UL
#define DBC_DBC32_INCR_SFT 24
#define DBC_DBC32_ABS 0x10000000UL
#define DBC_DBC32_TYPE_MASK 0xe0000000UL
#define DBC_DBC32_TYPE_SFT 29
#define DBC_DBC32_TYPE_SQ (0x0UL << 29)
#define DBC_DBC32_TYPE_LAST DBC_DBC32_TYPE_SQ
};
/* SQ WQE Structures */
@ -149,7 +159,24 @@ struct sq_psn_search {
#define SQ_PSN_SEARCH_NEXT_PSN_MASK 0xffffffUL
#define SQ_PSN_SEARCH_NEXT_PSN_SFT 0
#define SQ_PSN_SEARCH_FLAGS_MASK 0xff000000UL
#define SQ_PSN_SEARCH_FLAGS_SFT 24
#define SQ_PSN_SEARCH_FLAGS_SFT 24
};
/* sq_psn_search_ext (size:128b/16B) */
struct sq_psn_search_ext {
__le32 opcode_start_psn;
#define SQ_PSN_SEARCH_EXT_START_PSN_MASK 0xffffffUL
#define SQ_PSN_SEARCH_EXT_START_PSN_SFT 0
#define SQ_PSN_SEARCH_EXT_OPCODE_MASK 0xff000000UL
#define SQ_PSN_SEARCH_EXT_OPCODE_SFT 24
__le32 flags_next_psn;
#define SQ_PSN_SEARCH_EXT_NEXT_PSN_MASK 0xffffffUL
#define SQ_PSN_SEARCH_EXT_NEXT_PSN_SFT 0
#define SQ_PSN_SEARCH_EXT_FLAGS_MASK 0xff000000UL
#define SQ_PSN_SEARCH_EXT_FLAGS_SFT 24
__le16 start_slot_idx;
__le16 reserved16;
__le32 reserved32;
};
/* Send SQ WQE (40 bytes) */
@ -505,22 +532,24 @@ struct cq_res_rc {
/* Responder UD CQE (32 bytes) */
struct cq_res_ud {
__le32 length;
__le16 length;
#define CQ_RES_UD_LENGTH_MASK 0x3fffUL
#define CQ_RES_UD_LENGTH_SFT 0
#define CQ_RES_UD_RESERVED18_MASK 0xffffc000UL
#define CQ_RES_UD_RESERVED18_SFT 14
__le16 cfa_metadata;
#define CQ_RES_UD_CFA_METADATA_VID_MASK 0xfffUL
#define CQ_RES_UD_CFA_METADATA_VID_SFT 0
#define CQ_RES_UD_CFA_METADATA_DE 0x1000UL
#define CQ_RES_UD_CFA_METADATA_PRI_MASK 0xe000UL
#define CQ_RES_UD_CFA_METADATA_PRI_SFT 13
__le32 imm_data;
__le64 qp_handle;
__le16 src_mac[3];
__le16 src_qp_low;
u8 cqe_type_toggle;
#define CQ_RES_UD_TOGGLE 0x1UL
#define CQ_RES_UD_CQE_TYPE_MASK 0x1eUL
#define CQ_RES_UD_CQE_TYPE_SFT 1
#define CQ_RES_UD_TOGGLE 0x1UL
#define CQ_RES_UD_CQE_TYPE_MASK 0x1eUL
#define CQ_RES_UD_CQE_TYPE_SFT 1
#define CQ_RES_UD_CQE_TYPE_RES_UD (0x2UL << 1)
#define CQ_RES_UD_RESERVED3_MASK 0xe0UL
#define CQ_RES_UD_RESERVED3_SFT 5
u8 status;
#define CQ_RES_UD_STATUS_OK 0x0UL
#define CQ_RES_UD_STATUS_LOCAL_ACCESS_ERROR 0x1UL
@ -536,18 +565,30 @@ struct cq_res_ud {
#define CQ_RES_UD_FLAGS_SRQ_SRQ (0x1UL << 0)
#define CQ_RES_UD_FLAGS_SRQ_LAST CQ_RES_UD_FLAGS_SRQ_SRQ
#define CQ_RES_UD_FLAGS_IMM 0x2UL
#define CQ_RES_UD_FLAGS_ROCE_IP_VER_MASK 0xcUL
#define CQ_RES_UD_FLAGS_ROCE_IP_VER_SFT 2
#define CQ_RES_UD_FLAGS_ROCE_IP_VER_V1 (0x0UL << 2)
#define CQ_RES_UD_FLAGS_ROCE_IP_VER_V2IPV4 (0x2UL << 2)
#define CQ_RES_UD_FLAGS_ROCE_IP_VER_V2IPV6 (0x3UL << 2)
#define CQ_RES_UD_FLAGS_UNUSED_MASK 0xcUL
#define CQ_RES_UD_FLAGS_UNUSED_SFT 2
#define CQ_RES_UD_FLAGS_ROCE_IP_VER_MASK 0x30UL
#define CQ_RES_UD_FLAGS_ROCE_IP_VER_SFT 4
#define CQ_RES_UD_FLAGS_ROCE_IP_VER_V1 (0x0UL << 4)
#define CQ_RES_UD_FLAGS_ROCE_IP_VER_V2IPV4 (0x2UL << 4)
#define CQ_RES_UD_FLAGS_ROCE_IP_VER_V2IPV6 (0x3UL << 4)
#define CQ_RES_UD_FLAGS_ROCE_IP_VER_LAST \
CQ_RES_UD_FLAGS_ROCE_IP_VER_V2IPV6
#define CQ_RES_UD_FLAGS_META_FORMAT_MASK 0x3c0UL
#define CQ_RES_UD_FLAGS_META_FORMAT_SFT 6
#define CQ_RES_UD_FLAGS_META_FORMAT_NONE (0x0UL << 6)
#define CQ_RES_UD_FLAGS_META_FORMAT_VLAN (0x1UL << 6)
#define CQ_RES_UD_FLAGS_META_FORMAT_TUNNEL_ID (0x2UL << 6)
#define CQ_RES_UD_FLAGS_META_FORMAT_CHDR_DATA (0x3UL << 6)
#define CQ_RES_UD_FLAGS_META_FORMAT_HDR_OFFSET (0x4UL << 6)
#define CQ_RES_UD_FLAGS_META_FORMAT_LAST \
CQ_RES_UD_FLAGS_META_FORMAT_HDR_OFFSET
#define CQ_RES_UD_FLAGS_EXT_META_FORMAT_MASK 0xc00UL
#define CQ_RES_UD_FLAGS_EXT_META_FORMAT_SFT 10
__le32 src_qp_high_srq_or_rq_wr_id;
#define CQ_RES_UD_SRQ_OR_RQ_WR_ID_MASK 0xfffffUL
#define CQ_RES_UD_SRQ_OR_RQ_WR_ID_SFT 0
#define CQ_RES_UD_RESERVED4_MASK 0xf00000UL
#define CQ_RES_UD_RESERVED4_SFT 20
#define CQ_RES_UD_SRC_QP_HIGH_MASK 0xff000000UL
#define CQ_RES_UD_SRC_QP_HIGH_SFT 24
};
@ -983,6 +1024,7 @@ struct cmdq_create_qp {
#define CMDQ_CREATE_QP_TYPE_RC 0x2UL
#define CMDQ_CREATE_QP_TYPE_UD 0x4UL
#define CMDQ_CREATE_QP_TYPE_RAW_ETHERTYPE 0x6UL
#define CMDQ_CREATE_QP_TYPE_GSI 0x7UL
u8 sq_pg_size_sq_lvl;
#define CMDQ_CREATE_QP_SQ_LVL_MASK 0xfUL
#define CMDQ_CREATE_QP_SQ_LVL_SFT 0
@ -2719,6 +2761,8 @@ struct creq_query_func_resp_sb {
__le16 max_srq;
__le32 max_gid;
__le32 tqm_alloc_reqs[12];
__le32 max_dpi;
__le32 reserved_32;
};
/* Set resources command response (16 bytes) */

View File

@ -1,5 +1,5 @@
# SPDX-License-Identifier: GPL-2.0
ccflags-y := -Idrivers/net/ethernet/chelsio/cxgb3
ccflags-y := -I $(srctree)/drivers/net/ethernet/chelsio/cxgb3
obj-$(CONFIG_INFINIBAND_CXGB3) += iw_cxgb3.o

View File

@ -146,7 +146,7 @@ static void open_rnic_dev(struct t3cdev *tdev)
pr_debug("%s t3cdev %p\n", __func__, tdev);
pr_info_once("Chelsio T3 RDMA Driver - version %s\n", DRV_VERSION);
rnicp = (struct iwch_dev *)ib_alloc_device(sizeof(*rnicp));
rnicp = ib_alloc_device(iwch_dev, ibdev);
if (!rnicp) {
pr_err("Cannot allocate ib device\n");
return;

View File

@ -53,6 +53,7 @@
#include <rdma/ib_smi.h>
#include <rdma/ib_umem.h>
#include <rdma/ib_user_verbs.h>
#include <rdma/uverbs_ioctl.h>
#include "cxio_hal.h"
#include "iwch.h"
@ -61,7 +62,7 @@
#include <rdma/cxgb3-abi.h>
#include "common.h"
static int iwch_dealloc_ucontext(struct ib_ucontext *context)
static void iwch_dealloc_ucontext(struct ib_ucontext *context)
{
struct iwch_dev *rhp = to_iwch_dev(context->device);
struct iwch_ucontext *ucontext = to_iwch_ucontext(context);
@ -71,24 +72,20 @@ static int iwch_dealloc_ucontext(struct ib_ucontext *context)
list_for_each_entry_safe(mm, tmp, &ucontext->mmaps, entry)
kfree(mm);
cxio_release_ucontext(&rhp->rdev, &ucontext->uctx);
kfree(ucontext);
return 0;
}
static struct ib_ucontext *iwch_alloc_ucontext(struct ib_device *ibdev,
struct ib_udata *udata)
static int iwch_alloc_ucontext(struct ib_ucontext *ucontext,
struct ib_udata *udata)
{
struct iwch_ucontext *context;
struct ib_device *ibdev = ucontext->device;
struct iwch_ucontext *context = to_iwch_ucontext(ucontext);
struct iwch_dev *rhp = to_iwch_dev(ibdev);
pr_debug("%s ibdev %p\n", __func__, ibdev);
context = kzalloc(sizeof(*context), GFP_KERNEL);
if (!context)
return ERR_PTR(-ENOMEM);
cxio_init_ucontext(&rhp->rdev, &context->uctx);
INIT_LIST_HEAD(&context->mmaps);
spin_lock_init(&context->mmap_lock);
return &context->ibucontext;
return 0;
}
static int iwch_destroy_cq(struct ib_cq *ib_cq)
@ -370,7 +367,7 @@ static int iwch_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
return ret;
}
static int iwch_deallocate_pd(struct ib_pd *pd)
static void iwch_deallocate_pd(struct ib_pd *pd)
{
struct iwch_dev *rhp;
struct iwch_pd *php;
@ -379,15 +376,13 @@ static int iwch_deallocate_pd(struct ib_pd *pd)
rhp = php->rhp;
pr_debug("%s ibpd %p pdid 0x%x\n", __func__, pd, php->pdid);
cxio_hal_put_pdid(rhp->rdev.rscp, php->pdid);
kfree(php);
return 0;
}
static struct ib_pd *iwch_allocate_pd(struct ib_device *ibdev,
struct ib_ucontext *context,
struct ib_udata *udata)
static int iwch_allocate_pd(struct ib_pd *pd, struct ib_ucontext *context,
struct ib_udata *udata)
{
struct iwch_pd *php;
struct iwch_pd *php = to_iwch_pd(pd);
struct ib_device *ibdev = pd->device;
u32 pdid;
struct iwch_dev *rhp;
@ -395,12 +390,8 @@ static struct ib_pd *iwch_allocate_pd(struct ib_device *ibdev,
rhp = (struct iwch_dev *) ibdev;
pdid = cxio_hal_get_pdid(rhp->rdev.rscp);
if (!pdid)
return ERR_PTR(-EINVAL);
php = kzalloc(sizeof(*php), GFP_KERNEL);
if (!php) {
cxio_hal_put_pdid(rhp->rdev.rscp, pdid);
return ERR_PTR(-ENOMEM);
}
return -EINVAL;
php->pdid = pdid;
php->rhp = rhp;
if (context) {
@ -408,11 +399,11 @@ static struct ib_pd *iwch_allocate_pd(struct ib_device *ibdev,
if (ib_copy_to_udata(udata, &resp, sizeof(resp))) {
iwch_deallocate_pd(&php->ibpd);
return ERR_PTR(-EFAULT);
return -EFAULT;
}
}
pr_debug("%s pdid 0x%0x ptr 0x%p\n", __func__, pdid, php);
return &php->ibpd;
return 0;
}
static int iwch_dereg_mr(struct ib_mr *ib_mr)
@ -522,14 +513,13 @@ static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
u64 virt, int acc, struct ib_udata *udata)
{
__be64 *pages;
int shift, n, len;
int i, k, entry;
int shift, n, i;
int err = 0;
struct iwch_dev *rhp;
struct iwch_pd *php;
struct iwch_mr *mhp;
struct iwch_reg_user_mr_resp uresp;
struct scatterlist *sg;
struct sg_dma_page_iter sg_iter;
pr_debug("%s ib_pd %p\n", __func__, pd);
php = to_iwch_pd(pd);
@ -540,14 +530,14 @@ static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
mhp->rhp = rhp;
mhp->umem = ib_umem_get(pd->uobject->context, start, length, acc, 0);
mhp->umem = ib_umem_get(udata, start, length, acc, 0);
if (IS_ERR(mhp->umem)) {
err = PTR_ERR(mhp->umem);
kfree(mhp);
return ERR_PTR(err);
}
shift = mhp->umem->page_shift;
shift = PAGE_SHIFT;
n = mhp->umem->nmap;
@ -563,19 +553,15 @@ static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
i = n = 0;
for_each_sg(mhp->umem->sg_head.sgl, sg, mhp->umem->nmap, entry) {
len = sg_dma_len(sg) >> shift;
for (k = 0; k < len; ++k) {
pages[i++] = cpu_to_be64(sg_dma_address(sg) +
(k << shift));
if (i == PAGE_SIZE / sizeof *pages) {
err = iwch_write_pbl(mhp, pages, i, n);
if (err)
goto pbl_done;
n += i;
i = 0;
}
}
for_each_sg_dma_page(mhp->umem->sg_head.sgl, &sg_iter, mhp->umem->nmap, 0) {
pages[i++] = cpu_to_be64(sg_page_iter_dma_address(&sg_iter));
if (i == PAGE_SIZE / sizeof *pages) {
err = iwch_write_pbl(mhp, pages, i, n);
if (err)
goto pbl_done;
n += i;
i = 0;
}
}
if (i)
@ -836,7 +822,8 @@ static struct ib_qp *iwch_create_qp(struct ib_pd *pd,
* Kernel users need more wq space for fastreg WRs which can take
* 2 WR fragments.
*/
ucontext = udata ? to_iwch_ucontext(pd->uobject->context) : NULL;
ucontext = rdma_udata_to_drv_context(udata, struct iwch_ucontext,
ibucontext);
if (!ucontext && wqsize < (rqsize + (2 * sqsize)))
wqsize = roundup_pow_of_two(rqsize +
roundup_pow_of_two(attrs->cap.max_send_wr * 2));
@ -1130,8 +1117,9 @@ static int iwch_query_port(struct ib_device *ibdev,
static ssize_t hw_rev_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct iwch_dev *iwch_dev = container_of(dev, struct iwch_dev,
ibdev.dev);
struct iwch_dev *iwch_dev =
rdma_device_to_drv_device(dev, struct iwch_dev, ibdev);
pr_debug("%s dev 0x%p\n", __func__, dev);
return sprintf(buf, "%d\n", iwch_dev->rdev.t3cdev_p->type);
}
@ -1140,8 +1128,8 @@ static DEVICE_ATTR_RO(hw_rev);
static ssize_t hca_type_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct iwch_dev *iwch_dev = container_of(dev, struct iwch_dev,
ibdev.dev);
struct iwch_dev *iwch_dev =
rdma_device_to_drv_device(dev, struct iwch_dev, ibdev);
struct ethtool_drvinfo info;
struct net_device *lldev = iwch_dev->rdev.t3cdev_p->lldev;
@ -1154,8 +1142,9 @@ static DEVICE_ATTR_RO(hca_type);
static ssize_t board_id_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct iwch_dev *iwch_dev = container_of(dev, struct iwch_dev,
ibdev.dev);
struct iwch_dev *iwch_dev =
rdma_device_to_drv_device(dev, struct iwch_dev, ibdev);
pr_debug("%s dev 0x%p\n", __func__, dev);
return sprintf(buf, "%x.%x\n", iwch_dev->rdev.rnic_info.pdev->vendor,
iwch_dev->rdev.rnic_info.pdev->device);
@ -1348,6 +1337,8 @@ static const struct ib_device_ops iwch_dev_ops = {
.reg_user_mr = iwch_reg_user_mr,
.req_notify_cq = iwch_arm_cq,
.resize_cq = iwch_resize_cq,
INIT_RDMA_OBJ_SIZE(ib_pd, iwch_pd, ibpd),
INIT_RDMA_OBJ_SIZE(ib_ucontext, iwch_ucontext, ibucontext),
};
int iwch_register_device(struct iwch_dev *dev)
@ -1391,7 +1382,7 @@ int iwch_register_device(struct iwch_dev *dev)
dev->ibdev.dev.parent = &dev->rdev.rnic_info.pdev->dev;
dev->ibdev.uverbs_abi_ver = IWCH_UVERBS_ABI_VERSION;
dev->ibdev.iwcm = kmalloc(sizeof(struct iw_cm_verbs), GFP_KERNEL);
dev->ibdev.iwcm = kzalloc(sizeof(struct iw_cm_verbs), GFP_KERNEL);
if (!dev->ibdev.iwcm)
return -ENOMEM;
@ -1409,7 +1400,7 @@ int iwch_register_device(struct iwch_dev *dev)
dev->ibdev.driver_id = RDMA_DRIVER_CXGB3;
rdma_set_device_sysfs_group(&dev->ibdev, &iwch_attr_group);
ib_set_device_ops(&dev->ibdev, &iwch_dev_ops);
ret = ib_register_device(&dev->ibdev, "cxgb3_%d", NULL);
ret = ib_register_device(&dev->ibdev, "cxgb3_%d");
if (ret)
kfree(dev->ibdev.iwcm);
return ret;

View File

@ -1,5 +1,5 @@
ccflags-y := -Idrivers/net/ethernet/chelsio/cxgb4
ccflags-y += -Idrivers/net/ethernet/chelsio/libcxgb
ccflags-y := -I $(srctree)/drivers/net/ethernet/chelsio/cxgb4
ccflags-y += -I $(srctree)/drivers/net/ethernet/chelsio/libcxgb
obj-$(CONFIG_INFINIBAND_CXGB4) += iw_cxgb4.o

View File

@ -655,7 +655,33 @@ static int send_halfclose(struct c4iw_ep *ep)
return c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t);
}
static int send_abort(struct c4iw_ep *ep)
static void read_tcb(struct c4iw_ep *ep)
{
struct sk_buff *skb;
struct cpl_get_tcb *req;
int wrlen = roundup(sizeof(*req), 16);
skb = get_skb(NULL, sizeof(*req), GFP_KERNEL);
if (WARN_ON(!skb))
return;
set_wr_txq(skb, CPL_PRIORITY_CONTROL, ep->ctrlq_idx);
req = (struct cpl_get_tcb *) skb_put(skb, wrlen);
memset(req, 0, wrlen);
INIT_TP_WR(req, ep->hwtid);
OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID(CPL_GET_TCB, ep->hwtid));
req->reply_ctrl = htons(REPLY_CHAN_V(0) | QUEUENO_V(ep->rss_qid));
/*
* keep a ref on the ep so the tcb is not unlocked before this
* cpl completes. The ref is released in read_tcb_rpl().
*/
c4iw_get_ep(&ep->com);
if (WARN_ON(c4iw_ofld_send(&ep->com.dev->rdev, skb)))
c4iw_put_ep(&ep->com);
}
static int send_abort_req(struct c4iw_ep *ep)
{
u32 wrlen = roundup(sizeof(struct cpl_abort_req), 16);
struct sk_buff *req_skb = skb_dequeue(&ep->com.ep_skb_list);
@ -670,6 +696,17 @@ static int send_abort(struct c4iw_ep *ep)
return c4iw_l2t_send(&ep->com.dev->rdev, req_skb, ep->l2t);
}
static int send_abort(struct c4iw_ep *ep)
{
if (!ep->com.qp || !ep->com.qp->srq) {
send_abort_req(ep);
return 0;
}
set_bit(ABORT_REQ_IN_PROGRESS, &ep->com.flags);
read_tcb(ep);
return 0;
}
static int send_connect(struct c4iw_ep *ep)
{
struct cpl_act_open_req *req = NULL;
@ -1851,14 +1888,11 @@ static int rx_data(struct c4iw_dev *dev, struct sk_buff *skb)
return 0;
}
static void complete_cached_srq_buffers(struct c4iw_ep *ep,
__be32 srqidx_status)
static void complete_cached_srq_buffers(struct c4iw_ep *ep, u32 srqidx)
{
enum chip_type adapter_type;
u32 srqidx;
adapter_type = ep->com.dev->rdev.lldi.adapter_type;
srqidx = ABORT_RSS_SRQIDX_G(be32_to_cpu(srqidx_status));
/*
* If this TCB had a srq buffer cached, then we must complete
@ -1876,6 +1910,7 @@ static void complete_cached_srq_buffers(struct c4iw_ep *ep,
static int abort_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
{
u32 srqidx;
struct c4iw_ep *ep;
struct cpl_abort_rpl_rss6 *rpl = cplhdr(skb);
int release = 0;
@ -1887,7 +1922,10 @@ static int abort_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
return 0;
}
complete_cached_srq_buffers(ep, rpl->srqidx_status);
if (ep->com.qp && ep->com.qp->srq) {
srqidx = ABORT_RSS_SRQIDX_G(be32_to_cpu(rpl->srqidx_status));
complete_cached_srq_buffers(ep, srqidx ? srqidx : ep->srqe_idx);
}
pr_debug("ep %p tid %u\n", ep, ep->hwtid);
mutex_lock(&ep->com.mutex);
@ -1903,8 +1941,10 @@ static int abort_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
}
mutex_unlock(&ep->com.mutex);
if (release)
if (release) {
close_complete_upcall(ep, -ECONNRESET);
release_ep_resources(ep);
}
c4iw_put_ep(&ep->com);
return 0;
}
@ -2072,7 +2112,7 @@ static int import_ep(struct c4iw_ep *ep, int iptype, __u8 *peer_ip,
} else {
pdev = get_real_dev(n->dev);
ep->l2t = cxgb4_l2t_get(cdev->rdev.lldi.l2t,
n, pdev, 0);
n, pdev, rt_tos2priority(tos));
if (!ep->l2t)
goto out;
ep->mtu = dst_mtu(dst);
@ -2161,7 +2201,8 @@ static int c4iw_reconnect(struct c4iw_ep *ep)
laddr6->sin6_addr.s6_addr,
raddr6->sin6_addr.s6_addr,
laddr6->sin6_port,
raddr6->sin6_port, 0,
raddr6->sin6_port,
ep->com.cm_id->tos,
raddr6->sin6_scope_id);
iptype = 6;
ra = (__u8 *)&raddr6->sin6_addr;
@ -2476,7 +2517,7 @@ static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb)
u16 peer_mss = ntohs(req->tcpopt.mss);
int iptype;
unsigned short hdrs;
u8 tos = PASS_OPEN_TOS_G(ntohl(req->tos_stid));
u8 tos;
parent_ep = (struct c4iw_ep *)get_ep_from_stid(dev, stid);
if (!parent_ep) {
@ -2490,6 +2531,11 @@ static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb)
goto reject;
}
if (parent_ep->com.cm_id->tos_set)
tos = parent_ep->com.cm_id->tos;
else
tos = PASS_OPEN_TOS_G(ntohl(req->tos_stid));
cxgb_get_4tuple(req, parent_ep->com.dev->rdev.lldi.adapter_type,
&iptype, local_ip, peer_ip, &local_port, &peer_port);
@ -2509,7 +2555,7 @@ static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb)
ntohs(peer_port), peer_mss);
dst = cxgb_find_route6(&dev->rdev.lldi, get_real_dev,
local_ip, peer_ip, local_port, peer_port,
PASS_OPEN_TOS_G(ntohl(req->tos_stid)),
tos,
((struct sockaddr_in6 *)
&parent_ep->com.local_addr)->sin6_scope_id);
}
@ -2740,6 +2786,21 @@ static int peer_close(struct c4iw_dev *dev, struct sk_buff *skb)
return 0;
}
static void finish_peer_abort(struct c4iw_dev *dev, struct c4iw_ep *ep)
{
complete_cached_srq_buffers(ep, ep->srqe_idx);
if (ep->com.cm_id && ep->com.qp) {
struct c4iw_qp_attributes attrs;
attrs.next_state = C4IW_QP_STATE_ERROR;
c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp,
C4IW_QP_ATTR_NEXT_STATE, &attrs, 1);
}
peer_abort_upcall(ep);
release_ep_resources(ep);
c4iw_put_ep(&ep->com);
}
static int peer_abort(struct c4iw_dev *dev, struct sk_buff *skb)
{
struct cpl_abort_req_rss6 *req = cplhdr(skb);
@ -2750,6 +2811,7 @@ static int peer_abort(struct c4iw_dev *dev, struct sk_buff *skb)
int release = 0;
unsigned int tid = GET_TID(req);
u8 status;
u32 srqidx;
u32 len = roundup(sizeof(struct cpl_abort_rpl), 16);
@ -2769,8 +2831,6 @@ static int peer_abort(struct c4iw_dev *dev, struct sk_buff *skb)
goto deref_ep;
}
complete_cached_srq_buffers(ep, req->srqidx_status);
pr_debug("ep %p tid %u state %u\n", ep, ep->hwtid,
ep->com.state);
set_bit(PEER_ABORT, &ep->com.history);
@ -2819,6 +2879,23 @@ static int peer_abort(struct c4iw_dev *dev, struct sk_buff *skb)
stop_ep_timer(ep);
/*FALLTHROUGH*/
case FPDU_MODE:
if (ep->com.qp && ep->com.qp->srq) {
srqidx = ABORT_RSS_SRQIDX_G(
be32_to_cpu(req->srqidx_status));
if (srqidx) {
complete_cached_srq_buffers(ep,
req->srqidx_status);
} else {
/* Hold ep ref until finish_peer_abort() */
c4iw_get_ep(&ep->com);
__state_set(&ep->com, ABORTING);
set_bit(PEER_ABORT_IN_PROGRESS, &ep->com.flags);
read_tcb(ep);
break;
}
}
if (ep->com.cm_id && ep->com.qp) {
attrs.next_state = C4IW_QP_STATE_ERROR;
ret = c4iw_modify_qp(ep->com.qp->rhp,
@ -2942,15 +3019,18 @@ static int terminate(struct c4iw_dev *dev, struct sk_buff *skb)
ep = get_ep_from_tid(dev, tid);
if (ep && ep->com.qp) {
pr_warn("TERM received tid %u qpid %u\n",
tid, ep->com.qp->wq.sq.qid);
attrs.next_state = C4IW_QP_STATE_TERMINATE;
c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp,
C4IW_QP_ATTR_NEXT_STATE, &attrs, 1);
if (ep) {
if (ep->com.qp) {
pr_warn("TERM received tid %u qpid %u\n", tid,
ep->com.qp->wq.sq.qid);
attrs.next_state = C4IW_QP_STATE_TERMINATE;
c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp,
C4IW_QP_ATTR_NEXT_STATE, &attrs, 1);
}
c4iw_put_ep(&ep->com);
} else
pr_warn("TERM received tid %u no ep/qp\n", tid);
c4iw_put_ep(&ep->com);
return 0;
}
@ -3318,7 +3398,7 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
laddr6->sin6_addr.s6_addr,
raddr6->sin6_addr.s6_addr,
laddr6->sin6_port,
raddr6->sin6_port, 0,
raddr6->sin6_port, cm_id->tos,
raddr6->sin6_scope_id);
}
if (!ep->dst) {
@ -3606,7 +3686,6 @@ int c4iw_ep_disconnect(struct c4iw_ep *ep, int abrupt, gfp_t gfp)
if (close) {
if (abrupt) {
set_bit(EP_DISC_ABORT, &ep->com.history);
close_complete_upcall(ep, -ECONNRESET);
ret = send_abort(ep);
} else {
set_bit(EP_DISC_CLOSE, &ep->com.history);
@ -3717,6 +3796,80 @@ static void passive_ofld_conn_reply(struct c4iw_dev *dev, struct sk_buff *skb,
return;
}
static inline u64 t4_tcb_get_field64(__be64 *tcb, u16 word)
{
u64 tlo = be64_to_cpu(tcb[((31 - word) / 2)]);
u64 thi = be64_to_cpu(tcb[((31 - word) / 2) - 1]);
u64 t;
u32 shift = 32;
t = (thi << shift) | (tlo >> shift);
return t;
}
static inline u32 t4_tcb_get_field32(__be64 *tcb, u16 word, u32 mask, u32 shift)
{
u32 v;
u64 t = be64_to_cpu(tcb[(31 - word) / 2]);
if (word & 0x1)
shift += 32;
v = (t >> shift) & mask;
return v;
}
static int read_tcb_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
{
struct cpl_get_tcb_rpl *rpl = cplhdr(skb);
__be64 *tcb = (__be64 *)(rpl + 1);
unsigned int tid = GET_TID(rpl);
struct c4iw_ep *ep;
u64 t_flags_64;
u32 rx_pdu_out;
ep = get_ep_from_tid(dev, tid);
if (!ep)
return 0;
/* Examine the TF_RX_PDU_OUT (bit 49 of the t_flags) in order to
* determine if there's a rx PDU feedback event pending.
*
* If that bit is set, it means we'll need to re-read the TCB's
* rq_start value. The final value is the one present in a TCB
* with the TF_RX_PDU_OUT bit cleared.
*/
t_flags_64 = t4_tcb_get_field64(tcb, TCB_T_FLAGS_W);
rx_pdu_out = (t_flags_64 & TF_RX_PDU_OUT_V(1)) >> TF_RX_PDU_OUT_S;
c4iw_put_ep(&ep->com); /* from get_ep_from_tid() */
c4iw_put_ep(&ep->com); /* from read_tcb() */
/* If TF_RX_PDU_OUT bit is set, re-read the TCB */
if (rx_pdu_out) {
if (++ep->rx_pdu_out_cnt >= 2) {
WARN_ONCE(1, "tcb re-read() reached the guard limit, finishing the cleanup\n");
goto cleanup;
}
read_tcb(ep);
return 0;
}
ep->srqe_idx = t4_tcb_get_field32(tcb, TCB_RQ_START_W, TCB_RQ_START_W,
TCB_RQ_START_S);
cleanup:
pr_debug("ep %p tid %u %016x\n", ep, ep->hwtid, ep->srqe_idx);
if (test_bit(PEER_ABORT_IN_PROGRESS, &ep->com.flags))
finish_peer_abort(dev, ep);
else if (test_bit(ABORT_REQ_IN_PROGRESS, &ep->com.flags))
send_abort_req(ep);
else
WARN_ONCE(1, "unexpected state!");
return 0;
}
static int deferred_fw6_msg(struct c4iw_dev *dev, struct sk_buff *skb)
{
struct cpl_fw6_msg *rpl = cplhdr(skb);
@ -4037,6 +4190,7 @@ static c4iw_handler_func work_handlers[NUM_CPL_CMDS + NUM_FAKE_CPLS] = {
[CPL_CLOSE_CON_RPL] = close_con_rpl,
[CPL_RDMA_TERMINATE] = terminate,
[CPL_FW4_ACK] = fw4_ack,
[CPL_GET_TCB_RPL] = read_tcb_rpl,
[CPL_FW6_MSG] = deferred_fw6_msg,
[CPL_RX_PKT] = rx_pkt,
[FAKE_CPL_PUT_EP_SAFE] = _put_ep_safe,
@ -4268,6 +4422,7 @@ c4iw_handler_func c4iw_handlers[NUM_CPL_CMDS] = {
[CPL_RDMA_TERMINATE] = sched,
[CPL_FW4_ACK] = sched,
[CPL_SET_TCB_RPL] = set_tcb_rpl,
[CPL_GET_TCB_RPL] = sched,
[CPL_FW6_MSG] = fw6_msg,
[CPL_RX_PKT] = sched
};

View File

@ -720,11 +720,8 @@ static const struct file_operations ep_debugfs_fops = {
.read = debugfs_read,
};
static int setup_debugfs(struct c4iw_dev *devp)
static void setup_debugfs(struct c4iw_dev *devp)
{
if (!devp->debugfs_root)
return -1;
debugfs_create_file_size("qps", S_IWUSR, devp->debugfs_root,
(void *)devp, &qp_debugfs_fops, 4096);
@ -740,7 +737,6 @@ static int setup_debugfs(struct c4iw_dev *devp)
if (c4iw_wr_log)
debugfs_create_file_size("wr_log", S_IWUSR, devp->debugfs_root,
(void *)devp, &wr_log_debugfs_fops, 4096);
return 0;
}
void c4iw_release_dev_ucontext(struct c4iw_rdev *rdev,
@ -981,7 +977,7 @@ static struct c4iw_dev *c4iw_alloc(const struct cxgb4_lld_info *infop)
pr_info("%s: On-Chip Queues not supported on this device\n",
pci_name(infop->pdev));
devp = (struct c4iw_dev *)ib_alloc_device(sizeof(*devp));
devp = ib_alloc_device(c4iw_dev, ibdev);
if (!devp) {
pr_err("Cannot allocate ib device\n");
return ERR_PTR(-ENOMEM);
@ -1564,8 +1560,6 @@ static int __init c4iw_init_module(void)
return err;
c4iw_debugfs_root = debugfs_create_dir(DRV_NAME, NULL);
if (!c4iw_debugfs_root)
pr_warn("could not create debugfs entry, continuing\n");
reg_workq = create_singlethread_workqueue("Register_iWARP_device");
if (!reg_workq) {

View File

@ -589,7 +589,6 @@ struct c4iw_ucontext {
u32 key;
spinlock_t mmap_lock;
struct list_head mmaps;
struct kref kref;
bool is_32b_cqe;
};
@ -598,18 +597,6 @@ static inline struct c4iw_ucontext *to_c4iw_ucontext(struct ib_ucontext *c)
return container_of(c, struct c4iw_ucontext, ibucontext);
}
void _c4iw_free_ucontext(struct kref *kref);
static inline void c4iw_put_ucontext(struct c4iw_ucontext *ucontext)
{
kref_put(&ucontext->kref, _c4iw_free_ucontext);
}
static inline void c4iw_get_ucontext(struct c4iw_ucontext *ucontext)
{
kref_get(&ucontext->kref);
}
struct c4iw_mm_entry {
struct list_head entry;
u64 addr;
@ -982,6 +969,9 @@ struct c4iw_ep {
int rcv_win;
u32 snd_wscale;
struct c4iw_ep_stats stats;
u32 srqe_idx;
u32 rx_pdu_out_cnt;
struct sk_buff *peer_abort_skb;
};
static inline struct c4iw_ep *to_ep(struct iw_cm_id *cm_id)

View File

@ -502,10 +502,9 @@ struct ib_mr *c4iw_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
u64 virt, int acc, struct ib_udata *udata)
{
__be64 *pages;
int shift, n, len;
int i, k, entry;
int shift, n, i;
int err = -ENOMEM;
struct scatterlist *sg;
struct sg_dma_page_iter sg_iter;
struct c4iw_dev *rhp;
struct c4iw_pd *php;
struct c4iw_mr *mhp;
@ -537,11 +536,11 @@ struct ib_mr *c4iw_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
mhp->rhp = rhp;
mhp->umem = ib_umem_get(pd->uobject->context, start, length, acc, 0);
mhp->umem = ib_umem_get(udata, start, length, acc, 0);
if (IS_ERR(mhp->umem))
goto err_free_skb;
shift = mhp->umem->page_shift;
shift = PAGE_SHIFT;
n = mhp->umem->nmap;
err = alloc_pbl(mhp, n);
@ -556,21 +555,16 @@ struct ib_mr *c4iw_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
i = n = 0;
for_each_sg(mhp->umem->sg_head.sgl, sg, mhp->umem->nmap, entry) {
len = sg_dma_len(sg) >> shift;
for (k = 0; k < len; ++k) {
pages[i++] = cpu_to_be64(sg_dma_address(sg) +
(k << shift));
if (i == PAGE_SIZE / sizeof *pages) {
err = write_pbl(&mhp->rhp->rdev,
pages,
mhp->attr.pbl_addr + (n << 3), i,
mhp->wr_waitp);
if (err)
goto pbl_done;
n += i;
i = 0;
}
for_each_sg_dma_page(mhp->umem->sg_head.sgl, &sg_iter, mhp->umem->nmap, 0) {
pages[i++] = cpu_to_be64(sg_page_iter_dma_address(&sg_iter));
if (i == PAGE_SIZE / sizeof(*pages)) {
err = write_pbl(&mhp->rhp->rdev, pages,
mhp->attr.pbl_addr + (n << 3), i,
mhp->wr_waitp);
if (err)
goto pbl_done;
n += i;
i = 0;
}
}
@ -684,8 +678,8 @@ int c4iw_dealloc_mw(struct ib_mw *mw)
mhp->wr_waitp);
kfree_skb(mhp->dereg_skb);
c4iw_put_wr_wait(mhp->wr_waitp);
kfree(mhp);
pr_debug("ib_mw %p mmid 0x%x ptr %p\n", mw, mmid, mhp);
kfree(mhp);
return 0;
}

View File

@ -58,51 +58,34 @@ static int fastreg_support = 1;
module_param(fastreg_support, int, 0644);
MODULE_PARM_DESC(fastreg_support, "Advertise fastreg support (default=1)");
void _c4iw_free_ucontext(struct kref *kref)
static void c4iw_dealloc_ucontext(struct ib_ucontext *context)
{
struct c4iw_ucontext *ucontext;
struct c4iw_ucontext *ucontext = to_c4iw_ucontext(context);
struct c4iw_dev *rhp;
struct c4iw_mm_entry *mm, *tmp;
ucontext = container_of(kref, struct c4iw_ucontext, kref);
pr_debug("context %p\n", context);
rhp = to_c4iw_dev(ucontext->ibucontext.device);
pr_debug("ucontext %p\n", ucontext);
list_for_each_entry_safe(mm, tmp, &ucontext->mmaps, entry)
kfree(mm);
c4iw_release_dev_ucontext(&rhp->rdev, &ucontext->uctx);
kfree(ucontext);
}
static int c4iw_dealloc_ucontext(struct ib_ucontext *context)
static int c4iw_alloc_ucontext(struct ib_ucontext *ucontext,
struct ib_udata *udata)
{
struct c4iw_ucontext *ucontext = to_c4iw_ucontext(context);
pr_debug("context %p\n", context);
c4iw_put_ucontext(ucontext);
return 0;
}
static struct ib_ucontext *c4iw_alloc_ucontext(struct ib_device *ibdev,
struct ib_udata *udata)
{
struct c4iw_ucontext *context;
struct ib_device *ibdev = ucontext->device;
struct c4iw_ucontext *context = to_c4iw_ucontext(ucontext);
struct c4iw_dev *rhp = to_c4iw_dev(ibdev);
struct c4iw_alloc_ucontext_resp uresp;
int ret = 0;
struct c4iw_mm_entry *mm = NULL;
pr_debug("ibdev %p\n", ibdev);
context = kzalloc(sizeof(*context), GFP_KERNEL);
if (!context) {
ret = -ENOMEM;
goto err;
}
c4iw_init_dev_ucontext(&rhp->rdev, &context->uctx);
INIT_LIST_HEAD(&context->mmaps);
spin_lock_init(&context->mmap_lock);
kref_init(&context->kref);
if (udata->outlen < sizeof(uresp) - sizeof(uresp.reserved)) {
pr_err_once("Warning - downlevel libcxgb4 (non-fatal), device status page disabled\n");
@ -111,7 +94,7 @@ static struct ib_ucontext *c4iw_alloc_ucontext(struct ib_device *ibdev,
mm = kmalloc(sizeof(*mm), GFP_KERNEL);
if (!mm) {
ret = -ENOMEM;
goto err_free;
goto err;
}
uresp.status_page_size = PAGE_SIZE;
@ -131,13 +114,11 @@ static struct ib_ucontext *c4iw_alloc_ucontext(struct ib_device *ibdev,
mm->len = PAGE_SIZE;
insert_mmap(context, mm);
}
return &context->ibucontext;
return 0;
err_mm:
kfree(mm);
err_free:
kfree(context);
err:
return ERR_PTR(ret);
return ret;
}
static int c4iw_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
@ -209,7 +190,7 @@ static int c4iw_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
return ret;
}
static int c4iw_deallocate_pd(struct ib_pd *pd)
static void c4iw_deallocate_pd(struct ib_pd *pd)
{
struct c4iw_dev *rhp;
struct c4iw_pd *php;
@ -221,15 +202,13 @@ static int c4iw_deallocate_pd(struct ib_pd *pd)
mutex_lock(&rhp->rdev.stats.lock);
rhp->rdev.stats.pd.cur--;
mutex_unlock(&rhp->rdev.stats.lock);
kfree(php);
return 0;
}
static struct ib_pd *c4iw_allocate_pd(struct ib_device *ibdev,
struct ib_ucontext *context,
struct ib_udata *udata)
static int c4iw_allocate_pd(struct ib_pd *pd, struct ib_ucontext *context,
struct ib_udata *udata)
{
struct c4iw_pd *php;
struct c4iw_pd *php = to_c4iw_pd(pd);
struct ib_device *ibdev = pd->device;
u32 pdid;
struct c4iw_dev *rhp;
@ -237,12 +216,8 @@ static struct ib_pd *c4iw_allocate_pd(struct ib_device *ibdev,
rhp = (struct c4iw_dev *) ibdev;
pdid = c4iw_get_resource(&rhp->rdev.resource.pdid_table);
if (!pdid)
return ERR_PTR(-EINVAL);
php = kzalloc(sizeof(*php), GFP_KERNEL);
if (!php) {
c4iw_put_resource(&rhp->rdev.resource.pdid_table, pdid);
return ERR_PTR(-ENOMEM);
}
return -EINVAL;
php->pdid = pdid;
php->rhp = rhp;
if (context) {
@ -250,7 +225,7 @@ static struct ib_pd *c4iw_allocate_pd(struct ib_device *ibdev,
if (ib_copy_to_udata(udata, &uresp, sizeof(uresp))) {
c4iw_deallocate_pd(&php->ibpd);
return ERR_PTR(-EFAULT);
return -EFAULT;
}
}
mutex_lock(&rhp->rdev.stats.lock);
@ -259,7 +234,7 @@ static struct ib_pd *c4iw_allocate_pd(struct ib_device *ibdev,
rhp->rdev.stats.pd.max = rhp->rdev.stats.pd.cur;
mutex_unlock(&rhp->rdev.stats.lock);
pr_debug("pdid 0x%0x ptr 0x%p\n", pdid, php);
return &php->ibpd;
return 0;
}
static int c4iw_query_pkey(struct ib_device *ibdev, u8 port, u16 index,
@ -376,8 +351,9 @@ static int c4iw_query_port(struct ib_device *ibdev, u8 port,
static ssize_t hw_rev_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct c4iw_dev *c4iw_dev = container_of(dev, struct c4iw_dev,
ibdev.dev);
struct c4iw_dev *c4iw_dev =
rdma_device_to_drv_device(dev, struct c4iw_dev, ibdev);
pr_debug("dev 0x%p\n", dev);
return sprintf(buf, "%d\n",
CHELSIO_CHIP_RELEASE(c4iw_dev->rdev.lldi.adapter_type));
@ -387,8 +363,8 @@ static DEVICE_ATTR_RO(hw_rev);
static ssize_t hca_type_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct c4iw_dev *c4iw_dev = container_of(dev, struct c4iw_dev,
ibdev.dev);
struct c4iw_dev *c4iw_dev =
rdma_device_to_drv_device(dev, struct c4iw_dev, ibdev);
struct ethtool_drvinfo info;
struct net_device *lldev = c4iw_dev->rdev.lldi.ports[0];
@ -401,8 +377,9 @@ static DEVICE_ATTR_RO(hca_type);
static ssize_t board_id_show(struct device *dev, struct device_attribute *attr,
char *buf)
{
struct c4iw_dev *c4iw_dev = container_of(dev, struct c4iw_dev,
ibdev.dev);
struct c4iw_dev *c4iw_dev =
rdma_device_to_drv_device(dev, struct c4iw_dev, ibdev);
pr_debug("dev 0x%p\n", dev);
return sprintf(buf, "%x.%x\n", c4iw_dev->rdev.lldi.pdev->vendor,
c4iw_dev->rdev.lldi.pdev->device);
@ -547,6 +524,7 @@ static const struct ib_device_ops c4iw_dev_ops = {
.destroy_cq = c4iw_destroy_cq,
.destroy_qp = c4iw_destroy_qp,
.destroy_srq = c4iw_destroy_srq,
.fill_res_entry = fill_res_entry,
.get_dev_fw_str = get_dev_fw_str,
.get_dma_mr = c4iw_get_dma_mr,
.get_hw_stats = c4iw_get_mib,
@ -567,6 +545,8 @@ static const struct ib_device_ops c4iw_dev_ops = {
.query_qp = c4iw_ib_query_qp,
.reg_user_mr = c4iw_reg_user_mr,
.req_notify_cq = c4iw_arm_cq,
INIT_RDMA_OBJ_SIZE(ib_pd, c4iw_pd, ibpd),
INIT_RDMA_OBJ_SIZE(ib_ucontext, c4iw_ucontext, ibucontext),
};
void c4iw_register_device(struct work_struct *work)
@ -613,7 +593,7 @@ void c4iw_register_device(struct work_struct *work)
dev->ibdev.dev.parent = &dev->rdev.lldi.pdev->dev;
dev->ibdev.uverbs_abi_ver = C4IW_UVERBS_ABI_VERSION;
dev->ibdev.iwcm = kmalloc(sizeof(struct iw_cm_verbs), GFP_KERNEL);
dev->ibdev.iwcm = kzalloc(sizeof(struct iw_cm_verbs), GFP_KERNEL);
if (!dev->ibdev.iwcm) {
ret = -ENOMEM;
goto err_dealloc_ctx;
@ -627,14 +607,13 @@ void c4iw_register_device(struct work_struct *work)
dev->ibdev.iwcm->add_ref = c4iw_qp_add_ref;
dev->ibdev.iwcm->rem_ref = c4iw_qp_rem_ref;
dev->ibdev.iwcm->get_qp = c4iw_get_qp;
dev->ibdev.res.fill_res_entry = fill_res_entry;
memcpy(dev->ibdev.iwcm->ifname, dev->rdev.lldi.ports[0]->name,
sizeof(dev->ibdev.iwcm->ifname));
rdma_set_device_sysfs_group(&dev->ibdev, &c4iw_attr_group);
dev->ibdev.driver_id = RDMA_DRIVER_CXGB4;
ib_set_device_ops(&dev->ibdev, &c4iw_dev_ops);
ret = ib_register_device(&dev->ibdev, "cxgb4_%d", NULL);
ret = ib_register_device(&dev->ibdev, "cxgb4_%d");
if (ret)
goto err_kfree_iwcm;
return;

View File

@ -31,6 +31,7 @@
*/
#include <linux/module.h>
#include <rdma/uverbs_ioctl.h>
#include "iw_cxgb4.h"
@ -632,7 +633,10 @@ static void build_rdma_write_cmpl(struct t4_sq *sq,
wcwr->stag_sink = cpu_to_be32(rdma_wr(wr)->rkey);
wcwr->to_sink = cpu_to_be64(rdma_wr(wr)->remote_addr);
wcwr->stag_inv = cpu_to_be32(wr->next->ex.invalidate_rkey);
if (wr->next->opcode == IB_WR_SEND)
wcwr->stag_inv = 0;
else
wcwr->stag_inv = cpu_to_be32(wr->next->ex.invalidate_rkey);
wcwr->r2 = 0;
wcwr->r3 = 0;
@ -726,7 +730,10 @@ static void post_write_cmpl(struct c4iw_qp *qhp, const struct ib_send_wr *wr)
/* SEND_WITH_INV swsqe */
swsqe = &qhp->wq.sq.sw_sq[qhp->wq.sq.pidx];
swsqe->opcode = FW_RI_SEND_WITH_INV;
if (wr->next->opcode == IB_WR_SEND)
swsqe->opcode = FW_RI_SEND;
else
swsqe->opcode = FW_RI_SEND_WITH_INV;
swsqe->idx = qhp->wq.sq.pidx;
swsqe->complete = 0;
swsqe->signaled = send_signaled;
@ -897,8 +904,6 @@ static void free_qp_work(struct work_struct *work)
destroy_qp(&rhp->rdev, &qhp->wq,
ucontext ? &ucontext->uctx : &rhp->rdev.uctx, !qhp->srq);
if (ucontext)
c4iw_put_ucontext(ucontext);
c4iw_put_wr_wait(qhp->wr_waitp);
kfree(qhp);
}
@ -1133,9 +1138,9 @@ int c4iw_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
/*
* Fastpath for NVMe-oF target WRITE + SEND_WITH_INV wr chain which is
* the response for small NVMEe-oF READ requests. If the chain is
* exactly a WRITE->SEND_WITH_INV and the sgl depths and lengths
* meet the requirements of the fw_ri_write_cmpl_wr work request,
* then build and post the write_cmpl WR. If any of the tests
* exactly a WRITE->SEND_WITH_INV or a WRITE->SEND and the sgl depths
* and lengths meet the requirements of the fw_ri_write_cmpl_wr work
* request, then build and post the write_cmpl WR. If any of the tests
* below are not true, then we continue on with the tradtional WRITE
* and SEND WRs.
*/
@ -1145,7 +1150,8 @@ int c4iw_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
wr && wr->next && !wr->next->next &&
wr->opcode == IB_WR_RDMA_WRITE &&
wr->sg_list[0].length && wr->num_sge <= T4_WRITE_CMPL_MAX_SGL &&
wr->next->opcode == IB_WR_SEND_WITH_INV &&
(wr->next->opcode == IB_WR_SEND ||
wr->next->opcode == IB_WR_SEND_WITH_INV) &&
wr->next->sg_list[0].length == T4_WRITE_CMPL_MAX_CQE &&
wr->next->num_sge == 1 && num_wrs >= 2) {
post_write_cmpl(qhp, wr);
@ -2129,7 +2135,8 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs,
struct c4iw_cq *rchp;
struct c4iw_create_qp_resp uresp;
unsigned int sqsize, rqsize = 0;
struct c4iw_ucontext *ucontext;
struct c4iw_ucontext *ucontext = rdma_udata_to_drv_context(
udata, struct c4iw_ucontext, ibucontext);
int ret;
struct c4iw_mm_entry *sq_key_mm, *rq_key_mm = NULL, *sq_db_key_mm;
struct c4iw_mm_entry *rq_db_key_mm = NULL, *ma_sync_key_mm = NULL;
@ -2163,8 +2170,6 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs,
if (sqsize < 8)
sqsize = 8;
ucontext = udata ? to_c4iw_ucontext(pd->uobject->context) : NULL;
qhp = kzalloc(sizeof(*qhp), GFP_KERNEL);
if (!qhp)
return ERR_PTR(-ENOMEM);
@ -2331,7 +2336,6 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs,
insert_mmap(ucontext, ma_sync_key_mm);
}
c4iw_get_ucontext(ucontext);
qhp->ucontext = ucontext;
}
if (!attrs->srq) {
@ -2589,7 +2593,7 @@ static int alloc_srq_queue(struct c4iw_srq *srq, struct c4iw_dev_ucontext *uctx,
/* build fw_ri_res_wr */
wr_len = sizeof(*res_wr) + sizeof(*res);
skb = alloc_skb(wr_len, GFP_KERNEL | __GFP_NOFAIL);
skb = alloc_skb(wr_len, GFP_KERNEL);
if (!skb)
goto err_free_queue;
set_wr_txq(skb, CPL_PRIORITY_CONTROL, 0);
@ -2711,7 +2715,8 @@ struct ib_srq *c4iw_create_srq(struct ib_pd *pd, struct ib_srq_init_attr *attrs,
rqsize = attrs->attr.max_wr + 1;
rqsize = roundup_pow_of_two(max_t(u16, rqsize, 16));
ucontext = udata ? to_c4iw_ucontext(pd->uobject->context) : NULL;
ucontext = rdma_udata_to_drv_context(udata, struct c4iw_ucontext,
ibucontext);
srq = kzalloc(sizeof(*srq), GFP_KERNEL);
if (!srq)

View File

@ -35,6 +35,7 @@
#include "t4_regs.h"
#include "t4_values.h"
#include "t4_msg.h"
#include "t4_tcb.h"
#include "t4fw_ri_api.h"
#define T4_MAX_NUM_PD 65536

View File

@ -24,6 +24,7 @@ hfi1-y := \
mad.o \
mmu_rb.o \
msix.o \
opfn.o \
pcie.o \
pio.o \
pio_copy.o \

View File

@ -4253,6 +4253,8 @@ static struct cntr_entry dev_cntrs[DEV_CNTR_LAST] = {
access_sw_pio_drain),
[C_SW_KMEM_WAIT] = CNTR_ELEM("KmemWait", 0, 0, CNTR_NORMAL,
access_sw_kmem_wait),
[C_SW_TID_WAIT] = CNTR_ELEM("TidWait", 0, 0, CNTR_NORMAL,
hfi1_access_sw_tid_wait),
[C_SW_SEND_SCHED] = CNTR_ELEM("SendSched", 0, 0, CNTR_NORMAL,
access_sw_send_schedule),
[C_SDMA_DESC_FETCHED_CNT] = CNTR_ELEM("SDEDscFdCn",
@ -5222,6 +5224,17 @@ int is_bx(struct hfi1_devdata *dd)
return (chip_rev_minor & 0xF0) == 0x10;
}
/* return true is kernel urg disabled for rcd */
bool is_urg_masked(struct hfi1_ctxtdata *rcd)
{
u64 mask;
u32 is = IS_RCVURGENT_START + rcd->ctxt;
u8 bit = is % 64;
mask = read_csr(rcd->dd, CCE_INT_MASK + (8 * (is / 64)));
return !(mask & BIT_ULL(bit));
}
/*
* Append string s to buffer buf. Arguments curp and len are the current
* position and remaining length, respectively.

View File

@ -1,7 +1,7 @@
#ifndef _CHIP_H
#define _CHIP_H
/*
* Copyright(c) 2015 - 2017 Intel Corporation.
* Copyright(c) 2015 - 2018 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@ -804,6 +804,7 @@ void clear_linkup_counters(struct hfi1_devdata *dd);
u32 hdrqempty(struct hfi1_ctxtdata *rcd);
int is_ax(struct hfi1_devdata *dd);
int is_bx(struct hfi1_devdata *dd);
bool is_urg_masked(struct hfi1_ctxtdata *rcd);
u32 read_physical_state(struct hfi1_devdata *dd);
u32 chip_to_opa_pstate(struct hfi1_devdata *dd, u32 chip_pstate);
const char *opa_lstate_name(u32 lstate);
@ -926,6 +927,7 @@ enum {
C_SW_PIO_WAIT,
C_SW_PIO_DRAIN,
C_SW_KMEM_WAIT,
C_SW_TID_WAIT,
C_SW_SEND_SCHED,
C_SDMA_DESC_FETCHED_CNT,
C_SDMA_INT_CNT,

View File

@ -340,6 +340,10 @@ struct diag_pkt {
#define HFI1_PSM_IOC_BASE_SEQ 0x0
/* Number of BTH.PSN bits used for sequence number in expected rcvs */
#define HFI1_KDETH_BTH_SEQ_SHIFT 11
#define HFI1_KDETH_BTH_SEQ_MASK (BIT(HFI1_KDETH_BTH_SEQ_SHIFT) - 1)
static inline __u64 rhf_to_cpu(const __le32 *rbuf)
{
return __le64_to_cpu(*((__le64 *)rbuf));

View File

@ -1167,6 +1167,7 @@ void hfi1_dbg_ibdev_init(struct hfi1_ibdev *ibd)
char link[10];
struct hfi1_devdata *dd = dd_from_dev(ibd);
struct hfi1_pportdata *ppd;
struct dentry *root;
int unit = dd->unit;
int i, j;
@ -1174,31 +1175,29 @@ void hfi1_dbg_ibdev_init(struct hfi1_ibdev *ibd)
return;
snprintf(name, sizeof(name), "%s_%d", class_name(), unit);
snprintf(link, sizeof(link), "%d", unit);
ibd->hfi1_ibdev_dbg = debugfs_create_dir(name, hfi1_dbg_root);
if (!ibd->hfi1_ibdev_dbg) {
pr_warn("create of %s failed\n", name);
return;
}
root = debugfs_create_dir(name, hfi1_dbg_root);
ibd->hfi1_ibdev_dbg = root;
ibd->hfi1_ibdev_link =
debugfs_create_symlink(link, hfi1_dbg_root, name);
if (!ibd->hfi1_ibdev_link) {
pr_warn("create of %s symlink failed\n", name);
return;
}
DEBUGFS_SEQ_FILE_CREATE(opcode_stats, ibd->hfi1_ibdev_dbg, ibd);
DEBUGFS_SEQ_FILE_CREATE(tx_opcode_stats, ibd->hfi1_ibdev_dbg, ibd);
DEBUGFS_SEQ_FILE_CREATE(ctx_stats, ibd->hfi1_ibdev_dbg, ibd);
DEBUGFS_SEQ_FILE_CREATE(qp_stats, ibd->hfi1_ibdev_dbg, ibd);
DEBUGFS_SEQ_FILE_CREATE(sdes, ibd->hfi1_ibdev_dbg, ibd);
DEBUGFS_SEQ_FILE_CREATE(rcds, ibd->hfi1_ibdev_dbg, ibd);
DEBUGFS_SEQ_FILE_CREATE(pios, ibd->hfi1_ibdev_dbg, ibd);
DEBUGFS_SEQ_FILE_CREATE(sdma_cpu_list, ibd->hfi1_ibdev_dbg, ibd);
debugfs_create_file("opcode_stats", 0444, root, ibd,
&_opcode_stats_file_ops);
debugfs_create_file("tx_opcode_stats", 0444, root, ibd,
&_tx_opcode_stats_file_ops);
debugfs_create_file("ctx_stats", 0444, root, ibd, &_ctx_stats_file_ops);
debugfs_create_file("qp_stats", 0444, root, ibd, &_qp_stats_file_ops);
debugfs_create_file("sdes", 0444, root, ibd, &_sdes_file_ops);
debugfs_create_file("rcds", 0444, root, ibd, &_rcds_file_ops);
debugfs_create_file("pios", 0444, root, ibd, &_pios_file_ops);
debugfs_create_file("sdma_cpu_list", 0444, root, ibd,
&_sdma_cpu_list_file_ops);
/* dev counter files */
for (i = 0; i < ARRAY_SIZE(cntr_ops); i++)
DEBUGFS_FILE_CREATE(cntr_ops[i].name,
ibd->hfi1_ibdev_dbg,
dd,
&cntr_ops[i].ops, S_IRUGO);
debugfs_create_file(cntr_ops[i].name, 0444, root, dd,
&cntr_ops[i].ops);
/* per port files */
for (ppd = dd->pport, j = 0; j < dd->num_pports; j++, ppd++)
for (i = 0; i < ARRAY_SIZE(port_cntr_ops); i++) {
@ -1206,12 +1205,11 @@ void hfi1_dbg_ibdev_init(struct hfi1_ibdev *ibd)
sizeof(name),
port_cntr_ops[i].name,
j + 1);
DEBUGFS_FILE_CREATE(name,
ibd->hfi1_ibdev_dbg,
ppd,
&port_cntr_ops[i].ops,
debugfs_create_file(name,
!port_cntr_ops[i].ops.write ?
S_IRUGO : S_IRUGO | S_IWUSR);
S_IRUGO :
S_IRUGO | S_IWUSR,
root, ppd, &port_cntr_ops[i].ops);
}
hfi1_fault_init_debugfs(ibd);
@ -1341,10 +1339,10 @@ DEBUGFS_FILE_OPS(driver_stats);
void hfi1_dbg_init(void)
{
hfi1_dbg_root = debugfs_create_dir(DRIVER_NAME, NULL);
if (!hfi1_dbg_root)
pr_warn("init of debugfs failed\n");
DEBUGFS_SEQ_FILE_CREATE(driver_stats_names, hfi1_dbg_root, NULL);
DEBUGFS_SEQ_FILE_CREATE(driver_stats, hfi1_dbg_root, NULL);
debugfs_create_file("driver_stats_names", 0444, hfi1_dbg_root, NULL,
&_driver_stats_names_file_ops);
debugfs_create_file("driver_stats", 0444, hfi1_dbg_root, NULL,
&_driver_stats_file_ops);
}
void hfi1_dbg_exit(void)

View File

@ -49,16 +49,6 @@
struct hfi1_ibdev;
#define DEBUGFS_FILE_CREATE(name, parent, data, ops, mode) \
do { \
struct dentry *ent; \
const char *__name = name; \
ent = debugfs_create_file(__name, mode, parent, \
data, ops); \
if (!ent) \
pr_warn("create of %s failed\n", __name); \
} while (0)
#define DEBUGFS_SEQ_FILE_OPS(name) \
static const struct seq_operations _##name##_seq_ops = { \
.start = _##name##_seq_start, \
@ -89,8 +79,6 @@ static const struct file_operations _##name##_file_ops = { \
.release = seq_release \
}
#define DEBUGFS_SEQ_FILE_CREATE(name, parent, data) \
DEBUGFS_FILE_CREATE(#name, parent, data, &_##name##_file_ops, 0444)
ssize_t hfi1_seq_read(struct file *file, char __user *buf, size_t size,
loff_t *ppos);

View File

@ -1575,25 +1575,32 @@ drop:
return -EINVAL;
}
void handle_eflags(struct hfi1_packet *packet)
static void show_eflags_errs(struct hfi1_packet *packet)
{
struct hfi1_ctxtdata *rcd = packet->rcd;
u32 rte = rhf_rcv_type_err(packet->rhf);
dd_dev_err(rcd->dd,
"receive context %d: rhf 0x%016llx, errs [ %s%s%s%s%s%s%s%s] rte 0x%x\n",
rcd->ctxt, packet->rhf,
packet->rhf & RHF_K_HDR_LEN_ERR ? "k_hdr_len " : "",
packet->rhf & RHF_DC_UNC_ERR ? "dc_unc " : "",
packet->rhf & RHF_DC_ERR ? "dc " : "",
packet->rhf & RHF_TID_ERR ? "tid " : "",
packet->rhf & RHF_LEN_ERR ? "len " : "",
packet->rhf & RHF_ECC_ERR ? "ecc " : "",
packet->rhf & RHF_VCRC_ERR ? "vcrc " : "",
packet->rhf & RHF_ICRC_ERR ? "icrc " : "",
rte);
}
void handle_eflags(struct hfi1_packet *packet)
{
struct hfi1_ctxtdata *rcd = packet->rcd;
rcv_hdrerr(rcd, rcd->ppd, packet);
if (rhf_err_flags(packet->rhf))
dd_dev_err(rcd->dd,
"receive context %d: rhf 0x%016llx, errs [ %s%s%s%s%s%s%s%s] rte 0x%x\n",
rcd->ctxt, packet->rhf,
packet->rhf & RHF_K_HDR_LEN_ERR ? "k_hdr_len " : "",
packet->rhf & RHF_DC_UNC_ERR ? "dc_unc " : "",
packet->rhf & RHF_DC_ERR ? "dc " : "",
packet->rhf & RHF_TID_ERR ? "tid " : "",
packet->rhf & RHF_LEN_ERR ? "len " : "",
packet->rhf & RHF_ECC_ERR ? "ecc " : "",
packet->rhf & RHF_VCRC_ERR ? "vcrc " : "",
packet->rhf & RHF_ICRC_ERR ? "icrc " : "",
rte);
show_eflags_errs(packet);
}
/*
@ -1699,11 +1706,14 @@ static int kdeth_process_expected(struct hfi1_packet *packet)
if (unlikely(hfi1_dbg_should_fault_rx(packet)))
return RHF_RCV_CONTINUE;
if (unlikely(rhf_err_flags(packet->rhf)))
handle_eflags(packet);
if (unlikely(rhf_err_flags(packet->rhf))) {
struct hfi1_ctxtdata *rcd = packet->rcd;
dd_dev_err(packet->rcd->dd,
"Unhandled expected packet received. Dropping.\n");
if (hfi1_handle_kdeth_eflags(rcd, rcd->ppd, packet))
return RHF_RCV_CONTINUE;
}
hfi1_kdeth_expected_rcv(packet);
return RHF_RCV_CONTINUE;
}
@ -1712,11 +1722,17 @@ static int kdeth_process_eager(struct hfi1_packet *packet)
hfi1_setup_9B_packet(packet);
if (unlikely(hfi1_dbg_should_fault_rx(packet)))
return RHF_RCV_CONTINUE;
if (unlikely(rhf_err_flags(packet->rhf)))
handle_eflags(packet);
dd_dev_err(packet->rcd->dd,
"Unhandled eager packet received. Dropping.\n");
trace_hfi1_rcvhdr(packet);
if (unlikely(rhf_err_flags(packet->rhf))) {
struct hfi1_ctxtdata *rcd = packet->rcd;
show_eflags_errs(packet);
if (hfi1_handle_kdeth_eflags(rcd, rcd->ppd, packet))
return RHF_RCV_CONTINUE;
}
hfi1_kdeth_eager_rcv(packet);
return RHF_RCV_CONTINUE;
}

View File

@ -250,6 +250,7 @@ void hfi1_fault_exit_debugfs(struct hfi1_ibdev *ibd)
int hfi1_fault_init_debugfs(struct hfi1_ibdev *ibd)
{
struct dentry *parent = ibd->hfi1_ibdev_dbg;
struct dentry *fault_dir;
ibd->fault = kzalloc(sizeof(*ibd->fault), GFP_KERNEL);
if (!ibd->fault)
@ -269,45 +270,31 @@ int hfi1_fault_init_debugfs(struct hfi1_ibdev *ibd)
bitmap_zero(ibd->fault->opcodes,
sizeof(ibd->fault->opcodes) * BITS_PER_BYTE);
ibd->fault->dir =
fault_create_debugfs_attr("fault", parent,
&ibd->fault->attr);
if (IS_ERR(ibd->fault->dir)) {
fault_dir =
fault_create_debugfs_attr("fault", parent, &ibd->fault->attr);
if (IS_ERR(fault_dir)) {
kfree(ibd->fault);
ibd->fault = NULL;
return -ENOENT;
}
ibd->fault->dir = fault_dir;
DEBUGFS_SEQ_FILE_CREATE(fault_stats, ibd->fault->dir, ibd);
if (!debugfs_create_bool("enable", 0600, ibd->fault->dir,
&ibd->fault->enable))
goto fail;
if (!debugfs_create_bool("suppress_err", 0600,
ibd->fault->dir,
&ibd->fault->suppress_err))
goto fail;
if (!debugfs_create_bool("opcode_mode", 0600, ibd->fault->dir,
&ibd->fault->opcode))
goto fail;
if (!debugfs_create_file("opcodes", 0600, ibd->fault->dir,
ibd->fault, &__fault_opcodes_fops))
goto fail;
if (!debugfs_create_u64("skip_pkts", 0600,
ibd->fault->dir,
&ibd->fault->fault_skip))
goto fail;
if (!debugfs_create_u64("skip_usec", 0600,
ibd->fault->dir,
&ibd->fault->fault_skip_usec))
goto fail;
if (!debugfs_create_u8("direction", 0600, ibd->fault->dir,
&ibd->fault->direction))
goto fail;
debugfs_create_file("fault_stats", 0444, fault_dir, ibd,
&_fault_stats_file_ops);
debugfs_create_bool("enable", 0600, fault_dir, &ibd->fault->enable);
debugfs_create_bool("suppress_err", 0600, fault_dir,
&ibd->fault->suppress_err);
debugfs_create_bool("opcode_mode", 0600, fault_dir,
&ibd->fault->opcode);
debugfs_create_file("opcodes", 0600, fault_dir, ibd->fault,
&__fault_opcodes_fops);
debugfs_create_u64("skip_pkts", 0600, fault_dir,
&ibd->fault->fault_skip);
debugfs_create_u64("skip_usec", 0600, fault_dir,
&ibd->fault->fault_skip_usec);
debugfs_create_u8("direction", 0600, fault_dir, &ibd->fault->direction);
return 0;
fail:
hfi1_fault_exit_debugfs(ibd);
return -ENOMEM;
}
bool hfi1_dbg_fault_suppress_err(struct hfi1_ibdev *ibd)

View File

@ -73,6 +73,7 @@
#include "chip_registers.h"
#include "common.h"
#include "opfn.h"
#include "verbs.h"
#include "pio.h"
#include "chip.h"
@ -98,6 +99,8 @@
#define NEIGHBOR_TYPE_HFI 0
#define NEIGHBOR_TYPE_SWITCH 1
#define HFI1_MAX_ACTIVE_WORKQUEUE_ENTRIES 5
extern unsigned long hfi1_cap_mask;
#define HFI1_CAP_KGET_MASK(mask, cap) ((mask) & HFI1_CAP_##cap)
#define HFI1_CAP_UGET_MASK(mask, cap) \
@ -195,6 +198,14 @@ struct exp_tid_set {
};
typedef int (*rhf_rcv_function_ptr)(struct hfi1_packet *packet);
struct tid_queue {
struct list_head queue_head;
/* queue head for QP TID resource waiters */
u32 enqueue; /* count of tid enqueues */
u32 dequeue; /* count of tid dequeues */
};
struct hfi1_ctxtdata {
/* rcvhdrq base, needs mmap before useful */
void *rcvhdrq;
@ -288,6 +299,12 @@ struct hfi1_ctxtdata {
/* PSM Specific fields */
/* lock protecting all Expected TID data */
struct mutex exp_mutex;
/* lock protecting all Expected TID data of kernel contexts */
spinlock_t exp_lock;
/* Queue for QP's waiting for HW TID flows */
struct tid_queue flow_queue;
/* Queue for QP's waiting for HW receive array entries */
struct tid_queue rarr_queue;
/* when waiting for rcv or pioavail */
wait_queue_head_t wait;
/* uuid from PSM */
@ -320,6 +337,9 @@ struct hfi1_ctxtdata {
*/
u8 subctxt_cnt;
/* Bit mask to track free TID RDMA HW flows */
unsigned long flow_mask;
struct tid_flow_state flows[RXE_NUM_TID_FLOWS];
};
/**
@ -1435,7 +1455,7 @@ void hfi1_init_pportdata(struct pci_dev *pdev, struct hfi1_pportdata *ppd,
struct hfi1_devdata *dd, u8 hw_pidx, u8 port);
void hfi1_free_ctxtdata(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd);
int hfi1_rcd_put(struct hfi1_ctxtdata *rcd);
void hfi1_rcd_get(struct hfi1_ctxtdata *rcd);
int hfi1_rcd_get(struct hfi1_ctxtdata *rcd);
struct hfi1_ctxtdata *hfi1_rcd_get_by_index_safe(struct hfi1_devdata *dd,
u16 ctxt);
struct hfi1_ctxtdata *hfi1_rcd_get_by_index(struct hfi1_devdata *dd, u16 ctxt);
@ -2100,7 +2120,7 @@ static inline u64 hfi1_pkt_default_send_ctxt_mask(struct hfi1_devdata *dd,
SEND_CTXT_CHECK_ENABLE_DISALLOW_PBC_TEST_SMASK |
#endif
HFI1_PKT_USER_SC_INTEGRITY;
else
else if (ctxt_type != SC_KERNEL)
base_sc_integrity |= HFI1_PKT_KERNEL_SC_INTEGRITY;
/* turn on send-side job key checks if !A0 */

View File

@ -73,7 +73,6 @@
#undef pr_fmt
#define pr_fmt(fmt) DRIVER_NAME ": " fmt
#define HFI1_MAX_ACTIVE_WORKQUEUE_ENTRIES 5
/*
* min buffers we want to have per context, after driver
*/
@ -216,12 +215,12 @@ static void hfi1_rcd_free(struct kref *kref)
struct hfi1_ctxtdata *rcd =
container_of(kref, struct hfi1_ctxtdata, kref);
hfi1_free_ctxtdata(rcd->dd, rcd);
spin_lock_irqsave(&rcd->dd->uctxt_lock, flags);
rcd->dd->rcd[rcd->ctxt] = NULL;
spin_unlock_irqrestore(&rcd->dd->uctxt_lock, flags);
hfi1_free_ctxtdata(rcd->dd, rcd);
kfree(rcd);
}
@ -244,10 +243,13 @@ int hfi1_rcd_put(struct hfi1_ctxtdata *rcd)
* @rcd: pointer to an initialized rcd data structure
*
* Use this to get a reference after the init.
*
* Return : reflect kref_get_unless_zero(), which returns non-zero on
* increment, otherwise 0.
*/
void hfi1_rcd_get(struct hfi1_ctxtdata *rcd)
int hfi1_rcd_get(struct hfi1_ctxtdata *rcd)
{
kref_get(&rcd->kref);
return kref_get_unless_zero(&rcd->kref);
}
/**
@ -327,7 +329,8 @@ struct hfi1_ctxtdata *hfi1_rcd_get_by_index(struct hfi1_devdata *dd, u16 ctxt)
spin_lock_irqsave(&dd->uctxt_lock, flags);
if (dd->rcd[ctxt]) {
rcd = dd->rcd[ctxt];
hfi1_rcd_get(rcd);
if (!hfi1_rcd_get(rcd))
rcd = NULL;
}
spin_unlock_irqrestore(&dd->uctxt_lock, flags);
@ -372,6 +375,9 @@ int hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, int numa,
rcd->rhf_rcv_function_map = normal_rhf_rcv_functions;
mutex_init(&rcd->exp_mutex);
spin_lock_init(&rcd->exp_lock);
INIT_LIST_HEAD(&rcd->flow_queue.queue_head);
INIT_LIST_HEAD(&rcd->rarr_queue.queue_head);
hfi1_cdbg(PROC, "setting up context %u\n", rcd->ctxt);
@ -474,6 +480,9 @@ int hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, int numa,
GFP_KERNEL, numa);
if (!rcd->opstats)
goto bail;
/* Initialize TID flow generations for the context */
hfi1_kern_init_ctxt_generations(rcd);
}
*context = rcd;
@ -773,6 +782,8 @@ static void enable_chip(struct hfi1_devdata *dd)
rcvmask |= HFI1_RCVCTRL_NO_RHQ_DROP_ENB;
if (HFI1_CAP_KGET_MASK(rcd->flags, NODROP_EGR_FULL))
rcvmask |= HFI1_RCVCTRL_NO_EGR_DROP_ENB;
if (HFI1_CAP_IS_KSET(TID_RDMA))
rcvmask |= HFI1_RCVCTRL_TIDFLOW_ENB;
hfi1_rcvctrl(dd, rcvmask, rcd);
sc_enable(rcd->sc);
hfi1_rcd_put(rcd);
@ -928,6 +939,8 @@ int hfi1_init(struct hfi1_devdata *dd, int reinit)
lastfail = hfi1_create_rcvhdrq(dd, rcd);
if (!lastfail)
lastfail = hfi1_setup_eagerbufs(rcd);
if (!lastfail)
lastfail = hfi1_kern_exp_rcv_init(rcd, reinit);
if (lastfail) {
dd_dev_err(dd,
"failed to allocate kernel ctxt's rcvhdrq and/or egr bufs\n");
@ -1498,6 +1511,13 @@ static int __init hfi1_mod_init(void)
/* sanitize link CRC options */
link_crc_mask &= SUPPORTED_CRCS;
ret = opfn_init();
if (ret < 0) {
pr_err("Failed to allocate opfn_wq");
goto bail_dev;
}
hfi1_compute_tid_rdma_flow_wt();
/*
* These must be called before the driver is registered with
* the PCI subsystem.
@ -1528,6 +1548,7 @@ module_init(hfi1_mod_init);
static void __exit hfi1_mod_cleanup(void)
{
pci_unregister_driver(&hfi1_pci_driver);
opfn_exit();
node_affinity_destroy_all();
hfi1_dbg_exit();
@ -1582,7 +1603,7 @@ static void cleanup_device_data(struct hfi1_devdata *dd)
struct hfi1_ctxtdata *rcd = dd->rcd[ctxt];
if (rcd) {
hfi1_clear_tids(rcd);
hfi1_free_ctxt_rcv_groups(rcd);
hfi1_free_ctxt(rcd);
}
}

View File

@ -6,6 +6,9 @@
#include "iowait.h"
#include "trace_iowait.h"
/* 1 priority == 16 starve_cnt */
#define IOWAIT_PRIORITY_STARVE_SHIFT 4
void iowait_set_flag(struct iowait *wait, u32 flag)
{
trace_hfi1_iowait_set(wait, flag);
@ -44,7 +47,8 @@ void iowait_init(struct iowait *wait, u32 tx_limit,
uint seq,
bool pkts_sent),
void (*wakeup)(struct iowait *wait, int reason),
void (*sdma_drained)(struct iowait *wait))
void (*sdma_drained)(struct iowait *wait),
void (*init_priority)(struct iowait *wait))
{
int i;
@ -58,6 +62,7 @@ void iowait_init(struct iowait *wait, u32 tx_limit,
wait->sleep = sleep;
wait->wakeup = wakeup;
wait->sdma_drained = sdma_drained;
wait->init_priority = init_priority;
wait->flags = 0;
for (i = 0; i < IOWAIT_SES; i++) {
wait->wait[i].iow = wait;
@ -92,3 +97,30 @@ int iowait_set_work_flag(struct iowait_work *w)
iowait_set_flag(w->iow, IOWAIT_PENDING_TID);
return IOWAIT_TID_SE;
}
/**
* iowait_priority_update_top - update the top priority entry
* @w: the iowait struct
* @top: a pointer to the top priority entry
* @idx: the index of the current iowait in an array
* @top_idx: the array index for the iowait entry that has the top priority
*
* This function is called to compare the priority of a given
* iowait with the given top priority entry. The top index will
* be returned.
*/
uint iowait_priority_update_top(struct iowait *w,
struct iowait *top,
uint idx, uint top_idx)
{
u8 cnt, tcnt;
/* Convert priority into starve_cnt and compare the total.*/
cnt = (w->priority << IOWAIT_PRIORITY_STARVE_SHIFT) + w->starved_cnt;
tcnt = (top->priority << IOWAIT_PRIORITY_STARVE_SHIFT) +
top->starved_cnt;
if (cnt > tcnt)
return idx;
else
return top_idx;
}

View File

@ -100,6 +100,7 @@ struct iowait_work {
* @sleep: no space callback
* @wakeup: space callback wakeup
* @sdma_drained: sdma count drained
* @init_priority: callback to manipulate priority
* @lock: lock protected head of wait queue
* @iowork: workqueue overhead
* @wait_dma: wait for sdma_busy == 0
@ -109,7 +110,7 @@ struct iowait_work {
* @tx_limit: limit for overflow queuing
* @tx_count: number of tx entry's in tx_head'ed list
* @flags: wait flags (one per QP)
* @wait: SE array
* @wait: SE array for multiple legs
*
* This is to be embedded in user's state structure
* (QP or PQ).
@ -120,10 +121,13 @@ struct iowait_work {
* are callbacks for the ULP to implement
* what ever queuing/dequeuing of
* the embedded iowait and its containing struct
* when a resource shortage like SDMA ring space is seen.
* when a resource shortage like SDMA ring space
* or PIO credit space is seen.
*
* Both potentially have locks help
* so sleeping is not allowed.
* so sleeping is not allowed and it is not
* supported to submit txreqs from the wakeup
* call directly because of lock conflicts.
*
* The wait_dma member along with the iow
*
@ -143,6 +147,7 @@ struct iowait {
);
void (*wakeup)(struct iowait *wait, int reason);
void (*sdma_drained)(struct iowait *wait);
void (*init_priority)(struct iowait *wait);
seqlock_t *lock;
wait_queue_head_t wait_dma;
wait_queue_head_t wait_pio;
@ -152,6 +157,7 @@ struct iowait {
u32 tx_limit;
u32 tx_count;
u8 starved_cnt;
u8 priority;
unsigned long flags;
struct iowait_work wait[IOWAIT_SES];
};
@ -171,7 +177,8 @@ void iowait_init(struct iowait *wait, u32 tx_limit,
uint seq,
bool pkts_sent),
void (*wakeup)(struct iowait *wait, int reason),
void (*sdma_drained)(struct iowait *wait));
void (*sdma_drained)(struct iowait *wait),
void (*init_priority)(struct iowait *wait));
/**
* iowait_schedule() - schedule the default send engine work
@ -185,6 +192,18 @@ static inline bool iowait_schedule(struct iowait *wait,
return !!queue_work_on(cpu, wq, &wait->wait[IOWAIT_IB_SE].iowork);
}
/**
* iowait_tid_schedule - schedule the tid SE
* @wait: the iowait structure
* @wq: the work queue
* @cpu: the cpu
*/
static inline bool iowait_tid_schedule(struct iowait *wait,
struct workqueue_struct *wq, int cpu)
{
return !!queue_work_on(cpu, wq, &wait->wait[IOWAIT_TID_SE].iowork);
}
/**
* iowait_sdma_drain() - wait for DMAs to drain
*
@ -327,6 +346,8 @@ static inline u16 iowait_get_desc(struct iowait_work *w)
tx = list_first_entry(&w->tx_head, struct sdma_txreq,
list);
num_desc = tx->num_desc;
if (tx->flags & SDMA_TXREQ_F_VIP)
w->iow->priority++;
}
return num_desc;
}
@ -340,6 +361,37 @@ static inline u32 iowait_get_all_desc(struct iowait *w)
return num_desc;
}
static inline void iowait_update_priority(struct iowait_work *w)
{
struct sdma_txreq *tx = NULL;
if (!list_empty(&w->tx_head)) {
tx = list_first_entry(&w->tx_head, struct sdma_txreq,
list);
if (tx->flags & SDMA_TXREQ_F_VIP)
w->iow->priority++;
}
}
static inline void iowait_update_all_priority(struct iowait *w)
{
iowait_update_priority(&w->wait[IOWAIT_IB_SE]);
iowait_update_priority(&w->wait[IOWAIT_TID_SE]);
}
static inline void iowait_init_priority(struct iowait *w)
{
w->priority = 0;
if (w->init_priority)
w->init_priority(w);
}
static inline void iowait_get_priority(struct iowait *w)
{
iowait_init_priority(w);
iowait_update_all_priority(w);
}
/**
* iowait_queue - Put the iowait on a wait queue
* @pkts_sent: have some packets been sent before queuing?
@ -356,14 +408,18 @@ static inline void iowait_queue(bool pkts_sent, struct iowait *w,
/*
* To play fair, insert the iowait at the tail of the wait queue if it
* has already sent some packets; Otherwise, put it at the head.
* However, if it has priority packets to send, also put it at the
* head.
*/
if (pkts_sent) {
list_add_tail(&w->list, wait_head);
if (pkts_sent)
w->starved_cnt = 0;
} else {
list_add(&w->list, wait_head);
else
w->starved_cnt++;
}
if (w->priority > 0 || !pkts_sent)
list_add(&w->list, wait_head);
else
list_add_tail(&w->list, wait_head);
}
/**
@ -380,27 +436,10 @@ static inline void iowait_starve_clear(bool pkts_sent, struct iowait *w)
w->starved_cnt = 0;
}
/**
* iowait_starve_find_max - Find the maximum of the starve count
* @w: the iowait struct
* @max: a variable containing the max starve count
* @idx: the index of the current iowait in an array
* @max_idx: a variable containing the array index for the
* iowait entry that has the max starve count
*
* This function is called to compare the starve count of a
* given iowait with the given max starve count. The max starve
* count and the index will be updated if the iowait's start
* count is larger.
*/
static inline void iowait_starve_find_max(struct iowait *w, u8 *max,
uint idx, uint *max_idx)
{
if (w->starved_cnt > *max) {
*max = w->starved_cnt;
*max_idx = idx;
}
}
/* Update the top priority index */
uint iowait_priority_update_top(struct iowait *w,
struct iowait *top,
uint idx, uint top_idx);
/**
* iowait_packet_queued() - determine if a packet is queued

View File

@ -0,0 +1,323 @@
// SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause)
/*
* Copyright(c) 2018 Intel Corporation.
*
*/
#include "hfi.h"
#include "trace.h"
#include "qp.h"
#include "opfn.h"
#define IB_BTHE_E BIT(IB_BTHE_E_SHIFT)
#define OPFN_CODE(code) BIT((code) - 1)
#define OPFN_MASK(code) OPFN_CODE(STL_VERBS_EXTD_##code)
struct hfi1_opfn_type {
bool (*request)(struct rvt_qp *qp, u64 *data);
bool (*response)(struct rvt_qp *qp, u64 *data);
bool (*reply)(struct rvt_qp *qp, u64 data);
void (*error)(struct rvt_qp *qp);
};
static struct hfi1_opfn_type hfi1_opfn_handlers[STL_VERBS_EXTD_MAX] = {
[STL_VERBS_EXTD_TID_RDMA] = {
.request = tid_rdma_conn_req,
.response = tid_rdma_conn_resp,
.reply = tid_rdma_conn_reply,
.error = tid_rdma_conn_error,
},
};
static struct workqueue_struct *opfn_wq;
static void opfn_schedule_conn_request(struct rvt_qp *qp);
static bool hfi1_opfn_extended(u32 bth1)
{
return !!(bth1 & IB_BTHE_E);
}
static void opfn_conn_request(struct rvt_qp *qp)
{
struct hfi1_qp_priv *priv = qp->priv;
struct ib_atomic_wr wr;
u16 mask, capcode;
struct hfi1_opfn_type *extd;
u64 data;
unsigned long flags;
int ret = 0;
trace_hfi1_opfn_state_conn_request(qp);
spin_lock_irqsave(&priv->opfn.lock, flags);
/*
* Exit if the extended bit is not set, or if nothing is requested, or
* if we have completed all requests, or if a previous request is in
* progress
*/
if (!priv->opfn.extended || !priv->opfn.requested ||
priv->opfn.requested == priv->opfn.completed || priv->opfn.curr)
goto done;
mask = priv->opfn.requested & ~priv->opfn.completed;
capcode = ilog2(mask & ~(mask - 1)) + 1;
if (capcode >= STL_VERBS_EXTD_MAX) {
priv->opfn.completed |= OPFN_CODE(capcode);
goto done;
}
extd = &hfi1_opfn_handlers[capcode];
if (!extd || !extd->request || !extd->request(qp, &data)) {
/*
* Either there is no handler for this capability or the request
* packet could not be generated. Either way, mark it as done so
* we don't keep attempting to complete it.
*/
priv->opfn.completed |= OPFN_CODE(capcode);
goto done;
}
trace_hfi1_opfn_data_conn_request(qp, capcode, data);
data = (data & ~0xf) | capcode;
memset(&wr, 0, sizeof(wr));
wr.wr.opcode = IB_WR_OPFN;
wr.remote_addr = HFI1_VERBS_E_ATOMIC_VADDR;
wr.compare_add = data;
priv->opfn.curr = capcode; /* A new request is now in progress */
/* Drop opfn.lock before calling ib_post_send() */
spin_unlock_irqrestore(&priv->opfn.lock, flags);
ret = ib_post_send(&qp->ibqp, &wr.wr, NULL);
if (ret)
goto err;
trace_hfi1_opfn_state_conn_request(qp);
return;
err:
trace_hfi1_msg_opfn_conn_request(qp, "ib_ost_send failed: ret = ",
(u64)ret);
spin_lock_irqsave(&priv->opfn.lock, flags);
/*
* In case of an unexpected error return from ib_post_send
* clear opfn.curr and reschedule to try again
*/
priv->opfn.curr = STL_VERBS_EXTD_NONE;
opfn_schedule_conn_request(qp);
done:
spin_unlock_irqrestore(&priv->opfn.lock, flags);
}
void opfn_send_conn_request(struct work_struct *work)
{
struct hfi1_opfn_data *od;
struct hfi1_qp_priv *qpriv;
od = container_of(work, struct hfi1_opfn_data, opfn_work);
qpriv = container_of(od, struct hfi1_qp_priv, opfn);
opfn_conn_request(qpriv->owner);
}
/*
* When QP s_lock is held in the caller, the OPFN request must be scheduled
* to a different workqueue to avoid double locking QP s_lock in call to
* ib_post_send in opfn_conn_request
*/
static void opfn_schedule_conn_request(struct rvt_qp *qp)
{
struct hfi1_qp_priv *priv = qp->priv;
trace_hfi1_opfn_state_sched_conn_request(qp);
queue_work(opfn_wq, &priv->opfn.opfn_work);
}
void opfn_conn_response(struct rvt_qp *qp, struct rvt_ack_entry *e,
struct ib_atomic_eth *ateth)
{
struct hfi1_qp_priv *priv = qp->priv;
u64 data = be64_to_cpu(ateth->compare_data);
struct hfi1_opfn_type *extd;
u8 capcode;
unsigned long flags;
trace_hfi1_opfn_state_conn_response(qp);
capcode = data & 0xf;
trace_hfi1_opfn_data_conn_response(qp, capcode, data);
if (!capcode || capcode >= STL_VERBS_EXTD_MAX)
return;
extd = &hfi1_opfn_handlers[capcode];
if (!extd || !extd->response) {
e->atomic_data = capcode;
return;
}
spin_lock_irqsave(&priv->opfn.lock, flags);
if (priv->opfn.completed & OPFN_CODE(capcode)) {
/*
* We are receiving a request for a feature that has already
* been negotiated. This may mean that the other side has reset
*/
priv->opfn.completed &= ~OPFN_CODE(capcode);
if (extd->error)
extd->error(qp);
}
if (extd->response(qp, &data))
priv->opfn.completed |= OPFN_CODE(capcode);
e->atomic_data = (data & ~0xf) | capcode;
trace_hfi1_opfn_state_conn_response(qp);
spin_unlock_irqrestore(&priv->opfn.lock, flags);
}
void opfn_conn_reply(struct rvt_qp *qp, u64 data)
{
struct hfi1_qp_priv *priv = qp->priv;
struct hfi1_opfn_type *extd;
u8 capcode;
unsigned long flags;
trace_hfi1_opfn_state_conn_reply(qp);
capcode = data & 0xf;
trace_hfi1_opfn_data_conn_reply(qp, capcode, data);
if (!capcode || capcode >= STL_VERBS_EXTD_MAX)
return;
spin_lock_irqsave(&priv->opfn.lock, flags);
/*
* Either there is no previous request or the reply is not for the
* current request
*/
if (!priv->opfn.curr || capcode != priv->opfn.curr)
goto done;
extd = &hfi1_opfn_handlers[capcode];
if (!extd || !extd->reply)
goto clear;
if (extd->reply(qp, data))
priv->opfn.completed |= OPFN_CODE(capcode);
clear:
/*
* Clear opfn.curr to indicate that the previous request is no longer in
* progress
*/
priv->opfn.curr = STL_VERBS_EXTD_NONE;
trace_hfi1_opfn_state_conn_reply(qp);
done:
spin_unlock_irqrestore(&priv->opfn.lock, flags);
}
void opfn_conn_error(struct rvt_qp *qp)
{
struct hfi1_qp_priv *priv = qp->priv;
struct hfi1_opfn_type *extd = NULL;
unsigned long flags;
u16 capcode;
trace_hfi1_opfn_state_conn_error(qp);
trace_hfi1_msg_opfn_conn_error(qp, "error. qp state ", (u64)qp->state);
/*
* The QP has gone into the Error state. We have to invalidate all
* negotiated feature, including the one in progress (if any). The RC
* QP handling will clean the WQE for the connection request.
*/
spin_lock_irqsave(&priv->opfn.lock, flags);
while (priv->opfn.completed) {
capcode = priv->opfn.completed & ~(priv->opfn.completed - 1);
extd = &hfi1_opfn_handlers[ilog2(capcode) + 1];
if (extd->error)
extd->error(qp);
priv->opfn.completed &= ~OPFN_CODE(capcode);
}
priv->opfn.extended = 0;
priv->opfn.requested = 0;
priv->opfn.curr = STL_VERBS_EXTD_NONE;
spin_unlock_irqrestore(&priv->opfn.lock, flags);
}
void opfn_qp_init(struct rvt_qp *qp, struct ib_qp_attr *attr, int attr_mask)
{
struct ib_qp *ibqp = &qp->ibqp;
struct hfi1_qp_priv *priv = qp->priv;
unsigned long flags;
if (attr_mask & IB_QP_RETRY_CNT)
priv->s_retry = attr->retry_cnt;
spin_lock_irqsave(&priv->opfn.lock, flags);
if (ibqp->qp_type == IB_QPT_RC && HFI1_CAP_IS_KSET(TID_RDMA)) {
struct tid_rdma_params *local = &priv->tid_rdma.local;
if (attr_mask & IB_QP_TIMEOUT)
priv->tid_retry_timeout_jiffies = qp->timeout_jiffies;
if (qp->pmtu == enum_to_mtu(OPA_MTU_4096) ||
qp->pmtu == enum_to_mtu(OPA_MTU_8192)) {
tid_rdma_opfn_init(qp, local);
/*
* We only want to set the OPFN requested bit when the
* QP transitions to RTS.
*/
if (attr_mask & IB_QP_STATE &&
attr->qp_state == IB_QPS_RTS) {
priv->opfn.requested |= OPFN_MASK(TID_RDMA);
/*
* If the QP is transitioning to RTS and the
* opfn.completed for TID RDMA has already been
* set, the QP is being moved *back* into RTS.
* We can now renegotiate the TID RDMA
* parameters.
*/
if (priv->opfn.completed &
OPFN_MASK(TID_RDMA)) {
priv->opfn.completed &=
~OPFN_MASK(TID_RDMA);
/*
* Since the opfn.completed bit was
* already set, it is safe to assume
* that the opfn.extended is also set.
*/
opfn_schedule_conn_request(qp);
}
}
} else {
memset(local, 0, sizeof(*local));
}
}
spin_unlock_irqrestore(&priv->opfn.lock, flags);
}
void opfn_trigger_conn_request(struct rvt_qp *qp, u32 bth1)
{
struct hfi1_qp_priv *priv = qp->priv;
if (!priv->opfn.extended && hfi1_opfn_extended(bth1) &&
HFI1_CAP_IS_KSET(OPFN)) {
priv->opfn.extended = 1;
if (qp->state == IB_QPS_RTS)
opfn_conn_request(qp);
}
}
int opfn_init(void)
{
opfn_wq = alloc_workqueue("hfi_opfn",
WQ_SYSFS | WQ_HIGHPRI | WQ_CPU_INTENSIVE |
WQ_MEM_RECLAIM,
HFI1_MAX_ACTIVE_WORKQUEUE_ENTRIES);
if (!opfn_wq)
return -ENOMEM;
return 0;
}
void opfn_exit(void)
{
if (opfn_wq) {
destroy_workqueue(opfn_wq);
opfn_wq = NULL;
}
}

View File

@ -0,0 +1,85 @@
/* SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) */
/*
* Copyright(c) 2018 Intel Corporation.
*
*/
#ifndef _HFI1_OPFN_H
#define _HFI1_OPFN_H
/**
* DOC: Omni Path Feature Negotion (OPFN)
*
* OPFN is a discovery protocol for Intel Omni-Path fabric that
* allows two RC QPs to negotiate a common feature that both QPs
* can support. Currently, the only OPA feature that OPFN
* supports is TID RDMA.
*
* Architecture
*
* OPFN involves the communication between two QPs on the HFI
* level on an Omni-Path fabric, and ULPs have no knowledge of
* OPFN at all.
*
* Implementation
*
* OPFN extends the existing IB RC protocol with the following
* changes:
* -- Uses Bit 24 (reserved) of DWORD 1 of Base Transport
* Header (BTH1) to indicate that the RC QP supports OPFN;
* -- Uses a combination of RC COMPARE_SWAP opcode (0x13) and
* the address U64_MAX (0xFFFFFFFFFFFFFFFF) as an OPFN
* request; The 64-bit data carried with the request/response
* contains the parameters for negotiation and will be
* defined in tid_rdma.c file;
* -- Defines IB_WR_RESERVED3 as IB_WR_OPFN.
*
* The OPFN communication will be triggered when an RC QP
* receives a request with Bit 24 of BTH1 set. The responder QP
* will then post send an OPFN request with its local
* parameters, which will be sent to the requester QP once all
* existing requests on the responder QP side have been sent.
* Once the requester QP receives the OPFN request, it will
* keep a copy of the responder QP's parameters, and return a
* response packet with its own local parameters. The responder
* QP receives the response packet and keeps a copy of the requester
* QP's parameters. After this exchange, each side has the parameters
* for both sides and therefore can select the right parameters
* for future transactions
*/
/* STL Verbs Extended */
#define IB_BTHE_E_SHIFT 24
#define HFI1_VERBS_E_ATOMIC_VADDR U64_MAX
struct ib_atomic_eth;
enum hfi1_opfn_codes {
STL_VERBS_EXTD_NONE = 0,
STL_VERBS_EXTD_TID_RDMA,
STL_VERBS_EXTD_MAX
};
struct hfi1_opfn_data {
u8 extended;
u16 requested;
u16 completed;
enum hfi1_opfn_codes curr;
/* serialize opfn function calls */
spinlock_t lock;
struct work_struct opfn_work;
};
/* WR opcode for OPFN */
#define IB_WR_OPFN IB_WR_RESERVED3
void opfn_send_conn_request(struct work_struct *work);
void opfn_conn_response(struct rvt_qp *qp, struct rvt_ack_entry *e,
struct ib_atomic_eth *ateth);
void opfn_conn_reply(struct rvt_qp *qp, u64 data);
void opfn_conn_error(struct rvt_qp *qp);
void opfn_qp_init(struct rvt_qp *qp, struct ib_qp_attr *attr, int attr_mask);
void opfn_trigger_conn_request(struct rvt_qp *qp, u32 bth1);
int opfn_init(void);
void opfn_exit(void);
#endif /* _HFI1_OPFN_H */

View File

@ -1599,8 +1599,7 @@ static void sc_piobufavail(struct send_context *sc)
struct rvt_qp *qp;
struct hfi1_qp_priv *priv;
unsigned long flags;
uint i, n = 0, max_idx = 0;
u8 max_starved_cnt = 0;
uint i, n = 0, top_idx = 0;
if (dd->send_contexts[sc->sw_index].type != SC_KERNEL &&
dd->send_contexts[sc->sw_index].type != SC_VL15)
@ -1619,11 +1618,18 @@ static void sc_piobufavail(struct send_context *sc)
if (n == ARRAY_SIZE(qps))
break;
wait = list_first_entry(list, struct iowait, list);
iowait_get_priority(wait);
qp = iowait_to_qp(wait);
priv = qp->priv;
list_del_init(&priv->s_iowait.list);
priv->s_iowait.lock = NULL;
iowait_starve_find_max(wait, &max_starved_cnt, n, &max_idx);
if (n) {
priv = qps[top_idx]->priv;
top_idx = iowait_priority_update_top(wait,
&priv->s_iowait,
n, top_idx);
}
/* refcount held until actual wake up */
qps[n++] = qp;
}
@ -1638,12 +1644,12 @@ static void sc_piobufavail(struct send_context *sc)
}
write_sequnlock_irqrestore(&sc->waitlock, flags);
/* Wake up the most starved one first */
/* Wake up the top-priority one first */
if (n)
hfi1_qp_wakeup(qps[max_idx],
hfi1_qp_wakeup(qps[top_idx],
RVT_S_WAIT_PIO | HFI1_S_WAIT_PIO_DRAIN);
for (i = 0; i < n; i++)
if (i != max_idx)
if (i != top_idx)
hfi1_qp_wakeup(qps[i],
RVT_S_WAIT_PIO | HFI1_S_WAIT_PIO_DRAIN);
}

View File

@ -132,6 +132,18 @@ const struct rvt_operation_params hfi1_post_parms[RVT_OPERATION_MAX] = {
.qpt_support = BIT(IB_QPT_RC),
},
[IB_WR_OPFN] = {
.length = sizeof(struct ib_atomic_wr),
.qpt_support = BIT(IB_QPT_RC),
.flags = RVT_OPERATION_USE_RESERVE,
},
[IB_WR_TID_RDMA_WRITE] = {
.length = sizeof(struct ib_rdma_wr),
.qpt_support = BIT(IB_QPT_RC),
.flags = RVT_OPERATION_IGN_RNR_CNT,
},
};
static void flush_list_head(struct list_head *l)
@ -285,6 +297,8 @@ void hfi1_modify_qp(struct rvt_qp *qp, struct ib_qp_attr *attr,
priv->s_sendcontext = qp_to_send_context(qp, priv->s_sc);
qp_set_16b(qp);
}
opfn_qp_init(qp, attr, attr_mask);
}
/**
@ -311,6 +325,8 @@ int hfi1_setup_wqe(struct rvt_qp *qp, struct rvt_swqe *wqe, bool *call_send)
switch (qp->ibqp.qp_type) {
case IB_QPT_RC:
hfi1_setup_tid_rdma_wqe(qp, wqe);
/* fall through */
case IB_QPT_UC:
if (wqe->length > 0x80000000U)
return -EINVAL;
@ -422,6 +438,11 @@ static void hfi1_qp_schedule(struct rvt_qp *qp)
if (ret)
iowait_clear_flag(&priv->s_iowait, IOWAIT_PENDING_IB);
}
if (iowait_flag_set(&priv->s_iowait, IOWAIT_PENDING_TID)) {
ret = hfi1_schedule_tid_send(qp);
if (ret)
iowait_clear_flag(&priv->s_iowait, IOWAIT_PENDING_TID);
}
}
void hfi1_qp_wakeup(struct rvt_qp *qp, u32 flag)
@ -441,8 +462,27 @@ void hfi1_qp_wakeup(struct rvt_qp *qp, u32 flag)
void hfi1_qp_unbusy(struct rvt_qp *qp, struct iowait_work *wait)
{
if (iowait_set_work_flag(wait) == IOWAIT_IB_SE)
struct hfi1_qp_priv *priv = qp->priv;
if (iowait_set_work_flag(wait) == IOWAIT_IB_SE) {
qp->s_flags &= ~RVT_S_BUSY;
/*
* If we are sending a first-leg packet from the second leg,
* we need to clear the busy flag from priv->s_flags to
* avoid a race condition when the qp wakes up before
* the call to hfi1_verbs_send() returns to the second
* leg. In that case, the second leg will terminate without
* being re-scheduled, resulting in failure to send TID RDMA
* WRITE DATA and TID RDMA ACK packets.
*/
if (priv->s_flags & HFI1_S_TID_BUSY_SET) {
priv->s_flags &= ~(HFI1_S_TID_BUSY_SET |
RVT_S_BUSY);
iowait_set_flag(&priv->s_iowait, IOWAIT_PENDING_TID);
}
} else {
priv->s_flags &= ~RVT_S_BUSY;
}
}
static int iowait_sleep(
@ -479,6 +519,7 @@ static int iowait_sleep(
ibp->rvp.n_dmawait++;
qp->s_flags |= RVT_S_WAIT_DMA_DESC;
iowait_get_priority(&priv->s_iowait);
iowait_queue(pkts_sent, &priv->s_iowait,
&sde->dmawait);
priv->s_iowait.lock = &sde->waitlock;
@ -528,6 +569,17 @@ static void iowait_sdma_drained(struct iowait *wait)
spin_unlock_irqrestore(&qp->s_lock, flags);
}
static void hfi1_init_priority(struct iowait *w)
{
struct rvt_qp *qp = iowait_to_qp(w);
struct hfi1_qp_priv *priv = qp->priv;
if (qp->s_flags & RVT_S_ACK_PENDING)
w->priority++;
if (priv->s_flags & RVT_S_ACK_PENDING)
w->priority++;
}
/**
* qp_to_sdma_engine - map a qp to a send engine
* @qp: the QP
@ -685,10 +737,11 @@ void *qp_priv_alloc(struct rvt_dev_info *rdi, struct rvt_qp *qp)
&priv->s_iowait,
1,
_hfi1_do_send,
NULL,
_hfi1_do_tid_send,
iowait_sleep,
iowait_wakeup,
iowait_sdma_drained);
iowait_sdma_drained,
hfi1_init_priority);
return priv;
}
@ -696,6 +749,7 @@ void qp_priv_free(struct rvt_dev_info *rdi, struct rvt_qp *qp)
{
struct hfi1_qp_priv *priv = qp->priv;
hfi1_qp_priv_tid_free(rdi, qp);
kfree(priv->s_ahg);
kfree(priv);
}
@ -729,6 +783,7 @@ void flush_qp_waiters(struct rvt_qp *qp)
{
lockdep_assert_held(&qp->s_lock);
flush_iowait(qp);
hfi1_tid_rdma_flush_wait(qp);
}
void stop_send_queue(struct rvt_qp *qp)
@ -736,12 +791,16 @@ void stop_send_queue(struct rvt_qp *qp)
struct hfi1_qp_priv *priv = qp->priv;
iowait_cancel_work(&priv->s_iowait);
if (cancel_work_sync(&priv->tid_rdma.trigger_work))
rvt_put_qp(qp);
}
void quiesce_qp(struct rvt_qp *qp)
{
struct hfi1_qp_priv *priv = qp->priv;
hfi1_del_tid_reap_timer(qp);
hfi1_del_tid_retry_timer(qp);
iowait_sdma_drain(&priv->s_iowait);
qp_pio_drain(qp);
flush_tx_list(qp);
@ -749,8 +808,13 @@ void quiesce_qp(struct rvt_qp *qp)
void notify_qp_reset(struct rvt_qp *qp)
{
hfi1_qp_kern_exp_rcv_clear_all(qp);
qp->r_adefered = 0;
clear_ahg(qp);
/* Clear any OPFN state */
if (qp->ibqp.qp_type == IB_QPT_RC)
opfn_conn_error(qp);
}
/*
@ -832,7 +896,8 @@ void notify_error_qp(struct rvt_qp *qp)
if (lock) {
write_seqlock(lock);
if (!list_empty(&priv->s_iowait.list) &&
!(qp->s_flags & RVT_S_BUSY)) {
!(qp->s_flags & RVT_S_BUSY) &&
!(priv->s_flags & RVT_S_BUSY)) {
qp->s_flags &= ~RVT_S_ANY_WAIT_IO;
list_del_init(&priv->s_iowait.list);
priv->s_iowait.lock = NULL;
@ -841,7 +906,8 @@ void notify_error_qp(struct rvt_qp *qp)
write_sequnlock(lock);
}
if (!(qp->s_flags & RVT_S_BUSY)) {
if (!(qp->s_flags & RVT_S_BUSY) && !(priv->s_flags & RVT_S_BUSY)) {
qp->s_hdrwords = 0;
if (qp->s_rdma_mr) {
rvt_put_mr(qp->s_rdma_mr);
qp->s_rdma_mr = NULL;

View File

@ -63,11 +63,17 @@ extern const struct rvt_operation_params hfi1_post_parms[];
* HFI1_S_AHG_VALID - ahg header valid on chip
* HFI1_S_AHG_CLEAR - have send engine clear ahg state
* HFI1_S_WAIT_PIO_DRAIN - qp waiting for PIOs to drain
* HFI1_S_WAIT_TID_SPACE - a QP is waiting for TID resource
* HFI1_S_WAIT_TID_RESP - waiting for a TID RDMA WRITE response
* HFI1_S_WAIT_HALT - halt the first leg send engine
* HFI1_S_MIN_BIT_MASK - the lowest bit that can be used by hfi1
*/
#define HFI1_S_AHG_VALID 0x80000000
#define HFI1_S_AHG_CLEAR 0x40000000
#define HFI1_S_WAIT_PIO_DRAIN 0x20000000
#define HFI1_S_WAIT_TID_SPACE 0x10000000
#define HFI1_S_WAIT_TID_RESP 0x08000000
#define HFI1_S_WAIT_HALT 0x04000000
#define HFI1_S_MIN_BIT_MASK 0x01000000
/*
@ -76,6 +82,7 @@ extern const struct rvt_operation_params hfi1_post_parms[];
#define HFI1_S_ANY_WAIT_IO (RVT_S_ANY_WAIT_IO | HFI1_S_WAIT_PIO_DRAIN)
#define HFI1_S_ANY_WAIT (HFI1_S_ANY_WAIT_IO | RVT_S_ANY_WAIT_SEND)
#define HFI1_S_ANY_TID_WAIT_SEND (RVT_S_WAIT_SSN_CREDIT | RVT_S_WAIT_DMA)
/*
* Send if not busy or waiting for I/O and either

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,51 @@
/* SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) */
/*
* Copyright(c) 2018 Intel Corporation.
*
*/
#ifndef HFI1_RC_H
#define HFI1_RC_H
/* cut down ridiculously long IB macro names */
#define OP(x) IB_OPCODE_RC_##x
static inline void update_ack_queue(struct rvt_qp *qp, unsigned int n)
{
unsigned int next;
next = n + 1;
if (next > rvt_size_atomic(ib_to_rvt(qp->ibqp.device)))
next = 0;
qp->s_tail_ack_queue = next;
qp->s_acked_ack_queue = next;
qp->s_ack_state = OP(ACKNOWLEDGE);
}
static inline void rc_defered_ack(struct hfi1_ctxtdata *rcd,
struct rvt_qp *qp)
{
if (list_empty(&qp->rspwait)) {
qp->r_flags |= RVT_R_RSP_NAK;
rvt_get_qp(qp);
list_add_tail(&qp->rspwait, &rcd->qp_wait_list);
}
}
static inline u32 restart_sge(struct rvt_sge_state *ss, struct rvt_swqe *wqe,
u32 psn, u32 pmtu)
{
u32 len;
len = delta_psn(psn, wqe->psn) * pmtu;
return rvt_restart_sge(ss, wqe, len);
}
struct rvt_ack_entry *find_prev_entry(struct rvt_qp *qp, u32 psn, u8 *prev,
u8 *prev_ack, bool *scheduled);
int do_rc_ack(struct rvt_qp *qp, u32 aeth, u32 psn, int opcode, u64 val,
struct hfi1_ctxtdata *rcd);
struct rvt_swqe *do_rc_completion(struct rvt_qp *qp, struct rvt_swqe *wqe,
struct hfi1_ibport *ibp);
#endif /* HFI1_RC_H */

View File

@ -250,7 +250,6 @@ static inline void hfi1_make_ruc_bth(struct rvt_qp *qp,
struct ib_other_headers *ohdr,
u32 bth0, u32 bth1, u32 bth2)
{
bth1 |= qp->remote_qpn;
ohdr->bth[0] = cpu_to_be32(bth0);
ohdr->bth[1] = cpu_to_be32(bth1);
ohdr->bth[2] = cpu_to_be32(bth2);
@ -272,13 +271,13 @@ static inline void hfi1_make_ruc_bth(struct rvt_qp *qp,
*/
static inline void hfi1_make_ruc_header_16B(struct rvt_qp *qp,
struct ib_other_headers *ohdr,
u32 bth0, u32 bth2, int middle,
u32 bth0, u32 bth1, u32 bth2,
int middle,
struct hfi1_pkt_state *ps)
{
struct hfi1_qp_priv *priv = qp->priv;
struct hfi1_ibport *ibp = ps->ibp;
struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
u32 bth1 = 0;
u32 slid;
u16 pkey = hfi1_get_pkey(ibp, qp->s_pkey_index);
u8 l4 = OPA_16B_L4_IB_LOCAL;
@ -360,12 +359,12 @@ static inline void hfi1_make_ruc_header_16B(struct rvt_qp *qp,
*/
static inline void hfi1_make_ruc_header_9B(struct rvt_qp *qp,
struct ib_other_headers *ohdr,
u32 bth0, u32 bth2, int middle,
u32 bth0, u32 bth1, u32 bth2,
int middle,
struct hfi1_pkt_state *ps)
{
struct hfi1_qp_priv *priv = qp->priv;
struct hfi1_ibport *ibp = ps->ibp;
u32 bth1 = 0;
u16 pkey = hfi1_get_pkey(ibp, qp->s_pkey_index);
u16 lrh0 = HFI1_LRH_BTH;
u8 extra_bytes = -ps->s_txreq->s_cur_size & 3;
@ -415,7 +414,7 @@ static inline void hfi1_make_ruc_header_9B(struct rvt_qp *qp,
typedef void (*hfi1_make_ruc_hdr)(struct rvt_qp *qp,
struct ib_other_headers *ohdr,
u32 bth0, u32 bth2, int middle,
u32 bth0, u32 bth1, u32 bth2, int middle,
struct hfi1_pkt_state *ps);
/* We support only two types - 9B and 16B for now */
@ -425,7 +424,7 @@ static const hfi1_make_ruc_hdr hfi1_ruc_header_tbl[2] = {
};
void hfi1_make_ruc_header(struct rvt_qp *qp, struct ib_other_headers *ohdr,
u32 bth0, u32 bth2, int middle,
u32 bth0, u32 bth1, u32 bth2, int middle,
struct hfi1_pkt_state *ps)
{
struct hfi1_qp_priv *priv = qp->priv;
@ -446,18 +445,21 @@ void hfi1_make_ruc_header(struct rvt_qp *qp, struct ib_other_headers *ohdr,
priv->s_ahg->ahgidx = 0;
/* Make the appropriate header */
hfi1_ruc_header_tbl[priv->hdr_type](qp, ohdr, bth0, bth2, middle, ps);
hfi1_ruc_header_tbl[priv->hdr_type](qp, ohdr, bth0, bth1, bth2, middle,
ps);
}
/* when sending, force a reschedule every one of these periods */
#define SEND_RESCHED_TIMEOUT (5 * HZ) /* 5s in jiffies */
/**
* schedule_send_yield - test for a yield required for QP send engine
* hfi1_schedule_send_yield - test for a yield required for QP
* send engine
* @timeout: Final time for timeout slice for jiffies
* @qp: a pointer to QP
* @ps: a pointer to a structure with commonly lookup values for
* the the send engine progress
* @tid - true if it is the tid leg
*
* This routine checks if the time slice for the QP has expired
* for RC QPs, if so an additional work entry is queued. At this
@ -465,8 +467,8 @@ void hfi1_make_ruc_header(struct rvt_qp *qp, struct ib_other_headers *ohdr,
* returns true if a yield is required, otherwise, false
* is returned.
*/
static bool schedule_send_yield(struct rvt_qp *qp,
struct hfi1_pkt_state *ps)
bool hfi1_schedule_send_yield(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
bool tid)
{
ps->pkts_sent = true;
@ -474,8 +476,24 @@ static bool schedule_send_yield(struct rvt_qp *qp,
if (!ps->in_thread ||
workqueue_congested(ps->cpu, ps->ppd->hfi1_wq)) {
spin_lock_irqsave(&qp->s_lock, ps->flags);
qp->s_flags &= ~RVT_S_BUSY;
hfi1_schedule_send(qp);
if (!tid) {
qp->s_flags &= ~RVT_S_BUSY;
hfi1_schedule_send(qp);
} else {
struct hfi1_qp_priv *priv = qp->priv;
if (priv->s_flags &
HFI1_S_TID_BUSY_SET) {
qp->s_flags &= ~RVT_S_BUSY;
priv->s_flags &=
~(HFI1_S_TID_BUSY_SET |
RVT_S_BUSY);
} else {
priv->s_flags &= ~RVT_S_BUSY;
}
hfi1_schedule_tid_send(qp);
}
spin_unlock_irqrestore(&qp->s_lock, ps->flags);
this_cpu_inc(*ps->ppd->dd->send_schedule);
trace_hfi1_rc_expired_time_slice(qp, true);
@ -576,6 +594,8 @@ void hfi1_do_send(struct rvt_qp *qp, bool in_thread)
do {
/* Check for a constructed packet to be sent. */
if (ps.s_txreq) {
if (priv->s_flags & HFI1_S_TID_BUSY_SET)
qp->s_flags |= RVT_S_BUSY;
spin_unlock_irqrestore(&qp->s_lock, ps.flags);
/*
* If the packet cannot be sent now, return and
@ -585,7 +605,7 @@ void hfi1_do_send(struct rvt_qp *qp, bool in_thread)
return;
/* allow other tasks to run */
if (schedule_send_yield(qp, &ps))
if (hfi1_schedule_send_yield(qp, &ps, false))
return;
spin_lock_irqsave(&qp->s_lock, ps.flags);

View File

@ -1747,10 +1747,9 @@ retry:
*/
static void sdma_desc_avail(struct sdma_engine *sde, uint avail)
{
struct iowait *wait, *nw;
struct iowait *wait, *nw, *twait;
struct iowait *waits[SDMA_WAIT_BATCH_SIZE];
uint i, n = 0, seq, max_idx = 0;
u8 max_starved_cnt = 0;
uint i, n = 0, seq, tidx = 0;
#ifdef CONFIG_SDMA_VERBOSITY
dd_dev_err(sde->dd, "CONFIG SDMA(%u) %s:%d %s()\n", sde->this_idx,
@ -1775,13 +1774,20 @@ static void sdma_desc_avail(struct sdma_engine *sde, uint avail)
continue;
if (n == ARRAY_SIZE(waits))
break;
iowait_init_priority(wait);
num_desc = iowait_get_all_desc(wait);
if (num_desc > avail)
break;
avail -= num_desc;
/* Find the most starved wait memeber */
iowait_starve_find_max(wait, &max_starved_cnt,
n, &max_idx);
/* Find the top-priority wait memeber */
if (n) {
twait = waits[tidx];
tidx =
iowait_priority_update_top(wait,
twait,
n,
tidx);
}
list_del_init(&wait->list);
waits[n++] = wait;
}
@ -1790,12 +1796,12 @@ static void sdma_desc_avail(struct sdma_engine *sde, uint avail)
}
} while (read_seqretry(&sde->waitlock, seq));
/* Schedule the most starved one first */
/* Schedule the top-priority entry first */
if (n)
waits[max_idx]->wakeup(waits[max_idx], SDMA_AVAIL_REASON);
waits[tidx]->wakeup(waits[tidx], SDMA_AVAIL_REASON);
for (i = 0; i < n; i++)
if (i != max_idx)
if (i != tidx)
waits[i]->wakeup(waits[i], SDMA_AVAIL_REASON);
}

View File

@ -91,6 +91,7 @@ struct sdma_desc {
#define SDMA_TXREQ_F_URGENT 0x0001
#define SDMA_TXREQ_F_AHG_COPY 0x0002
#define SDMA_TXREQ_F_USE_AHG 0x0004
#define SDMA_TXREQ_F_VIP 0x0010
struct sdma_txreq;
typedef void (*callback_t)(struct sdma_txreq *, int);

View File

@ -498,7 +498,7 @@ static ssize_t hw_rev_show(struct device *device, struct device_attribute *attr,
char *buf)
{
struct hfi1_ibdev *dev =
container_of(device, struct hfi1_ibdev, rdi.ibdev.dev);
rdma_device_to_drv_device(device, struct hfi1_ibdev, rdi.ibdev);
return sprintf(buf, "%x\n", dd_from_dev(dev)->minrev);
}
@ -508,7 +508,7 @@ static ssize_t board_id_show(struct device *device,
struct device_attribute *attr, char *buf)
{
struct hfi1_ibdev *dev =
container_of(device, struct hfi1_ibdev, rdi.ibdev.dev);
rdma_device_to_drv_device(device, struct hfi1_ibdev, rdi.ibdev);
struct hfi1_devdata *dd = dd_from_dev(dev);
int ret;
@ -524,7 +524,7 @@ static ssize_t boardversion_show(struct device *device,
struct device_attribute *attr, char *buf)
{
struct hfi1_ibdev *dev =
container_of(device, struct hfi1_ibdev, rdi.ibdev.dev);
rdma_device_to_drv_device(device, struct hfi1_ibdev, rdi.ibdev);
struct hfi1_devdata *dd = dd_from_dev(dev);
/* The string printed here is already newline-terminated. */
@ -536,7 +536,7 @@ static ssize_t nctxts_show(struct device *device,
struct device_attribute *attr, char *buf)
{
struct hfi1_ibdev *dev =
container_of(device, struct hfi1_ibdev, rdi.ibdev.dev);
rdma_device_to_drv_device(device, struct hfi1_ibdev, rdi.ibdev);
struct hfi1_devdata *dd = dd_from_dev(dev);
/*
@ -555,7 +555,7 @@ static ssize_t nfreectxts_show(struct device *device,
struct device_attribute *attr, char *buf)
{
struct hfi1_ibdev *dev =
container_of(device, struct hfi1_ibdev, rdi.ibdev.dev);
rdma_device_to_drv_device(device, struct hfi1_ibdev, rdi.ibdev);
struct hfi1_devdata *dd = dd_from_dev(dev);
/* Return the number of free user ports (contexts) available. */
@ -567,7 +567,7 @@ static ssize_t serial_show(struct device *device,
struct device_attribute *attr, char *buf)
{
struct hfi1_ibdev *dev =
container_of(device, struct hfi1_ibdev, rdi.ibdev.dev);
rdma_device_to_drv_device(device, struct hfi1_ibdev, rdi.ibdev);
struct hfi1_devdata *dd = dd_from_dev(dev);
return scnprintf(buf, PAGE_SIZE, "%s", dd->serial);
@ -579,7 +579,7 @@ static ssize_t chip_reset_store(struct device *device,
size_t count)
{
struct hfi1_ibdev *dev =
container_of(device, struct hfi1_ibdev, rdi.ibdev.dev);
rdma_device_to_drv_device(device, struct hfi1_ibdev, rdi.ibdev);
struct hfi1_devdata *dd = dd_from_dev(dev);
int ret;
@ -609,7 +609,7 @@ static ssize_t tempsense_show(struct device *device,
struct device_attribute *attr, char *buf)
{
struct hfi1_ibdev *dev =
container_of(device, struct hfi1_ibdev, rdi.ibdev.dev);
rdma_device_to_drv_device(device, struct hfi1_ibdev, rdi.ibdev);
struct hfi1_devdata *dd = dd_from_dev(dev);
struct hfi1_temp temp;
int ret;

File diff suppressed because it is too large Load Diff

View File

@ -6,8 +6,317 @@
#ifndef HFI1_TID_RDMA_H
#define HFI1_TID_RDMA_H
#include <linux/circ_buf.h>
#include "common.h"
/* Add a convenience helper */
#define CIRC_ADD(val, add, size) (((val) + (add)) & ((size) - 1))
#define CIRC_NEXT(val, size) CIRC_ADD(val, 1, size)
#define CIRC_PREV(val, size) CIRC_ADD(val, -1, size)
#define TID_RDMA_MIN_SEGMENT_SIZE BIT(18) /* 256 KiB (for now) */
#define TID_RDMA_MAX_SEGMENT_SIZE BIT(18) /* 256 KiB (for now) */
#define TID_RDMA_MAX_PAGES (BIT(18) >> PAGE_SHIFT)
/*
* Bit definitions for priv->s_flags.
* These bit flags overload the bit flags defined for the QP's s_flags.
* Due to the fact that these bit fields are used only for the QP priv
* s_flags, there are no collisions.
*
* HFI1_S_TID_WAIT_INTERLCK - QP is waiting for requester interlock
* HFI1_R_TID_WAIT_INTERLCK - QP is waiting for responder interlock
*/
#define HFI1_S_TID_BUSY_SET BIT(0)
/* BIT(1) reserved for RVT_S_BUSY. */
#define HFI1_R_TID_RSC_TIMER BIT(2)
/* BIT(3) reserved for RVT_S_RESP_PENDING. */
/* BIT(4) reserved for RVT_S_ACK_PENDING. */
#define HFI1_S_TID_WAIT_INTERLCK BIT(5)
#define HFI1_R_TID_WAIT_INTERLCK BIT(6)
/* BIT(7) - BIT(15) reserved for RVT_S_WAIT_*. */
/* BIT(16) reserved for RVT_S_SEND_ONE */
#define HFI1_S_TID_RETRY_TIMER BIT(17)
/* BIT(18) reserved for RVT_S_ECN. */
#define HFI1_R_TID_SW_PSN BIT(19)
/* BIT(26) reserved for HFI1_S_WAIT_HALT */
/* BIT(27) reserved for HFI1_S_WAIT_TID_RESP */
/* BIT(28) reserved for HFI1_S_WAIT_TID_SPACE */
/*
* Unlike regular IB RDMA VERBS, which do not require an entry
* in the s_ack_queue, TID RDMA WRITE requests do because they
* generate responses.
* Therefore, the s_ack_queue needs to be extended by a certain
* amount. The key point is that the queue needs to be extended
* without letting the "user" know so they user doesn't end up
* using these extra entries.
*/
#define HFI1_TID_RDMA_WRITE_CNT 8
struct tid_rdma_params {
struct rcu_head rcu_head;
u32 qp;
u32 max_len;
u16 jkey;
u8 max_read;
u8 max_write;
u8 timeout;
u8 urg;
u8 version;
};
struct tid_rdma_qp_params {
struct work_struct trigger_work;
struct tid_rdma_params local;
struct tid_rdma_params __rcu *remote;
};
/* Track state for each hardware flow */
struct tid_flow_state {
u32 generation;
u32 psn;
u32 r_next_psn; /* next PSN to be received (in TID space) */
u8 index;
u8 last_index;
u8 flags;
};
enum tid_rdma_req_state {
TID_REQUEST_INACTIVE = 0,
TID_REQUEST_INIT,
TID_REQUEST_INIT_RESEND,
TID_REQUEST_ACTIVE,
TID_REQUEST_RESEND,
TID_REQUEST_RESEND_ACTIVE,
TID_REQUEST_QUEUED,
TID_REQUEST_SYNC,
TID_REQUEST_RNR_NAK,
TID_REQUEST_COMPLETE,
};
struct tid_rdma_request {
struct rvt_qp *qp;
struct hfi1_ctxtdata *rcd;
union {
struct rvt_swqe *swqe;
struct rvt_ack_entry *ack;
} e;
struct tid_rdma_flow *flows; /* array of tid flows */
struct rvt_sge_state ss; /* SGE state for TID RDMA requests */
u16 n_flows; /* size of the flow buffer window */
u16 setup_head; /* flow index we are setting up */
u16 clear_tail; /* flow index we are clearing */
u16 flow_idx; /* flow index most recently set up */
u16 acked_tail;
u32 seg_len;
u32 total_len;
u32 r_ack_psn; /* next expected ack PSN */
u32 r_flow_psn; /* IB PSN of next segment start */
u32 r_last_acked; /* IB PSN of last ACK'ed packet */
u32 s_next_psn; /* IB PSN of next segment start for read */
u32 total_segs; /* segments required to complete a request */
u32 cur_seg; /* index of current segment */
u32 comp_seg; /* index of last completed segment */
u32 ack_seg; /* index of last ack'ed segment */
u32 alloc_seg; /* index of next segment to be allocated */
u32 isge; /* index of "current" sge */
u32 ack_pending; /* num acks pending for this request */
enum tid_rdma_req_state state;
};
/*
* When header suppression is used, PSNs associated with a "flow" are
* relevant (and not the PSNs maintained by verbs). Track per-flow
* PSNs here for a TID RDMA segment.
*
*/
struct flow_state {
u32 flags;
u32 resp_ib_psn; /* The IB PSN of the response for this flow */
u32 generation; /* generation of flow */
u32 spsn; /* starting PSN in TID space */
u32 lpsn; /* last PSN in TID space */
u32 r_next_psn; /* next PSN to be received (in TID space) */
/* For tid rdma read */
u32 ib_spsn; /* starting PSN in Verbs space */
u32 ib_lpsn; /* last PSn in Verbs space */
};
struct tid_rdma_pageset {
dma_addr_t addr : 48; /* Only needed for the first page */
u8 idx: 8;
u8 count : 7;
u8 mapped: 1;
};
/**
* kern_tid_node - used for managing TID's in TID groups
*
* @grp_idx: rcd relative index to tid_group
* @map: grp->map captured prior to programming this TID group in HW
* @cnt: Only @cnt of available group entries are actually programmed
*/
struct kern_tid_node {
struct tid_group *grp;
u8 map;
u8 cnt;
};
/* Overall info for a TID RDMA segment */
struct tid_rdma_flow {
/*
* While a TID RDMA segment is being transferred, it uses a QP number
* from the "KDETH section of QP numbers" (which is different from the
* QP number that originated the request). Bits 11-15 of these QP
* numbers identify the "TID flow" for the segment.
*/
struct flow_state flow_state;
struct tid_rdma_request *req;
u32 tid_qpn;
u32 tid_offset;
u32 length;
u32 sent;
u8 tnode_cnt;
u8 tidcnt;
u8 tid_idx;
u8 idx;
u8 npagesets;
u8 npkts;
u8 pkt;
u8 resync_npkts;
struct kern_tid_node tnode[TID_RDMA_MAX_PAGES];
struct tid_rdma_pageset pagesets[TID_RDMA_MAX_PAGES];
u32 tid_entry[TID_RDMA_MAX_PAGES];
};
enum tid_rnr_nak_state {
TID_RNR_NAK_INIT = 0,
TID_RNR_NAK_SEND,
TID_RNR_NAK_SENT,
};
bool tid_rdma_conn_req(struct rvt_qp *qp, u64 *data);
bool tid_rdma_conn_reply(struct rvt_qp *qp, u64 data);
bool tid_rdma_conn_resp(struct rvt_qp *qp, u64 *data);
void tid_rdma_conn_error(struct rvt_qp *qp);
void tid_rdma_opfn_init(struct rvt_qp *qp, struct tid_rdma_params *p);
int hfi1_kern_exp_rcv_init(struct hfi1_ctxtdata *rcd, int reinit);
int hfi1_kern_exp_rcv_setup(struct tid_rdma_request *req,
struct rvt_sge_state *ss, bool *last);
int hfi1_kern_exp_rcv_clear(struct tid_rdma_request *req);
void hfi1_kern_exp_rcv_clear_all(struct tid_rdma_request *req);
void __trdma_clean_swqe(struct rvt_qp *qp, struct rvt_swqe *wqe);
/**
* trdma_clean_swqe - clean flows for swqe if large send queue
* @qp: the qp
* @wqe: the send wqe
*/
static inline void trdma_clean_swqe(struct rvt_qp *qp, struct rvt_swqe *wqe)
{
if (!wqe->priv)
return;
__trdma_clean_swqe(qp, wqe);
}
void hfi1_kern_read_tid_flow_free(struct rvt_qp *qp);
int hfi1_qp_priv_init(struct rvt_dev_info *rdi, struct rvt_qp *qp,
struct ib_qp_init_attr *init_attr);
void hfi1_qp_priv_tid_free(struct rvt_dev_info *rdi, struct rvt_qp *qp);
void hfi1_tid_rdma_flush_wait(struct rvt_qp *qp);
int hfi1_kern_setup_hw_flow(struct hfi1_ctxtdata *rcd, struct rvt_qp *qp);
void hfi1_kern_clear_hw_flow(struct hfi1_ctxtdata *rcd, struct rvt_qp *qp);
void hfi1_kern_init_ctxt_generations(struct hfi1_ctxtdata *rcd);
struct cntr_entry;
u64 hfi1_access_sw_tid_wait(const struct cntr_entry *entry,
void *context, int vl, int mode, u64 data);
u32 hfi1_build_tid_rdma_read_packet(struct rvt_swqe *wqe,
struct ib_other_headers *ohdr,
u32 *bth1, u32 *bth2, u32 *len);
u32 hfi1_build_tid_rdma_read_req(struct rvt_qp *qp, struct rvt_swqe *wqe,
struct ib_other_headers *ohdr, u32 *bth1,
u32 *bth2, u32 *len);
void hfi1_rc_rcv_tid_rdma_read_req(struct hfi1_packet *packet);
u32 hfi1_build_tid_rdma_read_resp(struct rvt_qp *qp, struct rvt_ack_entry *e,
struct ib_other_headers *ohdr, u32 *bth0,
u32 *bth1, u32 *bth2, u32 *len, bool *last);
void hfi1_rc_rcv_tid_rdma_read_resp(struct hfi1_packet *packet);
bool hfi1_handle_kdeth_eflags(struct hfi1_ctxtdata *rcd,
struct hfi1_pportdata *ppd,
struct hfi1_packet *packet);
void hfi1_tid_rdma_restart_req(struct rvt_qp *qp, struct rvt_swqe *wqe,
u32 *bth2);
void hfi1_qp_kern_exp_rcv_clear_all(struct rvt_qp *qp);
bool hfi1_tid_rdma_wqe_interlock(struct rvt_qp *qp, struct rvt_swqe *wqe);
void setup_tid_rdma_wqe(struct rvt_qp *qp, struct rvt_swqe *wqe);
static inline void hfi1_setup_tid_rdma_wqe(struct rvt_qp *qp,
struct rvt_swqe *wqe)
{
if (wqe->priv &&
(wqe->wr.opcode == IB_WR_RDMA_READ ||
wqe->wr.opcode == IB_WR_RDMA_WRITE) &&
wqe->length >= TID_RDMA_MIN_SEGMENT_SIZE)
setup_tid_rdma_wqe(qp, wqe);
}
u32 hfi1_build_tid_rdma_write_req(struct rvt_qp *qp, struct rvt_swqe *wqe,
struct ib_other_headers *ohdr,
u32 *bth1, u32 *bth2, u32 *len);
void hfi1_compute_tid_rdma_flow_wt(void);
void hfi1_rc_rcv_tid_rdma_write_req(struct hfi1_packet *packet);
u32 hfi1_build_tid_rdma_write_resp(struct rvt_qp *qp, struct rvt_ack_entry *e,
struct ib_other_headers *ohdr, u32 *bth1,
u32 bth2, u32 *len,
struct rvt_sge_state **ss);
void hfi1_del_tid_reap_timer(struct rvt_qp *qp);
void hfi1_rc_rcv_tid_rdma_write_resp(struct hfi1_packet *packet);
bool hfi1_build_tid_rdma_packet(struct rvt_swqe *wqe,
struct ib_other_headers *ohdr,
u32 *bth1, u32 *bth2, u32 *len);
void hfi1_rc_rcv_tid_rdma_write_data(struct hfi1_packet *packet);
u32 hfi1_build_tid_rdma_write_ack(struct rvt_qp *qp, struct rvt_ack_entry *e,
struct ib_other_headers *ohdr, u16 iflow,
u32 *bth1, u32 *bth2);
void hfi1_rc_rcv_tid_rdma_ack(struct hfi1_packet *packet);
void hfi1_add_tid_retry_timer(struct rvt_qp *qp);
void hfi1_del_tid_retry_timer(struct rvt_qp *qp);
u32 hfi1_build_tid_rdma_resync(struct rvt_qp *qp, struct rvt_swqe *wqe,
struct ib_other_headers *ohdr, u32 *bth1,
u32 *bth2, u16 fidx);
void hfi1_rc_rcv_tid_rdma_resync(struct hfi1_packet *packet);
struct hfi1_pkt_state;
int hfi1_make_tid_rdma_pkt(struct rvt_qp *qp, struct hfi1_pkt_state *ps);
void _hfi1_do_tid_send(struct work_struct *work);
bool hfi1_schedule_tid_send(struct rvt_qp *qp);
bool hfi1_tid_rdma_ack_interlock(struct rvt_qp *qp, struct rvt_ack_entry *e);
#endif /* HFI1_TID_RDMA_H */

View File

@ -46,6 +46,7 @@
*/
#define CREATE_TRACE_POINTS
#include "trace.h"
#include "exp_rcv.h"
static u8 __get_ib_hdr_len(struct ib_header *hdr)
{
@ -128,6 +129,15 @@ const char *hfi1_trace_get_packet_l2_str(u8 l2)
#define IETH_PRN "ieth rkey:0x%.8x"
#define ATOMICACKETH_PRN "origdata:%llx"
#define ATOMICETH_PRN "vaddr:0x%llx rkey:0x%.8x sdata:%llx cdata:%llx"
#define TID_RDMA_KDETH "kdeth0 0x%x kdeth1 0x%x"
#define TID_RDMA_KDETH_DATA "kdeth0 0x%x: kver %u sh %u intr %u tidctrl %u tid %x offset %x kdeth1 0x%x: jkey %x"
#define TID_READ_REQ_PRN "tid_flow_psn 0x%x tid_flow_qp 0x%x verbs_qp 0x%x"
#define TID_READ_RSP_PRN "verbs_qp 0x%x"
#define TID_WRITE_REQ_PRN "original_qp 0x%x"
#define TID_WRITE_RSP_PRN "tid_flow_psn 0x%x tid_flow_qp 0x%x verbs_qp 0x%x"
#define TID_WRITE_DATA_PRN "verbs_qp 0x%x"
#define TID_ACK_PRN "tid_flow_psn 0x%x verbs_psn 0x%x tid_flow_qp 0x%x verbs_qp 0x%x"
#define TID_RESYNC_PRN "verbs_qp 0x%x"
#define OP(transport, op) IB_OPCODE_## transport ## _ ## op
@ -322,6 +332,99 @@ const char *parse_everbs_hdrs(
parse_syndrome(be32_to_cpu(eh->aeth) >> 24),
be32_to_cpu(eh->aeth) & IB_MSN_MASK);
break;
case OP(TID_RDMA, WRITE_REQ):
trace_seq_printf(p, TID_RDMA_KDETH " " RETH_PRN " "
TID_WRITE_REQ_PRN,
le32_to_cpu(eh->tid_rdma.w_req.kdeth0),
le32_to_cpu(eh->tid_rdma.w_req.kdeth1),
ib_u64_get(&eh->tid_rdma.w_req.reth.vaddr),
be32_to_cpu(eh->tid_rdma.w_req.reth.rkey),
be32_to_cpu(eh->tid_rdma.w_req.reth.length),
be32_to_cpu(eh->tid_rdma.w_req.verbs_qp));
break;
case OP(TID_RDMA, WRITE_RESP):
trace_seq_printf(p, TID_RDMA_KDETH " " AETH_PRN " "
TID_WRITE_RSP_PRN,
le32_to_cpu(eh->tid_rdma.w_rsp.kdeth0),
le32_to_cpu(eh->tid_rdma.w_rsp.kdeth1),
be32_to_cpu(eh->tid_rdma.w_rsp.aeth) >> 24,
parse_syndrome(/* aeth */
be32_to_cpu(eh->tid_rdma.w_rsp.aeth)
>> 24),
(be32_to_cpu(eh->tid_rdma.w_rsp.aeth) &
IB_MSN_MASK),
be32_to_cpu(eh->tid_rdma.w_rsp.tid_flow_psn),
be32_to_cpu(eh->tid_rdma.w_rsp.tid_flow_qp),
be32_to_cpu(eh->tid_rdma.w_rsp.verbs_qp));
break;
case OP(TID_RDMA, WRITE_DATA_LAST):
case OP(TID_RDMA, WRITE_DATA):
trace_seq_printf(p, TID_RDMA_KDETH_DATA " " TID_WRITE_DATA_PRN,
le32_to_cpu(eh->tid_rdma.w_data.kdeth0),
KDETH_GET(eh->tid_rdma.w_data.kdeth0, KVER),
KDETH_GET(eh->tid_rdma.w_data.kdeth0, SH),
KDETH_GET(eh->tid_rdma.w_data.kdeth0, INTR),
KDETH_GET(eh->tid_rdma.w_data.kdeth0, TIDCTRL),
KDETH_GET(eh->tid_rdma.w_data.kdeth0, TID),
KDETH_GET(eh->tid_rdma.w_data.kdeth0, OFFSET),
le32_to_cpu(eh->tid_rdma.w_data.kdeth1),
KDETH_GET(eh->tid_rdma.w_data.kdeth1, JKEY),
be32_to_cpu(eh->tid_rdma.w_data.verbs_qp));
break;
case OP(TID_RDMA, READ_REQ):
trace_seq_printf(p, TID_RDMA_KDETH " " RETH_PRN " "
TID_READ_REQ_PRN,
le32_to_cpu(eh->tid_rdma.r_req.kdeth0),
le32_to_cpu(eh->tid_rdma.r_req.kdeth1),
ib_u64_get(&eh->tid_rdma.r_req.reth.vaddr),
be32_to_cpu(eh->tid_rdma.r_req.reth.rkey),
be32_to_cpu(eh->tid_rdma.r_req.reth.length),
be32_to_cpu(eh->tid_rdma.r_req.tid_flow_psn),
be32_to_cpu(eh->tid_rdma.r_req.tid_flow_qp),
be32_to_cpu(eh->tid_rdma.r_req.verbs_qp));
break;
case OP(TID_RDMA, READ_RESP):
trace_seq_printf(p, TID_RDMA_KDETH_DATA " " AETH_PRN " "
TID_READ_RSP_PRN,
le32_to_cpu(eh->tid_rdma.r_rsp.kdeth0),
KDETH_GET(eh->tid_rdma.r_rsp.kdeth0, KVER),
KDETH_GET(eh->tid_rdma.r_rsp.kdeth0, SH),
KDETH_GET(eh->tid_rdma.r_rsp.kdeth0, INTR),
KDETH_GET(eh->tid_rdma.r_rsp.kdeth0, TIDCTRL),
KDETH_GET(eh->tid_rdma.r_rsp.kdeth0, TID),
KDETH_GET(eh->tid_rdma.r_rsp.kdeth0, OFFSET),
le32_to_cpu(eh->tid_rdma.r_rsp.kdeth1),
KDETH_GET(eh->tid_rdma.r_rsp.kdeth1, JKEY),
be32_to_cpu(eh->tid_rdma.r_rsp.aeth) >> 24,
parse_syndrome(/* aeth */
be32_to_cpu(eh->tid_rdma.r_rsp.aeth)
>> 24),
(be32_to_cpu(eh->tid_rdma.r_rsp.aeth) &
IB_MSN_MASK),
be32_to_cpu(eh->tid_rdma.r_rsp.verbs_qp));
break;
case OP(TID_RDMA, ACK):
trace_seq_printf(p, TID_RDMA_KDETH " " AETH_PRN " "
TID_ACK_PRN,
le32_to_cpu(eh->tid_rdma.ack.kdeth0),
le32_to_cpu(eh->tid_rdma.ack.kdeth1),
be32_to_cpu(eh->tid_rdma.ack.aeth) >> 24,
parse_syndrome(/* aeth */
be32_to_cpu(eh->tid_rdma.ack.aeth)
>> 24),
(be32_to_cpu(eh->tid_rdma.ack.aeth) &
IB_MSN_MASK),
be32_to_cpu(eh->tid_rdma.ack.tid_flow_psn),
be32_to_cpu(eh->tid_rdma.ack.verbs_psn),
be32_to_cpu(eh->tid_rdma.ack.tid_flow_qp),
be32_to_cpu(eh->tid_rdma.ack.verbs_qp));
break;
case OP(TID_RDMA, RESYNC):
trace_seq_printf(p, TID_RDMA_KDETH " " TID_RESYNC_PRN,
le32_to_cpu(eh->tid_rdma.resync.kdeth0),
le32_to_cpu(eh->tid_rdma.resync.kdeth1),
be32_to_cpu(eh->tid_rdma.resync.verbs_qp));
break;
/* aeth + atomicacketh */
case OP(RC, ATOMIC_ACKNOWLEDGE):
trace_seq_printf(p, AETH_PRN " " ATOMICACKETH_PRN,
@ -394,6 +497,21 @@ const char *print_u32_array(
return ret;
}
u8 hfi1_trace_get_tid_ctrl(u32 ent)
{
return EXP_TID_GET(ent, CTRL);
}
u16 hfi1_trace_get_tid_len(u32 ent)
{
return EXP_TID_GET(ent, LEN);
}
u16 hfi1_trace_get_tid_idx(u32 ent)
{
return EXP_TID_GET(ent, IDX);
}
__hfi1_trace_fn(AFFINITY);
__hfi1_trace_fn(PKT);
__hfi1_trace_fn(PROC);

View File

@ -63,3 +63,4 @@ __print_symbolic(etype, \
#include "trace_tx.h"
#include "trace_mmu.h"
#include "trace_iowait.h"
#include "trace_tid.h"

View File

@ -79,6 +79,14 @@ __print_symbolic(opcode, \
ib_opcode_name(RC_ATOMIC_ACKNOWLEDGE), \
ib_opcode_name(RC_COMPARE_SWAP), \
ib_opcode_name(RC_FETCH_ADD), \
ib_opcode_name(TID_RDMA_WRITE_REQ), \
ib_opcode_name(TID_RDMA_WRITE_RESP), \
ib_opcode_name(TID_RDMA_WRITE_DATA), \
ib_opcode_name(TID_RDMA_WRITE_DATA_LAST), \
ib_opcode_name(TID_RDMA_READ_REQ), \
ib_opcode_name(TID_RDMA_READ_RESP), \
ib_opcode_name(TID_RDMA_RESYNC), \
ib_opcode_name(TID_RDMA_ACK), \
ib_opcode_name(UC_SEND_FIRST), \
ib_opcode_name(UC_SEND_MIDDLE), \
ib_opcode_name(UC_SEND_LAST), \

View File

@ -109,6 +109,54 @@ DEFINE_EVENT(hfi1_rc_template, hfi1_rcv_error,
TP_ARGS(qp, psn)
);
DEFINE_EVENT(/* event */
hfi1_rc_template, hfi1_rc_completion,
TP_PROTO(struct rvt_qp *qp, u32 psn),
TP_ARGS(qp, psn)
);
DECLARE_EVENT_CLASS(/* rc_ack */
hfi1_rc_ack_template,
TP_PROTO(struct rvt_qp *qp, u32 aeth, u32 psn,
struct rvt_swqe *wqe),
TP_ARGS(qp, aeth, psn, wqe),
TP_STRUCT__entry(/* entry */
DD_DEV_ENTRY(dd_from_ibdev(qp->ibqp.device))
__field(u32, qpn)
__field(u32, aeth)
__field(u32, psn)
__field(u8, opcode)
__field(u32, spsn)
__field(u32, lpsn)
),
TP_fast_assign(/* assign */
DD_DEV_ASSIGN(dd_from_ibdev(qp->ibqp.device))
__entry->qpn = qp->ibqp.qp_num;
__entry->aeth = aeth;
__entry->psn = psn;
__entry->opcode = wqe->wr.opcode;
__entry->spsn = wqe->psn;
__entry->lpsn = wqe->lpsn;
),
TP_printk(/* print */
"[%s] qpn 0x%x aeth 0x%x psn 0x%x opcode 0x%x spsn 0x%x lpsn 0x%x",
__get_str(dev),
__entry->qpn,
__entry->aeth,
__entry->psn,
__entry->opcode,
__entry->spsn,
__entry->lpsn
)
);
DEFINE_EVENT(/* do_rc_ack */
hfi1_rc_ack_template, hfi1_rc_ack_do,
TP_PROTO(struct rvt_qp *qp, u32 aeth, u32 psn,
struct rvt_swqe *wqe),
TP_ARGS(qp, aeth, psn, wqe)
);
#endif /* __HFI1_TRACE_RC_H */
#undef TRACE_INCLUDE_PATH

View File

@ -1,5 +1,5 @@
/*
* Copyright(c) 2015 - 2017 Intel Corporation.
* Copyright(c) 2015 - 2018 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@ -128,111 +128,6 @@ TRACE_EVENT(hfi1_receive_interrupt,
)
);
DECLARE_EVENT_CLASS(
hfi1_exp_tid_reg_unreg,
TP_PROTO(unsigned int ctxt, u16 subctxt, u32 rarr,
u32 npages, unsigned long va, unsigned long pa,
dma_addr_t dma),
TP_ARGS(ctxt, subctxt, rarr, npages, va, pa, dma),
TP_STRUCT__entry(
__field(unsigned int, ctxt)
__field(u16, subctxt)
__field(u32, rarr)
__field(u32, npages)
__field(unsigned long, va)
__field(unsigned long, pa)
__field(dma_addr_t, dma)
),
TP_fast_assign(
__entry->ctxt = ctxt;
__entry->subctxt = subctxt;
__entry->rarr = rarr;
__entry->npages = npages;
__entry->va = va;
__entry->pa = pa;
__entry->dma = dma;
),
TP_printk("[%u:%u] entry:%u, %u pages @ 0x%lx, va:0x%lx dma:0x%llx",
__entry->ctxt,
__entry->subctxt,
__entry->rarr,
__entry->npages,
__entry->pa,
__entry->va,
__entry->dma
)
);
DEFINE_EVENT(
hfi1_exp_tid_reg_unreg, hfi1_exp_tid_unreg,
TP_PROTO(unsigned int ctxt, u16 subctxt, u32 rarr, u32 npages,
unsigned long va, unsigned long pa, dma_addr_t dma),
TP_ARGS(ctxt, subctxt, rarr, npages, va, pa, dma));
DEFINE_EVENT(
hfi1_exp_tid_reg_unreg, hfi1_exp_tid_reg,
TP_PROTO(unsigned int ctxt, u16 subctxt, u32 rarr, u32 npages,
unsigned long va, unsigned long pa, dma_addr_t dma),
TP_ARGS(ctxt, subctxt, rarr, npages, va, pa, dma));
TRACE_EVENT(
hfi1_put_tid,
TP_PROTO(struct hfi1_devdata *dd,
u32 index, u32 type, unsigned long pa, u16 order),
TP_ARGS(dd, index, type, pa, order),
TP_STRUCT__entry(
DD_DEV_ENTRY(dd)
__field(unsigned long, pa);
__field(u32, index);
__field(u32, type);
__field(u16, order);
),
TP_fast_assign(
DD_DEV_ASSIGN(dd);
__entry->pa = pa;
__entry->index = index;
__entry->type = type;
__entry->order = order;
),
TP_printk("[%s] type %s pa %lx index %u order %u",
__get_str(dev),
show_tidtype(__entry->type),
__entry->pa,
__entry->index,
__entry->order
)
);
TRACE_EVENT(hfi1_exp_tid_inval,
TP_PROTO(unsigned int ctxt, u16 subctxt, unsigned long va, u32 rarr,
u32 npages, dma_addr_t dma),
TP_ARGS(ctxt, subctxt, va, rarr, npages, dma),
TP_STRUCT__entry(
__field(unsigned int, ctxt)
__field(u16, subctxt)
__field(unsigned long, va)
__field(u32, rarr)
__field(u32, npages)
__field(dma_addr_t, dma)
),
TP_fast_assign(
__entry->ctxt = ctxt;
__entry->subctxt = subctxt;
__entry->va = va;
__entry->rarr = rarr;
__entry->npages = npages;
__entry->dma = dma;
),
TP_printk("[%u:%u] entry:%u, %u pages @ 0x%lx dma: 0x%llx",
__entry->ctxt,
__entry->subctxt,
__entry->rarr,
__entry->npages,
__entry->va,
__entry->dma
)
);
TRACE_EVENT(hfi1_mmu_invalidate,
TP_PROTO(unsigned int ctxt, u16 subctxt, const char *type,
unsigned long start, unsigned long end),

File diff suppressed because it is too large Load Diff

View File

@ -114,19 +114,27 @@ DECLARE_EVENT_CLASS(hfi1_qpsleepwakeup_template,
__field(u32, qpn)
__field(u32, flags)
__field(u32, s_flags)
__field(u32, ps_flags)
__field(unsigned long, iow_flags)
),
TP_fast_assign(
DD_DEV_ASSIGN(dd_from_ibdev(qp->ibqp.device))
__entry->flags = flags;
__entry->qpn = qp->ibqp.qp_num;
__entry->s_flags = qp->s_flags;
__entry->ps_flags =
((struct hfi1_qp_priv *)qp->priv)->s_flags;
__entry->iow_flags =
((struct hfi1_qp_priv *)qp->priv)->s_iowait.flags;
),
TP_printk(
"[%s] qpn 0x%x flags 0x%x s_flags 0x%x",
"[%s] qpn 0x%x flags 0x%x s_flags 0x%x ps_flags 0x%x iow_flags 0x%lx",
__get_str(dev),
__entry->qpn,
__entry->flags,
__entry->s_flags
__entry->s_flags,
__entry->ps_flags,
__entry->iow_flags
)
);
@ -838,6 +846,12 @@ DEFINE_EVENT(
TP_ARGS(qp, flag)
);
DEFINE_EVENT(/* event */
hfi1_do_send_template, hfi1_rc_do_tid_send,
TP_PROTO(struct rvt_qp *qp, bool flag),
TP_ARGS(qp, flag)
);
DEFINE_EVENT(
hfi1_do_send_template, hfi1_rc_expired_time_slice,
TP_PROTO(struct rvt_qp *qp, bool flag),

View File

@ -271,7 +271,8 @@ int hfi1_make_uc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
ps->s_txreq->ss = &qp->s_sge;
ps->s_txreq->s_cur_size = len;
hfi1_make_ruc_header(qp, ohdr, bth0 | (qp->s_state << 24),
mask_psn(qp->s_psn++), middle, ps);
qp->remote_qpn, mask_psn(qp->s_psn++),
middle, ps);
return 1;
done_free_tx:

View File

@ -222,31 +222,11 @@ static void ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe)
ssge.num_sge = swqe->wr.num_sge;
sge = &ssge.sge;
while (length) {
u32 len = sge->length;
u32 len = rvt_get_sge_length(sge, length);
if (len > length)
len = length;
if (len > sge->sge_length)
len = sge->sge_length;
WARN_ON_ONCE(len == 0);
rvt_copy_sge(qp, &qp->r_sge, sge->vaddr, len, true, false);
sge->vaddr += len;
sge->length -= len;
sge->sge_length -= len;
if (sge->sge_length == 0) {
if (--ssge.num_sge)
*sge = *ssge.sg_list++;
} else if (sge->length == 0 && sge->mr->lkey) {
if (++sge->n >= RVT_SEGSZ) {
if (++sge->m >= sge->mr->mapsz)
break;
sge->n = 0;
}
sge->vaddr =
sge->mr->map[sge->m]->segs[sge->n].vaddr;
sge->length =
sge->mr->map[sge->m]->segs[sge->n].length;
}
rvt_update_sge(&ssge, len, false);
length -= len;
}
rvt_put_ss(&qp->r_sge);

View File

@ -48,7 +48,6 @@
*/
#include "hfi.h"
#include "exp_rcv.h"
struct tid_pageset {

View File

@ -91,9 +91,7 @@ bool hfi1_can_pin_pages(struct hfi1_devdata *dd, struct mm_struct *mm,
/* Convert to number of pages */
size = DIV_ROUND_UP(size, PAGE_SIZE);
down_read(&mm->mmap_sem);
pinned = mm->pinned_vm;
up_read(&mm->mmap_sem);
pinned = atomic64_read(&mm->pinned_vm);
/* First, check the absolute limit against all pinned pages. */
if (pinned + npages >= ulimit && !can_lock)
@ -111,9 +109,7 @@ int hfi1_acquire_user_pages(struct mm_struct *mm, unsigned long vaddr, size_t np
if (ret < 0)
return ret;
down_write(&mm->mmap_sem);
mm->pinned_vm += ret;
up_write(&mm->mmap_sem);
atomic64_add(ret, &mm->pinned_vm);
return ret;
}
@ -130,8 +126,6 @@ void hfi1_release_user_pages(struct mm_struct *mm, struct page **p,
}
if (mm) { /* during close after signal, mm can be NULL */
down_write(&mm->mmap_sem);
mm->pinned_vm -= npages;
up_write(&mm->mmap_sem);
atomic64_sub(npages, &mm->pinned_vm);
}
}

View File

@ -144,8 +144,10 @@ static int defer_packet_queue(
*/
xchg(&pq->state, SDMA_PKT_Q_DEFERRED);
write_seqlock(&sde->waitlock);
if (list_empty(&pq->busy.list))
if (list_empty(&pq->busy.list)) {
iowait_get_priority(&pq->busy);
iowait_queue(pkts_sent, &pq->busy, &sde->dmawait);
}
write_sequnlock(&sde->waitlock);
return -EBUSY;
eagain:
@ -191,7 +193,7 @@ int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt,
pq->mm = fd->mm;
iowait_init(&pq->busy, 0, NULL, NULL, defer_packet_queue,
activate_packet_queue, NULL);
activate_packet_queue, NULL, NULL);
pq->reqidx = 0;
pq->reqs = kcalloc(hfi1_sdma_comp_ring_size,
@ -1126,7 +1128,8 @@ static inline u32 set_pkt_bth_psn(__be32 bthpsn, u8 expct, u32 frags)
0xffffffull),
psn = val & mask;
if (expct)
psn = (psn & ~BTH_SEQ_MASK) | ((psn + frags) & BTH_SEQ_MASK);
psn = (psn & ~HFI1_KDETH_BTH_SEQ_MASK) |
((psn + frags) & HFI1_KDETH_BTH_SEQ_MASK);
else
psn = psn + frags;
return psn & mask;

View File

@ -161,10 +161,12 @@ MODULE_PARM_DESC(wss_clean_period, "Count of verbs copies before an entry in the
*/
const enum ib_wc_opcode ib_hfi1_wc_opcode[] = {
[IB_WR_RDMA_WRITE] = IB_WC_RDMA_WRITE,
[IB_WR_TID_RDMA_WRITE] = IB_WC_RDMA_WRITE,
[IB_WR_RDMA_WRITE_WITH_IMM] = IB_WC_RDMA_WRITE,
[IB_WR_SEND] = IB_WC_SEND,
[IB_WR_SEND_WITH_IMM] = IB_WC_SEND,
[IB_WR_RDMA_READ] = IB_WC_RDMA_READ,
[IB_WR_TID_RDMA_READ] = IB_WC_RDMA_READ,
[IB_WR_ATOMIC_CMP_AND_SWP] = IB_WC_COMP_SWAP,
[IB_WR_ATOMIC_FETCH_AND_ADD] = IB_WC_FETCH_ADD,
[IB_WR_SEND_WITH_INV] = IB_WC_SEND,
@ -200,6 +202,14 @@ const u8 hdr_len_by_opcode[256] = {
[IB_OPCODE_RC_FETCH_ADD] = 12 + 8 + 28,
[IB_OPCODE_RC_SEND_LAST_WITH_INVALIDATE] = 12 + 8 + 4,
[IB_OPCODE_RC_SEND_ONLY_WITH_INVALIDATE] = 12 + 8 + 4,
[IB_OPCODE_TID_RDMA_READ_REQ] = 12 + 8 + 36,
[IB_OPCODE_TID_RDMA_READ_RESP] = 12 + 8 + 36,
[IB_OPCODE_TID_RDMA_WRITE_REQ] = 12 + 8 + 36,
[IB_OPCODE_TID_RDMA_WRITE_RESP] = 12 + 8 + 36,
[IB_OPCODE_TID_RDMA_WRITE_DATA] = 12 + 8 + 36,
[IB_OPCODE_TID_RDMA_WRITE_DATA_LAST] = 12 + 8 + 36,
[IB_OPCODE_TID_RDMA_ACK] = 12 + 8 + 36,
[IB_OPCODE_TID_RDMA_RESYNC] = 12 + 8 + 36,
/* UC */
[IB_OPCODE_UC_SEND_FIRST] = 12 + 8,
[IB_OPCODE_UC_SEND_MIDDLE] = 12 + 8,
@ -243,6 +253,17 @@ static const opcode_handler opcode_handler_tbl[256] = {
[IB_OPCODE_RC_FETCH_ADD] = &hfi1_rc_rcv,
[IB_OPCODE_RC_SEND_LAST_WITH_INVALIDATE] = &hfi1_rc_rcv,
[IB_OPCODE_RC_SEND_ONLY_WITH_INVALIDATE] = &hfi1_rc_rcv,
/* TID RDMA has separate handlers for different opcodes.*/
[IB_OPCODE_TID_RDMA_WRITE_REQ] = &hfi1_rc_rcv_tid_rdma_write_req,
[IB_OPCODE_TID_RDMA_WRITE_RESP] = &hfi1_rc_rcv_tid_rdma_write_resp,
[IB_OPCODE_TID_RDMA_WRITE_DATA] = &hfi1_rc_rcv_tid_rdma_write_data,
[IB_OPCODE_TID_RDMA_WRITE_DATA_LAST] = &hfi1_rc_rcv_tid_rdma_write_data,
[IB_OPCODE_TID_RDMA_READ_REQ] = &hfi1_rc_rcv_tid_rdma_read_req,
[IB_OPCODE_TID_RDMA_READ_RESP] = &hfi1_rc_rcv_tid_rdma_read_resp,
[IB_OPCODE_TID_RDMA_RESYNC] = &hfi1_rc_rcv_tid_rdma_resync,
[IB_OPCODE_TID_RDMA_ACK] = &hfi1_rc_rcv_tid_rdma_ack,
/* UC */
[IB_OPCODE_UC_SEND_FIRST] = &hfi1_uc_rcv,
[IB_OPCODE_UC_SEND_MIDDLE] = &hfi1_uc_rcv,
@ -308,7 +329,7 @@ static inline opcode_handler qp_ok(struct hfi1_packet *packet)
static u64 hfi1_fault_tx(struct rvt_qp *qp, u8 opcode, u64 pbc)
{
#ifdef CONFIG_FAULT_INJECTION
if ((opcode & IB_OPCODE_MSP) == IB_OPCODE_MSP)
if ((opcode & IB_OPCODE_MSP) == IB_OPCODE_MSP) {
/*
* In order to drop non-IB traffic we
* set PbcInsertHrc to NONE (0x2).
@ -319,8 +340,9 @@ static u64 hfi1_fault_tx(struct rvt_qp *qp, u8 opcode, u64 pbc)
* packet will not be delivered to the
* correct context.
*/
pbc &= ~PBC_INSERT_HCRC_SMASK;
pbc |= (u64)PBC_IHCRC_NONE << PBC_INSERT_HCRC_SHIFT;
else
} else {
/*
* In order to drop regular verbs
* traffic we set the PbcTestEbp
@ -330,10 +352,129 @@ static u64 hfi1_fault_tx(struct rvt_qp *qp, u8 opcode, u64 pbc)
* triggered and will be dropped.
*/
pbc |= PBC_TEST_EBP;
}
#endif
return pbc;
}
static opcode_handler tid_qp_ok(int opcode, struct hfi1_packet *packet)
{
if (packet->qp->ibqp.qp_type != IB_QPT_RC ||
!(ib_rvt_state_ops[packet->qp->state] & RVT_PROCESS_RECV_OK))
return NULL;
if ((opcode & RVT_OPCODE_QP_MASK) == IB_OPCODE_TID_RDMA)
return opcode_handler_tbl[opcode];
return NULL;
}
void hfi1_kdeth_eager_rcv(struct hfi1_packet *packet)
{
struct hfi1_ctxtdata *rcd = packet->rcd;
struct ib_header *hdr = packet->hdr;
u32 tlen = packet->tlen;
struct hfi1_pportdata *ppd = rcd->ppd;
struct hfi1_ibport *ibp = &ppd->ibport_data;
struct rvt_dev_info *rdi = &ppd->dd->verbs_dev.rdi;
opcode_handler opcode_handler;
unsigned long flags;
u32 qp_num;
int lnh;
u8 opcode;
/* DW == LRH (2) + BTH (3) + KDETH (9) + CRC (1) */
if (unlikely(tlen < 15 * sizeof(u32)))
goto drop;
lnh = be16_to_cpu(hdr->lrh[0]) & 3;
if (lnh != HFI1_LRH_BTH)
goto drop;
packet->ohdr = &hdr->u.oth;
trace_input_ibhdr(rcd->dd, packet, !!(rhf_dc_info(packet->rhf)));
opcode = (be32_to_cpu(packet->ohdr->bth[0]) >> 24);
inc_opstats(tlen, &rcd->opstats->stats[opcode]);
/* verbs_qp can be picked up from any tid_rdma header struct */
qp_num = be32_to_cpu(packet->ohdr->u.tid_rdma.r_req.verbs_qp) &
RVT_QPN_MASK;
rcu_read_lock();
packet->qp = rvt_lookup_qpn(rdi, &ibp->rvp, qp_num);
if (!packet->qp)
goto drop_rcu;
spin_lock_irqsave(&packet->qp->r_lock, flags);
opcode_handler = tid_qp_ok(opcode, packet);
if (likely(opcode_handler))
opcode_handler(packet);
else
goto drop_unlock;
spin_unlock_irqrestore(&packet->qp->r_lock, flags);
rcu_read_unlock();
return;
drop_unlock:
spin_unlock_irqrestore(&packet->qp->r_lock, flags);
drop_rcu:
rcu_read_unlock();
drop:
ibp->rvp.n_pkt_drops++;
}
void hfi1_kdeth_expected_rcv(struct hfi1_packet *packet)
{
struct hfi1_ctxtdata *rcd = packet->rcd;
struct ib_header *hdr = packet->hdr;
u32 tlen = packet->tlen;
struct hfi1_pportdata *ppd = rcd->ppd;
struct hfi1_ibport *ibp = &ppd->ibport_data;
struct rvt_dev_info *rdi = &ppd->dd->verbs_dev.rdi;
opcode_handler opcode_handler;
unsigned long flags;
u32 qp_num;
int lnh;
u8 opcode;
/* DW == LRH (2) + BTH (3) + KDETH (9) + CRC (1) */
if (unlikely(tlen < 15 * sizeof(u32)))
goto drop;
lnh = be16_to_cpu(hdr->lrh[0]) & 3;
if (lnh != HFI1_LRH_BTH)
goto drop;
packet->ohdr = &hdr->u.oth;
trace_input_ibhdr(rcd->dd, packet, !!(rhf_dc_info(packet->rhf)));
opcode = (be32_to_cpu(packet->ohdr->bth[0]) >> 24);
inc_opstats(tlen, &rcd->opstats->stats[opcode]);
/* verbs_qp can be picked up from any tid_rdma header struct */
qp_num = be32_to_cpu(packet->ohdr->u.tid_rdma.r_rsp.verbs_qp) &
RVT_QPN_MASK;
rcu_read_lock();
packet->qp = rvt_lookup_qpn(rdi, &ibp->rvp, qp_num);
if (!packet->qp)
goto drop_rcu;
spin_lock_irqsave(&packet->qp->r_lock, flags);
opcode_handler = tid_qp_ok(opcode, packet);
if (likely(opcode_handler))
opcode_handler(packet);
else
goto drop_unlock;
spin_unlock_irqrestore(&packet->qp->r_lock, flags);
rcu_read_unlock();
return;
drop_unlock:
spin_unlock_irqrestore(&packet->qp->r_lock, flags);
drop_rcu:
rcu_read_unlock();
drop:
ibp->rvp.n_pkt_drops++;
}
static int hfi1_do_pkey_check(struct hfi1_packet *packet)
{
struct hfi1_ctxtdata *rcd = packet->rcd;
@ -504,11 +645,28 @@ static void verbs_sdma_complete(
hfi1_put_txreq(tx);
}
void hfi1_wait_kmem(struct rvt_qp *qp)
{
struct hfi1_qp_priv *priv = qp->priv;
struct ib_qp *ibqp = &qp->ibqp;
struct ib_device *ibdev = ibqp->device;
struct hfi1_ibdev *dev = to_idev(ibdev);
if (list_empty(&priv->s_iowait.list)) {
if (list_empty(&dev->memwait))
mod_timer(&dev->mem_timer, jiffies + 1);
qp->s_flags |= RVT_S_WAIT_KMEM;
list_add_tail(&priv->s_iowait.list, &dev->memwait);
priv->s_iowait.lock = &dev->iowait_lock;
trace_hfi1_qpsleep(qp, RVT_S_WAIT_KMEM);
rvt_get_qp(qp);
}
}
static int wait_kmem(struct hfi1_ibdev *dev,
struct rvt_qp *qp,
struct hfi1_pkt_state *ps)
{
struct hfi1_qp_priv *priv = qp->priv;
unsigned long flags;
int ret = 0;
@ -517,15 +675,7 @@ static int wait_kmem(struct hfi1_ibdev *dev,
write_seqlock(&dev->iowait_lock);
list_add_tail(&ps->s_txreq->txreq.list,
&ps->wait->tx_head);
if (list_empty(&priv->s_iowait.list)) {
if (list_empty(&dev->memwait))
mod_timer(&dev->mem_timer, jiffies + 1);
qp->s_flags |= RVT_S_WAIT_KMEM;
list_add_tail(&priv->s_iowait.list, &dev->memwait);
priv->s_iowait.lock = &dev->iowait_lock;
trace_hfi1_qpsleep(qp, RVT_S_WAIT_KMEM);
rvt_get_qp(qp);
}
hfi1_wait_kmem(qp);
write_sequnlock(&dev->iowait_lock);
hfi1_qp_unbusy(qp, ps->wait);
ret = -EBUSY;
@ -553,11 +703,7 @@ static noinline int build_verbs_ulp_payload(
int ret = 0;
while (length) {
len = ss->sge.length;
if (len > length)
len = length;
if (len > ss->sge.sge_length)
len = ss->sge.sge_length;
len = rvt_get_sge_length(&ss->sge, length);
WARN_ON_ONCE(len == 0);
ret = sdma_txadd_kvaddr(
sde->dd,
@ -678,6 +824,15 @@ bail_txadd:
return ret;
}
static u64 update_hcrc(u8 opcode, u64 pbc)
{
if ((opcode & IB_OPCODE_TID_RDMA) == IB_OPCODE_TID_RDMA) {
pbc &= ~PBC_INSERT_HCRC_SMASK;
pbc |= (u64)PBC_IHCRC_LKDETH << PBC_INSERT_HCRC_SHIFT;
}
return pbc;
}
int hfi1_verbs_send_dma(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
u64 pbc)
{
@ -723,6 +878,9 @@ int hfi1_verbs_send_dma(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
qp->srate_mbps,
vl,
plen);
/* Update HCRC based on packet opcode */
pbc = update_hcrc(ps->opcode, pbc);
}
tx->wqe = qp->s_wqe;
ret = build_verbs_tx_desc(tx->sde, len, tx, ahg_info, pbc);
@ -787,6 +945,7 @@ static int pio_wait(struct rvt_qp *qp,
dev->n_piodrain += !!(flag & HFI1_S_WAIT_PIO_DRAIN);
qp->s_flags |= flag;
was_empty = list_empty(&sc->piowait);
iowait_get_priority(&priv->s_iowait);
iowait_queue(ps->pkts_sent, &priv->s_iowait,
&sc->piowait);
priv->s_iowait.lock = &sc->waitlock;
@ -871,6 +1030,9 @@ int hfi1_verbs_send_pio(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
if (unlikely(hfi1_dbg_should_fault_tx(qp, ps->opcode)))
pbc = hfi1_fault_tx(qp, ps->opcode, pbc);
pbc = create_pbc(ppd, pbc, qp->srate_mbps, vl, plen);
/* Update HCRC based on packet opcode */
pbc = update_hcrc(ps->opcode, pbc);
}
if (cb)
iowait_pio_inc(&priv->s_iowait);
@ -914,12 +1076,8 @@ int hfi1_verbs_send_pio(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
if (ss) {
while (len) {
void *addr = ss->sge.vaddr;
u32 slen = ss->sge.length;
u32 slen = rvt_get_sge_length(&ss->sge, len);
if (slen > len)
slen = len;
if (slen > ss->sge.sge_length)
slen = ss->sge.sge_length;
rvt_update_sge(ss, slen, false);
seg_pio_copy_mid(pbuf, addr, slen);
len -= slen;
@ -1188,7 +1346,9 @@ static void hfi1_fill_device_attr(struct hfi1_devdata *dd)
rdi->dparms.props.max_mr_size = U64_MAX;
rdi->dparms.props.max_fast_reg_page_list_len = UINT_MAX;
rdi->dparms.props.max_qp = hfi1_max_qps;
rdi->dparms.props.max_qp_wr = hfi1_max_qp_wrs;
rdi->dparms.props.max_qp_wr =
(hfi1_max_qp_wrs >= HFI1_QP_WQE_INVALID ?
HFI1_QP_WQE_INVALID - 1 : hfi1_max_qp_wrs);
rdi->dparms.props.max_send_sge = hfi1_max_sges;
rdi->dparms.props.max_recv_sge = hfi1_max_sges;
rdi->dparms.props.max_sge_rd = hfi1_max_sges;
@ -1622,6 +1782,7 @@ static const struct ib_device_ops hfi1_dev_ops = {
.alloc_rdma_netdev = hfi1_vnic_alloc_rn,
.get_dev_fw_str = hfi1_get_dev_fw_str,
.get_hw_stats = get_hw_stats,
.init_port = hfi1_create_port_files,
.modify_device = modify_device,
/* keep process mad in the driver */
.process_mad = hfi1_process_mad,
@ -1679,7 +1840,6 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd)
/*
* Fill in rvt info object.
*/
dd->verbs_dev.rdi.driver_f.port_callback = hfi1_create_port_files;
dd->verbs_dev.rdi.driver_f.get_pci_dev = get_pci_dev;
dd->verbs_dev.rdi.driver_f.check_ah = hfi1_check_ah;
dd->verbs_dev.rdi.driver_f.notify_new_ah = hfi1_notify_new_ah;
@ -1743,6 +1903,8 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd)
dd->verbs_dev.rdi.dparms.sge_copy_mode = sge_copy_mode;
dd->verbs_dev.rdi.dparms.wss_threshold = wss_threshold;
dd->verbs_dev.rdi.dparms.wss_clean_period = wss_clean_period;
dd->verbs_dev.rdi.dparms.reserved_operations = 1;
dd->verbs_dev.rdi.dparms.extra_rdma_atomic = HFI1_TID_RDMA_WRITE_CNT;
/* post send table */
dd->verbs_dev.rdi.post_parms = hfi1_post_parms;

View File

@ -72,6 +72,7 @@ struct hfi1_packet;
#include "iowait.h"
#include "tid_rdma.h"
#include "opfn.h"
#define HFI1_MAX_RDMA_ATOMIC 16
@ -158,10 +159,68 @@ struct hfi1_qp_priv {
struct sdma_engine *s_sde; /* current sde */
struct send_context *s_sendcontext; /* current sendcontext */
struct hfi1_ctxtdata *rcd; /* QP's receive context */
struct page **pages; /* for TID page scan */
u32 tid_enqueue; /* saved when tid waited */
u8 s_sc; /* SC[0..4] for next packet */
struct iowait s_iowait;
struct timer_list s_tid_timer; /* for timing tid wait */
struct timer_list s_tid_retry_timer; /* for timing tid ack */
struct list_head tid_wait; /* for queueing tid space */
struct hfi1_opfn_data opfn;
struct tid_flow_state flow_state;
struct tid_rdma_qp_params tid_rdma;
struct rvt_qp *owner;
u8 hdr_type; /* 9B or 16B */
struct rvt_sge_state tid_ss; /* SGE state pointer for 2nd leg */
atomic_t n_requests; /* # of TID RDMA requests in the */
/* queue */
atomic_t n_tid_requests; /* # of sent TID RDMA requests */
unsigned long tid_timer_timeout_jiffies;
unsigned long tid_retry_timeout_jiffies;
/* variables for the TID RDMA SE state machine */
u8 s_state;
u8 s_retry;
u8 rnr_nak_state; /* RNR NAK state */
u8 s_nak_state;
u32 s_nak_psn;
u32 s_flags;
u32 s_tid_cur;
u32 s_tid_head;
u32 s_tid_tail;
u32 r_tid_head; /* Most recently added TID RDMA request */
u32 r_tid_tail; /* the last completed TID RDMA request */
u32 r_tid_ack; /* the TID RDMA request to be ACK'ed */
u32 r_tid_alloc; /* Request for which we are allocating resources */
u32 pending_tid_w_segs; /* Num of pending tid write segments */
u32 pending_tid_w_resp; /* Num of pending tid write responses */
u32 alloc_w_segs; /* Number of segments for which write */
/* resources have been allocated for this QP */
/* For TID RDMA READ */
u32 tid_r_reqs; /* Num of tid reads requested */
u32 tid_r_comp; /* Num of tid reads completed */
u32 pending_tid_r_segs; /* Num of pending tid read segments */
u16 pkts_ps; /* packets per segment */
u8 timeout_shift; /* account for number of packets per segment */
u32 r_next_psn_kdeth;
u32 r_next_psn_kdeth_save;
u32 s_resync_psn;
u8 sync_pt; /* Set when QP reaches sync point */
u8 resync;
};
#define HFI1_QP_WQE_INVALID ((u32)-1)
struct hfi1_swqe_priv {
struct tid_rdma_request tid_req;
struct rvt_sge_state ss; /* Used for TID RDMA READ Request */
};
struct hfi1_ack_priv {
struct rvt_sge_state ss; /* used for TID WRITE RESP */
struct tid_rdma_request tid_req;
};
/*
@ -225,6 +284,7 @@ struct hfi1_ibdev {
struct kmem_cache *verbs_txreq_cache;
u64 n_txwait;
u64 n_kmem_wait;
u64 n_tidwait;
/* protect iowait lists */
seqlock_t iowait_lock ____cacheline_aligned_in_smp;
@ -312,6 +372,31 @@ static inline u32 delta_psn(u32 a, u32 b)
return (((int)a - (int)b) << PSN_SHIFT) >> PSN_SHIFT;
}
static inline struct tid_rdma_request *wqe_to_tid_req(struct rvt_swqe *wqe)
{
return &((struct hfi1_swqe_priv *)wqe->priv)->tid_req;
}
static inline struct tid_rdma_request *ack_to_tid_req(struct rvt_ack_entry *e)
{
return &((struct hfi1_ack_priv *)e->priv)->tid_req;
}
/*
* Look through all the active flows for a TID RDMA request and find
* the one (if it exists) that contains the specified PSN.
*/
static inline u32 __full_flow_psn(struct flow_state *state, u32 psn)
{
return mask_psn((state->generation << HFI1_KDETH_BTH_SEQ_SHIFT) |
(psn & HFI1_KDETH_BTH_SEQ_MASK));
}
static inline u32 full_flow_psn(struct tid_rdma_flow *flow, u32 psn)
{
return __full_flow_psn(&flow->flow_state, psn);
}
struct verbs_txreq;
void hfi1_put_txreq(struct verbs_txreq *tx);
@ -356,9 +441,12 @@ u32 hfi1_make_grh(struct hfi1_ibport *ibp, struct ib_grh *hdr,
const struct ib_global_route *grh, u32 hwords, u32 nwords);
void hfi1_make_ruc_header(struct rvt_qp *qp, struct ib_other_headers *ohdr,
u32 bth0, u32 bth2, int middle,
u32 bth0, u32 bth1, u32 bth2, int middle,
struct hfi1_pkt_state *ps);
bool hfi1_schedule_send_yield(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
bool tid);
void _hfi1_do_send(struct work_struct *work);
void hfi1_do_send_from_rvt(struct rvt_qp *qp);
@ -377,6 +465,10 @@ int hfi1_register_ib_device(struct hfi1_devdata *);
void hfi1_unregister_ib_device(struct hfi1_devdata *);
void hfi1_kdeth_eager_rcv(struct hfi1_packet *packet);
void hfi1_kdeth_expected_rcv(struct hfi1_packet *packet);
void hfi1_ib_rcv(struct hfi1_packet *packet);
void hfi1_16B_rcv(struct hfi1_packet *packet);
@ -394,6 +486,16 @@ static inline bool opa_bth_is_migration(struct ib_other_headers *ohdr)
return ohdr->bth[1] & cpu_to_be32(OPA_BTH_MIG_REQ);
}
void hfi1_wait_kmem(struct rvt_qp *qp);
static inline void hfi1_trdma_send_complete(struct rvt_qp *qp,
struct rvt_swqe *wqe,
enum ib_wc_status status)
{
trdma_clean_swqe(qp, wqe);
rvt_send_complete(qp, wqe, status);
}
extern const enum ib_wc_opcode ib_hfi1_wc_opcode[];
extern const u8 hdr_len_by_opcode[];

Some files were not shown because too many files have changed in this diff Show More