linux/drivers/infiniband/hw/mlx5/counters.c

724 lines
19 KiB
C
Raw Normal View History

// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
/*
* Copyright (c) 2013-2020, Mellanox Technologies inc. All rights reserved.
*/
#include "mlx5_ib.h"
#include <linux/mlx5/eswitch.h>
#include "counters.h"
#include "ib_rep.h"
#include "qp.h"
struct mlx5_ib_counter {
const char *name;
size_t offset;
};
#define INIT_Q_COUNTER(_name) \
{ .name = #_name, .offset = MLX5_BYTE_OFF(query_q_counter_out, _name)}
static const struct mlx5_ib_counter basic_q_cnts[] = {
INIT_Q_COUNTER(rx_write_requests),
INIT_Q_COUNTER(rx_read_requests),
INIT_Q_COUNTER(rx_atomic_requests),
INIT_Q_COUNTER(out_of_buffer),
};
static const struct mlx5_ib_counter out_of_seq_q_cnts[] = {
INIT_Q_COUNTER(out_of_sequence),
};
static const struct mlx5_ib_counter retrans_q_cnts[] = {
INIT_Q_COUNTER(duplicate_request),
INIT_Q_COUNTER(rnr_nak_retry_err),
INIT_Q_COUNTER(packet_seq_err),
INIT_Q_COUNTER(implied_nak_seq_err),
INIT_Q_COUNTER(local_ack_timeout_err),
};
#define INIT_CONG_COUNTER(_name) \
{ .name = #_name, .offset = \
MLX5_BYTE_OFF(query_cong_statistics_out, _name ## _high)}
static const struct mlx5_ib_counter cong_cnts[] = {
INIT_CONG_COUNTER(rp_cnp_ignored),
INIT_CONG_COUNTER(rp_cnp_handled),
INIT_CONG_COUNTER(np_ecn_marked_roce_packets),
INIT_CONG_COUNTER(np_cnp_sent),
};
static const struct mlx5_ib_counter extended_err_cnts[] = {
INIT_Q_COUNTER(resp_local_length_error),
INIT_Q_COUNTER(resp_cqe_error),
INIT_Q_COUNTER(req_cqe_error),
INIT_Q_COUNTER(req_remote_invalid_request),
INIT_Q_COUNTER(req_remote_access_errors),
INIT_Q_COUNTER(resp_remote_access_errors),
INIT_Q_COUNTER(resp_cqe_flush_error),
INIT_Q_COUNTER(req_cqe_flush_error),
};
static const struct mlx5_ib_counter roce_accl_cnts[] = {
INIT_Q_COUNTER(roce_adp_retrans),
INIT_Q_COUNTER(roce_adp_retrans_to),
INIT_Q_COUNTER(roce_slow_restart),
INIT_Q_COUNTER(roce_slow_restart_cnps),
INIT_Q_COUNTER(roce_slow_restart_trans),
};
#define INIT_EXT_PPCNT_COUNTER(_name) \
{ .name = #_name, .offset = \
MLX5_BYTE_OFF(ppcnt_reg, \
counter_set.eth_extended_cntrs_grp_data_layout._name##_high)}
static const struct mlx5_ib_counter ext_ppcnt_cnts[] = {
INIT_EXT_PPCNT_COUNTER(rx_icrc_encapsulated),
};
static int mlx5_ib_read_counters(struct ib_counters *counters,
struct ib_counters_read_attr *read_attr,
struct uverbs_attr_bundle *attrs)
{
struct mlx5_ib_mcounters *mcounters = to_mcounters(counters);
struct mlx5_read_counters_attr mread_attr = {};
struct mlx5_ib_flow_counters_desc *desc;
int ret, i;
mutex_lock(&mcounters->mcntrs_mutex);
if (mcounters->cntrs_max_index > read_attr->ncounters) {
ret = -EINVAL;
goto err_bound;
}
mread_attr.out = kcalloc(mcounters->counters_num, sizeof(u64),
GFP_KERNEL);
if (!mread_attr.out) {
ret = -ENOMEM;
goto err_bound;
}
mread_attr.hw_cntrs_hndl = mcounters->hw_cntrs_hndl;
mread_attr.flags = read_attr->flags;
ret = mcounters->read_counters(counters->device, &mread_attr);
if (ret)
goto err_read;
/* do the pass over the counters data array to assign according to the
* descriptions and indexing pairs
*/
desc = mcounters->counters_data;
for (i = 0; i < mcounters->ncounters; i++)
read_attr->counters_buff[desc[i].index] += mread_attr.out[desc[i].description];
err_read:
kfree(mread_attr.out);
err_bound:
mutex_unlock(&mcounters->mcntrs_mutex);
return ret;
}
static int mlx5_ib_destroy_counters(struct ib_counters *counters)
{
struct mlx5_ib_mcounters *mcounters = to_mcounters(counters);
mlx5_ib_counters_clear_description(counters);
if (mcounters->hw_cntrs_hndl)
mlx5_fc_destroy(to_mdev(counters->device)->mdev,
mcounters->hw_cntrs_hndl);
return 0;
}
static int mlx5_ib_create_counters(struct ib_counters *counters,
struct uverbs_attr_bundle *attrs)
{
struct mlx5_ib_mcounters *mcounters = to_mcounters(counters);
mutex_init(&mcounters->mcntrs_mutex);
return 0;
}
static const struct mlx5_ib_counters *get_counters(struct mlx5_ib_dev *dev,
RDMA: Support more than 255 rdma ports Current code uses many different types when dealing with a port of a RDMA device: u8, unsigned int and u32. Switch to u32 to clean up the logic. This allows us to make (at least) the core view consistent and use the same type. Unfortunately not all places can be converted. Many uverbs functions expect port to be u8 so keep those places in order not to break UAPIs. HW/Spec defined values must also not be changed. With the switch to u32 we now can support devices with more than 255 ports. U32_MAX is reserved to make control logic a bit easier to deal with. As a device with U32_MAX ports probably isn't going to happen any time soon this seems like a non issue. When a device with more than 255 ports is created uverbs will report the RDMA device as having 255 ports as this is the max currently supported. The verbs interface is not changed yet because the IBTA spec limits the port size in too many places to be u8 and all applications that relies in verbs won't be able to cope with this change. At this stage, we are extending the interfaces that are using vendor channel solely Once the limitation is lifted mlx5 in switchdev mode will be able to have thousands of SFs created by the device. As the only instance of an RDMA device that reports more than 255 ports will be a representor device and it exposes itself as a RAW Ethernet only device CM/MAD/IPoIB and other ULPs aren't effected by this change and their sysfs/interfaces that are exposes to userspace can remain unchanged. While here cleanup some alignment issues and remove unneeded sanity checks (mainly in rdmavt), Link: https://lore.kernel.org/r/20210301070420.439400-1-leon@kernel.org Signed-off-by: Mark Bloch <mbloch@nvidia.com> Signed-off-by: Leon Romanovsky <leonro@nvidia.com> Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
2021-03-01 15:04:20 +08:00
u32 port_num)
{
return is_mdev_switchdev_mode(dev->mdev) ? &dev->port[0].cnts :
&dev->port[port_num].cnts;
}
/**
* mlx5_ib_get_counters_id - Returns counters id to use for device+port
* @dev: Pointer to mlx5 IB device
* @port_num: Zero based port number
*
* mlx5_ib_get_counters_id() Returns counters set id to use for given
* device port combination in switchdev and non switchdev mode of the
* parent device.
*/
RDMA: Support more than 255 rdma ports Current code uses many different types when dealing with a port of a RDMA device: u8, unsigned int and u32. Switch to u32 to clean up the logic. This allows us to make (at least) the core view consistent and use the same type. Unfortunately not all places can be converted. Many uverbs functions expect port to be u8 so keep those places in order not to break UAPIs. HW/Spec defined values must also not be changed. With the switch to u32 we now can support devices with more than 255 ports. U32_MAX is reserved to make control logic a bit easier to deal with. As a device with U32_MAX ports probably isn't going to happen any time soon this seems like a non issue. When a device with more than 255 ports is created uverbs will report the RDMA device as having 255 ports as this is the max currently supported. The verbs interface is not changed yet because the IBTA spec limits the port size in too many places to be u8 and all applications that relies in verbs won't be able to cope with this change. At this stage, we are extending the interfaces that are using vendor channel solely Once the limitation is lifted mlx5 in switchdev mode will be able to have thousands of SFs created by the device. As the only instance of an RDMA device that reports more than 255 ports will be a representor device and it exposes itself as a RAW Ethernet only device CM/MAD/IPoIB and other ULPs aren't effected by this change and their sysfs/interfaces that are exposes to userspace can remain unchanged. While here cleanup some alignment issues and remove unneeded sanity checks (mainly in rdmavt), Link: https://lore.kernel.org/r/20210301070420.439400-1-leon@kernel.org Signed-off-by: Mark Bloch <mbloch@nvidia.com> Signed-off-by: Leon Romanovsky <leonro@nvidia.com> Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
2021-03-01 15:04:20 +08:00
u16 mlx5_ib_get_counters_id(struct mlx5_ib_dev *dev, u32 port_num)
{
const struct mlx5_ib_counters *cnts = get_counters(dev, port_num);
return cnts->set_id;
}
static struct rdma_hw_stats *
mlx5_ib_alloc_hw_device_stats(struct ib_device *ibdev)
{
struct mlx5_ib_dev *dev = to_mdev(ibdev);
const struct mlx5_ib_counters *cnts = &dev->port[0].cnts;
return rdma_alloc_hw_stats_struct(cnts->names,
cnts->num_q_counters +
cnts->num_cong_counters +
cnts->num_ext_ppcnt_counters,
RDMA_HW_STATS_DEFAULT_LIFESPAN);
}
static struct rdma_hw_stats *
mlx5_ib_alloc_hw_port_stats(struct ib_device *ibdev, u32 port_num)
{
struct mlx5_ib_dev *dev = to_mdev(ibdev);
const struct mlx5_ib_counters *cnts = &dev->port[port_num - 1].cnts;
return rdma_alloc_hw_stats_struct(cnts->names,
cnts->num_q_counters +
cnts->num_cong_counters +
cnts->num_ext_ppcnt_counters,
RDMA_HW_STATS_DEFAULT_LIFESPAN);
}
static int mlx5_ib_query_q_counters(struct mlx5_core_dev *mdev,
const struct mlx5_ib_counters *cnts,
struct rdma_hw_stats *stats,
u16 set_id)
{
u32 out[MLX5_ST_SZ_DW(query_q_counter_out)] = {};
u32 in[MLX5_ST_SZ_DW(query_q_counter_in)] = {};
__be32 val;
int ret, i;
MLX5_SET(query_q_counter_in, in, opcode, MLX5_CMD_OP_QUERY_Q_COUNTER);
MLX5_SET(query_q_counter_in, in, counter_set_id, set_id);
ret = mlx5_cmd_exec_inout(mdev, query_q_counter, in, out);
if (ret)
return ret;
for (i = 0; i < cnts->num_q_counters; i++) {
val = *(__be32 *)((void *)out + cnts->offsets[i]);
stats->value[i] = (u64)be32_to_cpu(val);
}
return 0;
}
static int mlx5_ib_query_ext_ppcnt_counters(struct mlx5_ib_dev *dev,
const struct mlx5_ib_counters *cnts,
struct rdma_hw_stats *stats)
{
int offset = cnts->num_q_counters + cnts->num_cong_counters;
u32 in[MLX5_ST_SZ_DW(ppcnt_reg)] = {};
int sz = MLX5_ST_SZ_BYTES(ppcnt_reg);
int ret, i;
void *out;
out = kvzalloc(sz, GFP_KERNEL);
if (!out)
return -ENOMEM;
MLX5_SET(ppcnt_reg, in, local_port, 1);
MLX5_SET(ppcnt_reg, in, grp, MLX5_ETHERNET_EXTENDED_COUNTERS_GROUP);
ret = mlx5_core_access_reg(dev->mdev, in, sz, out, sz, MLX5_REG_PPCNT,
0, 0);
if (ret)
goto free;
for (i = 0; i < cnts->num_ext_ppcnt_counters; i++)
stats->value[i + offset] =
be64_to_cpup((__be64 *)(out +
cnts->offsets[i + offset]));
free:
kvfree(out);
return ret;
}
static int mlx5_ib_get_hw_stats(struct ib_device *ibdev,
struct rdma_hw_stats *stats,
RDMA: Support more than 255 rdma ports Current code uses many different types when dealing with a port of a RDMA device: u8, unsigned int and u32. Switch to u32 to clean up the logic. This allows us to make (at least) the core view consistent and use the same type. Unfortunately not all places can be converted. Many uverbs functions expect port to be u8 so keep those places in order not to break UAPIs. HW/Spec defined values must also not be changed. With the switch to u32 we now can support devices with more than 255 ports. U32_MAX is reserved to make control logic a bit easier to deal with. As a device with U32_MAX ports probably isn't going to happen any time soon this seems like a non issue. When a device with more than 255 ports is created uverbs will report the RDMA device as having 255 ports as this is the max currently supported. The verbs interface is not changed yet because the IBTA spec limits the port size in too many places to be u8 and all applications that relies in verbs won't be able to cope with this change. At this stage, we are extending the interfaces that are using vendor channel solely Once the limitation is lifted mlx5 in switchdev mode will be able to have thousands of SFs created by the device. As the only instance of an RDMA device that reports more than 255 ports will be a representor device and it exposes itself as a RAW Ethernet only device CM/MAD/IPoIB and other ULPs aren't effected by this change and their sysfs/interfaces that are exposes to userspace can remain unchanged. While here cleanup some alignment issues and remove unneeded sanity checks (mainly in rdmavt), Link: https://lore.kernel.org/r/20210301070420.439400-1-leon@kernel.org Signed-off-by: Mark Bloch <mbloch@nvidia.com> Signed-off-by: Leon Romanovsky <leonro@nvidia.com> Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
2021-03-01 15:04:20 +08:00
u32 port_num, int index)
{
struct mlx5_ib_dev *dev = to_mdev(ibdev);
const struct mlx5_ib_counters *cnts = get_counters(dev, port_num - 1);
struct mlx5_core_dev *mdev;
int ret, num_counters;
RDMA: Support more than 255 rdma ports Current code uses many different types when dealing with a port of a RDMA device: u8, unsigned int and u32. Switch to u32 to clean up the logic. This allows us to make (at least) the core view consistent and use the same type. Unfortunately not all places can be converted. Many uverbs functions expect port to be u8 so keep those places in order not to break UAPIs. HW/Spec defined values must also not be changed. With the switch to u32 we now can support devices with more than 255 ports. U32_MAX is reserved to make control logic a bit easier to deal with. As a device with U32_MAX ports probably isn't going to happen any time soon this seems like a non issue. When a device with more than 255 ports is created uverbs will report the RDMA device as having 255 ports as this is the max currently supported. The verbs interface is not changed yet because the IBTA spec limits the port size in too many places to be u8 and all applications that relies in verbs won't be able to cope with this change. At this stage, we are extending the interfaces that are using vendor channel solely Once the limitation is lifted mlx5 in switchdev mode will be able to have thousands of SFs created by the device. As the only instance of an RDMA device that reports more than 255 ports will be a representor device and it exposes itself as a RAW Ethernet only device CM/MAD/IPoIB and other ULPs aren't effected by this change and their sysfs/interfaces that are exposes to userspace can remain unchanged. While here cleanup some alignment issues and remove unneeded sanity checks (mainly in rdmavt), Link: https://lore.kernel.org/r/20210301070420.439400-1-leon@kernel.org Signed-off-by: Mark Bloch <mbloch@nvidia.com> Signed-off-by: Leon Romanovsky <leonro@nvidia.com> Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
2021-03-01 15:04:20 +08:00
u32 mdev_port_num;
if (!stats)
return -EINVAL;
num_counters = cnts->num_q_counters +
cnts->num_cong_counters +
cnts->num_ext_ppcnt_counters;
/* q_counters are per IB device, query the master mdev */
ret = mlx5_ib_query_q_counters(dev->mdev, cnts, stats, cnts->set_id);
if (ret)
return ret;
if (MLX5_CAP_PCAM_FEATURE(dev->mdev, rx_icrc_encapsulated_counter)) {
ret = mlx5_ib_query_ext_ppcnt_counters(dev, cnts, stats);
if (ret)
return ret;
}
if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) {
mdev = mlx5_ib_get_native_port_mdev(dev, port_num,
&mdev_port_num);
if (!mdev) {
/* If port is not affiliated yet, its in down state
* which doesn't have any counters yet, so it would be
* zero. So no need to read from the HCA.
*/
goto done;
}
ret = mlx5_lag_query_cong_counters(dev->mdev,
stats->value +
cnts->num_q_counters,
cnts->num_cong_counters,
cnts->offsets +
cnts->num_q_counters);
mlx5_ib_put_native_port_mdev(dev, port_num);
if (ret)
return ret;
}
done:
return num_counters;
}
static struct rdma_hw_stats *
mlx5_ib_counter_alloc_stats(struct rdma_counter *counter)
{
struct mlx5_ib_dev *dev = to_mdev(counter->device);
const struct mlx5_ib_counters *cnts =
get_counters(dev, counter->port - 1);
return rdma_alloc_hw_stats_struct(cnts->names,
cnts->num_q_counters +
cnts->num_cong_counters +
cnts->num_ext_ppcnt_counters,
RDMA_HW_STATS_DEFAULT_LIFESPAN);
}
static int mlx5_ib_counter_update_stats(struct rdma_counter *counter)
{
struct mlx5_ib_dev *dev = to_mdev(counter->device);
const struct mlx5_ib_counters *cnts =
get_counters(dev, counter->port - 1);
return mlx5_ib_query_q_counters(dev->mdev, cnts,
counter->stats, counter->id);
}
static int mlx5_ib_counter_dealloc(struct rdma_counter *counter)
{
struct mlx5_ib_dev *dev = to_mdev(counter->device);
u32 in[MLX5_ST_SZ_DW(dealloc_q_counter_in)] = {};
if (!counter->id)
return 0;
MLX5_SET(dealloc_q_counter_in, in, opcode,
MLX5_CMD_OP_DEALLOC_Q_COUNTER);
MLX5_SET(dealloc_q_counter_in, in, counter_set_id, counter->id);
return mlx5_cmd_exec_in(dev->mdev, dealloc_q_counter, in);
}
static int mlx5_ib_counter_bind_qp(struct rdma_counter *counter,
struct ib_qp *qp)
{
struct mlx5_ib_dev *dev = to_mdev(qp->device);
int err;
if (!counter->id) {
u32 out[MLX5_ST_SZ_DW(alloc_q_counter_out)] = {};
u32 in[MLX5_ST_SZ_DW(alloc_q_counter_in)] = {};
MLX5_SET(alloc_q_counter_in, in, opcode,
MLX5_CMD_OP_ALLOC_Q_COUNTER);
MLX5_SET(alloc_q_counter_in, in, uid, MLX5_SHARED_RESOURCE_UID);
err = mlx5_cmd_exec_inout(dev->mdev, alloc_q_counter, in, out);
if (err)
return err;
counter->id =
MLX5_GET(alloc_q_counter_out, out, counter_set_id);
}
err = mlx5_ib_qp_set_counter(qp, counter);
if (err)
goto fail_set_counter;
return 0;
fail_set_counter:
mlx5_ib_counter_dealloc(counter);
counter->id = 0;
return err;
}
static int mlx5_ib_counter_unbind_qp(struct ib_qp *qp)
{
return mlx5_ib_qp_set_counter(qp, NULL);
}
static void mlx5_ib_fill_counters(struct mlx5_ib_dev *dev,
const char **names,
size_t *offsets)
{
int i;
int j = 0;
for (i = 0; i < ARRAY_SIZE(basic_q_cnts); i++, j++) {
names[j] = basic_q_cnts[i].name;
offsets[j] = basic_q_cnts[i].offset;
}
if (MLX5_CAP_GEN(dev->mdev, out_of_seq_cnt)) {
for (i = 0; i < ARRAY_SIZE(out_of_seq_q_cnts); i++, j++) {
names[j] = out_of_seq_q_cnts[i].name;
offsets[j] = out_of_seq_q_cnts[i].offset;
}
}
if (MLX5_CAP_GEN(dev->mdev, retransmission_q_counters)) {
for (i = 0; i < ARRAY_SIZE(retrans_q_cnts); i++, j++) {
names[j] = retrans_q_cnts[i].name;
offsets[j] = retrans_q_cnts[i].offset;
}
}
if (MLX5_CAP_GEN(dev->mdev, enhanced_error_q_counters)) {
for (i = 0; i < ARRAY_SIZE(extended_err_cnts); i++, j++) {
names[j] = extended_err_cnts[i].name;
offsets[j] = extended_err_cnts[i].offset;
}
}
if (MLX5_CAP_GEN(dev->mdev, roce_accl)) {
for (i = 0; i < ARRAY_SIZE(roce_accl_cnts); i++, j++) {
names[j] = roce_accl_cnts[i].name;
offsets[j] = roce_accl_cnts[i].offset;
}
}
if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) {
for (i = 0; i < ARRAY_SIZE(cong_cnts); i++, j++) {
names[j] = cong_cnts[i].name;
offsets[j] = cong_cnts[i].offset;
}
}
if (MLX5_CAP_PCAM_FEATURE(dev->mdev, rx_icrc_encapsulated_counter)) {
for (i = 0; i < ARRAY_SIZE(ext_ppcnt_cnts); i++, j++) {
names[j] = ext_ppcnt_cnts[i].name;
offsets[j] = ext_ppcnt_cnts[i].offset;
}
}
}
static int __mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev,
struct mlx5_ib_counters *cnts)
{
u32 num_counters;
num_counters = ARRAY_SIZE(basic_q_cnts);
if (MLX5_CAP_GEN(dev->mdev, out_of_seq_cnt))
num_counters += ARRAY_SIZE(out_of_seq_q_cnts);
if (MLX5_CAP_GEN(dev->mdev, retransmission_q_counters))
num_counters += ARRAY_SIZE(retrans_q_cnts);
if (MLX5_CAP_GEN(dev->mdev, enhanced_error_q_counters))
num_counters += ARRAY_SIZE(extended_err_cnts);
if (MLX5_CAP_GEN(dev->mdev, roce_accl))
num_counters += ARRAY_SIZE(roce_accl_cnts);
cnts->num_q_counters = num_counters;
if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) {
cnts->num_cong_counters = ARRAY_SIZE(cong_cnts);
num_counters += ARRAY_SIZE(cong_cnts);
}
if (MLX5_CAP_PCAM_FEATURE(dev->mdev, rx_icrc_encapsulated_counter)) {
cnts->num_ext_ppcnt_counters = ARRAY_SIZE(ext_ppcnt_cnts);
num_counters += ARRAY_SIZE(ext_ppcnt_cnts);
}
cnts->names = kcalloc(num_counters, sizeof(*cnts->names), GFP_KERNEL);
if (!cnts->names)
return -ENOMEM;
cnts->offsets = kcalloc(num_counters,
sizeof(*cnts->offsets), GFP_KERNEL);
if (!cnts->offsets)
goto err_names;
return 0;
err_names:
kfree(cnts->names);
cnts->names = NULL;
return -ENOMEM;
}
static void mlx5_ib_dealloc_counters(struct mlx5_ib_dev *dev)
{
u32 in[MLX5_ST_SZ_DW(dealloc_q_counter_in)] = {};
int num_cnt_ports;
int i;
num_cnt_ports = is_mdev_switchdev_mode(dev->mdev) ? 1 : dev->num_ports;
MLX5_SET(dealloc_q_counter_in, in, opcode,
MLX5_CMD_OP_DEALLOC_Q_COUNTER);
for (i = 0; i < num_cnt_ports; i++) {
if (dev->port[i].cnts.set_id) {
MLX5_SET(dealloc_q_counter_in, in, counter_set_id,
dev->port[i].cnts.set_id);
mlx5_cmd_exec_in(dev->mdev, dealloc_q_counter, in);
}
kfree(dev->port[i].cnts.names);
kfree(dev->port[i].cnts.offsets);
}
}
static int mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev)
{
u32 out[MLX5_ST_SZ_DW(alloc_q_counter_out)] = {};
u32 in[MLX5_ST_SZ_DW(alloc_q_counter_in)] = {};
int num_cnt_ports;
int err = 0;
int i;
bool is_shared;
MLX5_SET(alloc_q_counter_in, in, opcode, MLX5_CMD_OP_ALLOC_Q_COUNTER);
is_shared = MLX5_CAP_GEN(dev->mdev, log_max_uctx) != 0;
num_cnt_ports = is_mdev_switchdev_mode(dev->mdev) ? 1 : dev->num_ports;
for (i = 0; i < num_cnt_ports; i++) {
err = __mlx5_ib_alloc_counters(dev, &dev->port[i].cnts);
if (err)
goto err_alloc;
mlx5_ib_fill_counters(dev, dev->port[i].cnts.names,
dev->port[i].cnts.offsets);
MLX5_SET(alloc_q_counter_in, in, uid,
is_shared ? MLX5_SHARED_RESOURCE_UID : 0);
err = mlx5_cmd_exec_inout(dev->mdev, alloc_q_counter, in, out);
if (err) {
mlx5_ib_warn(dev,
"couldn't allocate queue counter for port %d, err %d\n",
i + 1, err);
goto err_alloc;
}
dev->port[i].cnts.set_id =
MLX5_GET(alloc_q_counter_out, out, counter_set_id);
}
return 0;
err_alloc:
mlx5_ib_dealloc_counters(dev);
return err;
}
static int read_flow_counters(struct ib_device *ibdev,
struct mlx5_read_counters_attr *read_attr)
{
struct mlx5_fc *fc = read_attr->hw_cntrs_hndl;
struct mlx5_ib_dev *dev = to_mdev(ibdev);
return mlx5_fc_query(dev->mdev, fc,
&read_attr->out[IB_COUNTER_PACKETS],
&read_attr->out[IB_COUNTER_BYTES]);
}
/* flow counters currently expose two counters packets and bytes */
#define FLOW_COUNTERS_NUM 2
static int counters_set_description(
struct ib_counters *counters, enum mlx5_ib_counters_type counters_type,
struct mlx5_ib_flow_counters_desc *desc_data, u32 ncounters)
{
struct mlx5_ib_mcounters *mcounters = to_mcounters(counters);
u32 cntrs_max_index = 0;
int i;
if (counters_type != MLX5_IB_COUNTERS_FLOW)
return -EINVAL;
/* init the fields for the object */
mcounters->type = counters_type;
mcounters->read_counters = read_flow_counters;
mcounters->counters_num = FLOW_COUNTERS_NUM;
mcounters->ncounters = ncounters;
/* each counter entry have both description and index pair */
for (i = 0; i < ncounters; i++) {
if (desc_data[i].description > IB_COUNTER_BYTES)
return -EINVAL;
if (cntrs_max_index <= desc_data[i].index)
cntrs_max_index = desc_data[i].index + 1;
}
mutex_lock(&mcounters->mcntrs_mutex);
mcounters->counters_data = desc_data;
mcounters->cntrs_max_index = cntrs_max_index;
mutex_unlock(&mcounters->mcntrs_mutex);
return 0;
}
#define MAX_COUNTERS_NUM (USHRT_MAX / (sizeof(u32) * 2))
int mlx5_ib_flow_counters_set_data(struct ib_counters *ibcounters,
struct mlx5_ib_create_flow *ucmd)
{
struct mlx5_ib_mcounters *mcounters = to_mcounters(ibcounters);
struct mlx5_ib_flow_counters_data *cntrs_data = NULL;
struct mlx5_ib_flow_counters_desc *desc_data = NULL;
bool hw_hndl = false;
int ret = 0;
if (ucmd && ucmd->ncounters_data != 0) {
cntrs_data = ucmd->data;
if (cntrs_data->ncounters > MAX_COUNTERS_NUM)
return -EINVAL;
desc_data = kcalloc(cntrs_data->ncounters,
sizeof(*desc_data),
GFP_KERNEL);
if (!desc_data)
return -ENOMEM;
if (copy_from_user(desc_data,
u64_to_user_ptr(cntrs_data->counters_data),
sizeof(*desc_data) * cntrs_data->ncounters)) {
ret = -EFAULT;
goto free;
}
}
if (!mcounters->hw_cntrs_hndl) {
mcounters->hw_cntrs_hndl = mlx5_fc_create(
to_mdev(ibcounters->device)->mdev, false);
if (IS_ERR(mcounters->hw_cntrs_hndl)) {
ret = PTR_ERR(mcounters->hw_cntrs_hndl);
goto free;
}
hw_hndl = true;
}
if (desc_data) {
/* counters already bound to at least one flow */
if (mcounters->cntrs_max_index) {
ret = -EINVAL;
goto free_hndl;
}
ret = counters_set_description(ibcounters,
MLX5_IB_COUNTERS_FLOW,
desc_data,
cntrs_data->ncounters);
if (ret)
goto free_hndl;
} else if (!mcounters->cntrs_max_index) {
/* counters not bound yet, must have udata passed */
ret = -EINVAL;
goto free_hndl;
}
return 0;
free_hndl:
if (hw_hndl) {
mlx5_fc_destroy(to_mdev(ibcounters->device)->mdev,
mcounters->hw_cntrs_hndl);
mcounters->hw_cntrs_hndl = NULL;
}
free:
kfree(desc_data);
return ret;
}
void mlx5_ib_counters_clear_description(struct ib_counters *counters)
{
struct mlx5_ib_mcounters *mcounters;
if (!counters || atomic_read(&counters->usecnt) != 1)
return;
mcounters = to_mcounters(counters);
mutex_lock(&mcounters->mcntrs_mutex);
kfree(mcounters->counters_data);
mcounters->counters_data = NULL;
mcounters->cntrs_max_index = 0;
mutex_unlock(&mcounters->mcntrs_mutex);
}
static const struct ib_device_ops hw_stats_ops = {
.alloc_hw_port_stats = mlx5_ib_alloc_hw_port_stats,
.get_hw_stats = mlx5_ib_get_hw_stats,
.counter_bind_qp = mlx5_ib_counter_bind_qp,
.counter_unbind_qp = mlx5_ib_counter_unbind_qp,
.counter_dealloc = mlx5_ib_counter_dealloc,
.counter_alloc_stats = mlx5_ib_counter_alloc_stats,
.counter_update_stats = mlx5_ib_counter_update_stats,
};
static const struct ib_device_ops hw_switchdev_stats_ops = {
.alloc_hw_device_stats = mlx5_ib_alloc_hw_device_stats,
.get_hw_stats = mlx5_ib_get_hw_stats,
.counter_bind_qp = mlx5_ib_counter_bind_qp,
.counter_unbind_qp = mlx5_ib_counter_unbind_qp,
.counter_dealloc = mlx5_ib_counter_dealloc,
.counter_alloc_stats = mlx5_ib_counter_alloc_stats,
.counter_update_stats = mlx5_ib_counter_update_stats,
};
static const struct ib_device_ops counters_ops = {
.create_counters = mlx5_ib_create_counters,
.destroy_counters = mlx5_ib_destroy_counters,
.read_counters = mlx5_ib_read_counters,
INIT_RDMA_OBJ_SIZE(ib_counters, mlx5_ib_mcounters, ibcntrs),
};
int mlx5_ib_counters_init(struct mlx5_ib_dev *dev)
{
ib_set_device_ops(&dev->ib_dev, &counters_ops);
if (!MLX5_CAP_GEN(dev->mdev, max_qp_cnt))
return 0;
if (is_mdev_switchdev_mode(dev->mdev))
ib_set_device_ops(&dev->ib_dev, &hw_switchdev_stats_ops);
else
ib_set_device_ops(&dev->ib_dev, &hw_stats_ops);
return mlx5_ib_alloc_counters(dev);
}
void mlx5_ib_counters_cleanup(struct mlx5_ib_dev *dev)
{
if (!MLX5_CAP_GEN(dev->mdev, max_qp_cnt))
return;
mlx5_ib_dealloc_counters(dev);
}