Merge branch 'net-sched-actions-improve-dump-performance'

Jamal Hadi Salim says:

====================
net sched actions: improve dump performance

Changes since v11:
------------------
1) Jiri - renames: nla_value to value and nla_selector to selector
2) Jiri - rename: validate_nla_bitfield_32 to validate_nla_bitfield_32
3) Jiri - rename: NLA_BITFIELD_32 to NLA_BITFIELD32
4) Jiri - remove unnecessary break when we return in case statement
5) Jiri - rename and move nla_get_bitfield_32 to an earlier patch
6) Jiri - xmas tree alignment of var declaration
7) Jiri - rename all declarations of bitfield 32 vars to be consistent ("bf")
8) Jiri - improve validate_nla_bitfield32() validation to disallow valid
          bit values that are not selected by the selector

Changes since v10:
-----------------
1) Jiri: move type->validate_content() to its own patch
Jamal: decided to remove it altogether so we can get this patch set in.

2) Change name of NLA_FLAG_BITS to NLA_BITFIELD_32 based on discussions
with D. Ahern and Jiri. D. Ahern suggests to make this a variable bitmap size.
My analysis at this point is it too complex and i only need a few bit
flags. If we run out of bits someone else can create a new NLA_BITFIELD_XXX
and start using that. So please let this go.

3) Jamal - Add Suggested-by: Jiri for type NLA_BITFIELD_32

4) Jiri: Change name allowed_flags to tcaa_root_flags_allowed

5) Jiri: Introduce nla_get_flag_bits_values() helper instead of using
memcpy for retrieving nla_bitfield_32 fields.

Changes since v9:
-----------------

1) General consensus:
- remove again the use of BIT() to maintain uapi consistency ;->

1) Jiri:
- Add a new netlink type NLA_FLAG_BITS to check for valid bits
  and use it instead of inline vetting (patch 4/4 now)

Changes since v8:
-----------------

1) Jiri:
- Add back the use of BIT(). Eventually fix iproute2 instead
- Rename VALID_TCA_FLAGS to VALID_TCA_ROOT_FLAGS

Changes since v7:
-----------------

Jamal:
No changes.
Patch 1 went out twice. Resend without two copies of patch 1

changes since v6:
-----------------

1) DaveM:
New rules for netlink messages. From now on we are going to start
checking for bits that are not used and rejecting anything we dont
understand. In the future this is going to require major changes
to user space code (tc etc). This is just a start.

To quote, David:
"
 Again, bits you aren't using now, make sure userspace doesn't
   set them.  And if it does, reject.
"
Added checks for ensuring things work as above.

2) Jiri:
a)Fix the commit message to properly use "Fixes" description
b)Align assignments for nla_policy

Changes since v5:
----------------

0)
Remove use of BIT() because it is kernel specific. Requires a separate
patch (Jiri can submit that in his cleanups)

1)To paraphrase Eric D.

"memcpy(nla_data(count_attr), &cb->args[1], sizeof(u32));
wont work on 64bit BE machines because cb->args[1]
(which is 64 bit is larger in size than sizeof(u32))"

Fixed

2) Jiri Pirko

i) Spotted a bug fix mixed in the patch for wrong TLV
fix. Add patch 1/3 to address this. Make part of this
series because of dependencies.

ii) Rename ACT_LARGE_DUMP_ON -> TCA_FLAG_LARGE_DUMP_ON

iii) Satisfy Jiri's obsession against the noun "tcaa"
a)Rename struct nlattr *tcaa --> struct nlattr *tb
b)Rename TCAA_ACT_XXX -> TCA_ROOT_XXX

Changes since v4:
-----------------

1) Eric D.

pointed out that when all skb space is used up by the dump
there will be no space to insert the TCAA_ACT_COUNT attribute.

2) Jiri:

i) Change:

enum {
        TCAA_UNSPEC,
        TCAA_ACT_TAB,
        TCAA_ACT_FLAGS,
        TCAA_ACT_COUNT,
        TCAA_ACT_TIME_FILTER,
        __TCAA_MAX
};

to:
enum {
       TCAA_UNSPEC,
       TCAA_ACT_TAB,
       TCAA_ACT_FLAGS,
       TCAA_ACT_COUNT,
       __TCAA_MAX,
};

Jiri plans to followup with the rest of the code to make the
style consistent.

ii) Rename attribute TCAA_ACT_TIME_FILTER --> TCAA_ACT_TIME_DELTA

iii) Rename variable jiffy_filter --> jiffy_since
iv) Rename msecs_filter --> msecs_since
v) get rid of unused cb->args[0] and rename cb->args[4] to cb->args[0]

Earlier Changes
----------------
- Jiri mostly on names of things.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
David S. Miller 2017-07-30 19:28:08 -07:00
commit 764646b08d
5 changed files with 144 additions and 13 deletions

View File

@ -178,6 +178,7 @@ enum {
NLA_S16,
NLA_S32,
NLA_S64,
NLA_BITFIELD32,
__NLA_TYPE_MAX,
};
@ -206,6 +207,7 @@ enum {
* NLA_MSECS Leaving the length field zero will verify the
* given type fits, using it verifies minimum length
* just like "All other"
* NLA_BITFIELD32 A 32-bit bitmap/bitselector attribute
* All other Minimum length of attribute payload
*
* Example:
@ -213,11 +215,13 @@ enum {
* [ATTR_FOO] = { .type = NLA_U16 },
* [ATTR_BAR] = { .type = NLA_STRING, .len = BARSIZ },
* [ATTR_BAZ] = { .len = sizeof(struct mystruct) },
* [ATTR_GOO] = { .type = NLA_BITFIELD32, .validation_data = &myvalidflags },
* };
*/
struct nla_policy {
u16 type;
u16 len;
void *validation_data;
};
/**
@ -1202,6 +1206,18 @@ static inline struct in6_addr nla_get_in6_addr(const struct nlattr *nla)
return tmp;
}
/**
* nla_get_bitfield32 - return payload of 32 bitfield attribute
* @nla: nla_bitfield32 attribute
*/
static inline struct nla_bitfield32 nla_get_bitfield32(const struct nlattr *nla)
{
struct nla_bitfield32 tmp;
nla_memcpy(&tmp, nla, sizeof(tmp));
return tmp;
}
/**
* nla_memdup - duplicate attribute memory (kmemdup)
* @src: netlink attribute to duplicate from

View File

@ -226,5 +226,22 @@ struct nlattr {
#define NLA_ALIGN(len) (((len) + NLA_ALIGNTO - 1) & ~(NLA_ALIGNTO - 1))
#define NLA_HDRLEN ((int) NLA_ALIGN(sizeof(struct nlattr)))
/* Generic 32 bitflags attribute content sent to the kernel.
*
* The value is a bitmap that defines the values being set
* The selector is a bitmask that defines which value is legit
*
* Examples:
* value = 0x0, and selector = 0x1
* implies we are selecting bit 1 and we want to set its value to 0.
*
* value = 0x2, and selector = 0x2
* implies we are selecting bit 2 and we want to set its value to 1.
*
*/
struct nla_bitfield32 {
__u32 value;
__u32 selector;
};
#endif /* _UAPI__LINUX_NETLINK_H */

View File

@ -683,10 +683,29 @@ struct tcamsg {
unsigned char tca__pad1;
unsigned short tca__pad2;
};
enum {
TCA_ROOT_UNSPEC,
TCA_ROOT_TAB,
#define TCA_ACT_TAB TCA_ROOT_TAB
#define TCAA_MAX TCA_ROOT_TAB
TCA_ROOT_FLAGS,
TCA_ROOT_COUNT,
TCA_ROOT_TIME_DELTA, /* in msecs */
__TCA_ROOT_MAX,
#define TCA_ROOT_MAX (__TCA_ROOT_MAX - 1)
};
#define TA_RTA(r) ((struct rtattr*)(((char*)(r)) + NLMSG_ALIGN(sizeof(struct tcamsg))))
#define TA_PAYLOAD(n) NLMSG_PAYLOAD(n,sizeof(struct tcamsg))
#define TCA_ACT_TAB 1 /* attr type must be >=1 */
#define TCAA_MAX 1
/* tcamsg flags stored in attribute TCA_ROOT_FLAGS
*
* TCA_FLAG_LARGE_DUMP_ON user->kernel to request for larger than TCA_ACT_MAX_PRIO
* actions in a dump. All dump responses will contain the number of actions
* being dumped stored in for user app's consumption in TCA_ROOT_COUNT
*
*/
#define TCA_FLAG_LARGE_DUMP_ON (1 << 0)
/* New extended info filters for IFLA_EXT_MASK */
#define RTEXT_FILTER_VF (1 << 0)

View File

@ -27,6 +27,30 @@ static const u8 nla_attr_minlen[NLA_TYPE_MAX+1] = {
[NLA_S64] = sizeof(s64),
};
static int validate_nla_bitfield32(const struct nlattr *nla,
u32 *valid_flags_allowed)
{
const struct nla_bitfield32 *bf = nla_data(nla);
u32 *valid_flags_mask = valid_flags_allowed;
if (!valid_flags_allowed)
return -EINVAL;
/*disallow invalid bit selector */
if (bf->selector & ~*valid_flags_mask)
return -EINVAL;
/*disallow invalid bit values */
if (bf->value & ~*valid_flags_mask)
return -EINVAL;
/*disallow valid bit values that are not selected*/
if (bf->value & ~bf->selector)
return -EINVAL;
return 0;
}
static int validate_nla(const struct nlattr *nla, int maxtype,
const struct nla_policy *policy)
{
@ -46,6 +70,12 @@ static int validate_nla(const struct nlattr *nla, int maxtype,
return -ERANGE;
break;
case NLA_BITFIELD32:
if (attrlen != sizeof(struct nla_bitfield32))
return -ERANGE;
return validate_nla_bitfield32(nla, pt->validation_data);
case NLA_NUL_STRING:
if (pt->len)
minlen = min_t(int, attrlen, pt->len + 1);

View File

@ -110,6 +110,8 @@ static int tcf_dump_walker(struct tcf_hashinfo *hinfo, struct sk_buff *skb,
struct netlink_callback *cb)
{
int err = 0, index = -1, i = 0, s_i = 0, n_i = 0;
u32 act_flags = cb->args[2];
unsigned long jiffy_since = cb->args[3];
struct nlattr *nest;
spin_lock_bh(&hinfo->lock);
@ -127,6 +129,11 @@ static int tcf_dump_walker(struct tcf_hashinfo *hinfo, struct sk_buff *skb,
if (index < s_i)
continue;
if (jiffy_since &&
time_after(jiffy_since,
(unsigned long)p->tcfa_tm.lastuse))
continue;
nest = nla_nest_start(skb, n_i);
if (nest == NULL)
goto nla_put_failure;
@ -138,14 +145,20 @@ static int tcf_dump_walker(struct tcf_hashinfo *hinfo, struct sk_buff *skb,
}
nla_nest_end(skb, nest);
n_i++;
if (n_i >= TCA_ACT_MAX_PRIO)
if (!(act_flags & TCA_FLAG_LARGE_DUMP_ON) &&
n_i >= TCA_ACT_MAX_PRIO)
goto done;
}
}
done:
if (index >= 0)
cb->args[0] = index + 1;
spin_unlock_bh(&hinfo->lock);
if (n_i)
cb->args[0] += n_i;
if (n_i) {
if (act_flags & TCA_FLAG_LARGE_DUMP_ON)
cb->args[1] = n_i;
}
return n_i;
nla_put_failure:
@ -1068,11 +1081,18 @@ static int tcf_action_add(struct net *net, struct nlattr *nla,
return tcf_add_notify(net, n, &actions, portid);
}
static u32 tcaa_root_flags_allowed = TCA_FLAG_LARGE_DUMP_ON;
static const struct nla_policy tcaa_policy[TCA_ROOT_MAX + 1] = {
[TCA_ROOT_FLAGS] = { .type = NLA_BITFIELD32,
.validation_data = &tcaa_root_flags_allowed },
[TCA_ROOT_TIME_DELTA] = { .type = NLA_U32 },
};
static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n,
struct netlink_ext_ack *extack)
{
struct net *net = sock_net(skb->sk);
struct nlattr *tca[TCA_ACT_MAX + 1];
struct nlattr *tca[TCA_ROOT_MAX + 1];
u32 portid = skb ? NETLINK_CB(skb).portid : 0;
int ret = 0, ovr = 0;
@ -1080,7 +1100,7 @@ static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n,
!netlink_capable(skb, CAP_NET_ADMIN))
return -EPERM;
ret = nlmsg_parse(n, sizeof(struct tcamsg), tca, TCA_ACT_MAX, NULL,
ret = nlmsg_parse(n, sizeof(struct tcamsg), tca, TCA_ROOT_MAX, NULL,
extack);
if (ret < 0)
return ret;
@ -1121,16 +1141,12 @@ replay:
return ret;
}
static struct nlattr *find_dump_kind(const struct nlmsghdr *n)
static struct nlattr *find_dump_kind(struct nlattr **nla)
{
struct nlattr *tb1, *tb2[TCA_ACT_MAX + 1];
struct nlattr *tb[TCA_ACT_MAX_PRIO + 1];
struct nlattr *nla[TCAA_MAX + 1];
struct nlattr *kind;
if (nlmsg_parse(n, sizeof(struct tcamsg), nla, TCAA_MAX,
NULL, NULL) < 0)
return NULL;
tb1 = nla[TCA_ACT_TAB];
if (tb1 == NULL)
return NULL;
@ -1157,8 +1173,20 @@ static int tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb)
struct tc_action_ops *a_o;
int ret = 0;
struct tcamsg *t = (struct tcamsg *) nlmsg_data(cb->nlh);
struct nlattr *kind = find_dump_kind(cb->nlh);
struct nlattr *tb[TCA_ROOT_MAX + 1];
struct nlattr *count_attr = NULL;
unsigned long jiffy_since = 0;
struct nlattr *kind = NULL;
struct nla_bitfield32 bf;
u32 msecs_since = 0;
u32 act_count = 0;
ret = nlmsg_parse(cb->nlh, sizeof(struct tcamsg), tb, TCA_ROOT_MAX,
tcaa_policy, NULL);
if (ret < 0)
return ret;
kind = find_dump_kind(tb);
if (kind == NULL) {
pr_info("tc_dump_action: action bad kind\n");
return 0;
@ -1168,14 +1196,32 @@ static int tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb)
if (a_o == NULL)
return 0;
cb->args[2] = 0;
if (tb[TCA_ROOT_FLAGS]) {
bf = nla_get_bitfield32(tb[TCA_ROOT_FLAGS]);
cb->args[2] = bf.value;
}
if (tb[TCA_ROOT_TIME_DELTA]) {
msecs_since = nla_get_u32(tb[TCA_ROOT_TIME_DELTA]);
}
nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
cb->nlh->nlmsg_type, sizeof(*t), 0);
if (!nlh)
goto out_module_put;
if (msecs_since)
jiffy_since = jiffies - msecs_to_jiffies(msecs_since);
t = nlmsg_data(nlh);
t->tca_family = AF_UNSPEC;
t->tca__pad1 = 0;
t->tca__pad2 = 0;
cb->args[3] = jiffy_since;
count_attr = nla_reserve(skb, TCA_ROOT_COUNT, sizeof(u32));
if (!count_attr)
goto out_module_put;
nest = nla_nest_start(skb, TCA_ACT_TAB);
if (nest == NULL)
@ -1188,6 +1234,9 @@ static int tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb)
if (ret > 0) {
nla_nest_end(skb, nest);
ret = skb->len;
act_count = cb->args[1];
memcpy(nla_data(count_attr), &act_count, sizeof(u32));
cb->args[1] = 0;
} else
nlmsg_trim(skb, b);