netfilter pull request 23-04-22

-----BEGIN PGP SIGNATURE-----
 
 iQIzBAABCgAdFiEEN9lkrMBJgcdVAPub1V2XiooUIOQFAmRDIGwACgkQ1V2XiooU
 IOTR2xAAgV2g59s0pPO0VnAs9WTKiMgm+9JZz2uLwOT1/nvVzdz8P7wXTJK47m57
 qziX//P0S37D6e5UkdSHpGBSfeIAMwciDd5WVJgsZYhpZ/QMS3y4GEUXVybMhV+p
 oqX86Kc1u91+kLbgFF1CdbMDvw+oNW43QrIYR1Ok2MqlnZ3dlDajkw5KLYWFvb6t
 9BCmkqZ1xheuN1l8CPZ0La3a9fwqTuC0P7TgSlO1BZF5nhADajcGt8lF8Td98zEZ
 Mrb4VyOrhT6UZgGd+UW1ZlNyduO2Cb6Tj7jx9Es5gX8fDjmnZyAJf1qW9Iinoo46
 4Yr+61oSNNGdE63m1lLm2ZQQJGDlUsuGCjUMW4hyVe8YIrqQg+cOXaTkCa40Xvos
 qvObjrsnAd9xHtBchk4IaQ84J56ifalXULAJs8IZGM5K2XUNwnF7kIOaVtV8Rq+K
 BSSprrM9Z7Lz5W5ucRLlBmDYSDd+ESUnhgJgIuf1CZ04mRBupF4IkqCU5INmVhJR
 472cSc9DKPHmsnFFdszidxBwM6mg00qQ9M4Qvl+dQIOs0a28Pr8nHPOSStgMaecd
 NAPGGEMdRLNaH5KYN1VbseUbbFhXQpXcuNCF1q8fdzpQYyo/+I/lu618sFEhy08r
 KN6+JIDdrcAdMyYgL3rQy57u58xYlwU2NLIE0SLHLqI4grtTxnw=
 =T5eS
 -----END PGP SIGNATURE-----

Merge tag 'nf-next-23-04-22' of git://git.kernel.org/pub/scm/linux/kernel/git/netfilter/nf-next

Pablo Neira Ayuso says:

====================
Netfilter/IPVS updates for net-next

1) Reduce jumpstack footprint: Stash chain in last rule marker in blob for
   tracing. Remove last rule and chain from jumpstack. From Florian Westphal.

2) nf_tables validates all tables before committing the new rules.
   Unfortunately, this has two drawbacks:

   - Since addition of the transaction mutex pernet state gets written to
     outside of the locked section from the cleanup callback, this is
     wrong so do this cleanup directly after table has passed all checks.

   - Revalidate tables that saw no changes. This can be avoided by
     keeping the validation state per table, not per netns.

   From Florian Westphal.

3) Get rid of a few redundant pointers in the traceinfo structure.
   The three removed pointers are used in the expression evaluation loop,
   so gcc keeps them in registers. Passing them to the (inlined) helpers
   thus doesn't increase nft_do_chain text size, while stack is reduced
   by another 24 bytes on 64bit arches. From Florian Westphal.

4) IPVS cleanups in several ways without implementing any functional
   changes, aside from removing some debugging output:

   - Update width of source for ip_vs_sync_conn_options
     The operation is safe, use an annotation to describe it properly.

   - Consistently use array_size() in ip_vs_conn_init()
     It seems better to use helpers consistently.

   - Remove {Enter,Leave}Function. These seem to be well past their
     use-by date.

   - Correct spelling in comments.

   From Simon Horman.

5) Extended netlink error report for netdevice in flowtables and
   netdev/chains. Allow for incrementally add/delete devices to netdev
   basechain. Allow to create netdev chain without device.

* tag 'nf-next-23-04-22' of git://git.kernel.org/pub/scm/linux/kernel/git/netfilter/nf-next:
  netfilter: nf_tables: allow to create netdev chain without device
  netfilter: nf_tables: support for deleting devices in an existing netdev chain
  netfilter: nf_tables: support for adding new devices to an existing netdev chain
  netfilter: nf_tables: rename function to destroy hook list
  netfilter: nf_tables: do not send complete notification of deletions
  netfilter: nf_tables: extended netlink error reporting for netdevice
  ipvs: Correct spelling in comments
  ipvs: Remove {Enter,Leave}Function
  ipvs: Consistently use array_size() in ip_vs_conn_init()
  ipvs: Update width of source for ip_vs_sync_conn_options
  netfilter: nf_tables: do not store rule in traceinfo structure
  netfilter: nf_tables: do not store verdict in traceinfo structure
  netfilter: nf_tables: do not store pktinfo in traceinfo structure
  netfilter: nf_tables: remove unneeded conditional
  netfilter: nf_tables: make validation state per table
  netfilter: nf_tables: don't write table validation state without mutex
  netfilter: nf_tables: don't store chain address on jump
  netfilter: nf_tables: don't store address of last rule on jump
  netfilter: nf_tables: merge nft_rules_old structure and end of ruleblob marker
====================

Link: https://lore.kernel.org/r/20230421235021.216950-1-pablo@netfilter.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
Jakub Kicinski 2023-04-24 15:37:35 -07:00
commit ffcddcaed6
12 changed files with 463 additions and 382 deletions

View File

@ -45,7 +45,6 @@ struct nfnetlink_subsystem {
int (*commit)(struct net *net, struct sk_buff *skb);
int (*abort)(struct net *net, struct sk_buff *skb,
enum nfnl_abort_action action);
void (*cleanup)(struct net *net);
bool (*valid_genid)(struct net *net, u32 genid);
};

View File

@ -265,26 +265,6 @@ static inline const char *ip_vs_dbg_addr(int af, char *buf, size_t buf_len,
pr_err(msg, ##__VA_ARGS__); \
} while (0)
#ifdef CONFIG_IP_VS_DEBUG
#define EnterFunction(level) \
do { \
if (level <= ip_vs_get_debug_level()) \
printk(KERN_DEBUG \
pr_fmt("Enter: %s, %s line %i\n"), \
__func__, __FILE__, __LINE__); \
} while (0)
#define LeaveFunction(level) \
do { \
if (level <= ip_vs_get_debug_level()) \
printk(KERN_DEBUG \
pr_fmt("Leave: %s, %s line %i\n"), \
__func__, __FILE__, __LINE__); \
} while (0)
#else
#define EnterFunction(level) do {} while (0)
#define LeaveFunction(level) do {} while (0)
#endif
/* The port number of FTP service (in network order). */
#define FTPPORT cpu_to_be16(21)
#define FTPDATA cpu_to_be16(20)
@ -604,7 +584,7 @@ struct ip_vs_conn {
spinlock_t lock; /* lock for state transition */
volatile __u16 state; /* state info */
volatile __u16 old_state; /* old state, to be used for
* state transition triggerd
* state transition triggered
* synchronization
*/
__u32 fwmark; /* Fire wall mark from skb */
@ -630,8 +610,10 @@ struct ip_vs_conn {
*/
struct ip_vs_app *app; /* bound ip_vs_app object */
void *app_data; /* Application private data */
struct ip_vs_seq in_seq; /* incoming seq. struct */
struct ip_vs_seq out_seq; /* outgoing seq. struct */
struct_group(sync_conn_opt,
struct ip_vs_seq in_seq; /* incoming seq. struct */
struct ip_vs_seq out_seq; /* outgoing seq. struct */
);
const struct ip_vs_pe *pe;
char *pe_data;
@ -653,7 +635,7 @@ struct ip_vs_service_user_kern {
u16 protocol;
union nf_inet_addr addr; /* virtual ip address */
__be16 port;
u32 fwmark; /* firwall mark of service */
u32 fwmark; /* firewall mark of service */
/* virtual service options */
char *sched_name;
@ -1054,7 +1036,7 @@ struct netns_ipvs {
struct ipvs_sync_daemon_cfg bcfg; /* Backup Configuration */
/* net name space ptr */
struct net *net; /* Needed by timer routines */
/* Number of heterogeneous destinations, needed becaus heterogeneous
/* Number of heterogeneous destinations, needed because heterogeneous
* are not supported when synchronization is enabled.
*/
unsigned int mixed_address_family_dests;

View File

@ -1046,6 +1046,18 @@ struct nft_rule_dp {
__attribute__((aligned(__alignof__(struct nft_expr))));
};
struct nft_rule_dp_last {
struct nft_rule_dp end; /* end of nft_rule_blob marker */
struct rcu_head h; /* call_rcu head */
struct nft_rule_blob *blob; /* ptr to free via call_rcu */
const struct nft_chain *chain; /* for nftables tracing */
};
static inline const struct nft_rule_dp *nft_rule_next(const struct nft_rule_dp *rule)
{
return (void *)rule + sizeof(*rule) + rule->dlen;
}
struct nft_rule_blob {
unsigned long size;
unsigned char data[]
@ -1197,6 +1209,7 @@ unsigned int nft_do_chain(struct nft_pktinfo *pkt, void *priv);
* @genmask: generation mask
* @afinfo: address family info
* @name: name of the table
* @validate_state: internal, set when transaction adds jumps
*/
struct nft_table {
struct list_head list;
@ -1215,6 +1228,7 @@ struct nft_table {
char *name;
u16 udlen;
u8 *udata;
u8 validate_state;
};
static inline bool nft_table_has_owner(const struct nft_table *table)
@ -1394,11 +1408,7 @@ void nft_unregister_flowtable_type(struct nf_flowtable_type *type);
* @type: event type (enum nft_trace_types)
* @skbid: hash of skb to be used as trace id
* @packet_dumped: packet headers sent in a previous traceinfo message
* @pkt: pktinfo currently processed
* @basechain: base chain currently processed
* @chain: chain currently processed
* @rule: rule that was evaluated
* @verdict: verdict given by rule
*/
struct nft_traceinfo {
bool trace;
@ -1406,18 +1416,16 @@ struct nft_traceinfo {
bool packet_dumped;
enum nft_trace_types type:8;
u32 skbid;
const struct nft_pktinfo *pkt;
const struct nft_base_chain *basechain;
const struct nft_chain *chain;
const struct nft_rule_dp *rule;
const struct nft_verdict *verdict;
};
void nft_trace_init(struct nft_traceinfo *info, const struct nft_pktinfo *pkt,
const struct nft_verdict *verdict,
const struct nft_chain *basechain);
void nft_trace_notify(struct nft_traceinfo *info);
void nft_trace_notify(const struct nft_pktinfo *pkt,
const struct nft_verdict *verdict,
const struct nft_rule_dp *rule,
struct nft_traceinfo *info);
#define MODULE_ALIAS_NFT_CHAIN(family, name) \
MODULE_ALIAS("nft-chain-" __stringify(family) "-" name)
@ -1601,6 +1609,8 @@ struct nft_trans_chain {
struct nft_stats __percpu *stats;
u8 policy;
u32 chain_id;
struct nft_base_chain *basechain;
struct list_head hook_list;
};
#define nft_trans_chain_update(trans) \
@ -1613,6 +1623,10 @@ struct nft_trans_chain {
(((struct nft_trans_chain *)trans->data)->policy)
#define nft_trans_chain_id(trans) \
(((struct nft_trans_chain *)trans->data)->chain_id)
#define nft_trans_basechain(trans) \
(((struct nft_trans_chain *)trans->data)->basechain)
#define nft_trans_chain_hooks(trans) \
(((struct nft_trans_chain *)trans->data)->hook_list)
struct nft_trans_table {
bool update;
@ -1688,7 +1702,6 @@ struct nftables_pernet {
struct mutex commit_mutex;
u64 table_handle;
unsigned int base_seq;
u8 validate_state;
};
extern unsigned int nf_tables_net_id;

View File

@ -1481,6 +1481,7 @@ void __net_exit ip_vs_conn_net_cleanup(struct netns_ipvs *ipvs)
int __init ip_vs_conn_init(void)
{
size_t tab_array_size;
int idx;
/* Compute size and mask */
@ -1494,8 +1495,9 @@ int __init ip_vs_conn_init(void)
/*
* Allocate the connection hash table and initialize its list heads
*/
ip_vs_conn_tab = vmalloc(array_size(ip_vs_conn_tab_size,
sizeof(*ip_vs_conn_tab)));
tab_array_size = array_size(ip_vs_conn_tab_size,
sizeof(*ip_vs_conn_tab));
ip_vs_conn_tab = vmalloc(tab_array_size);
if (!ip_vs_conn_tab)
return -ENOMEM;
@ -1508,10 +1510,8 @@ int __init ip_vs_conn_init(void)
return -ENOMEM;
}
pr_info("Connection hash table configured "
"(size=%d, memory=%ldKbytes)\n",
ip_vs_conn_tab_size,
(long)(ip_vs_conn_tab_size*sizeof(*ip_vs_conn_tab))/1024);
pr_info("Connection hash table configured (size=%d, memory=%zdKbytes)\n",
ip_vs_conn_tab_size, tab_array_size / 1024);
IP_VS_DBG(0, "Each connection entry needs %zd bytes at least\n",
sizeof(struct ip_vs_conn));

View File

@ -1140,7 +1140,6 @@ struct ip_vs_conn *ip_vs_new_conn_out(struct ip_vs_service *svc,
__be16 vport;
unsigned int flags;
EnterFunction(12);
vaddr = &svc->addr;
vport = svc->port;
daddr = &iph->saddr;
@ -1208,7 +1207,6 @@ struct ip_vs_conn *ip_vs_new_conn_out(struct ip_vs_service *svc,
IP_VS_DBG_ADDR(cp->af, &cp->vaddr), ntohs(cp->vport),
IP_VS_DBG_ADDR(cp->af, &cp->daddr), ntohs(cp->dport),
cp->flags, refcount_read(&cp->refcnt));
LeaveFunction(12);
return cp;
}
@ -1316,13 +1314,11 @@ after_nat:
ip_vs_update_conntrack(skb, cp, 0);
ip_vs_conn_put(cp);
LeaveFunction(11);
return NF_ACCEPT;
drop:
ip_vs_conn_put(cp);
kfree_skb(skb);
LeaveFunction(11);
return NF_STOLEN;
}
@ -1341,8 +1337,6 @@ ip_vs_out_hook(void *priv, struct sk_buff *skb, const struct nf_hook_state *stat
int af = state->pf;
struct sock *sk;
EnterFunction(11);
/* Already marked as IPVS request or reply? */
if (skb->ipvs_property)
return NF_ACCEPT;
@ -2365,7 +2359,6 @@ static void __net_exit __ip_vs_dev_cleanup_batch(struct list_head *net_list)
struct netns_ipvs *ipvs;
struct net *net;
EnterFunction(2);
list_for_each_entry(net, net_list, exit_list) {
ipvs = net_ipvs(net);
ip_vs_unregister_hooks(ipvs, AF_INET);
@ -2374,7 +2367,6 @@ static void __net_exit __ip_vs_dev_cleanup_batch(struct list_head *net_list)
smp_wmb();
ip_vs_sync_net_cleanup(ipvs);
}
LeaveFunction(2);
}
static struct pernet_operations ipvs_core_ops = {

View File

@ -1061,8 +1061,6 @@ ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
unsigned int atype;
int ret;
EnterFunction(2);
#ifdef CONFIG_IP_VS_IPV6
if (udest->af == AF_INET6) {
atype = ipv6_addr_type(&udest->addr.in6);
@ -1111,7 +1109,6 @@ ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
spin_lock_init(&dest->dst_lock);
__ip_vs_update_dest(svc, dest, udest, 1);
LeaveFunction(2);
return 0;
err_stats:
@ -1134,8 +1131,6 @@ ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
__be16 dport = udest->port;
int ret;
EnterFunction(2);
if (udest->weight < 0) {
pr_err("%s(): server weight less than zero\n", __func__);
return -ERANGE;
@ -1183,7 +1178,7 @@ ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
ret = ip_vs_start_estimator(svc->ipvs, &dest->stats);
if (ret < 0)
goto err;
return ret;
__ip_vs_update_dest(svc, dest, udest, 1);
} else {
/*
@ -1192,9 +1187,6 @@ ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
ret = ip_vs_new_dest(svc, udest);
}
err:
LeaveFunction(2);
return ret;
}
@ -1209,8 +1201,6 @@ ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
union nf_inet_addr daddr;
__be16 dport = udest->port;
EnterFunction(2);
if (udest->weight < 0) {
pr_err("%s(): server weight less than zero\n", __func__);
return -ERANGE;
@ -1242,7 +1232,6 @@ ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
}
__ip_vs_update_dest(svc, dest, udest, 0);
LeaveFunction(2);
return 0;
}
@ -1317,8 +1306,6 @@ ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
struct ip_vs_dest *dest;
__be16 dport = udest->port;
EnterFunction(2);
/* We use function that requires RCU lock */
rcu_read_lock();
dest = ip_vs_lookup_dest(svc, udest->af, &udest->addr, dport);
@ -1339,8 +1326,6 @@ ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
*/
__ip_vs_del_dest(svc->ipvs, dest, false);
LeaveFunction(2);
return 0;
}
@ -1746,7 +1731,6 @@ void ip_vs_service_nets_cleanup(struct list_head *net_list)
struct netns_ipvs *ipvs;
struct net *net;
EnterFunction(2);
/* Check for "full" addressed entries */
mutex_lock(&__ip_vs_mutex);
list_for_each_entry(net, net_list, exit_list) {
@ -1754,7 +1738,6 @@ void ip_vs_service_nets_cleanup(struct list_head *net_list)
ip_vs_flush(ipvs, true);
}
mutex_unlock(&__ip_vs_mutex);
LeaveFunction(2);
}
/* Put all references for device (dst_cache) */
@ -1792,7 +1775,6 @@ static int ip_vs_dst_event(struct notifier_block *this, unsigned long event,
if (event != NETDEV_DOWN || !ipvs)
return NOTIFY_DONE;
IP_VS_DBG(3, "%s() dev=%s\n", __func__, dev->name);
EnterFunction(2);
mutex_lock(&__ip_vs_mutex);
for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
hlist_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
@ -1821,7 +1803,6 @@ static int ip_vs_dst_event(struct notifier_block *this, unsigned long event,
}
spin_unlock_bh(&ipvs->dest_trash_lock);
mutex_unlock(&__ip_vs_mutex);
LeaveFunction(2);
return NOTIFY_DONE;
}
@ -4537,8 +4518,6 @@ int __init ip_vs_control_init(void)
int idx;
int ret;
EnterFunction(2);
/* Initialize svc_table, ip_vs_svc_fwm_table */
for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
INIT_HLIST_HEAD(&ip_vs_svc_table[idx]);
@ -4551,15 +4530,12 @@ int __init ip_vs_control_init(void)
if (ret < 0)
return ret;
LeaveFunction(2);
return 0;
}
void ip_vs_control_cleanup(void)
{
EnterFunction(2);
unregister_netdevice_notifier(&ip_vs_dst_notifier);
/* relying on common rcu_barrier() in ip_vs_cleanup() */
LeaveFunction(2);
}

View File

@ -603,7 +603,7 @@ static void ip_vs_sync_conn_v0(struct netns_ipvs *ipvs, struct ip_vs_conn *cp,
if (cp->flags & IP_VS_CONN_F_SEQ_MASK) {
struct ip_vs_sync_conn_options *opt =
(struct ip_vs_sync_conn_options *)&s[1];
memcpy(opt, &cp->in_seq, sizeof(*opt));
memcpy(opt, &cp->sync_conn_opt, sizeof(*opt));
}
m->nr_conns++;
@ -1582,13 +1582,11 @@ ip_vs_send_async(struct socket *sock, const char *buffer, const size_t length)
struct kvec iov;
int len;
EnterFunction(7);
iov.iov_base = (void *)buffer;
iov.iov_len = length;
len = kernel_sendmsg(sock, &msg, &iov, 1, (size_t)(length));
LeaveFunction(7);
return len;
}
@ -1614,15 +1612,12 @@ ip_vs_receive(struct socket *sock, char *buffer, const size_t buflen)
struct kvec iov = {buffer, buflen};
int len;
EnterFunction(7);
/* Receive a packet */
iov_iter_kvec(&msg.msg_iter, ITER_DEST, &iov, 1, buflen);
len = sock_recvmsg(sock, &msg, MSG_DONTWAIT);
if (len < 0)
return len;
LeaveFunction(7);
return len;
}

View File

@ -706,8 +706,6 @@ ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
{
struct iphdr *iph = ip_hdr(skb);
EnterFunction(10);
if (__ip_vs_get_out_rt(cp->ipvs, cp->af, skb, NULL, iph->daddr,
IP_VS_RT_MODE_NON_LOCAL, NULL, ipvsh) < 0)
goto tx_error;
@ -719,12 +717,10 @@ ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
ip_vs_send_or_cont(NFPROTO_IPV4, skb, cp, 0);
LeaveFunction(10);
return NF_STOLEN;
tx_error:
kfree_skb(skb);
LeaveFunction(10);
return NF_STOLEN;
}
@ -735,8 +731,6 @@ ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
{
struct ipv6hdr *iph = ipv6_hdr(skb);
EnterFunction(10);
if (__ip_vs_get_out_rt_v6(cp->ipvs, cp->af, skb, NULL,
&iph->daddr, NULL,
ipvsh, 0, IP_VS_RT_MODE_NON_LOCAL) < 0)
@ -747,12 +741,10 @@ ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
ip_vs_send_or_cont(NFPROTO_IPV6, skb, cp, 0);
LeaveFunction(10);
return NF_STOLEN;
tx_error:
kfree_skb(skb);
LeaveFunction(10);
return NF_STOLEN;
}
#endif
@ -768,8 +760,6 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
struct rtable *rt; /* Route to the other host */
int local, rc, was_input;
EnterFunction(10);
/* check if it is a connection of no-client-port */
if (unlikely(cp->flags & IP_VS_CONN_F_NO_CPORT)) {
__be16 _pt, *p;
@ -839,12 +829,10 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
rc = ip_vs_nat_send_or_cont(NFPROTO_IPV4, skb, cp, local);
LeaveFunction(10);
return rc;
tx_error:
kfree_skb(skb);
LeaveFunction(10);
return NF_STOLEN;
}
@ -856,8 +844,6 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
struct rt6_info *rt; /* Route to the other host */
int local, rc;
EnterFunction(10);
/* check if it is a connection of no-client-port */
if (unlikely(cp->flags & IP_VS_CONN_F_NO_CPORT && !ipvsh->fragoffs)) {
__be16 _pt, *p;
@ -927,11 +913,9 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
rc = ip_vs_nat_send_or_cont(NFPROTO_IPV6, skb, cp, local);
LeaveFunction(10);
return rc;
tx_error:
LeaveFunction(10);
kfree_skb(skb);
return NF_STOLEN;
}
@ -1149,8 +1133,6 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
int tun_type, gso_type;
int tun_flags;
EnterFunction(10);
local = __ip_vs_get_out_rt(ipvs, cp->af, skb, cp->dest, cp->daddr.ip,
IP_VS_RT_MODE_LOCAL |
IP_VS_RT_MODE_NON_LOCAL |
@ -1199,7 +1181,7 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
&next_protocol, NULL, &dsfield,
&ttl, dfp);
if (IS_ERR(skb))
goto tx_error;
return NF_STOLEN;
gso_type = __tun_gso_type_mask(AF_INET, cp->af);
if (tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GUE) {
@ -1267,14 +1249,10 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
else if (ret == NF_DROP)
kfree_skb(skb);
LeaveFunction(10);
return NF_STOLEN;
tx_error:
if (!IS_ERR(skb))
kfree_skb(skb);
LeaveFunction(10);
kfree_skb(skb);
return NF_STOLEN;
}
@ -1298,8 +1276,6 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
int tun_type, gso_type;
int tun_flags;
EnterFunction(10);
local = __ip_vs_get_out_rt_v6(ipvs, cp->af, skb, cp->dest,
&cp->daddr.in6,
&saddr, ipvsh, 1,
@ -1347,7 +1323,7 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
&next_protocol, &payload_len,
&dsfield, &ttl, NULL);
if (IS_ERR(skb))
goto tx_error;
return NF_STOLEN;
gso_type = __tun_gso_type_mask(AF_INET6, cp->af);
if (tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GUE) {
@ -1414,14 +1390,10 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
else if (ret == NF_DROP)
kfree_skb(skb);
LeaveFunction(10);
return NF_STOLEN;
tx_error:
if (!IS_ERR(skb))
kfree_skb(skb);
LeaveFunction(10);
kfree_skb(skb);
return NF_STOLEN;
}
#endif
@ -1437,8 +1409,6 @@ ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
{
int local;
EnterFunction(10);
local = __ip_vs_get_out_rt(cp->ipvs, cp->af, skb, cp->dest, cp->daddr.ip,
IP_VS_RT_MODE_LOCAL |
IP_VS_RT_MODE_NON_LOCAL |
@ -1455,12 +1425,10 @@ ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
ip_vs_send_or_cont(NFPROTO_IPV4, skb, cp, 0);
LeaveFunction(10);
return NF_STOLEN;
tx_error:
kfree_skb(skb);
LeaveFunction(10);
return NF_STOLEN;
}
@ -1471,8 +1439,6 @@ ip_vs_dr_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
{
int local;
EnterFunction(10);
local = __ip_vs_get_out_rt_v6(cp->ipvs, cp->af, skb, cp->dest,
&cp->daddr.in6,
NULL, ipvsh, 0,
@ -1489,12 +1455,10 @@ ip_vs_dr_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
ip_vs_send_or_cont(NFPROTO_IPV6, skb, cp, 0);
LeaveFunction(10);
return NF_STOLEN;
tx_error:
kfree_skb(skb);
LeaveFunction(10);
return NF_STOLEN;
}
#endif
@ -1514,8 +1478,6 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
int local;
int rt_mode, was_input;
EnterFunction(10);
/* The ICMP packet for VS/TUN, VS/DR and LOCALNODE will be
forwarded directly here, because there is no need to
translate address/port back */
@ -1526,7 +1488,7 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
rc = NF_ACCEPT;
/* do not touch skb anymore */
atomic_inc(&cp->in_pkts);
goto out;
return rc;
}
/*
@ -1582,14 +1544,11 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
/* Another hack: avoid icmp_send in ip_fragment */
skb->ignore_df = 1;
rc = ip_vs_nat_send_or_cont(NFPROTO_IPV4, skb, cp, local);
goto out;
return ip_vs_nat_send_or_cont(NFPROTO_IPV4, skb, cp, local);
tx_error:
kfree_skb(skb);
rc = NF_STOLEN;
out:
LeaveFunction(10);
return rc;
}
@ -1604,8 +1563,6 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
int local;
int rt_mode;
EnterFunction(10);
/* The ICMP packet for VS/TUN, VS/DR and LOCALNODE will be
forwarded directly here, because there is no need to
translate address/port back */
@ -1616,7 +1573,7 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
rc = NF_ACCEPT;
/* do not touch skb anymore */
atomic_inc(&cp->in_pkts);
goto out;
return rc;
}
/*
@ -1671,14 +1628,11 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
/* Another hack: avoid icmp_send in ip_fragment */
skb->ignore_df = 1;
rc = ip_vs_nat_send_or_cont(NFPROTO_IPV6, skb, cp, local);
goto out;
return ip_vs_nat_send_or_cont(NFPROTO_IPV6, skb, cp, local);
tx_error:
kfree_skb(skb);
rc = NF_STOLEN;
out:
LeaveFunction(10);
return rc;
}
#endif

File diff suppressed because it is too large Load Diff

View File

@ -41,39 +41,37 @@ static inline bool nf_skip_indirect_calls(void) { return false; }
static inline void nf_skip_indirect_calls_enable(void) { }
#endif
static noinline void __nft_trace_packet(struct nft_traceinfo *info,
const struct nft_chain *chain,
static noinline void __nft_trace_packet(const struct nft_pktinfo *pkt,
const struct nft_verdict *verdict,
const struct nft_rule_dp *rule,
struct nft_traceinfo *info,
enum nft_trace_types type)
{
if (!info->trace || !info->nf_trace)
return;
info->chain = chain;
info->type = type;
nft_trace_notify(info);
nft_trace_notify(pkt, verdict, rule, info);
}
static inline void nft_trace_packet(const struct nft_pktinfo *pkt,
struct nft_verdict *verdict,
struct nft_traceinfo *info,
const struct nft_chain *chain,
const struct nft_rule_dp *rule,
enum nft_trace_types type)
{
if (static_branch_unlikely(&nft_trace_enabled)) {
info->nf_trace = pkt->skb->nf_trace;
info->rule = rule;
__nft_trace_packet(info, chain, type);
__nft_trace_packet(pkt, verdict, rule, info, type);
}
}
static inline void nft_trace_copy_nftrace(const struct nft_pktinfo *pkt,
struct nft_traceinfo *info)
{
if (static_branch_unlikely(&nft_trace_enabled)) {
if (info->trace)
info->nf_trace = pkt->skb->nf_trace;
}
if (static_branch_unlikely(&nft_trace_enabled))
info->nf_trace = pkt->skb->nf_trace;
}
static void nft_bitwise_fast_eval(const struct nft_expr *expr,
@ -110,8 +108,9 @@ static void nft_cmp16_fast_eval(const struct nft_expr *expr,
regs->verdict.code = NFT_BREAK;
}
static noinline void __nft_trace_verdict(struct nft_traceinfo *info,
const struct nft_chain *chain,
static noinline void __nft_trace_verdict(const struct nft_pktinfo *pkt,
struct nft_traceinfo *info,
const struct nft_rule_dp *rule,
const struct nft_regs *regs)
{
enum nft_trace_types type;
@ -129,22 +128,20 @@ static noinline void __nft_trace_verdict(struct nft_traceinfo *info,
type = NFT_TRACETYPE_RULE;
if (info->trace)
info->nf_trace = info->pkt->skb->nf_trace;
info->nf_trace = pkt->skb->nf_trace;
break;
}
__nft_trace_packet(info, chain, type);
__nft_trace_packet(pkt, &regs->verdict, rule, info, type);
}
static inline void nft_trace_verdict(struct nft_traceinfo *info,
const struct nft_chain *chain,
static inline void nft_trace_verdict(const struct nft_pktinfo *pkt,
struct nft_traceinfo *info,
const struct nft_rule_dp *rule,
const struct nft_regs *regs)
{
if (static_branch_unlikely(&nft_trace_enabled)) {
info->rule = rule;
__nft_trace_verdict(info, chain, regs);
}
if (static_branch_unlikely(&nft_trace_enabled))
__nft_trace_verdict(pkt, info, rule, regs);
}
static bool nft_payload_fast_eval(const struct nft_expr *expr,
@ -203,9 +200,7 @@ static noinline void nft_update_chain_stats(const struct nft_chain *chain,
}
struct nft_jumpstack {
const struct nft_chain *chain;
const struct nft_rule_dp *rule;
const struct nft_rule_dp *last_rule;
};
static void expr_call_ops_eval(const struct nft_expr *expr,
@ -248,7 +243,6 @@ indirect_call:
#define nft_rule_expr_first(rule) (struct nft_expr *)&rule->data[0]
#define nft_rule_expr_next(expr) ((void *)expr) + expr->ops->size
#define nft_rule_expr_last(rule) (struct nft_expr *)&rule->data[rule->dlen]
#define nft_rule_next(rule) (void *)rule + sizeof(*rule) + rule->dlen
#define nft_rule_dp_for_each_expr(expr, last, rule) \
for ((expr) = nft_rule_expr_first(rule), (last) = nft_rule_expr_last(rule); \
@ -259,9 +253,9 @@ unsigned int
nft_do_chain(struct nft_pktinfo *pkt, void *priv)
{
const struct nft_chain *chain = priv, *basechain = chain;
const struct nft_rule_dp *rule, *last_rule;
const struct net *net = nft_net(pkt);
const struct nft_expr *expr, *last;
const struct nft_rule_dp *rule;
struct nft_regs regs = {};
unsigned int stackptr = 0;
struct nft_jumpstack jumpstack[NFT_JUMP_STACK_SIZE];
@ -271,7 +265,7 @@ nft_do_chain(struct nft_pktinfo *pkt, void *priv)
info.trace = false;
if (static_branch_unlikely(&nft_trace_enabled))
nft_trace_init(&info, pkt, &regs.verdict, basechain);
nft_trace_init(&info, pkt, basechain);
do_chain:
if (genbit)
blob = rcu_dereference(chain->blob_gen_1);
@ -279,10 +273,9 @@ do_chain:
blob = rcu_dereference(chain->blob_gen_0);
rule = (struct nft_rule_dp *)blob->data;
last_rule = (void *)blob->data + blob->size;
next_rule:
regs.verdict.code = NFT_CONTINUE;
for (; rule < last_rule; rule = nft_rule_next(rule)) {
for (; !rule->is_last ; rule = nft_rule_next(rule)) {
nft_rule_dp_for_each_expr(expr, last, rule) {
if (expr->ops == &nft_cmp_fast_ops)
nft_cmp_fast_eval(expr, &regs);
@ -304,14 +297,14 @@ next_rule:
nft_trace_copy_nftrace(pkt, &info);
continue;
case NFT_CONTINUE:
nft_trace_packet(pkt, &info, chain, rule,
nft_trace_packet(pkt, &regs.verdict, &info, rule,
NFT_TRACETYPE_RULE);
continue;
}
break;
}
nft_trace_verdict(&info, chain, rule, &regs);
nft_trace_verdict(pkt, &info, rule, &regs);
switch (regs.verdict.code & NF_VERDICT_MASK) {
case NF_ACCEPT:
@ -325,9 +318,7 @@ next_rule:
case NFT_JUMP:
if (WARN_ON_ONCE(stackptr >= NFT_JUMP_STACK_SIZE))
return NF_DROP;
jumpstack[stackptr].chain = chain;
jumpstack[stackptr].rule = nft_rule_next(rule);
jumpstack[stackptr].last_rule = last_rule;
stackptr++;
fallthrough;
case NFT_GOTO:
@ -342,13 +333,11 @@ next_rule:
if (stackptr > 0) {
stackptr--;
chain = jumpstack[stackptr].chain;
rule = jumpstack[stackptr].rule;
last_rule = jumpstack[stackptr].last_rule;
goto next_rule;
}
nft_trace_packet(pkt, &info, basechain, NULL, NFT_TRACETYPE_POLICY);
nft_trace_packet(pkt, &regs.verdict, &info, NULL, NFT_TRACETYPE_POLICY);
if (static_branch_unlikely(&nft_counters_enabled))
nft_update_chain_stats(basechain, pkt);

View File

@ -124,9 +124,11 @@ static int nf_trace_fill_pkt_info(struct sk_buff *nlskb,
}
static int nf_trace_fill_rule_info(struct sk_buff *nlskb,
const struct nft_verdict *verdict,
const struct nft_rule_dp *rule,
const struct nft_traceinfo *info)
{
if (!info->rule || info->rule->is_last)
if (!rule || rule->is_last)
return 0;
/* a continue verdict with ->type == RETURN means that this is
@ -135,15 +137,16 @@ static int nf_trace_fill_rule_info(struct sk_buff *nlskb,
* Since no rule matched, the ->rule pointer is invalid.
*/
if (info->type == NFT_TRACETYPE_RETURN &&
info->verdict->code == NFT_CONTINUE)
verdict->code == NFT_CONTINUE)
return 0;
return nla_put_be64(nlskb, NFTA_TRACE_RULE_HANDLE,
cpu_to_be64(info->rule->handle),
cpu_to_be64(rule->handle),
NFTA_TRACE_PAD);
}
static bool nft_trace_have_verdict_chain(struct nft_traceinfo *info)
static bool nft_trace_have_verdict_chain(const struct nft_verdict *verdict,
struct nft_traceinfo *info)
{
switch (info->type) {
case NFT_TRACETYPE_RETURN:
@ -153,7 +156,7 @@ static bool nft_trace_have_verdict_chain(struct nft_traceinfo *info)
return false;
}
switch (info->verdict->code) {
switch (verdict->code) {
case NFT_JUMP:
case NFT_GOTO:
break;
@ -164,9 +167,31 @@ static bool nft_trace_have_verdict_chain(struct nft_traceinfo *info)
return true;
}
void nft_trace_notify(struct nft_traceinfo *info)
static const struct nft_chain *nft_trace_get_chain(const struct nft_rule_dp *rule,
const struct nft_traceinfo *info)
{
const struct nft_pktinfo *pkt = info->pkt;
const struct nft_rule_dp_last *last;
if (!rule)
return &info->basechain->chain;
while (!rule->is_last)
rule = nft_rule_next(rule);
last = (const struct nft_rule_dp_last *)rule;
if (WARN_ON_ONCE(!last->chain))
return &info->basechain->chain;
return last->chain;
}
void nft_trace_notify(const struct nft_pktinfo *pkt,
const struct nft_verdict *verdict,
const struct nft_rule_dp *rule,
struct nft_traceinfo *info)
{
const struct nft_chain *chain;
struct nlmsghdr *nlh;
struct sk_buff *skb;
unsigned int size;
@ -176,9 +201,11 @@ void nft_trace_notify(struct nft_traceinfo *info)
if (!nfnetlink_has_listeners(nft_net(pkt), NFNLGRP_NFTRACE))
return;
chain = nft_trace_get_chain(rule, info);
size = nlmsg_total_size(sizeof(struct nfgenmsg)) +
nla_total_size(strlen(info->chain->table->name)) +
nla_total_size(strlen(info->chain->name)) +
nla_total_size(strlen(chain->table->name)) +
nla_total_size(strlen(chain->name)) +
nla_total_size_64bit(sizeof(__be64)) + /* rule handle */
nla_total_size(sizeof(__be32)) + /* trace type */
nla_total_size(0) + /* VERDICT, nested */
@ -195,8 +222,8 @@ void nft_trace_notify(struct nft_traceinfo *info)
nla_total_size(sizeof(u32)) + /* nfproto */
nla_total_size(sizeof(u32)); /* policy */
if (nft_trace_have_verdict_chain(info))
size += nla_total_size(strlen(info->verdict->chain->name)); /* jump target */
if (nft_trace_have_verdict_chain(verdict, info))
size += nla_total_size(strlen(verdict->chain->name)); /* jump target */
skb = nlmsg_new(size, GFP_ATOMIC);
if (!skb)
@ -217,13 +244,13 @@ void nft_trace_notify(struct nft_traceinfo *info)
if (nla_put_u32(skb, NFTA_TRACE_ID, info->skbid))
goto nla_put_failure;
if (nla_put_string(skb, NFTA_TRACE_CHAIN, info->chain->name))
if (nla_put_string(skb, NFTA_TRACE_CHAIN, chain->name))
goto nla_put_failure;
if (nla_put_string(skb, NFTA_TRACE_TABLE, info->chain->table->name))
if (nla_put_string(skb, NFTA_TRACE_TABLE, chain->table->name))
goto nla_put_failure;
if (nf_trace_fill_rule_info(skb, info))
if (nf_trace_fill_rule_info(skb, verdict, rule, info))
goto nla_put_failure;
switch (info->type) {
@ -232,11 +259,11 @@ void nft_trace_notify(struct nft_traceinfo *info)
break;
case NFT_TRACETYPE_RETURN:
case NFT_TRACETYPE_RULE:
if (nft_verdict_dump(skb, NFTA_TRACE_VERDICT, info->verdict))
if (nft_verdict_dump(skb, NFTA_TRACE_VERDICT, verdict))
goto nla_put_failure;
/* pkt->skb undefined iff NF_STOLEN, disable dump */
if (info->verdict->code == NF_STOLEN)
if (verdict->code == NF_STOLEN)
info->packet_dumped = true;
else
mark = pkt->skb->mark;
@ -273,7 +300,6 @@ void nft_trace_notify(struct nft_traceinfo *info)
}
void nft_trace_init(struct nft_traceinfo *info, const struct nft_pktinfo *pkt,
const struct nft_verdict *verdict,
const struct nft_chain *chain)
{
static siphash_key_t trace_key __read_mostly;
@ -283,8 +309,6 @@ void nft_trace_init(struct nft_traceinfo *info, const struct nft_pktinfo *pkt,
info->trace = true;
info->nf_trace = pkt->skb->nf_trace;
info->packet_dumped = false;
info->pkt = pkt;
info->verdict = verdict;
net_get_random_once(&trace_key, sizeof(trace_key));

View File

@ -590,8 +590,6 @@ done:
goto replay_abort;
}
}
if (ss->cleanup)
ss->cleanup(net);
nfnl_err_deliver(&err_list, oskb);
kfree_skb(skb);