Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/klassert/ipsec-next

Steffen Klassert says:

====================
pull request (net-next): ipsec-next 2018-07-27

1) Extend the output_mark to also support the input direction
   and masking the mark values before applying to the skb.

2) Add a new lookup key for the upcomming xfrm interfaces.

3) Extend the xfrm lookups to match xfrm interface IDs.

4) Add virtual xfrm interfaces. The purpose of these interfaces
   is to overcome the design limitations that the existing
   VTI devices have.

  The main limitations that we see with the current VTI are the
  following:

  VTI interfaces are L3 tunnels with configurable endpoints.
  For xfrm, the tunnel endpoint are already determined by the SA.
  So the VTI tunnel endpoints must be either the same as on the
  SA or wildcards. In case VTI tunnel endpoints are same as on
  the SA, we get a one to one correlation between the SA and
  the tunnel. So each SA needs its own tunnel interface.

  On the other hand, we can have only one VTI tunnel with
  wildcard src/dst tunnel endpoints in the system because the
  lookup is based on the tunnel endpoints. The existing tunnel
  lookup won't work with multiple tunnels with wildcard
  tunnel endpoints. Some usecases require more than on
  VTI tunnel of this type, for example if somebody has multiple
  namespaces and every namespace requires such a VTI.

  VTI needs separate interfaces for IPv4 and IPv6 tunnels.
  So when routing to a VTI, we have to know to which address
  family this traffic class is going to be encapsulated.
  This is a lmitation because it makes routing more complex
  and it is not always possible to know what happens behind the
  VTI, e.g. when the VTI is move to some namespace.

  VTI works just with tunnel mode SAs. We need generic interfaces
  that ensures transfomation, regardless of the xfrm mode and
  the encapsulated address family.

  VTI is configured with a combination GRE keys and xfrm marks.
  With this we have to deal with some extra cases in the generic
  tunnel lookup because the GRE keys on the VTI are actually
  not GRE keys, the GRE keys were just reused for something else.
  All extensions to the VTI interfaces would require to add
  even more complexity to the generic tunnel lookup.

  So to overcome this, we developed xfrm interfaces with the
  following design goal:

  It should be possible to tunnel IPv4 and IPv6 through the same
  interface.

  No limitation on xfrm mode (tunnel, transport and beet).

  Should be a generic virtual interface that ensures IPsec
  transformation, no need to know what happens behind the
  interface.

  Interfaces should be configured with a new key that must match a
  new policy/SA lookup key.

  The lookup logic should stay in the xfrm codebase, no need to
  change or extend generic routing and tunnel lookups.

  Should be possible to use IPsec hardware offloads of the underlying
  interface.

5) Remove xfrm pcpu policy cache. This was added after the flowcache
   removal, but it turned out to make things even worse.
   From Florian Westphal.

6) Allow to update the set mark on SA updates.
   From Nathan Harold.

7) Convert some timestamps to time64_t.
   From Arnd Bergmann.

8) Don't check the offload_handle in xfrm code,
   it is an opaque data cookie for the driver.
   From Shannon Nelson.

9) Remove xfrmi interface ID from flowi. After this pach
   no generic code is touched anymore to do xfrm interface
   lookups. From Benedict Wong.

10) Allow to update the xfrm interface ID on SA updates.
    From Nathan Harold.

11) Don't pass zero to ERR_PTR() in xfrm_resolve_and_create_bundle.
    From YueHaibing.

12) Return more detailed errors on xfrm interface creation.
    From Benedict Wong.

13) Use PTR_ERR_OR_ZERO instead of IS_ERR + PTR_ERR.
    From the kbuild test robot.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
David S. Miller 2018-07-27 09:33:37 -07:00
commit 7a49d3d4ea
18 changed files with 1331 additions and 246 deletions

View File

@ -475,6 +475,14 @@ static inline struct dst_entry *xfrm_lookup(struct net *net,
return dst_orig;
}
static inline struct dst_entry *
xfrm_lookup_with_ifid(struct net *net, struct dst_entry *dst_orig,
const struct flowi *fl, const struct sock *sk,
int flags, u32 if_id)
{
return dst_orig;
}
static inline struct dst_entry *xfrm_lookup_route(struct net *net,
struct dst_entry *dst_orig,
const struct flowi *fl,
@ -494,6 +502,12 @@ struct dst_entry *xfrm_lookup(struct net *net, struct dst_entry *dst_orig,
const struct flowi *fl, const struct sock *sk,
int flags);
struct dst_entry *xfrm_lookup_with_ifid(struct net *net,
struct dst_entry *dst_orig,
const struct flowi *fl,
const struct sock *sk, int flags,
u32 if_id);
struct dst_entry *xfrm_lookup_route(struct net *net, struct dst_entry *dst_orig,
const struct flowi *fl, const struct sock *sk,
int flags);

View File

@ -23,6 +23,7 @@
#include <net/ipv6.h>
#include <net/ip6_fib.h>
#include <net/flow.h>
#include <net/gro_cells.h>
#include <linux/interrupt.h>
@ -147,6 +148,7 @@ struct xfrm_state {
struct xfrm_id id;
struct xfrm_selector sel;
struct xfrm_mark mark;
u32 if_id;
u32 tfcpad;
u32 genid;
@ -166,7 +168,7 @@ struct xfrm_state {
int header_len;
int trailer_len;
u32 extra_flags;
u32 output_mark;
struct xfrm_mark smark;
} props;
struct xfrm_lifetime_cfg lft;
@ -225,7 +227,7 @@ struct xfrm_state {
long saved_tmo;
/* Last used time */
unsigned long lastused;
time64_t lastused;
struct page_frag xfrag;
@ -292,6 +294,13 @@ struct xfrm_replay {
int (*overflow)(struct xfrm_state *x, struct sk_buff *skb);
};
struct xfrm_if_cb {
struct xfrm_if *(*decode_session)(struct sk_buff *skb);
};
void xfrm_if_register_cb(const struct xfrm_if_cb *ifcb);
void xfrm_if_unregister_cb(void);
struct net_device;
struct xfrm_type;
struct xfrm_dst;
@ -323,7 +332,6 @@ int xfrm_policy_register_afinfo(const struct xfrm_policy_afinfo *afinfo, int fam
void xfrm_policy_unregister_afinfo(const struct xfrm_policy_afinfo *afinfo);
void km_policy_notify(struct xfrm_policy *xp, int dir,
const struct km_event *c);
void xfrm_policy_cache_flush(void);
void km_state_notify(struct xfrm_state *x, const struct km_event *c);
struct xfrm_tmpl;
@ -574,6 +582,7 @@ struct xfrm_policy {
atomic_t genid;
u32 priority;
u32 index;
u32 if_id;
struct xfrm_mark mark;
struct xfrm_selector selector;
struct xfrm_lifetime_cfg lft;
@ -1037,6 +1046,22 @@ static inline void xfrm_dst_destroy(struct xfrm_dst *xdst)
void xfrm_dst_ifdown(struct dst_entry *dst, struct net_device *dev);
struct xfrm_if_parms {
char name[IFNAMSIZ]; /* name of XFRM device */
int link; /* ifindex of underlying L2 interface */
u32 if_id; /* interface identifyer */
};
struct xfrm_if {
struct xfrm_if __rcu *next; /* next interface in list */
struct net_device *dev; /* virtual device associated with interface */
struct net_device *phydev; /* physical device */
struct net *net; /* netns for packet i/o */
struct xfrm_if_parms p; /* interface parms */
struct gro_cells gro_cells;
};
struct xfrm_offload {
/* Output sequence number for replay protection on offloading. */
struct {
@ -1532,8 +1557,8 @@ struct xfrm_state *xfrm_state_find(const xfrm_address_t *daddr,
const struct flowi *fl,
struct xfrm_tmpl *tmpl,
struct xfrm_policy *pol, int *err,
unsigned short family);
struct xfrm_state *xfrm_stateonly_find(struct net *net, u32 mark,
unsigned short family, u32 if_id);
struct xfrm_state *xfrm_stateonly_find(struct net *net, u32 mark, u32 if_id,
xfrm_address_t *daddr,
xfrm_address_t *saddr,
unsigned short family,
@ -1690,20 +1715,20 @@ int xfrm_policy_walk(struct net *net, struct xfrm_policy_walk *walk,
void *);
void xfrm_policy_walk_done(struct xfrm_policy_walk *walk, struct net *net);
int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl);
struct xfrm_policy *xfrm_policy_bysel_ctx(struct net *net, u32 mark,
struct xfrm_policy *xfrm_policy_bysel_ctx(struct net *net, u32 mark, u32 if_id,
u8 type, int dir,
struct xfrm_selector *sel,
struct xfrm_sec_ctx *ctx, int delete,
int *err);
struct xfrm_policy *xfrm_policy_byid(struct net *net, u32 mark, u8, int dir,
u32 id, int delete, int *err);
struct xfrm_policy *xfrm_policy_byid(struct net *net, u32 mark, u32 if_id, u8,
int dir, u32 id, int delete, int *err);
int xfrm_policy_flush(struct net *net, u8 type, bool task_valid);
void xfrm_policy_hash_rebuild(struct net *net);
u32 xfrm_get_acqseq(void);
int verify_spi_info(u8 proto, u32 min, u32 max);
int xfrm_alloc_spi(struct xfrm_state *x, u32 minspi, u32 maxspi);
struct xfrm_state *xfrm_find_acq(struct net *net, const struct xfrm_mark *mark,
u8 mode, u32 reqid, u8 proto,
u8 mode, u32 reqid, u32 if_id, u8 proto,
const xfrm_address_t *daddr,
const xfrm_address_t *saddr, int create,
unsigned short family);
@ -2012,6 +2037,22 @@ static inline int xfrm_mark_put(struct sk_buff *skb, const struct xfrm_mark *m)
return ret;
}
static inline __u32 xfrm_smark_get(__u32 mark, struct xfrm_state *x)
{
struct xfrm_mark *m = &x->props.smark;
return (m->v & m->m) | (mark & ~m->m);
}
static inline int xfrm_if_id_put(struct sk_buff *skb, __u32 if_id)
{
int ret = 0;
if (if_id)
ret = nla_put_u32(skb, XFRMA_IF_ID, if_id);
return ret;
}
static inline int xfrm_tunnel_check(struct sk_buff *skb, struct xfrm_state *x,
unsigned int family)
{

View File

@ -460,6 +460,16 @@ enum {
#define IFLA_MACSEC_MAX (__IFLA_MACSEC_MAX - 1)
/* XFRM section */
enum {
IFLA_XFRM_UNSPEC,
IFLA_XFRM_LINK,
IFLA_XFRM_IF_ID,
__IFLA_XFRM_MAX
};
#define IFLA_XFRM_MAX (__IFLA_XFRM_MAX - 1)
enum macsec_validation_type {
MACSEC_VALIDATE_DISABLED = 0,
MACSEC_VALIDATE_CHECK = 1,

View File

@ -305,9 +305,12 @@ enum xfrm_attr_type_t {
XFRMA_ADDRESS_FILTER, /* struct xfrm_address_filter */
XFRMA_PAD,
XFRMA_OFFLOAD_DEV, /* struct xfrm_state_offload */
XFRMA_OUTPUT_MARK, /* __u32 */
XFRMA_SET_MARK, /* __u32 */
XFRMA_SET_MARK_MASK, /* __u32 */
XFRMA_IF_ID, /* __u32 */
__XFRMA_MAX
#define XFRMA_OUTPUT_MARK XFRMA_SET_MARK /* Compatibility */
#define XFRMA_MAX (__XFRMA_MAX - 1)
};

View File

@ -2253,7 +2253,7 @@ static void get_ipsec_sa(struct pktgen_dev *pkt_dev, int flow)
x = xfrm_state_lookup_byspi(pn->net, htonl(pkt_dev->spi), AF_INET);
} else {
/* slow path: we dont already have xfrm_state */
x = xfrm_stateonly_find(pn->net, DUMMY_MARK,
x = xfrm_stateonly_find(pn->net, DUMMY_MARK, 0,
(xfrm_address_t *)&pkt_dev->cur_daddr,
(xfrm_address_t *)&pkt_dev->cur_saddr,
AF_INET,

View File

@ -135,8 +135,7 @@ static struct sk_buff *esp4_gso_segment(struct sk_buff *skb,
skb->encap_hdr_csum = 1;
if (!(features & NETIF_F_HW_ESP) || !x->xso.offload_handle ||
(x->xso.dev != skb->dev))
if (!(features & NETIF_F_HW_ESP) || x->xso.dev != skb->dev)
esp_features = features & ~(NETIF_F_SG | NETIF_F_CSUM_MASK);
else if (!(features & NETIF_F_HW_ESP_TX_CSUM))
esp_features = features & ~NETIF_F_CSUM_MASK;
@ -179,8 +178,7 @@ static int esp_xmit(struct xfrm_state *x, struct sk_buff *skb, netdev_features_
if (!xo)
return -EINVAL;
if (!(features & NETIF_F_HW_ESP) || !x->xso.offload_handle ||
(x->xso.dev != skb->dev)) {
if (!(features & NETIF_F_HW_ESP) || x->xso.dev != skb->dev) {
xo->flags |= CRYPTO_FALLBACK;
hw_offload = false;
}

View File

@ -162,8 +162,7 @@ static struct sk_buff *esp6_gso_segment(struct sk_buff *skb,
skb->encap_hdr_csum = 1;
if (!(features & NETIF_F_HW_ESP) || !x->xso.offload_handle ||
(x->xso.dev != skb->dev))
if (!(features & NETIF_F_HW_ESP) || x->xso.dev != skb->dev)
esp_features = features & ~(NETIF_F_SG | NETIF_F_CSUM_MASK);
else if (!(features & NETIF_F_HW_ESP_TX_CSUM))
esp_features = features & ~NETIF_F_CSUM_MASK;
@ -207,8 +206,7 @@ static int esp6_xmit(struct xfrm_state *x, struct sk_buff *skb, netdev_features
if (!xo)
return -EINVAL;
if (!(features & NETIF_F_HW_ESP) || !x->xso.offload_handle ||
(x->xso.dev != skb->dev)) {
if (!(features & NETIF_F_HW_ESP) || x->xso.dev != skb->dev) {
xo->flags |= CRYPTO_FALLBACK;
hw_offload = false;
}

View File

@ -55,7 +55,7 @@ static int xfrm6_ro_output(struct xfrm_state *x, struct sk_buff *skb)
__skb_pull(skb, hdr_len);
memmove(ipv6_hdr(skb), iph, hdr_len);
x->lastused = get_seconds();
x->lastused = ktime_get_real_seconds();
return 0;
}

View File

@ -1383,7 +1383,7 @@ static int pfkey_getspi(struct sock *sk, struct sk_buff *skb, const struct sadb_
}
if (!x)
x = xfrm_find_acq(net, &dummy_mark, mode, reqid, proto, xdaddr, xsaddr, 1, family);
x = xfrm_find_acq(net, &dummy_mark, mode, reqid, 0, proto, xdaddr, xsaddr, 1, family);
if (x == NULL)
return -ENOENT;
@ -2414,7 +2414,7 @@ static int pfkey_spddelete(struct sock *sk, struct sk_buff *skb, const struct sa
return err;
}
xp = xfrm_policy_bysel_ctx(net, DUMMY_MARK, XFRM_POLICY_TYPE_MAIN,
xp = xfrm_policy_bysel_ctx(net, DUMMY_MARK, 0, XFRM_POLICY_TYPE_MAIN,
pol->sadb_x_policy_dir - 1, &sel, pol_ctx,
1, &err);
security_xfrm_policy_free(pol_ctx);
@ -2663,7 +2663,7 @@ static int pfkey_spdget(struct sock *sk, struct sk_buff *skb, const struct sadb_
return -EINVAL;
delete = (hdr->sadb_msg_type == SADB_X_SPDDELETE2);
xp = xfrm_policy_byid(net, DUMMY_MARK, XFRM_POLICY_TYPE_MAIN,
xp = xfrm_policy_byid(net, DUMMY_MARK, 0, XFRM_POLICY_TYPE_MAIN,
dir, pol->sadb_x_policy_id, delete, &err);
if (xp == NULL)
return -ENOENT;

View File

@ -25,6 +25,14 @@ config XFRM_USER
If unsure, say Y.
config XFRM_INTERFACE
tristate "Transformation virtual interface"
depends on XFRM && IPV6
---help---
This provides a virtual interface to route IPsec traffic.
If unsure, say N.
config XFRM_SUB_POLICY
bool "Transformation sub policy support"
depends on XFRM

View File

@ -10,3 +10,4 @@ obj-$(CONFIG_XFRM_STATISTICS) += xfrm_proc.o
obj-$(CONFIG_XFRM_ALGO) += xfrm_algo.o
obj-$(CONFIG_XFRM_USER) += xfrm_user.o
obj-$(CONFIG_XFRM_IPCOMP) += xfrm_ipcomp.o
obj-$(CONFIG_XFRM_INTERFACE) += xfrm_interface.o

View File

@ -56,7 +56,7 @@ struct sk_buff *validate_xmit_xfrm(struct sk_buff *skb, netdev_features_t featur
if (skb_is_gso(skb)) {
struct net_device *dev = skb->dev;
if (unlikely(!x->xso.offload_handle || (x->xso.dev != dev))) {
if (unlikely(x->xso.dev != dev)) {
struct sk_buff *segs;
/* Packet got rerouted, fixup features and segment it. */
@ -162,7 +162,8 @@ int xfrm_dev_state_add(struct net *net, struct xfrm_state *x,
}
dst = __xfrm_dst_lookup(net, 0, 0, saddr, daddr,
x->props.family, x->props.output_mark);
x->props.family,
xfrm_smark_get(0, x));
if (IS_ERR(dst))
return 0;
@ -210,8 +211,8 @@ bool xfrm_dev_offload_ok(struct sk_buff *skb, struct xfrm_state *x)
if (!x->type_offload || x->encap)
return false;
if ((!dev || (x->xso.offload_handle && (dev == xfrm_dst_path(dst)->dev))) &&
(!xdst->child->xfrm && x->type->get_mtu)) {
if ((!dev || (dev == xfrm_dst_path(dst)->dev)) &&
(!xdst->child->xfrm && x->type->get_mtu)) {
mtu = x->type->get_mtu(x, xdst->child_mtu_cached);
if (skb->len <= mtu)
@ -306,12 +307,6 @@ static int xfrm_dev_register(struct net_device *dev)
return xfrm_api_check(dev);
}
static int xfrm_dev_unregister(struct net_device *dev)
{
xfrm_policy_cache_flush();
return NOTIFY_DONE;
}
static int xfrm_dev_feat_change(struct net_device *dev)
{
return xfrm_api_check(dev);
@ -322,7 +317,6 @@ static int xfrm_dev_down(struct net_device *dev)
if (dev->features & NETIF_F_HW_ESP)
xfrm_dev_state_flush(dev_net(dev), dev, true);
xfrm_policy_cache_flush();
return NOTIFY_DONE;
}
@ -334,9 +328,6 @@ static int xfrm_dev_event(struct notifier_block *this, unsigned long event, void
case NETDEV_REGISTER:
return xfrm_dev_register(dev);
case NETDEV_UNREGISTER:
return xfrm_dev_unregister(dev);
case NETDEV_FEAT_CHANGE:
return xfrm_dev_feat_change(dev);

View File

@ -320,6 +320,7 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
seq = 0;
if (!spi && (err = xfrm_parse_spi(skb, nexthdr, &spi, &seq)) != 0) {
secpath_reset(skb);
XFRM_INC_STATS(net, LINUX_MIB_XFRMINHDRERROR);
goto drop;
}
@ -328,17 +329,21 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
XFRM_SPI_SKB_CB(skb)->daddroff);
do {
if (skb->sp->len == XFRM_MAX_DEPTH) {
secpath_reset(skb);
XFRM_INC_STATS(net, LINUX_MIB_XFRMINBUFFERERROR);
goto drop;
}
x = xfrm_state_lookup(net, mark, daddr, spi, nexthdr, family);
if (x == NULL) {
secpath_reset(skb);
XFRM_INC_STATS(net, LINUX_MIB_XFRMINNOSTATES);
xfrm_audit_state_notfound(skb, family, spi, seq);
goto drop;
}
skb->mark = xfrm_smark_get(skb->mark, x);
skb->sp->xvec[skb->sp->len++] = x;
lock:

975
net/xfrm/xfrm_interface.c Normal file
View File

@ -0,0 +1,975 @@
// SPDX-License-Identifier: GPL-2.0
/*
* XFRM virtual interface
*
* Copyright (C) 2018 secunet Security Networks AG
*
* Author:
* Steffen Klassert <steffen.klassert@secunet.com>
*/
#include <linux/module.h>
#include <linux/capability.h>
#include <linux/errno.h>
#include <linux/types.h>
#include <linux/sockios.h>
#include <linux/icmp.h>
#include <linux/if.h>
#include <linux/in.h>
#include <linux/ip.h>
#include <linux/net.h>
#include <linux/in6.h>
#include <linux/netdevice.h>
#include <linux/if_link.h>
#include <linux/if_arp.h>
#include <linux/icmpv6.h>
#include <linux/init.h>
#include <linux/route.h>
#include <linux/rtnetlink.h>
#include <linux/netfilter_ipv6.h>
#include <linux/slab.h>
#include <linux/hash.h>
#include <linux/uaccess.h>
#include <linux/atomic.h>
#include <net/icmp.h>
#include <net/ip.h>
#include <net/ipv6.h>
#include <net/ip6_route.h>
#include <net/addrconf.h>
#include <net/xfrm.h>
#include <net/net_namespace.h>
#include <net/netns/generic.h>
#include <linux/etherdevice.h>
static int xfrmi_dev_init(struct net_device *dev);
static void xfrmi_dev_setup(struct net_device *dev);
static struct rtnl_link_ops xfrmi_link_ops __read_mostly;
static unsigned int xfrmi_net_id __read_mostly;
struct xfrmi_net {
/* lists for storing interfaces in use */
struct xfrm_if __rcu *xfrmi[1];
};
#define for_each_xfrmi_rcu(start, xi) \
for (xi = rcu_dereference(start); xi; xi = rcu_dereference(xi->next))
static struct xfrm_if *xfrmi_lookup(struct net *net, struct xfrm_state *x)
{
struct xfrmi_net *xfrmn = net_generic(net, xfrmi_net_id);
struct xfrm_if *xi;
for_each_xfrmi_rcu(xfrmn->xfrmi[0], xi) {
if (x->if_id == xi->p.if_id &&
(xi->dev->flags & IFF_UP))
return xi;
}
return NULL;
}
static struct xfrm_if *xfrmi_decode_session(struct sk_buff *skb)
{
struct xfrmi_net *xfrmn;
int ifindex;
struct xfrm_if *xi;
if (!skb->dev)
return NULL;
xfrmn = net_generic(dev_net(skb->dev), xfrmi_net_id);
ifindex = skb->dev->ifindex;
for_each_xfrmi_rcu(xfrmn->xfrmi[0], xi) {
if (ifindex == xi->dev->ifindex &&
(xi->dev->flags & IFF_UP))
return xi;
}
return NULL;
}
static void xfrmi_link(struct xfrmi_net *xfrmn, struct xfrm_if *xi)
{
struct xfrm_if __rcu **xip = &xfrmn->xfrmi[0];
rcu_assign_pointer(xi->next , rtnl_dereference(*xip));
rcu_assign_pointer(*xip, xi);
}
static void xfrmi_unlink(struct xfrmi_net *xfrmn, struct xfrm_if *xi)
{
struct xfrm_if __rcu **xip;
struct xfrm_if *iter;
for (xip = &xfrmn->xfrmi[0];
(iter = rtnl_dereference(*xip)) != NULL;
xip = &iter->next) {
if (xi == iter) {
rcu_assign_pointer(*xip, xi->next);
break;
}
}
}
static void xfrmi_dev_free(struct net_device *dev)
{
free_percpu(dev->tstats);
}
static int xfrmi_create2(struct net_device *dev)
{
struct xfrm_if *xi = netdev_priv(dev);
struct net *net = dev_net(dev);
struct xfrmi_net *xfrmn = net_generic(net, xfrmi_net_id);
int err;
dev->rtnl_link_ops = &xfrmi_link_ops;
err = register_netdevice(dev);
if (err < 0)
goto out;
strcpy(xi->p.name, dev->name);
dev_hold(dev);
xfrmi_link(xfrmn, xi);
return 0;
out:
return err;
}
static struct xfrm_if *xfrmi_create(struct net *net, struct xfrm_if_parms *p)
{
struct net_device *dev;
struct xfrm_if *xi;
char name[IFNAMSIZ];
int err;
if (p->name[0]) {
strlcpy(name, p->name, IFNAMSIZ);
} else {
err = -EINVAL;
goto failed;
}
dev = alloc_netdev(sizeof(*xi), name, NET_NAME_UNKNOWN, xfrmi_dev_setup);
if (!dev) {
err = -EAGAIN;
goto failed;
}
dev_net_set(dev, net);
xi = netdev_priv(dev);
xi->p = *p;
xi->net = net;
xi->dev = dev;
xi->phydev = dev_get_by_index(net, p->link);
if (!xi->phydev) {
err = -ENODEV;
goto failed_free;
}
err = xfrmi_create2(dev);
if (err < 0)
goto failed_dev_put;
return xi;
failed_dev_put:
dev_put(xi->phydev);
failed_free:
free_netdev(dev);
failed:
return ERR_PTR(err);
}
static struct xfrm_if *xfrmi_locate(struct net *net, struct xfrm_if_parms *p,
int create)
{
struct xfrm_if __rcu **xip;
struct xfrm_if *xi;
struct xfrmi_net *xfrmn = net_generic(net, xfrmi_net_id);
for (xip = &xfrmn->xfrmi[0];
(xi = rtnl_dereference(*xip)) != NULL;
xip = &xi->next) {
if (xi->p.if_id == p->if_id) {
if (create)
return ERR_PTR(-EEXIST);
return xi;
}
}
if (!create)
return ERR_PTR(-ENODEV);
return xfrmi_create(net, p);
}
static void xfrmi_dev_uninit(struct net_device *dev)
{
struct xfrm_if *xi = netdev_priv(dev);
struct xfrmi_net *xfrmn = net_generic(xi->net, xfrmi_net_id);
xfrmi_unlink(xfrmn, xi);
dev_put(xi->phydev);
dev_put(dev);
}
static void xfrmi_scrub_packet(struct sk_buff *skb, bool xnet)
{
skb->tstamp = 0;
skb->pkt_type = PACKET_HOST;
skb->skb_iif = 0;
skb->ignore_df = 0;
skb_dst_drop(skb);
nf_reset(skb);
nf_reset_trace(skb);
if (!xnet)
return;
ipvs_reset(skb);
secpath_reset(skb);
skb_orphan(skb);
skb->mark = 0;
}
static int xfrmi_rcv_cb(struct sk_buff *skb, int err)
{
struct pcpu_sw_netstats *tstats;
struct xfrm_mode *inner_mode;
struct net_device *dev;
struct xfrm_state *x;
struct xfrm_if *xi;
bool xnet;
if (err && !skb->sp)
return 0;
x = xfrm_input_state(skb);
xi = xfrmi_lookup(xs_net(x), x);
if (!xi)
return 1;
dev = xi->dev;
skb->dev = dev;
if (err) {
dev->stats.rx_errors++;
dev->stats.rx_dropped++;
return 0;
}
xnet = !net_eq(xi->net, dev_net(skb->dev));
if (xnet) {
inner_mode = x->inner_mode;
if (x->sel.family == AF_UNSPEC) {
inner_mode = xfrm_ip2inner_mode(x, XFRM_MODE_SKB_CB(skb)->protocol);
if (inner_mode == NULL) {
XFRM_INC_STATS(dev_net(skb->dev),
LINUX_MIB_XFRMINSTATEMODEERROR);
return -EINVAL;
}
}
if (!xfrm_policy_check(NULL, XFRM_POLICY_IN, skb,
inner_mode->afinfo->family))
return -EPERM;
}
xfrmi_scrub_packet(skb, xnet);
tstats = this_cpu_ptr(dev->tstats);
u64_stats_update_begin(&tstats->syncp);
tstats->rx_packets++;
tstats->rx_bytes += skb->len;
u64_stats_update_end(&tstats->syncp);
return 0;
}
static int
xfrmi_xmit2(struct sk_buff *skb, struct net_device *dev, struct flowi *fl)
{
struct xfrm_if *xi = netdev_priv(dev);
struct net_device_stats *stats = &xi->dev->stats;
struct dst_entry *dst = skb_dst(skb);
unsigned int length = skb->len;
struct net_device *tdev;
struct xfrm_state *x;
int err = -1;
int mtu;
if (!dst)
goto tx_err_link_failure;
dst_hold(dst);
dst = xfrm_lookup_with_ifid(xi->net, dst, fl, NULL, 0, xi->p.if_id);
if (IS_ERR(dst)) {
err = PTR_ERR(dst);
dst = NULL;
goto tx_err_link_failure;
}
x = dst->xfrm;
if (!x)
goto tx_err_link_failure;
if (x->if_id != xi->p.if_id)
goto tx_err_link_failure;
tdev = dst->dev;
if (tdev == dev) {
stats->collisions++;
net_warn_ratelimited("%s: Local routing loop detected!\n",
xi->p.name);
goto tx_err_dst_release;
}
mtu = dst_mtu(dst);
if (!skb->ignore_df && skb->len > mtu) {
skb_dst_update_pmtu(skb, mtu);
if (skb->protocol == htons(ETH_P_IPV6)) {
if (mtu < IPV6_MIN_MTU)
mtu = IPV6_MIN_MTU;
icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
} else {
icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
htonl(mtu));
}
dst_release(dst);
return -EMSGSIZE;
}
xfrmi_scrub_packet(skb, !net_eq(xi->net, dev_net(dev)));
skb_dst_set(skb, dst);
skb->dev = tdev;
err = dst_output(xi->net, skb->sk, skb);
if (net_xmit_eval(err) == 0) {
struct pcpu_sw_netstats *tstats = this_cpu_ptr(dev->tstats);
u64_stats_update_begin(&tstats->syncp);
tstats->tx_bytes += length;
tstats->tx_packets++;
u64_stats_update_end(&tstats->syncp);
} else {
stats->tx_errors++;
stats->tx_aborted_errors++;
}
return 0;
tx_err_link_failure:
stats->tx_carrier_errors++;
dst_link_failure(skb);
tx_err_dst_release:
dst_release(dst);
return err;
}
static netdev_tx_t xfrmi_xmit(struct sk_buff *skb, struct net_device *dev)
{
struct xfrm_if *xi = netdev_priv(dev);
struct net_device_stats *stats = &xi->dev->stats;
struct flowi fl;
int ret;
memset(&fl, 0, sizeof(fl));
switch (skb->protocol) {
case htons(ETH_P_IPV6):
xfrm_decode_session(skb, &fl, AF_INET6);
memset(IP6CB(skb), 0, sizeof(*IP6CB(skb)));
break;
case htons(ETH_P_IP):
xfrm_decode_session(skb, &fl, AF_INET);
memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
break;
default:
goto tx_err;
}
fl.flowi_oif = xi->phydev->ifindex;
ret = xfrmi_xmit2(skb, dev, &fl);
if (ret < 0)
goto tx_err;
return NETDEV_TX_OK;
tx_err:
stats->tx_errors++;
stats->tx_dropped++;
kfree_skb(skb);
return NETDEV_TX_OK;
}
static int xfrmi4_err(struct sk_buff *skb, u32 info)
{
const struct iphdr *iph = (const struct iphdr *)skb->data;
struct net *net = dev_net(skb->dev);
int protocol = iph->protocol;
struct ip_comp_hdr *ipch;
struct ip_esp_hdr *esph;
struct ip_auth_hdr *ah ;
struct xfrm_state *x;
struct xfrm_if *xi;
__be32 spi;
switch (protocol) {
case IPPROTO_ESP:
esph = (struct ip_esp_hdr *)(skb->data+(iph->ihl<<2));
spi = esph->spi;
break;
case IPPROTO_AH:
ah = (struct ip_auth_hdr *)(skb->data+(iph->ihl<<2));
spi = ah->spi;
break;
case IPPROTO_COMP:
ipch = (struct ip_comp_hdr *)(skb->data+(iph->ihl<<2));
spi = htonl(ntohs(ipch->cpi));
break;
default:
return 0;
}
switch (icmp_hdr(skb)->type) {
case ICMP_DEST_UNREACH:
if (icmp_hdr(skb)->code != ICMP_FRAG_NEEDED)
return 0;
case ICMP_REDIRECT:
break;
default:
return 0;
}
x = xfrm_state_lookup(net, skb->mark, (const xfrm_address_t *)&iph->daddr,
spi, protocol, AF_INET);
if (!x)
return 0;
xi = xfrmi_lookup(net, x);
if (!xi) {
xfrm_state_put(x);
return -1;
}
if (icmp_hdr(skb)->type == ICMP_DEST_UNREACH)
ipv4_update_pmtu(skb, net, info, 0, 0, protocol, 0);
else
ipv4_redirect(skb, net, 0, 0, protocol, 0);
xfrm_state_put(x);
return 0;
}
static int xfrmi6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
u8 type, u8 code, int offset, __be32 info)
{
const struct ipv6hdr *iph = (const struct ipv6hdr *)skb->data;
struct net *net = dev_net(skb->dev);
int protocol = iph->nexthdr;
struct ip_comp_hdr *ipch;
struct ip_esp_hdr *esph;
struct ip_auth_hdr *ah;
struct xfrm_state *x;
struct xfrm_if *xi;
__be32 spi;
switch (protocol) {
case IPPROTO_ESP:
esph = (struct ip_esp_hdr *)(skb->data + offset);
spi = esph->spi;
break;
case IPPROTO_AH:
ah = (struct ip_auth_hdr *)(skb->data + offset);
spi = ah->spi;
break;
case IPPROTO_COMP:
ipch = (struct ip_comp_hdr *)(skb->data + offset);
spi = htonl(ntohs(ipch->cpi));
break;
default:
return 0;
}
if (type != ICMPV6_PKT_TOOBIG &&
type != NDISC_REDIRECT)
return 0;
x = xfrm_state_lookup(net, skb->mark, (const xfrm_address_t *)&iph->daddr,
spi, protocol, AF_INET6);
if (!x)
return 0;
xi = xfrmi_lookup(net, x);
if (!xi) {
xfrm_state_put(x);
return -1;
}
if (type == NDISC_REDIRECT)
ip6_redirect(skb, net, skb->dev->ifindex, 0,
sock_net_uid(net, NULL));
else
ip6_update_pmtu(skb, net, info, 0, 0, sock_net_uid(net, NULL));
xfrm_state_put(x);
return 0;
}
static int xfrmi_change(struct xfrm_if *xi, const struct xfrm_if_parms *p)
{
if (xi->p.link != p->link)
return -EINVAL;
xi->p.if_id = p->if_id;
return 0;
}
static int xfrmi_update(struct xfrm_if *xi, struct xfrm_if_parms *p)
{
struct net *net = dev_net(xi->dev);
struct xfrmi_net *xfrmn = net_generic(net, xfrmi_net_id);
int err;
xfrmi_unlink(xfrmn, xi);
synchronize_net();
err = xfrmi_change(xi, p);
xfrmi_link(xfrmn, xi);
netdev_state_change(xi->dev);
return err;
}
static void xfrmi_get_stats64(struct net_device *dev,
struct rtnl_link_stats64 *s)
{
int cpu;
if (!dev->tstats)
return;
for_each_possible_cpu(cpu) {
struct pcpu_sw_netstats *stats;
struct pcpu_sw_netstats tmp;
int start;
stats = per_cpu_ptr(dev->tstats, cpu);
do {
start = u64_stats_fetch_begin_irq(&stats->syncp);
tmp.rx_packets = stats->rx_packets;
tmp.rx_bytes = stats->rx_bytes;
tmp.tx_packets = stats->tx_packets;
tmp.tx_bytes = stats->tx_bytes;
} while (u64_stats_fetch_retry_irq(&stats->syncp, start));
s->rx_packets += tmp.rx_packets;
s->rx_bytes += tmp.rx_bytes;
s->tx_packets += tmp.tx_packets;
s->tx_bytes += tmp.tx_bytes;
}
s->rx_dropped = dev->stats.rx_dropped;
s->tx_dropped = dev->stats.tx_dropped;
}
static int xfrmi_get_iflink(const struct net_device *dev)
{
struct xfrm_if *xi = netdev_priv(dev);
return xi->phydev->ifindex;
}
static const struct net_device_ops xfrmi_netdev_ops = {
.ndo_init = xfrmi_dev_init,
.ndo_uninit = xfrmi_dev_uninit,
.ndo_start_xmit = xfrmi_xmit,
.ndo_get_stats64 = xfrmi_get_stats64,
.ndo_get_iflink = xfrmi_get_iflink,
};
static void xfrmi_dev_setup(struct net_device *dev)
{
dev->netdev_ops = &xfrmi_netdev_ops;
dev->type = ARPHRD_NONE;
dev->hard_header_len = ETH_HLEN;
dev->min_header_len = ETH_HLEN;
dev->mtu = ETH_DATA_LEN;
dev->min_mtu = ETH_MIN_MTU;
dev->max_mtu = ETH_DATA_LEN;
dev->addr_len = ETH_ALEN;
dev->flags = IFF_NOARP;
dev->needs_free_netdev = true;
dev->priv_destructor = xfrmi_dev_free;
netif_keep_dst(dev);
}
static int xfrmi_dev_init(struct net_device *dev)
{
struct xfrm_if *xi = netdev_priv(dev);
struct net_device *phydev = xi->phydev;
int err;
dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
if (!dev->tstats)
return -ENOMEM;
err = gro_cells_init(&xi->gro_cells, dev);
if (err) {
free_percpu(dev->tstats);
return err;
}
dev->features |= NETIF_F_LLTX;
dev->needed_headroom = phydev->needed_headroom;
dev->needed_tailroom = phydev->needed_tailroom;
if (is_zero_ether_addr(dev->dev_addr))
eth_hw_addr_inherit(dev, phydev);
if (is_zero_ether_addr(dev->broadcast))
memcpy(dev->broadcast, phydev->broadcast, dev->addr_len);
return 0;
}
static int xfrmi_validate(struct nlattr *tb[], struct nlattr *data[],
struct netlink_ext_ack *extack)
{
return 0;
}
static void xfrmi_netlink_parms(struct nlattr *data[],
struct xfrm_if_parms *parms)
{
memset(parms, 0, sizeof(*parms));
if (!data)
return;
if (data[IFLA_XFRM_LINK])
parms->link = nla_get_u32(data[IFLA_XFRM_LINK]);
if (data[IFLA_XFRM_IF_ID])
parms->if_id = nla_get_u32(data[IFLA_XFRM_IF_ID]);
}
static int xfrmi_newlink(struct net *src_net, struct net_device *dev,
struct nlattr *tb[], struct nlattr *data[],
struct netlink_ext_ack *extack)
{
struct net *net = dev_net(dev);
struct xfrm_if_parms *p;
struct xfrm_if *xi;
xi = netdev_priv(dev);
p = &xi->p;
xfrmi_netlink_parms(data, p);
if (!tb[IFLA_IFNAME])
return -EINVAL;
nla_strlcpy(p->name, tb[IFLA_IFNAME], IFNAMSIZ);
xi = xfrmi_locate(net, p, 1);
return PTR_ERR_OR_ZERO(xi);
}
static void xfrmi_dellink(struct net_device *dev, struct list_head *head)
{
unregister_netdevice_queue(dev, head);
}
static int xfrmi_changelink(struct net_device *dev, struct nlattr *tb[],
struct nlattr *data[],
struct netlink_ext_ack *extack)
{
struct xfrm_if *xi = netdev_priv(dev);
struct net *net = dev_net(dev);
xfrmi_netlink_parms(data, &xi->p);
xi = xfrmi_locate(net, &xi->p, 0);
if (IS_ERR_OR_NULL(xi)) {
xi = netdev_priv(dev);
} else {
if (xi->dev != dev)
return -EEXIST;
}
return xfrmi_update(xi, &xi->p);
}
static size_t xfrmi_get_size(const struct net_device *dev)
{
return
/* IFLA_XFRM_LINK */
nla_total_size(4) +
/* IFLA_XFRM_IF_ID */
nla_total_size(4) +
0;
}
static int xfrmi_fill_info(struct sk_buff *skb, const struct net_device *dev)
{
struct xfrm_if *xi = netdev_priv(dev);
struct xfrm_if_parms *parm = &xi->p;
if (nla_put_u32(skb, IFLA_XFRM_LINK, parm->link) ||
nla_put_u32(skb, IFLA_XFRM_IF_ID, parm->if_id))
goto nla_put_failure;
return 0;
nla_put_failure:
return -EMSGSIZE;
}
struct net *xfrmi_get_link_net(const struct net_device *dev)
{
struct xfrm_if *xi = netdev_priv(dev);
return dev_net(xi->phydev);
}
static const struct nla_policy xfrmi_policy[IFLA_XFRM_MAX + 1] = {
[IFLA_XFRM_LINK] = { .type = NLA_U32 },
[IFLA_XFRM_IF_ID] = { .type = NLA_U32 },
};
static struct rtnl_link_ops xfrmi_link_ops __read_mostly = {
.kind = "xfrm",
.maxtype = IFLA_XFRM_MAX,
.policy = xfrmi_policy,
.priv_size = sizeof(struct xfrm_if),
.setup = xfrmi_dev_setup,
.validate = xfrmi_validate,
.newlink = xfrmi_newlink,
.dellink = xfrmi_dellink,
.changelink = xfrmi_changelink,
.get_size = xfrmi_get_size,
.fill_info = xfrmi_fill_info,
.get_link_net = xfrmi_get_link_net,
};
static void __net_exit xfrmi_destroy_interfaces(struct xfrmi_net *xfrmn)
{
struct xfrm_if *xi;
LIST_HEAD(list);
xi = rtnl_dereference(xfrmn->xfrmi[0]);
if (!xi)
return;
unregister_netdevice_queue(xi->dev, &list);
unregister_netdevice_many(&list);
}
static int __net_init xfrmi_init_net(struct net *net)
{
return 0;
}
static void __net_exit xfrmi_exit_net(struct net *net)
{
struct xfrmi_net *xfrmn = net_generic(net, xfrmi_net_id);
rtnl_lock();
xfrmi_destroy_interfaces(xfrmn);
rtnl_unlock();
}
static struct pernet_operations xfrmi_net_ops = {
.init = xfrmi_init_net,
.exit = xfrmi_exit_net,
.id = &xfrmi_net_id,
.size = sizeof(struct xfrmi_net),
};
static struct xfrm6_protocol xfrmi_esp6_protocol __read_mostly = {
.handler = xfrm6_rcv,
.cb_handler = xfrmi_rcv_cb,
.err_handler = xfrmi6_err,
.priority = 10,
};
static struct xfrm6_protocol xfrmi_ah6_protocol __read_mostly = {
.handler = xfrm6_rcv,
.cb_handler = xfrmi_rcv_cb,
.err_handler = xfrmi6_err,
.priority = 10,
};
static struct xfrm6_protocol xfrmi_ipcomp6_protocol __read_mostly = {
.handler = xfrm6_rcv,
.cb_handler = xfrmi_rcv_cb,
.err_handler = xfrmi6_err,
.priority = 10,
};
static struct xfrm4_protocol xfrmi_esp4_protocol __read_mostly = {
.handler = xfrm4_rcv,
.input_handler = xfrm_input,
.cb_handler = xfrmi_rcv_cb,
.err_handler = xfrmi4_err,
.priority = 10,
};
static struct xfrm4_protocol xfrmi_ah4_protocol __read_mostly = {
.handler = xfrm4_rcv,
.input_handler = xfrm_input,
.cb_handler = xfrmi_rcv_cb,
.err_handler = xfrmi4_err,
.priority = 10,
};
static struct xfrm4_protocol xfrmi_ipcomp4_protocol __read_mostly = {
.handler = xfrm4_rcv,
.input_handler = xfrm_input,
.cb_handler = xfrmi_rcv_cb,
.err_handler = xfrmi4_err,
.priority = 10,
};
static int __init xfrmi4_init(void)
{
int err;
err = xfrm4_protocol_register(&xfrmi_esp4_protocol, IPPROTO_ESP);
if (err < 0)
goto xfrm_proto_esp_failed;
err = xfrm4_protocol_register(&xfrmi_ah4_protocol, IPPROTO_AH);
if (err < 0)
goto xfrm_proto_ah_failed;
err = xfrm4_protocol_register(&xfrmi_ipcomp4_protocol, IPPROTO_COMP);
if (err < 0)
goto xfrm_proto_comp_failed;
return 0;
xfrm_proto_comp_failed:
xfrm4_protocol_deregister(&xfrmi_ah4_protocol, IPPROTO_AH);
xfrm_proto_ah_failed:
xfrm4_protocol_deregister(&xfrmi_esp4_protocol, IPPROTO_ESP);
xfrm_proto_esp_failed:
return err;
}
static void xfrmi4_fini(void)
{
xfrm4_protocol_deregister(&xfrmi_ipcomp4_protocol, IPPROTO_COMP);
xfrm4_protocol_deregister(&xfrmi_ah4_protocol, IPPROTO_AH);
xfrm4_protocol_deregister(&xfrmi_esp4_protocol, IPPROTO_ESP);
}
static int __init xfrmi6_init(void)
{
int err;
err = xfrm6_protocol_register(&xfrmi_esp6_protocol, IPPROTO_ESP);
if (err < 0)
goto xfrm_proto_esp_failed;
err = xfrm6_protocol_register(&xfrmi_ah6_protocol, IPPROTO_AH);
if (err < 0)
goto xfrm_proto_ah_failed;
err = xfrm6_protocol_register(&xfrmi_ipcomp6_protocol, IPPROTO_COMP);
if (err < 0)
goto xfrm_proto_comp_failed;
return 0;
xfrm_proto_comp_failed:
xfrm6_protocol_deregister(&xfrmi_ah6_protocol, IPPROTO_AH);
xfrm_proto_ah_failed:
xfrm6_protocol_deregister(&xfrmi_esp6_protocol, IPPROTO_ESP);
xfrm_proto_esp_failed:
return err;
}
static void xfrmi6_fini(void)
{
xfrm6_protocol_deregister(&xfrmi_ipcomp6_protocol, IPPROTO_COMP);
xfrm6_protocol_deregister(&xfrmi_ah6_protocol, IPPROTO_AH);
xfrm6_protocol_deregister(&xfrmi_esp6_protocol, IPPROTO_ESP);
}
static const struct xfrm_if_cb xfrm_if_cb = {
.decode_session = xfrmi_decode_session,
};
static int __init xfrmi_init(void)
{
const char *msg;
int err;
pr_info("IPsec XFRM device driver\n");
msg = "tunnel device";
err = register_pernet_device(&xfrmi_net_ops);
if (err < 0)
goto pernet_dev_failed;
msg = "xfrm4 protocols";
err = xfrmi4_init();
if (err < 0)
goto xfrmi4_failed;
msg = "xfrm6 protocols";
err = xfrmi6_init();
if (err < 0)
goto xfrmi6_failed;
msg = "netlink interface";
err = rtnl_link_register(&xfrmi_link_ops);
if (err < 0)
goto rtnl_link_failed;
xfrm_if_register_cb(&xfrm_if_cb);
return err;
rtnl_link_failed:
xfrmi6_fini();
xfrmi6_failed:
xfrmi4_fini();
xfrmi4_failed:
unregister_pernet_device(&xfrmi_net_ops);
pernet_dev_failed:
pr_err("xfrmi init: failed to register %s\n", msg);
return err;
}
static void __exit xfrmi_fini(void)
{
xfrm_if_unregister_cb();
rtnl_link_unregister(&xfrmi_link_ops);
xfrmi4_fini();
xfrmi6_fini();
unregister_pernet_device(&xfrmi_net_ops);
}
module_init(xfrmi_init);
module_exit(xfrmi_fini);
MODULE_LICENSE("GPL");
MODULE_ALIAS_RTNL_LINK("xfrm");
MODULE_ALIAS_NETDEV("xfrm0");
MODULE_AUTHOR("Steffen Klassert");
MODULE_DESCRIPTION("XFRM virtual interface");

View File

@ -66,8 +66,7 @@ static int xfrm_output_one(struct sk_buff *skb, int err)
goto error_nolock;
}
if (x->props.output_mark)
skb->mark = x->props.output_mark;
skb->mark = xfrm_smark_get(skb->mark, x);
err = x->outer_mode->output(x, skb);
if (err) {

View File

@ -45,8 +45,9 @@ struct xfrm_flo {
u8 flags;
};
static DEFINE_PER_CPU(struct xfrm_dst *, xfrm_last_dst);
static struct work_struct *xfrm_pcpu_work __read_mostly;
static DEFINE_SPINLOCK(xfrm_if_cb_lock);
static struct xfrm_if_cb const __rcu *xfrm_if_cb __read_mostly;
static DEFINE_SPINLOCK(xfrm_policy_afinfo_lock);
static struct xfrm_policy_afinfo const __rcu *xfrm_policy_afinfo[AF_INET6 + 1]
__read_mostly;
@ -119,6 +120,12 @@ static const struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short fa
return afinfo;
}
/* Called with rcu_read_lock(). */
static const struct xfrm_if_cb *xfrm_if_get_cb(void)
{
return rcu_dereference(xfrm_if_cb);
}
struct dst_entry *__xfrm_dst_lookup(struct net *net, int tos, int oif,
const xfrm_address_t *saddr,
const xfrm_address_t *daddr,
@ -182,8 +189,8 @@ static inline unsigned long make_jiffies(long secs)
static void xfrm_policy_timer(struct timer_list *t)
{
struct xfrm_policy *xp = from_timer(xp, t, timer);
unsigned long now = get_seconds();
long next = LONG_MAX;
time64_t now = ktime_get_real_seconds();
time64_t next = TIME64_MAX;
int warn = 0;
int dir;
@ -195,7 +202,7 @@ static void xfrm_policy_timer(struct timer_list *t)
dir = xfrm_policy_id2dir(xp->index);
if (xp->lft.hard_add_expires_seconds) {
long tmo = xp->lft.hard_add_expires_seconds +
time64_t tmo = xp->lft.hard_add_expires_seconds +
xp->curlft.add_time - now;
if (tmo <= 0)
goto expired;
@ -203,7 +210,7 @@ static void xfrm_policy_timer(struct timer_list *t)
next = tmo;
}
if (xp->lft.hard_use_expires_seconds) {
long tmo = xp->lft.hard_use_expires_seconds +
time64_t tmo = xp->lft.hard_use_expires_seconds +
(xp->curlft.use_time ? : xp->curlft.add_time) - now;
if (tmo <= 0)
goto expired;
@ -211,7 +218,7 @@ static void xfrm_policy_timer(struct timer_list *t)
next = tmo;
}
if (xp->lft.soft_add_expires_seconds) {
long tmo = xp->lft.soft_add_expires_seconds +
time64_t tmo = xp->lft.soft_add_expires_seconds +
xp->curlft.add_time - now;
if (tmo <= 0) {
warn = 1;
@ -221,7 +228,7 @@ static void xfrm_policy_timer(struct timer_list *t)
next = tmo;
}
if (xp->lft.soft_use_expires_seconds) {
long tmo = xp->lft.soft_use_expires_seconds +
time64_t tmo = xp->lft.soft_use_expires_seconds +
(xp->curlft.use_time ? : xp->curlft.add_time) - now;
if (tmo <= 0) {
warn = 1;
@ -233,7 +240,7 @@ static void xfrm_policy_timer(struct timer_list *t)
if (warn)
km_policy_expired(xp, dir, 0, 0);
if (next != LONG_MAX &&
if (next != TIME64_MAX &&
!mod_timer(&xp->timer, jiffies + make_jiffies(next)))
xfrm_pol_hold(xp);
@ -747,6 +754,7 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
newpos = NULL;
hlist_for_each_entry(pol, chain, bydst) {
if (pol->type == policy->type &&
pol->if_id == policy->if_id &&
!selector_cmp(&pol->selector, &policy->selector) &&
xfrm_policy_mark_match(policy, pol) &&
xfrm_sec_ctx_match(pol->security, policy->security) &&
@ -783,7 +791,7 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
}
policy->index = delpol ? delpol->index : xfrm_gen_index(net, dir, policy->index);
hlist_add_head(&policy->byidx, net->xfrm.policy_byidx+idx_hash(net, policy->index));
policy->curlft.add_time = get_seconds();
policy->curlft.add_time = ktime_get_real_seconds();
policy->curlft.use_time = 0;
if (!mod_timer(&policy->timer, jiffies + HZ))
xfrm_pol_hold(policy);
@ -798,8 +806,9 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
}
EXPORT_SYMBOL(xfrm_policy_insert);
struct xfrm_policy *xfrm_policy_bysel_ctx(struct net *net, u32 mark, u8 type,
int dir, struct xfrm_selector *sel,
struct xfrm_policy *xfrm_policy_bysel_ctx(struct net *net, u32 mark, u32 if_id,
u8 type, int dir,
struct xfrm_selector *sel,
struct xfrm_sec_ctx *ctx, int delete,
int *err)
{
@ -812,6 +821,7 @@ struct xfrm_policy *xfrm_policy_bysel_ctx(struct net *net, u32 mark, u8 type,
ret = NULL;
hlist_for_each_entry(pol, chain, bydst) {
if (pol->type == type &&
pol->if_id == if_id &&
(mark & pol->mark.m) == pol->mark.v &&
!selector_cmp(sel, &pol->selector) &&
xfrm_sec_ctx_match(ctx, pol->security)) {
@ -837,8 +847,9 @@ struct xfrm_policy *xfrm_policy_bysel_ctx(struct net *net, u32 mark, u8 type,
}
EXPORT_SYMBOL(xfrm_policy_bysel_ctx);
struct xfrm_policy *xfrm_policy_byid(struct net *net, u32 mark, u8 type,
int dir, u32 id, int delete, int *err)
struct xfrm_policy *xfrm_policy_byid(struct net *net, u32 mark, u32 if_id,
u8 type, int dir, u32 id, int delete,
int *err)
{
struct xfrm_policy *pol, *ret;
struct hlist_head *chain;
@ -853,6 +864,7 @@ struct xfrm_policy *xfrm_policy_byid(struct net *net, u32 mark, u8 type,
ret = NULL;
hlist_for_each_entry(pol, chain, byidx) {
if (pol->type == type && pol->index == id &&
pol->if_id == if_id &&
(mark & pol->mark.m) == pol->mark.v) {
xfrm_pol_hold(pol);
if (delete) {
@ -1056,13 +1068,14 @@ EXPORT_SYMBOL(xfrm_policy_walk_done);
*/
static int xfrm_policy_match(const struct xfrm_policy *pol,
const struct flowi *fl,
u8 type, u16 family, int dir)
u8 type, u16 family, int dir, u32 if_id)
{
const struct xfrm_selector *sel = &pol->selector;
int ret = -ESRCH;
bool match;
if (pol->family != family ||
pol->if_id != if_id ||
(fl->flowi_mark & pol->mark.m) != pol->mark.v ||
pol->type != type)
return ret;
@ -1077,7 +1090,8 @@ static int xfrm_policy_match(const struct xfrm_policy *pol,
static struct xfrm_policy *xfrm_policy_lookup_bytype(struct net *net, u8 type,
const struct flowi *fl,
u16 family, u8 dir)
u16 family, u8 dir,
u32 if_id)
{
int err;
struct xfrm_policy *pol, *ret;
@ -1101,7 +1115,7 @@ static struct xfrm_policy *xfrm_policy_lookup_bytype(struct net *net, u8 type,
priority = ~0U;
ret = NULL;
hlist_for_each_entry_rcu(pol, chain, bydst) {
err = xfrm_policy_match(pol, fl, type, family, dir);
err = xfrm_policy_match(pol, fl, type, family, dir, if_id);
if (err) {
if (err == -ESRCH)
continue;
@ -1120,7 +1134,7 @@ static struct xfrm_policy *xfrm_policy_lookup_bytype(struct net *net, u8 type,
if ((pol->priority >= priority) && ret)
break;
err = xfrm_policy_match(pol, fl, type, family, dir);
err = xfrm_policy_match(pol, fl, type, family, dir, if_id);
if (err) {
if (err == -ESRCH)
continue;
@ -1145,21 +1159,25 @@ fail:
return ret;
}
static struct xfrm_policy *
xfrm_policy_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir)
static struct xfrm_policy *xfrm_policy_lookup(struct net *net,
const struct flowi *fl,
u16 family, u8 dir, u32 if_id)
{
#ifdef CONFIG_XFRM_SUB_POLICY
struct xfrm_policy *pol;
pol = xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_SUB, fl, family, dir);
pol = xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_SUB, fl, family,
dir, if_id);
if (pol != NULL)
return pol;
#endif
return xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_MAIN, fl, family, dir);
return xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_MAIN, fl, family,
dir, if_id);
}
static struct xfrm_policy *xfrm_sk_policy_lookup(const struct sock *sk, int dir,
const struct flowi *fl, u16 family)
const struct flowi *fl,
u16 family, u32 if_id)
{
struct xfrm_policy *pol;
@ -1177,7 +1195,8 @@ static struct xfrm_policy *xfrm_sk_policy_lookup(const struct sock *sk, int dir,
match = xfrm_selector_match(&pol->selector, fl, family);
if (match) {
if ((sk->sk_mark & pol->mark.m) != pol->mark.v) {
if ((sk->sk_mark & pol->mark.m) != pol->mark.v ||
pol->if_id != if_id) {
pol = NULL;
goto out;
}
@ -1268,7 +1287,7 @@ int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol)
old_pol = rcu_dereference_protected(sk->sk_policy[dir],
lockdep_is_held(&net->xfrm.xfrm_policy_lock));
if (pol) {
pol->curlft.add_time = get_seconds();
pol->curlft.add_time = ktime_get_real_seconds();
pol->index = xfrm_gen_index(net, XFRM_POLICY_MAX+dir, 0);
xfrm_sk_policy_link(pol, dir);
}
@ -1305,6 +1324,7 @@ static struct xfrm_policy *clone_policy(const struct xfrm_policy *old, int dir)
newp->lft = old->lft;
newp->curlft = old->curlft;
newp->mark = old->mark;
newp->if_id = old->if_id;
newp->action = old->action;
newp->flags = old->flags;
newp->xfrm_nr = old->xfrm_nr;
@ -1390,7 +1410,8 @@ xfrm_tmpl_resolve_one(struct xfrm_policy *policy, const struct flowi *fl,
}
}
x = xfrm_state_find(remote, local, fl, tmpl, policy, &error, family);
x = xfrm_state_find(remote, local, fl, tmpl, policy, &error,
family, policy->if_id);
if (x && x->km.state == XFRM_STATE_VALID) {
xfrm[nx++] = x;
@ -1607,10 +1628,11 @@ static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy,
dst_copy_metrics(dst1, dst);
if (xfrm[i]->props.mode != XFRM_MODE_TRANSPORT) {
__u32 mark = xfrm_smark_get(fl->flowi_mark, xfrm[i]);
family = xfrm[i]->props.family;
dst = xfrm_dst_lookup(xfrm[i], tos, fl->flowi_oif,
&saddr, &daddr, family,
xfrm[i]->props.output_mark);
&saddr, &daddr, family, mark);
err = PTR_ERR(dst);
if (IS_ERR(dst))
goto put_states;
@ -1692,7 +1714,8 @@ static int xfrm_expand_policies(const struct flowi *fl, u16 family,
pols[1] = xfrm_policy_lookup_bytype(xp_net(pols[0]),
XFRM_POLICY_TYPE_MAIN,
fl, family,
XFRM_POLICY_OUT);
XFRM_POLICY_OUT,
pols[0]->if_id);
if (pols[1]) {
if (IS_ERR(pols[1])) {
xfrm_pols_put(pols, *num_pols);
@ -1714,108 +1737,6 @@ static int xfrm_expand_policies(const struct flowi *fl, u16 family,
}
static void xfrm_last_dst_update(struct xfrm_dst *xdst, struct xfrm_dst *old)
{
this_cpu_write(xfrm_last_dst, xdst);
if (old)
dst_release(&old->u.dst);
}
static void __xfrm_pcpu_work_fn(void)
{
struct xfrm_dst *old;
old = this_cpu_read(xfrm_last_dst);
if (old && !xfrm_bundle_ok(old))
xfrm_last_dst_update(NULL, old);
}
static void xfrm_pcpu_work_fn(struct work_struct *work)
{
local_bh_disable();
rcu_read_lock();
__xfrm_pcpu_work_fn();
rcu_read_unlock();
local_bh_enable();
}
void xfrm_policy_cache_flush(void)
{
struct xfrm_dst *old;
bool found = false;
int cpu;
might_sleep();
local_bh_disable();
rcu_read_lock();
for_each_possible_cpu(cpu) {
old = per_cpu(xfrm_last_dst, cpu);
if (old && !xfrm_bundle_ok(old)) {
if (smp_processor_id() == cpu) {
__xfrm_pcpu_work_fn();
continue;
}
found = true;
break;
}
}
rcu_read_unlock();
local_bh_enable();
if (!found)
return;
get_online_cpus();
for_each_possible_cpu(cpu) {
bool bundle_release;
rcu_read_lock();
old = per_cpu(xfrm_last_dst, cpu);
bundle_release = old && !xfrm_bundle_ok(old);
rcu_read_unlock();
if (!bundle_release)
continue;
if (cpu_online(cpu)) {
schedule_work_on(cpu, &xfrm_pcpu_work[cpu]);
continue;
}
rcu_read_lock();
old = per_cpu(xfrm_last_dst, cpu);
if (old && !xfrm_bundle_ok(old)) {
per_cpu(xfrm_last_dst, cpu) = NULL;
dst_release(&old->u.dst);
}
rcu_read_unlock();
}
put_online_cpus();
}
static bool xfrm_xdst_can_reuse(struct xfrm_dst *xdst,
struct xfrm_state * const xfrm[],
int num)
{
const struct dst_entry *dst = &xdst->u.dst;
int i;
if (xdst->num_xfrms != num)
return false;
for (i = 0; i < num; i++) {
if (!dst || dst->xfrm != xfrm[i])
return false;
dst = xfrm_dst_child(dst);
}
return xfrm_bundle_ok(xdst);
}
static struct xfrm_dst *
xfrm_resolve_and_create_bundle(struct xfrm_policy **pols, int num_pols,
const struct flowi *fl, u16 family,
@ -1824,34 +1745,21 @@ xfrm_resolve_and_create_bundle(struct xfrm_policy **pols, int num_pols,
struct net *net = xp_net(pols[0]);
struct xfrm_state *xfrm[XFRM_MAX_DEPTH];
struct xfrm_dst *bundle[XFRM_MAX_DEPTH];
struct xfrm_dst *xdst, *old;
struct xfrm_dst *xdst;
struct dst_entry *dst;
int err;
/* Try to instantiate a bundle */
err = xfrm_tmpl_resolve(pols, num_pols, fl, xfrm, family);
if (err <= 0) {
if (err != 0 && err != -EAGAIN)
if (err == 0)
return NULL;
if (err != -EAGAIN)
XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLERROR);
return ERR_PTR(err);
}
xdst = this_cpu_read(xfrm_last_dst);
if (xdst &&
xdst->u.dst.dev == dst_orig->dev &&
xdst->num_pols == num_pols &&
memcmp(xdst->pols, pols,
sizeof(struct xfrm_policy *) * num_pols) == 0 &&
xfrm_xdst_can_reuse(xdst, xfrm, err)) {
dst_hold(&xdst->u.dst);
xfrm_pols_put(pols, num_pols);
while (err > 0)
xfrm_state_put(xfrm[--err]);
return xdst;
}
old = xdst;
dst = xfrm_bundle_create(pols[0], xfrm, bundle, err, fl, dst_orig);
if (IS_ERR(dst)) {
XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTBUNDLEGENERROR);
@ -1864,9 +1772,6 @@ xfrm_resolve_and_create_bundle(struct xfrm_policy **pols, int num_pols,
memcpy(xdst->pols, pols, sizeof(struct xfrm_policy *) * num_pols);
xdst->policy_genid = atomic_read(&pols[0]->genid);
atomic_set(&xdst->u.dst.__refcnt, 2);
xfrm_last_dst_update(xdst, old);
return xdst;
}
@ -2047,8 +1952,10 @@ free_dst:
goto out;
}
static struct xfrm_dst *
xfrm_bundle_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir, struct xfrm_flo *xflo)
static struct xfrm_dst *xfrm_bundle_lookup(struct net *net,
const struct flowi *fl,
u16 family, u8 dir,
struct xfrm_flo *xflo, u32 if_id)
{
struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX];
int num_pols = 0, num_xfrms = 0, err;
@ -2057,7 +1964,7 @@ xfrm_bundle_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir,
/* Resolve policies to use if we couldn't get them from
* previous cache entry */
num_pols = 1;
pols[0] = xfrm_policy_lookup(net, fl, family, dir);
pols[0] = xfrm_policy_lookup(net, fl, family, dir, if_id);
err = xfrm_expand_policies(fl, family, pols,
&num_pols, &num_xfrms);
if (err < 0)
@ -2067,13 +1974,15 @@ xfrm_bundle_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir,
if (num_xfrms <= 0)
goto make_dummy_bundle;
local_bh_disable();
xdst = xfrm_resolve_and_create_bundle(pols, num_pols, fl, family,
xflo->dst_orig);
local_bh_enable();
if (IS_ERR(xdst)) {
err = PTR_ERR(xdst);
if (err == -EREMOTE) {
xfrm_pols_put(pols, num_pols);
return NULL;
}
if (err != -EAGAIN)
goto error;
goto make_dummy_bundle;
@ -2123,14 +2032,19 @@ static struct dst_entry *make_blackhole(struct net *net, u16 family,
return ret;
}
/* Main function: finds/creates a bundle for given flow.
/* Finds/creates a bundle for given flow and if_id
*
* At the moment we eat a raw IP route. Mostly to speed up lookups
* on interfaces with disabled IPsec.
*
* xfrm_lookup uses an if_id of 0 by default, and is provided for
* compatibility
*/
struct dst_entry *xfrm_lookup(struct net *net, struct dst_entry *dst_orig,
const struct flowi *fl,
const struct sock *sk, int flags)
struct dst_entry *xfrm_lookup_with_ifid(struct net *net,
struct dst_entry *dst_orig,
const struct flowi *fl,
const struct sock *sk,
int flags, u32 if_id)
{
struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX];
struct xfrm_dst *xdst;
@ -2146,7 +2060,8 @@ struct dst_entry *xfrm_lookup(struct net *net, struct dst_entry *dst_orig,
sk = sk_const_to_full_sk(sk);
if (sk && sk->sk_policy[XFRM_POLICY_OUT]) {
num_pols = 1;
pols[0] = xfrm_sk_policy_lookup(sk, XFRM_POLICY_OUT, fl, family);
pols[0] = xfrm_sk_policy_lookup(sk, XFRM_POLICY_OUT, fl, family,
if_id);
err = xfrm_expand_policies(fl, family, pols,
&num_pols, &num_xfrms);
if (err < 0)
@ -2158,15 +2073,16 @@ struct dst_entry *xfrm_lookup(struct net *net, struct dst_entry *dst_orig,
goto no_transform;
}
local_bh_disable();
xdst = xfrm_resolve_and_create_bundle(
pols, num_pols, fl,
family, dst_orig);
local_bh_enable();
if (IS_ERR(xdst)) {
xfrm_pols_put(pols, num_pols);
err = PTR_ERR(xdst);
if (err == -EREMOTE)
goto nopol;
goto dropdst;
} else if (xdst == NULL) {
num_xfrms = 0;
@ -2189,7 +2105,7 @@ struct dst_entry *xfrm_lookup(struct net *net, struct dst_entry *dst_orig,
!net->xfrm.policy_count[XFRM_POLICY_OUT])
goto nopol;
xdst = xfrm_bundle_lookup(net, fl, family, dir, &xflo);
xdst = xfrm_bundle_lookup(net, fl, family, dir, &xflo, if_id);
if (xdst == NULL)
goto nopol;
if (IS_ERR(xdst)) {
@ -2234,7 +2150,7 @@ no_transform:
}
for (i = 0; i < num_pols; i++)
pols[i]->curlft.use_time = get_seconds();
pols[i]->curlft.use_time = ktime_get_real_seconds();
if (num_xfrms < 0) {
/* Prohibit the flow */
@ -2270,6 +2186,19 @@ dropdst:
xfrm_pols_put(pols, drop_pols);
return ERR_PTR(err);
}
EXPORT_SYMBOL(xfrm_lookup_with_ifid);
/* Main function: finds/creates a bundle for given flow.
*
* At the moment we eat a raw IP route. Mostly to speed up lookups
* on interfaces with disabled IPsec.
*/
struct dst_entry *xfrm_lookup(struct net *net, struct dst_entry *dst_orig,
const struct flowi *fl, const struct sock *sk,
int flags)
{
return xfrm_lookup_with_ifid(net, dst_orig, fl, sk, flags, 0);
}
EXPORT_SYMBOL(xfrm_lookup);
/* Callers of xfrm_lookup_route() must ensure a call to dst_output().
@ -2365,6 +2294,7 @@ int __xfrm_decode_session(struct sk_buff *skb, struct flowi *fl,
return -EAFNOSUPPORT;
afinfo->decode_session(skb, fl, reverse);
err = security_xfrm_decode_session(skb, &fl->flowi_secid);
rcu_read_unlock();
return err;
@ -2395,6 +2325,19 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
int reverse;
struct flowi fl;
int xerr_idx = -1;
const struct xfrm_if_cb *ifcb;
struct xfrm_if *xi;
u32 if_id = 0;
rcu_read_lock();
ifcb = xfrm_if_get_cb();
if (ifcb) {
xi = ifcb->decode_session(skb);
if (xi)
if_id = xi->p.if_id;
}
rcu_read_unlock();
reverse = dir & ~XFRM_POLICY_MASK;
dir &= XFRM_POLICY_MASK;
@ -2422,7 +2365,7 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
pol = NULL;
sk = sk_to_full_sk(sk);
if (sk && sk->sk_policy[dir]) {
pol = xfrm_sk_policy_lookup(sk, dir, &fl, family);
pol = xfrm_sk_policy_lookup(sk, dir, &fl, family, if_id);
if (IS_ERR(pol)) {
XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLERROR);
return 0;
@ -2430,7 +2373,7 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
}
if (!pol)
pol = xfrm_policy_lookup(net, &fl, family, dir);
pol = xfrm_policy_lookup(net, &fl, family, dir, if_id);
if (IS_ERR(pol)) {
XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLERROR);
@ -2446,7 +2389,7 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
return 1;
}
pol->curlft.use_time = get_seconds();
pol->curlft.use_time = ktime_get_real_seconds();
pols[0] = pol;
npols++;
@ -2454,13 +2397,13 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
if (pols[0]->type != XFRM_POLICY_TYPE_MAIN) {
pols[1] = xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_MAIN,
&fl, family,
XFRM_POLICY_IN);
XFRM_POLICY_IN, if_id);
if (pols[1]) {
if (IS_ERR(pols[1])) {
XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLERROR);
return 0;
}
pols[1]->curlft.use_time = get_seconds();
pols[1]->curlft.use_time = ktime_get_real_seconds();
npols++;
}
}
@ -2819,6 +2762,21 @@ void xfrm_policy_unregister_afinfo(const struct xfrm_policy_afinfo *afinfo)
}
EXPORT_SYMBOL(xfrm_policy_unregister_afinfo);
void xfrm_if_register_cb(const struct xfrm_if_cb *ifcb)
{
spin_lock(&xfrm_if_cb_lock);
rcu_assign_pointer(xfrm_if_cb, ifcb);
spin_unlock(&xfrm_if_cb_lock);
}
EXPORT_SYMBOL(xfrm_if_register_cb);
void xfrm_if_unregister_cb(void)
{
RCU_INIT_POINTER(xfrm_if_cb, NULL);
synchronize_rcu();
}
EXPORT_SYMBOL(xfrm_if_unregister_cb);
#ifdef CONFIG_XFRM_STATISTICS
static int __net_init xfrm_statistics_init(struct net *net)
{
@ -2986,19 +2944,13 @@ static struct pernet_operations __net_initdata xfrm_net_ops = {
void __init xfrm_init(void)
{
int i;
xfrm_pcpu_work = kmalloc_array(NR_CPUS, sizeof(*xfrm_pcpu_work),
GFP_KERNEL);
BUG_ON(!xfrm_pcpu_work);
for (i = 0; i < NR_CPUS; i++)
INIT_WORK(&xfrm_pcpu_work[i], xfrm_pcpu_work_fn);
register_pernet_subsys(&xfrm_net_ops);
xfrm_dev_init();
seqcount_init(&xfrm_policy_hash_generation);
xfrm_input_init();
RCU_INIT_POINTER(xfrm_if_cb, NULL);
synchronize_rcu();
}
#ifdef CONFIG_AUDITSYSCALL

View File

@ -475,8 +475,8 @@ static enum hrtimer_restart xfrm_timer_handler(struct hrtimer *me)
{
struct tasklet_hrtimer *thr = container_of(me, struct tasklet_hrtimer, timer);
struct xfrm_state *x = container_of(thr, struct xfrm_state, mtimer);
unsigned long now = get_seconds();
long next = LONG_MAX;
time64_t now = ktime_get_real_seconds();
time64_t next = TIME64_MAX;
int warn = 0;
int err = 0;
@ -537,7 +537,7 @@ static enum hrtimer_restart xfrm_timer_handler(struct hrtimer *me)
if (warn)
km_state_expired(x, 0, 0);
resched:
if (next != LONG_MAX) {
if (next != TIME64_MAX) {
tasklet_hrtimer_start(&x->mtimer, ktime_set(next, 0), HRTIMER_MODE_REL);
}
@ -577,7 +577,7 @@ struct xfrm_state *xfrm_state_alloc(struct net *net)
tasklet_hrtimer_init(&x->mtimer, xfrm_timer_handler,
CLOCK_BOOTTIME, HRTIMER_MODE_ABS);
timer_setup(&x->rtimer, xfrm_replay_timer_handler, 0);
x->curlft.add_time = get_seconds();
x->curlft.add_time = ktime_get_real_seconds();
x->lft.soft_byte_limit = XFRM_INF;
x->lft.soft_packet_limit = XFRM_INF;
x->lft.hard_byte_limit = XFRM_INF;
@ -735,10 +735,9 @@ restart:
}
out:
spin_unlock_bh(&net->xfrm.xfrm_state_lock);
if (cnt) {
if (cnt)
err = 0;
xfrm_policy_cache_flush();
}
return err;
}
EXPORT_SYMBOL(xfrm_state_flush);
@ -931,7 +930,7 @@ struct xfrm_state *
xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr,
const struct flowi *fl, struct xfrm_tmpl *tmpl,
struct xfrm_policy *pol, int *err,
unsigned short family)
unsigned short family, u32 if_id)
{
static xfrm_address_t saddr_wildcard = { };
struct net *net = xp_net(pol);
@ -955,6 +954,7 @@ xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr,
if (x->props.family == encap_family &&
x->props.reqid == tmpl->reqid &&
(mark & x->mark.m) == x->mark.v &&
x->if_id == if_id &&
!(x->props.flags & XFRM_STATE_WILDRECV) &&
xfrm_state_addr_check(x, daddr, saddr, encap_family) &&
tmpl->mode == x->props.mode &&
@ -971,6 +971,7 @@ xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr,
if (x->props.family == encap_family &&
x->props.reqid == tmpl->reqid &&
(mark & x->mark.m) == x->mark.v &&
x->if_id == if_id &&
!(x->props.flags & XFRM_STATE_WILDRECV) &&
xfrm_addr_equal(&x->id.daddr, daddr, encap_family) &&
tmpl->mode == x->props.mode &&
@ -1010,6 +1011,7 @@ found:
* to current session. */
xfrm_init_tempstate(x, fl, tmpl, daddr, saddr, family);
memcpy(&x->mark, &pol->mark, sizeof(x->mark));
x->if_id = if_id;
error = security_xfrm_state_alloc_acquire(x, pol->security, fl->flowi_secid);
if (error) {
@ -1067,7 +1069,7 @@ out:
}
struct xfrm_state *
xfrm_stateonly_find(struct net *net, u32 mark,
xfrm_stateonly_find(struct net *net, u32 mark, u32 if_id,
xfrm_address_t *daddr, xfrm_address_t *saddr,
unsigned short family, u8 mode, u8 proto, u32 reqid)
{
@ -1080,6 +1082,7 @@ xfrm_stateonly_find(struct net *net, u32 mark,
if (x->props.family == family &&
x->props.reqid == reqid &&
(mark & x->mark.m) == x->mark.v &&
x->if_id == if_id &&
!(x->props.flags & XFRM_STATE_WILDRECV) &&
xfrm_state_addr_check(x, daddr, saddr, family) &&
mode == x->props.mode &&
@ -1160,11 +1163,13 @@ static void __xfrm_state_bump_genids(struct xfrm_state *xnew)
struct xfrm_state *x;
unsigned int h;
u32 mark = xnew->mark.v & xnew->mark.m;
u32 if_id = xnew->if_id;
h = xfrm_dst_hash(net, &xnew->id.daddr, &xnew->props.saddr, reqid, family);
hlist_for_each_entry(x, net->xfrm.state_bydst+h, bydst) {
if (x->props.family == family &&
x->props.reqid == reqid &&
x->if_id == if_id &&
(mark & x->mark.m) == x->mark.v &&
xfrm_addr_equal(&x->id.daddr, &xnew->id.daddr, family) &&
xfrm_addr_equal(&x->props.saddr, &xnew->props.saddr, family))
@ -1187,7 +1192,7 @@ EXPORT_SYMBOL(xfrm_state_insert);
static struct xfrm_state *__find_acq_core(struct net *net,
const struct xfrm_mark *m,
unsigned short family, u8 mode,
u32 reqid, u8 proto,
u32 reqid, u32 if_id, u8 proto,
const xfrm_address_t *daddr,
const xfrm_address_t *saddr,
int create)
@ -1242,6 +1247,7 @@ static struct xfrm_state *__find_acq_core(struct net *net,
x->props.family = family;
x->props.mode = mode;
x->props.reqid = reqid;
x->if_id = if_id;
x->mark.v = m->v;
x->mark.m = m->m;
x->lft.hard_add_expires_seconds = net->xfrm.sysctl_acq_expires;
@ -1296,7 +1302,7 @@ int xfrm_state_add(struct xfrm_state *x)
if (use_spi && !x1)
x1 = __find_acq_core(net, &x->mark, family, x->props.mode,
x->props.reqid, x->id.proto,
x->props.reqid, x->if_id, x->id.proto,
&x->id.daddr, &x->props.saddr, 0);
__xfrm_state_bump_genids(x);
@ -1395,6 +1401,7 @@ static struct xfrm_state *xfrm_state_clone(struct xfrm_state *orig,
x->props.flags = orig->props.flags;
x->props.extra_flags = orig->props.extra_flags;
x->if_id = orig->if_id;
x->tfcpad = orig->tfcpad;
x->replay_maxdiff = orig->replay_maxdiff;
x->replay_maxage = orig->replay_maxage;
@ -1554,6 +1561,19 @@ out:
if (x1->curlft.use_time)
xfrm_state_check_expire(x1);
if (x->props.smark.m || x->props.smark.v || x->if_id) {
spin_lock_bh(&net->xfrm.xfrm_state_lock);
if (x->props.smark.m || x->props.smark.v)
x1->props.smark = x->props.smark;
if (x->if_id)
x1->if_id = x->if_id;
__xfrm_state_bump_genids(x1);
spin_unlock_bh(&net->xfrm.xfrm_state_lock);
}
err = 0;
x->km.state = XFRM_STATE_DEAD;
__xfrm_state_put(x);
@ -1571,7 +1591,7 @@ EXPORT_SYMBOL(xfrm_state_update);
int xfrm_state_check_expire(struct xfrm_state *x)
{
if (!x->curlft.use_time)
x->curlft.use_time = get_seconds();
x->curlft.use_time = ktime_get_real_seconds();
if (x->curlft.bytes >= x->lft.hard_byte_limit ||
x->curlft.packets >= x->lft.hard_packet_limit) {
@ -1619,13 +1639,13 @@ EXPORT_SYMBOL(xfrm_state_lookup_byaddr);
struct xfrm_state *
xfrm_find_acq(struct net *net, const struct xfrm_mark *mark, u8 mode, u32 reqid,
u8 proto, const xfrm_address_t *daddr,
u32 if_id, u8 proto, const xfrm_address_t *daddr,
const xfrm_address_t *saddr, int create, unsigned short family)
{
struct xfrm_state *x;
spin_lock_bh(&net->xfrm.xfrm_state_lock);
x = __find_acq_core(net, mark, family, mode, reqid, proto, daddr, saddr, create);
x = __find_acq_core(net, mark, family, mode, reqid, if_id, proto, daddr, saddr, create);
spin_unlock_bh(&net->xfrm.xfrm_state_lock);
return x;

View File

@ -527,6 +527,19 @@ static void xfrm_update_ae_params(struct xfrm_state *x, struct nlattr **attrs,
x->replay_maxdiff = nla_get_u32(rt);
}
static void xfrm_smark_init(struct nlattr **attrs, struct xfrm_mark *m)
{
if (attrs[XFRMA_SET_MARK]) {
m->v = nla_get_u32(attrs[XFRMA_SET_MARK]);
if (attrs[XFRMA_SET_MARK_MASK])
m->m = nla_get_u32(attrs[XFRMA_SET_MARK_MASK]);
else
m->m = 0xffffffff;
} else {
m->v = m->m = 0;
}
}
static struct xfrm_state *xfrm_state_construct(struct net *net,
struct xfrm_usersa_info *p,
struct nlattr **attrs,
@ -579,8 +592,10 @@ static struct xfrm_state *xfrm_state_construct(struct net *net,
xfrm_mark_get(attrs, &x->mark);
if (attrs[XFRMA_OUTPUT_MARK])
x->props.output_mark = nla_get_u32(attrs[XFRMA_OUTPUT_MARK]);
xfrm_smark_init(attrs, &x->props.smark);
if (attrs[XFRMA_IF_ID])
x->if_id = nla_get_u32(attrs[XFRMA_IF_ID]);
err = __xfrm_init_state(x, false, attrs[XFRMA_OFFLOAD_DEV]);
if (err)
@ -824,6 +839,18 @@ static int copy_to_user_auth(struct xfrm_algo_auth *auth, struct sk_buff *skb)
return 0;
}
static int xfrm_smark_put(struct sk_buff *skb, struct xfrm_mark *m)
{
int ret = 0;
if (m->v | m->m) {
ret = nla_put_u32(skb, XFRMA_SET_MARK, m->v);
if (!ret)
ret = nla_put_u32(skb, XFRMA_SET_MARK_MASK, m->m);
}
return ret;
}
/* Don't change this without updating xfrm_sa_len! */
static int copy_to_user_state_extra(struct xfrm_state *x,
struct xfrm_usersa_info *p,
@ -887,6 +914,11 @@ static int copy_to_user_state_extra(struct xfrm_state *x,
ret = xfrm_mark_put(skb, &x->mark);
if (ret)
goto out;
ret = xfrm_smark_put(skb, &x->props.smark);
if (ret)
goto out;
if (x->replay_esn)
ret = nla_put(skb, XFRMA_REPLAY_ESN_VAL,
xfrm_replay_state_esn_len(x->replay_esn),
@ -900,8 +932,8 @@ static int copy_to_user_state_extra(struct xfrm_state *x,
ret = copy_user_offload(&x->xso, skb);
if (ret)
goto out;
if (x->props.output_mark) {
ret = nla_put_u32(skb, XFRMA_OUTPUT_MARK, x->props.output_mark);
if (x->if_id) {
ret = nla_put_u32(skb, XFRMA_IF_ID, x->if_id);
if (ret)
goto out;
}
@ -1253,6 +1285,7 @@ static int xfrm_alloc_userspi(struct sk_buff *skb, struct nlmsghdr *nlh,
int err;
u32 mark;
struct xfrm_mark m;
u32 if_id = 0;
p = nlmsg_data(nlh);
err = verify_spi_info(p->info.id.proto, p->min, p->max);
@ -1265,6 +1298,10 @@ static int xfrm_alloc_userspi(struct sk_buff *skb, struct nlmsghdr *nlh,
x = NULL;
mark = xfrm_mark_get(attrs, &m);
if (attrs[XFRMA_IF_ID])
if_id = nla_get_u32(attrs[XFRMA_IF_ID]);
if (p->info.seq) {
x = xfrm_find_acq_byseq(net, mark, p->info.seq);
if (x && !xfrm_addr_equal(&x->id.daddr, daddr, family)) {
@ -1275,7 +1312,7 @@ static int xfrm_alloc_userspi(struct sk_buff *skb, struct nlmsghdr *nlh,
if (!x)
x = xfrm_find_acq(net, &m, p->info.mode, p->info.reqid,
p->info.id.proto, daddr,
if_id, p->info.id.proto, daddr,
&p->info.saddr, 1,
family);
err = -ENOENT;
@ -1563,6 +1600,9 @@ static struct xfrm_policy *xfrm_policy_construct(struct net *net, struct xfrm_us
xfrm_mark_get(attrs, &xp->mark);
if (attrs[XFRMA_IF_ID])
xp->if_id = nla_get_u32(attrs[XFRMA_IF_ID]);
return xp;
error:
*errp = err;
@ -1708,6 +1748,8 @@ static int dump_one_policy(struct xfrm_policy *xp, int dir, int count, void *ptr
err = copy_to_user_policy_type(xp->type, skb);
if (!err)
err = xfrm_mark_put(skb, &xp->mark);
if (!err)
err = xfrm_if_id_put(skb, xp->if_id);
if (err) {
nlmsg_cancel(skb, nlh);
return err;
@ -1789,6 +1831,7 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh,
int delete;
struct xfrm_mark m;
u32 mark = xfrm_mark_get(attrs, &m);
u32 if_id = 0;
p = nlmsg_data(nlh);
delete = nlh->nlmsg_type == XFRM_MSG_DELPOLICY;
@ -1801,8 +1844,11 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh,
if (err)
return err;
if (attrs[XFRMA_IF_ID])
if_id = nla_get_u32(attrs[XFRMA_IF_ID]);
if (p->index)
xp = xfrm_policy_byid(net, mark, type, p->dir, p->index, delete, &err);
xp = xfrm_policy_byid(net, mark, if_id, type, p->dir, p->index, delete, &err);
else {
struct nlattr *rt = attrs[XFRMA_SEC_CTX];
struct xfrm_sec_ctx *ctx;
@ -1819,7 +1865,7 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh,
if (err)
return err;
}
xp = xfrm_policy_bysel_ctx(net, mark, type, p->dir, &p->sel,
xp = xfrm_policy_bysel_ctx(net, mark, if_id, type, p->dir, &p->sel,
ctx, delete, &err);
security_xfrm_policy_free(ctx);
}
@ -1942,6 +1988,10 @@ static int build_aevent(struct sk_buff *skb, struct xfrm_state *x, const struct
if (err)
goto out_cancel;
err = xfrm_if_id_put(skb, x->if_id);
if (err)
goto out_cancel;
nlmsg_end(skb, nlh);
return 0;
@ -2084,6 +2134,7 @@ static int xfrm_add_pol_expire(struct sk_buff *skb, struct nlmsghdr *nlh,
int err = -ENOENT;
struct xfrm_mark m;
u32 mark = xfrm_mark_get(attrs, &m);
u32 if_id = 0;
err = copy_from_user_policy_type(&type, attrs);
if (err)
@ -2093,8 +2144,11 @@ static int xfrm_add_pol_expire(struct sk_buff *skb, struct nlmsghdr *nlh,
if (err)
return err;
if (attrs[XFRMA_IF_ID])
if_id = nla_get_u32(attrs[XFRMA_IF_ID]);
if (p->index)
xp = xfrm_policy_byid(net, mark, type, p->dir, p->index, 0, &err);
xp = xfrm_policy_byid(net, mark, if_id, type, p->dir, p->index, 0, &err);
else {
struct nlattr *rt = attrs[XFRMA_SEC_CTX];
struct xfrm_sec_ctx *ctx;
@ -2111,7 +2165,7 @@ static int xfrm_add_pol_expire(struct sk_buff *skb, struct nlmsghdr *nlh,
if (err)
return err;
}
xp = xfrm_policy_bysel_ctx(net, mark, type, p->dir,
xp = xfrm_policy_bysel_ctx(net, mark, if_id, type, p->dir,
&p->sel, ctx, 0, &err);
security_xfrm_policy_free(ctx);
}
@ -2493,7 +2547,9 @@ static const struct nla_policy xfrma_policy[XFRMA_MAX+1] = {
[XFRMA_PROTO] = { .type = NLA_U8 },
[XFRMA_ADDRESS_FILTER] = { .len = sizeof(struct xfrm_address_filter) },
[XFRMA_OFFLOAD_DEV] = { .len = sizeof(struct xfrm_user_offload) },
[XFRMA_OUTPUT_MARK] = { .type = NLA_U32 },
[XFRMA_SET_MARK] = { .type = NLA_U32 },
[XFRMA_SET_MARK_MASK] = { .type = NLA_U32 },
[XFRMA_IF_ID] = { .type = NLA_U32 },
};
static const struct nla_policy xfrma_spd_policy[XFRMA_SPD_MAX+1] = {
@ -2625,6 +2681,10 @@ static int build_expire(struct sk_buff *skb, struct xfrm_state *x, const struct
if (err)
return err;
err = xfrm_if_id_put(skb, x->if_id);
if (err)
return err;
nlmsg_end(skb, nlh);
return 0;
}
@ -2719,8 +2779,12 @@ static inline unsigned int xfrm_sa_len(struct xfrm_state *x)
l += nla_total_size(sizeof(x->props.extra_flags));
if (x->xso.dev)
l += nla_total_size(sizeof(x->xso));
if (x->props.output_mark)
l += nla_total_size(sizeof(x->props.output_mark));
if (x->props.smark.v | x->props.smark.m) {
l += nla_total_size(sizeof(x->props.smark.v));
l += nla_total_size(sizeof(x->props.smark.m));
}
if (x->if_id)
l += nla_total_size(sizeof(x->if_id));
/* Must count x->lastused as it may become non-zero behind our back. */
l += nla_total_size_64bit(sizeof(u64));
@ -2850,6 +2914,8 @@ static int build_acquire(struct sk_buff *skb, struct xfrm_state *x,
err = copy_to_user_policy_type(xp->type, skb);
if (!err)
err = xfrm_mark_put(skb, &xp->mark);
if (!err)
err = xfrm_if_id_put(skb, xp->if_id);
if (err) {
nlmsg_cancel(skb, nlh);
return err;
@ -2966,6 +3032,8 @@ static int build_polexpire(struct sk_buff *skb, struct xfrm_policy *xp,
err = copy_to_user_policy_type(xp->type, skb);
if (!err)
err = xfrm_mark_put(skb, &xp->mark);
if (!err)
err = xfrm_if_id_put(skb, xp->if_id);
if (err) {
nlmsg_cancel(skb, nlh);
return err;
@ -3047,6 +3115,8 @@ static int xfrm_notify_policy(struct xfrm_policy *xp, int dir, const struct km_e
err = copy_to_user_policy_type(xp->type, skb);
if (!err)
err = xfrm_mark_put(skb, &xp->mark);
if (!err)
err = xfrm_if_id_put(skb, xp->if_id);
if (err)
goto out_free_skb;