linux/include/net/xfrm.h

2085 lines
55 KiB
C
Raw Normal View History

License cleanup: add SPDX GPL-2.0 license identifier to files with no license Many source files in the tree are missing licensing information, which makes it harder for compliance tools to determine the correct license. By default all files without license information are under the default license of the kernel, which is GPL version 2. Update the files which contain no license information with the 'GPL-2.0' SPDX license identifier. The SPDX identifier is a legally binding shorthand, which can be used instead of the full boiler plate text. This patch is based on work done by Thomas Gleixner and Kate Stewart and Philippe Ombredanne. How this work was done: Patches were generated and checked against linux-4.14-rc6 for a subset of the use cases: - file had no licensing information it it. - file was a */uapi/* one with no licensing information in it, - file was a */uapi/* one with existing licensing information, Further patches will be generated in subsequent months to fix up cases where non-standard license headers were used, and references to license had to be inferred by heuristics based on keywords. The analysis to determine which SPDX License Identifier to be applied to a file was done in a spreadsheet of side by side results from of the output of two independent scanners (ScanCode & Windriver) producing SPDX tag:value files created by Philippe Ombredanne. Philippe prepared the base worksheet, and did an initial spot review of a few 1000 files. The 4.13 kernel was the starting point of the analysis with 60,537 files assessed. Kate Stewart did a file by file comparison of the scanner results in the spreadsheet to determine which SPDX license identifier(s) to be applied to the file. She confirmed any determination that was not immediately clear with lawyers working with the Linux Foundation. Criteria used to select files for SPDX license identifier tagging was: - Files considered eligible had to be source code files. - Make and config files were included as candidates if they contained >5 lines of source - File already had some variant of a license header in it (even if <5 lines). All documentation files were explicitly excluded. The following heuristics were used to determine which SPDX license identifiers to apply. - when both scanners couldn't find any license traces, file was considered to have no license information in it, and the top level COPYING file license applied. For non */uapi/* files that summary was: SPDX license identifier # files ---------------------------------------------------|------- GPL-2.0 11139 and resulted in the first patch in this series. If that file was a */uapi/* path one, it was "GPL-2.0 WITH Linux-syscall-note" otherwise it was "GPL-2.0". Results of that was: SPDX license identifier # files ---------------------------------------------------|------- GPL-2.0 WITH Linux-syscall-note 930 and resulted in the second patch in this series. - if a file had some form of licensing information in it, and was one of the */uapi/* ones, it was denoted with the Linux-syscall-note if any GPL family license was found in the file or had no licensing in it (per prior point). Results summary: SPDX license identifier # files ---------------------------------------------------|------ GPL-2.0 WITH Linux-syscall-note 270 GPL-2.0+ WITH Linux-syscall-note 169 ((GPL-2.0 WITH Linux-syscall-note) OR BSD-2-Clause) 21 ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) 17 LGPL-2.1+ WITH Linux-syscall-note 15 GPL-1.0+ WITH Linux-syscall-note 14 ((GPL-2.0+ WITH Linux-syscall-note) OR BSD-3-Clause) 5 LGPL-2.0+ WITH Linux-syscall-note 4 LGPL-2.1 WITH Linux-syscall-note 3 ((GPL-2.0 WITH Linux-syscall-note) OR MIT) 3 ((GPL-2.0 WITH Linux-syscall-note) AND MIT) 1 and that resulted in the third patch in this series. - when the two scanners agreed on the detected license(s), that became the concluded license(s). - when there was disagreement between the two scanners (one detected a license but the other didn't, or they both detected different licenses) a manual inspection of the file occurred. - In most cases a manual inspection of the information in the file resulted in a clear resolution of the license that should apply (and which scanner probably needed to revisit its heuristics). - When it was not immediately clear, the license identifier was confirmed with lawyers working with the Linux Foundation. - If there was any question as to the appropriate license identifier, the file was flagged for further research and to be revisited later in time. In total, over 70 hours of logged manual review was done on the spreadsheet to determine the SPDX license identifiers to apply to the source files by Kate, Philippe, Thomas and, in some cases, confirmation by lawyers working with the Linux Foundation. Kate also obtained a third independent scan of the 4.13 code base from FOSSology, and compared selected files where the other two scanners disagreed against that SPDX file, to see if there was new insights. The Windriver scanner is based on an older version of FOSSology in part, so they are related. Thomas did random spot checks in about 500 files from the spreadsheets for the uapi headers and agreed with SPDX license identifier in the files he inspected. For the non-uapi files Thomas did random spot checks in about 15000 files. In initial set of patches against 4.14-rc6, 3 files were found to have copy/paste license identifier errors, and have been fixed to reflect the correct identifier. Additionally Philippe spent 10 hours this week doing a detailed manual inspection and review of the 12,461 patched files from the initial patch version early this week with: - a full scancode scan run, collecting the matched texts, detected license ids and scores - reviewing anything where there was a license detected (about 500+ files) to ensure that the applied SPDX license was correct - reviewing anything where there was no detection but the patch license was not GPL-2.0 WITH Linux-syscall-note to ensure that the applied SPDX license was correct This produced a worksheet with 20 files needing minor correction. This worksheet was then exported into 3 different .csv files for the different types of files to be modified. These .csv files were then reviewed by Greg. Thomas wrote a script to parse the csv files and add the proper SPDX tag to the file, in the format that the file expected. This script was further refined by Greg based on the output to detect more types of files automatically and to distinguish between header and source .c files (which need different comment types.) Finally Greg ran the script using the .csv files to generate the patches. Reviewed-by: Kate Stewart <kstewart@linuxfoundation.org> Reviewed-by: Philippe Ombredanne <pombredanne@nexb.com> Reviewed-by: Thomas Gleixner <tglx@linutronix.de> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2017-11-01 22:07:57 +08:00
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _NET_XFRM_H
#define _NET_XFRM_H
#include <linux/compiler.h>
#include <linux/xfrm.h>
#include <linux/spinlock.h>
#include <linux/list.h>
#include <linux/skbuff.h>
#include <linux/socket.h>
#include <linux/pfkeyv2.h>
#include <linux/ipsec.h>
#include <linux/in6.h>
#include <linux/mutex.h>
#include <linux/audit.h>
include cleanup: Update gfp.h and slab.h includes to prepare for breaking implicit slab.h inclusion from percpu.h percpu.h is included by sched.h and module.h and thus ends up being included when building most .c files. percpu.h includes slab.h which in turn includes gfp.h making everything defined by the two files universally available and complicating inclusion dependencies. percpu.h -> slab.h dependency is about to be removed. Prepare for this change by updating users of gfp and slab facilities include those headers directly instead of assuming availability. As this conversion needs to touch large number of source files, the following script is used as the basis of conversion. http://userweb.kernel.org/~tj/misc/slabh-sweep.py The script does the followings. * Scan files for gfp and slab usages and update includes such that only the necessary includes are there. ie. if only gfp is used, gfp.h, if slab is used, slab.h. * When the script inserts a new include, it looks at the include blocks and try to put the new include such that its order conforms to its surrounding. It's put in the include block which contains core kernel includes, in the same order that the rest are ordered - alphabetical, Christmas tree, rev-Xmas-tree or at the end if there doesn't seem to be any matching order. * If the script can't find a place to put a new include (mostly because the file doesn't have fitting include block), it prints out an error message indicating which .h file needs to be added to the file. The conversion was done in the following steps. 1. The initial automatic conversion of all .c files updated slightly over 4000 files, deleting around 700 includes and adding ~480 gfp.h and ~3000 slab.h inclusions. The script emitted errors for ~400 files. 2. Each error was manually checked. Some didn't need the inclusion, some needed manual addition while adding it to implementation .h or embedding .c file was more appropriate for others. This step added inclusions to around 150 files. 3. The script was run again and the output was compared to the edits from #2 to make sure no file was left behind. 4. Several build tests were done and a couple of problems were fixed. e.g. lib/decompress_*.c used malloc/free() wrappers around slab APIs requiring slab.h to be added manually. 5. The script was run on all .h files but without automatically editing them as sprinkling gfp.h and slab.h inclusions around .h files could easily lead to inclusion dependency hell. Most gfp.h inclusion directives were ignored as stuff from gfp.h was usually wildly available and often used in preprocessor macros. Each slab.h inclusion directive was examined and added manually as necessary. 6. percpu.h was updated not to include slab.h. 7. Build test were done on the following configurations and failures were fixed. CONFIG_GCOV_KERNEL was turned off for all tests (as my distributed build env didn't work with gcov compiles) and a few more options had to be turned off depending on archs to make things build (like ipr on powerpc/64 which failed due to missing writeq). * x86 and x86_64 UP and SMP allmodconfig and a custom test config. * powerpc and powerpc64 SMP allmodconfig * sparc and sparc64 SMP allmodconfig * ia64 SMP allmodconfig * s390 SMP allmodconfig * alpha SMP allmodconfig * um on x86_64 SMP allmodconfig 8. percpu.h modifications were reverted so that it could be applied as a separate patch and serve as bisection point. Given the fact that I had only a couple of failures from tests on step 6, I'm fairly confident about the coverage of this conversion patch. If there is a breakage, it's likely to be something in one of the arch headers which should be easily discoverable easily on most builds of the specific arch. Signed-off-by: Tejun Heo <tj@kernel.org> Guess-its-ok-by: Christoph Lameter <cl@linux-foundation.org> Cc: Ingo Molnar <mingo@redhat.com> Cc: Lee Schermerhorn <Lee.Schermerhorn@hp.com>
2010-03-24 16:04:11 +08:00
#include <linux/slab.h>
#include <linux/refcount.h>
#include <linux/sockptr.h>
#include <net/sock.h>
#include <net/dst.h>
#include <net/ip.h>
#include <net/route.h>
#include <net/ipv6.h>
#include <net/ip6_fib.h>
#include <net/flow.h>
#include <net/gro_cells.h>
#include <linux/interrupt.h>
#ifdef CONFIG_XFRM_STATISTICS
#include <net/snmp.h>
#endif
#define XFRM_PROTO_ESP 50
#define XFRM_PROTO_AH 51
#define XFRM_PROTO_COMP 108
#define XFRM_PROTO_IPIP 4
#define XFRM_PROTO_IPV6 41
#define XFRM_PROTO_ROUTING IPPROTO_ROUTING
#define XFRM_PROTO_DSTOPTS IPPROTO_DSTOPTS
#define XFRM_ALIGN4(len) (((len) + 3) & ~3)
#define XFRM_ALIGN8(len) (((len) + 7) & ~7)
#define MODULE_ALIAS_XFRM_MODE(family, encap) \
MODULE_ALIAS("xfrm-mode-" __stringify(family) "-" __stringify(encap))
#define MODULE_ALIAS_XFRM_TYPE(family, proto) \
MODULE_ALIAS("xfrm-type-" __stringify(family) "-" __stringify(proto))
#define MODULE_ALIAS_XFRM_OFFLOAD_TYPE(family, proto) \
MODULE_ALIAS("xfrm-offload-" __stringify(family) "-" __stringify(proto))
#ifdef CONFIG_XFRM_STATISTICS
#define XFRM_INC_STATS(net, field) SNMP_INC_STATS((net)->mib.xfrm_statistics, field)
#else
#define XFRM_INC_STATS(net, field) ((void)(net))
#endif
/* Organization of SPD aka "XFRM rules"
------------------------------------
Basic objects:
- policy rule, struct xfrm_policy (=SPD entry)
- bundle of transformations, struct dst_entry == struct xfrm_dst (=SA bundle)
- instance of a transformer, struct xfrm_state (=SA)
- template to clone xfrm_state, struct xfrm_tmpl
SPD is plain linear list of xfrm_policy rules, ordered by priority.
(To be compatible with existing pfkeyv2 implementations,
many rules with priority of 0x7fffffff are allowed to exist and
such rules are ordered in an unpredictable way, thanks to bsd folks.)
Lookup is plain linear search until the first match with selector.
If "action" is "block", then we prohibit the flow, otherwise:
if "xfrms_nr" is zero, the flow passes untransformed. Otherwise,
policy entry has list of up to XFRM_MAX_DEPTH transformations,
described by templates xfrm_tmpl. Each template is resolved
to a complete xfrm_state (see below) and we pack bundle of transformations
to a dst_entry returned to requestor.
dst -. xfrm .-> xfrm_state #1
|---. child .-> dst -. xfrm .-> xfrm_state #2
|---. child .-> dst -. xfrm .-> xfrm_state #3
|---. child .-> NULL
Bundles are cached at xrfm_policy struct (field ->bundles).
Resolution of xrfm_tmpl
-----------------------
Template contains:
1. ->mode Mode: transport or tunnel
2. ->id.proto Protocol: AH/ESP/IPCOMP
3. ->id.daddr Remote tunnel endpoint, ignored for transport mode.
Q: allow to resolve security gateway?
4. ->id.spi If not zero, static SPI.
5. ->saddr Local tunnel endpoint, ignored for transport mode.
6. ->algos List of allowed algos. Plain bitmask now.
Q: ealgos, aalgos, calgos. What a mess...
7. ->share Sharing mode.
Q: how to implement private sharing mode? To add struct sock* to
flow id?
Having this template we search through SAD searching for entries
with appropriate mode/proto/algo, permitted by selector.
If no appropriate entry found, it is requested from key manager.
PROBLEMS:
Q: How to find all the bundles referring to a physical path for
PMTU discovery? Seems, dst should contain list of all parents...
and enter to infinite locking hierarchy disaster.
No! It is easier, we will not search for them, let them find us.
We add genid to each dst plus pointer to genid of raw IP route,
pmtu disc will update pmtu on raw IP route and increase its genid.
dst_check() will see this for top level and trigger resyncing
metrics. Plus, it will be made via sk->sk_dst_cache. Solved.
*/
struct xfrm_state_walk {
struct list_head all;
u8 state;
u8 dying;
u8 proto;
u32 seq;
struct xfrm_address_filter *filter;
};
enum {
XFRM_DEV_OFFLOAD_IN = 1,
XFRM_DEV_OFFLOAD_OUT,
};
struct xfrm_dev_offload {
struct net_device *dev;
netdevice_tracker dev_tracker;
struct net_device *real_dev;
unsigned long offload_handle;
u8 dir : 2;
};
struct xfrm_mode {
u8 encap;
u8 family;
u8 flags;
};
/* Flags for xfrm_mode. */
enum {
XFRM_MODE_FLAG_TUNNEL = 1,
};
enum xfrm_replay_mode {
XFRM_REPLAY_MODE_LEGACY,
XFRM_REPLAY_MODE_BMP,
XFRM_REPLAY_MODE_ESN,
};
/* Full description of state of transformer. */
struct xfrm_state {
possible_net_t xs_net;
union {
struct hlist_node gclist;
struct hlist_node bydst;
};
struct hlist_node bysrc;
struct hlist_node byspi;
struct hlist_node byseq;
refcount_t refcnt;
spinlock_t lock;
struct xfrm_id id;
struct xfrm_selector sel;
struct xfrm_mark mark;
u32 if_id;
u32 tfcpad;
u32 genid;
/* Key manager bits */
struct xfrm_state_walk km;
/* Parameters of this state. */
struct {
u32 reqid;
u8 mode;
u8 replay_window;
u8 aalgo, ealgo, calgo;
u8 flags;
u16 family;
xfrm_address_t saddr;
int header_len;
int trailer_len;
u32 extra_flags;
struct xfrm_mark smark;
} props;
struct xfrm_lifetime_cfg lft;
/* Data for transformer */
struct xfrm_algo_auth *aalg;
struct xfrm_algo *ealg;
struct xfrm_algo *calg;
struct xfrm_algo_aead *aead;
const char *geniv;
/* mapping change rate limiting */
__be16 new_mapping_sport;
u32 new_mapping; /* seconds */
u32 mapping_maxage; /* seconds for input SA */
/* Data for encapsulator */
struct xfrm_encap_tmpl *encap;
struct sock __rcu *encap_sk;
/* Data for care-of address */
xfrm_address_t *coaddr;
/* IPComp needs an IPIP tunnel for handling uncompressed packets */
struct xfrm_state *tunnel;
/* If a tunnel, number of users + 1 */
atomic_t tunnel_users;
/* State for replay detection */
struct xfrm_replay_state replay;
struct xfrm_replay_state_esn *replay_esn;
/* Replay detection state at the time we sent the last notification */
struct xfrm_replay_state preplay;
struct xfrm_replay_state_esn *preplay_esn;
/* replay detection mode */
enum xfrm_replay_mode repl_mode;
/* internal flag that only holds state for delayed aevent at the
* moment
*/
u32 xflags;
/* Replay detection notification settings */
u32 replay_maxage;
u32 replay_maxdiff;
/* Replay detection notification timer */
struct timer_list rtimer;
/* Statistics */
struct xfrm_stats stats;
struct xfrm_lifetime_cur curlft;
struct hrtimer mtimer;
struct xfrm_dev_offload xso;
/* used to fix curlft->add_time when changing date */
long saved_tmo;
/* Last used time */
time64_t lastused;
struct page_frag xfrag;
/* Reference to data common to all the instances of this
* transformer. */
const struct xfrm_type *type;
struct xfrm_mode inner_mode;
struct xfrm_mode inner_mode_iaf;
struct xfrm_mode outer_mode;
const struct xfrm_type_offload *type_offload;
[LSM-IPSec]: Security association restriction. This patch series implements per packet access control via the extension of the Linux Security Modules (LSM) interface by hooks in the XFRM and pfkey subsystems that leverage IPSec security associations to label packets. Extensions to the SELinux LSM are included that leverage the patch for this purpose. This patch implements the changes necessary to the XFRM subsystem, pfkey interface, ipv4/ipv6, and xfrm_user interface to restrict a socket to use only authorized security associations (or no security association) to send/receive network packets. Patch purpose: The patch is designed to enable access control per packets based on the strongly authenticated IPSec security association. Such access controls augment the existing ones based on network interface and IP address. The former are very coarse-grained, and the latter can be spoofed. By using IPSec, the system can control access to remote hosts based on cryptographic keys generated using the IPSec mechanism. This enables access control on a per-machine basis or per-application if the remote machine is running the same mechanism and trusted to enforce the access control policy. Patch design approach: The overall approach is that policy (xfrm_policy) entries set by user-level programs (e.g., setkey for ipsec-tools) are extended with a security context that is used at policy selection time in the XFRM subsystem to restrict the sockets that can send/receive packets via security associations (xfrm_states) that are built from those policies. A presentation available at www.selinux-symposium.org/2005/presentations/session2/2-3-jaeger.pdf from the SELinux symposium describes the overall approach. Patch implementation details: On output, the policy retrieved (via xfrm_policy_lookup or xfrm_sk_policy_lookup) must be authorized for the security context of the socket and the same security context is required for resultant security association (retrieved or negotiated via racoon in ipsec-tools). This is enforced in xfrm_state_find. On input, the policy retrieved must also be authorized for the socket (at __xfrm_policy_check), and the security context of the policy must also match the security association being used. The patch has virtually no impact on packets that do not use IPSec. The existing Netfilter (outgoing) and LSM rcv_skb hooks are used as before. Also, if IPSec is used without security contexts, the impact is minimal. The LSM must allow such policies to be selected for the combination of socket and remote machine, but subsequent IPSec processing proceeds as in the original case. Testing: The pfkey interface is tested using the ipsec-tools. ipsec-tools have been modified (a separate ipsec-tools patch is available for version 0.5) that supports assignment of xfrm_policy entries and security associations with security contexts via setkey and the negotiation using the security contexts via racoon. The xfrm_user interface is tested via ad hoc programs that set security contexts. These programs are also available from me, and contain programs for setting, getting, and deleting policy for testing this interface. Testing of sa functions was done by tracing kernel behavior. Signed-off-by: Trent Jaeger <tjaeger@cse.psu.edu> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> Signed-off-by: David S. Miller <davem@davemloft.net>
2005-12-14 15:12:27 +08:00
/* Security context */
struct xfrm_sec_ctx *security;
/* Private data of this transformer, format is opaque,
* interpreted by xfrm_type methods. */
void *data;
};
static inline struct net *xs_net(struct xfrm_state *x)
{
return read_pnet(&x->xs_net);
}
/* xflags - make enum if more show up */
#define XFRM_TIME_DEFER 1
#define XFRM_SOFT_EXPIRE 2
enum {
XFRM_STATE_VOID,
XFRM_STATE_ACQ,
XFRM_STATE_VALID,
XFRM_STATE_ERROR,
XFRM_STATE_EXPIRED,
XFRM_STATE_DEAD
};
/* callback structure passed from either netlink or pfkey */
struct km_event {
union {
u32 hard;
u32 proto;
u32 byid;
u32 aevent;
u32 type;
} data;
u32 seq;
u32 portid;
u32 event;
struct net *net;
};
xfrm: interface: support collect metadata mode This commit adds support for 'collect_md' mode on xfrm interfaces. Each net can have one collect_md device, created by providing the IFLA_XFRM_COLLECT_METADATA flag at creation. This device cannot be altered and has no if_id or link device attributes. On transmit to this device, the if_id is fetched from the attached dst metadata on the skb. If exists, the link property is also fetched from the metadata. The dst metadata type used is METADATA_XFRM which holds these properties. On the receive side, xfrmi_rcv_cb() populates a dst metadata for each packet received and attaches it to the skb. The if_id used in this case is fetched from the xfrm state, and the link is fetched from the incoming device. This information can later be used by upper layers such as tc, ebpf, and ip rules. Because the skb is scrubed in xfrmi_rcv_cb(), the attachment of the dst metadata is postponed until after scrubing. Similarly, xfrm_input() is adapted to avoid dropping metadata dsts by only dropping 'valid' (skb_valid_dst(skb) == true) dsts. Policy matching on packets arriving from collect_md xfrmi devices is done by using the xfrm state existing in the skb's sec_path. The xfrm_if_cb.decode_cb() interface implemented by xfrmi_decode_session() is changed to keep the details of the if_id extraction tucked away in xfrm_interface.c. Reviewed-by: Nicolas Dichtel <nicolas.dichtel@6wind.com> Reviewed-by: Nikolay Aleksandrov <razor@blackwall.org> Signed-off-by: Eyal Birger <eyal.birger@gmail.com> Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
2022-08-26 19:46:59 +08:00
struct xfrm_if_decode_session_result {
struct net *net;
u32 if_id;
};
struct xfrm_if_cb {
xfrm: interface: support collect metadata mode This commit adds support for 'collect_md' mode on xfrm interfaces. Each net can have one collect_md device, created by providing the IFLA_XFRM_COLLECT_METADATA flag at creation. This device cannot be altered and has no if_id or link device attributes. On transmit to this device, the if_id is fetched from the attached dst metadata on the skb. If exists, the link property is also fetched from the metadata. The dst metadata type used is METADATA_XFRM which holds these properties. On the receive side, xfrmi_rcv_cb() populates a dst metadata for each packet received and attaches it to the skb. The if_id used in this case is fetched from the xfrm state, and the link is fetched from the incoming device. This information can later be used by upper layers such as tc, ebpf, and ip rules. Because the skb is scrubed in xfrmi_rcv_cb(), the attachment of the dst metadata is postponed until after scrubing. Similarly, xfrm_input() is adapted to avoid dropping metadata dsts by only dropping 'valid' (skb_valid_dst(skb) == true) dsts. Policy matching on packets arriving from collect_md xfrmi devices is done by using the xfrm state existing in the skb's sec_path. The xfrm_if_cb.decode_cb() interface implemented by xfrmi_decode_session() is changed to keep the details of the if_id extraction tucked away in xfrm_interface.c. Reviewed-by: Nicolas Dichtel <nicolas.dichtel@6wind.com> Reviewed-by: Nikolay Aleksandrov <razor@blackwall.org> Signed-off-by: Eyal Birger <eyal.birger@gmail.com> Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
2022-08-26 19:46:59 +08:00
bool (*decode_session)(struct sk_buff *skb,
unsigned short family,
struct xfrm_if_decode_session_result *res);
};
void xfrm_if_register_cb(const struct xfrm_if_cb *ifcb);
void xfrm_if_unregister_cb(void);
struct net_device;
struct xfrm_type;
struct xfrm_dst;
struct xfrm_policy_afinfo {
struct dst_ops *dst_ops;
struct dst_entry *(*dst_lookup)(struct net *net,
int tos, int oif,
const xfrm_address_t *saddr,
net: xfrm: support setting an output mark. On systems that use mark-based routing it may be necessary for routing lookups to use marks in order for packets to be routed correctly. An example of such a system is Android, which uses socket marks to route packets via different networks. Currently, routing lookups in tunnel mode always use a mark of zero, making routing incorrect on such systems. This patch adds a new output_mark element to the xfrm state and a corresponding XFRMA_OUTPUT_MARK netlink attribute. The output mark differs from the existing xfrm mark in two ways: 1. The xfrm mark is used to match xfrm policies and states, while the xfrm output mark is used to set the mark (and influence the routing) of the packets emitted by those states. 2. The existing mark is constrained to be a subset of the bits of the originating socket or transformed packet, but the output mark is arbitrary and depends only on the state. The use of a separate mark provides additional flexibility. For example: - A packet subject to two transforms (e.g., transport mode inside tunnel mode) can have two different output marks applied to it, one for the transport mode SA and one for the tunnel mode SA. - On a system where socket marks determine routing, the packets emitted by an IPsec tunnel can be routed based on a mark that is determined by the tunnel, not by the marks of the unencrypted packets. - Support for setting the output marks can be introduced without breaking any existing setups that employ both mark-based routing and xfrm tunnel mode. Simply changing the code to use the xfrm mark for routing output packets could xfrm mark could change behaviour in a way that breaks these setups. If the output mark is unspecified or set to zero, the mark is not set or changed. Tested: make allyesconfig; make -j64 Tested: https://android-review.googlesource.com/452776 Signed-off-by: Lorenzo Colitti <lorenzo@google.com> Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
2017-08-11 01:11:33 +08:00
const xfrm_address_t *daddr,
u32 mark);
int (*get_saddr)(struct net *net, int oif,
xfrm_address_t *saddr,
net: xfrm: support setting an output mark. On systems that use mark-based routing it may be necessary for routing lookups to use marks in order for packets to be routed correctly. An example of such a system is Android, which uses socket marks to route packets via different networks. Currently, routing lookups in tunnel mode always use a mark of zero, making routing incorrect on such systems. This patch adds a new output_mark element to the xfrm state and a corresponding XFRMA_OUTPUT_MARK netlink attribute. The output mark differs from the existing xfrm mark in two ways: 1. The xfrm mark is used to match xfrm policies and states, while the xfrm output mark is used to set the mark (and influence the routing) of the packets emitted by those states. 2. The existing mark is constrained to be a subset of the bits of the originating socket or transformed packet, but the output mark is arbitrary and depends only on the state. The use of a separate mark provides additional flexibility. For example: - A packet subject to two transforms (e.g., transport mode inside tunnel mode) can have two different output marks applied to it, one for the transport mode SA and one for the tunnel mode SA. - On a system where socket marks determine routing, the packets emitted by an IPsec tunnel can be routed based on a mark that is determined by the tunnel, not by the marks of the unencrypted packets. - Support for setting the output marks can be introduced without breaking any existing setups that employ both mark-based routing and xfrm tunnel mode. Simply changing the code to use the xfrm mark for routing output packets could xfrm mark could change behaviour in a way that breaks these setups. If the output mark is unspecified or set to zero, the mark is not set or changed. Tested: make allyesconfig; make -j64 Tested: https://android-review.googlesource.com/452776 Signed-off-by: Lorenzo Colitti <lorenzo@google.com> Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
2017-08-11 01:11:33 +08:00
xfrm_address_t *daddr,
u32 mark);
int (*fill_dst)(struct xfrm_dst *xdst,
struct net_device *dev,
const struct flowi *fl);
struct dst_entry *(*blackhole_route)(struct net *net, struct dst_entry *orig);
};
int xfrm_policy_register_afinfo(const struct xfrm_policy_afinfo *afinfo, int family);
void xfrm_policy_unregister_afinfo(const struct xfrm_policy_afinfo *afinfo);
void km_policy_notify(struct xfrm_policy *xp, int dir,
const struct km_event *c);
void km_state_notify(struct xfrm_state *x, const struct km_event *c);
struct xfrm_tmpl;
int km_query(struct xfrm_state *x, struct xfrm_tmpl *t,
struct xfrm_policy *pol);
void km_state_expired(struct xfrm_state *x, int hard, u32 portid);
int __xfrm_state_delete(struct xfrm_state *x);
struct xfrm_state_afinfo {
u8 family;
u8 proto;
const struct xfrm_type_offload *type_offload_esp;
const struct xfrm_type *type_esp;
const struct xfrm_type *type_ipip;
const struct xfrm_type *type_ipip6;
const struct xfrm_type *type_comp;
const struct xfrm_type *type_ah;
const struct xfrm_type *type_routing;
const struct xfrm_type *type_dstopts;
int (*output)(struct net *net, struct sock *sk, struct sk_buff *skb);
int (*transport_finish)(struct sk_buff *skb,
int async);
void (*local_error)(struct sk_buff *skb, u32 mtu);
};
int xfrm_state_register_afinfo(struct xfrm_state_afinfo *afinfo);
int xfrm_state_unregister_afinfo(struct xfrm_state_afinfo *afinfo);
struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned int family);
struct xfrm_state_afinfo *xfrm_state_afinfo_get_rcu(unsigned int family);
struct xfrm_input_afinfo {
u8 family;
bool is_ipip;
int (*callback)(struct sk_buff *skb, u8 protocol,
int err);
};
int xfrm_input_register_afinfo(const struct xfrm_input_afinfo *afinfo);
int xfrm_input_unregister_afinfo(const struct xfrm_input_afinfo *afinfo);
void xfrm_flush_gc(void);
void xfrm_state_delete_tunnel(struct xfrm_state *x);
struct xfrm_type {
struct module *owner;
u8 proto;
u8 flags;
#define XFRM_TYPE_NON_FRAGMENT 1
#define XFRM_TYPE_REPLAY_PROT 2
#define XFRM_TYPE_LOCAL_COADDR 4
#define XFRM_TYPE_REMOTE_COADDR 8
int (*init_state)(struct xfrm_state *x);
void (*destructor)(struct xfrm_state *);
int (*input)(struct xfrm_state *, struct sk_buff *skb);
int (*output)(struct xfrm_state *, struct sk_buff *pskb);
int (*reject)(struct xfrm_state *, struct sk_buff *,
const struct flowi *);
};
int xfrm_register_type(const struct xfrm_type *type, unsigned short family);
void xfrm_unregister_type(const struct xfrm_type *type, unsigned short family);
struct xfrm_type_offload {
struct module *owner;
u8 proto;
void (*encap)(struct xfrm_state *, struct sk_buff *pskb);
int (*input_tail)(struct xfrm_state *x, struct sk_buff *skb);
int (*xmit)(struct xfrm_state *, struct sk_buff *pskb, netdev_features_t features);
};
int xfrm_register_type_offload(const struct xfrm_type_offload *type, unsigned short family);
void xfrm_unregister_type_offload(const struct xfrm_type_offload *type, unsigned short family);
static inline int xfrm_af2proto(unsigned int family)
{
switch(family) {
case AF_INET:
return IPPROTO_IPIP;
case AF_INET6:
return IPPROTO_IPV6;
default:
return 0;
}
}
static inline const struct xfrm_mode *xfrm_ip2inner_mode(struct xfrm_state *x, int ipproto)
{
if ((ipproto == IPPROTO_IPIP && x->props.family == AF_INET) ||
(ipproto == IPPROTO_IPV6 && x->props.family == AF_INET6))
return &x->inner_mode;
else
return &x->inner_mode_iaf;
}
struct xfrm_tmpl {
/* id in template is interpreted as:
* daddr - destination of tunnel, may be zero for transport mode.
* spi - zero to acquire spi. Not zero if spi is static, then
* daddr must be fixed too.
* proto - AH/ESP/IPCOMP
*/
struct xfrm_id id;
/* Source address of tunnel. Ignored, if it is not a tunnel. */
xfrm_address_t saddr;
unsigned short encap_family;
u32 reqid;
/* Mode: transport, tunnel etc. */
u8 mode;
/* Sharing mode: unique, this session only, this user only etc. */
u8 share;
/* May skip this transfomration if no SA is found */
u8 optional;
/* Skip aalgos/ealgos/calgos checks. */
u8 allalgs;
/* Bit mask of algos allowed for acquisition */
u32 aalgos;
u32 ealgos;
u32 calgos;
};
#define XFRM_MAX_DEPTH 6
#define XFRM_MAX_OFFLOAD_DEPTH 1
struct xfrm_policy_walk_entry {
struct list_head all;
u8 dead;
};
struct xfrm_policy_walk {
struct xfrm_policy_walk_entry walk;
u8 type;
u32 seq;
};
struct xfrm_policy_queue {
struct sk_buff_head hold_queue;
struct timer_list hold_timer;
unsigned long timeout;
};
struct xfrm_policy {
possible_net_t xp_net;
struct hlist_node bydst;
struct hlist_node byidx;
/* This lock only affects elements except for entry. */
rwlock_t lock;
refcount_t refcnt;
u32 pos;
struct timer_list timer;
atomic_t genid;
u32 priority;
u32 index;
u32 if_id;
struct xfrm_mark mark;
struct xfrm_selector selector;
struct xfrm_lifetime_cfg lft;
struct xfrm_lifetime_cur curlft;
struct xfrm_policy_walk_entry walk;
struct xfrm_policy_queue polq;
bool bydst_reinsert;
[XFRM]: Pack struct xfrm_policy [acme@newtoy net-2.6.20]$ pahole net/ipv4/tcp.o xfrm_policy /* /pub/scm/linux/kernel/git/acme/net-2.6.20/include/linux/security.h:67 */ struct xfrm_policy { struct xfrm_policy * next; /* 0 4 */ struct hlist_node bydst; /* 4 8 */ struct hlist_node byidx; /* 12 8 */ rwlock_t lock; /* 20 36 */ atomic_t refcnt; /* 56 4 */ struct timer_list timer; /* 60 24 */ u8 type; /* 84 1 */ /* XXX 3 bytes hole, try to pack */ u32 priority; /* 88 4 */ u32 index; /* 92 4 */ struct xfrm_selector selector; /* 96 56 */ struct xfrm_lifetime_cfg lft; /* 152 64 */ struct xfrm_lifetime_cur curlft; /* 216 32 */ struct dst_entry * bundles; /* 248 4 */ __u16 family; /* 252 2 */ __u8 action; /* 254 1 */ __u8 flags; /* 255 1 */ __u8 dead; /* 256 1 */ __u8 xfrm_nr; /* 257 1 */ /* XXX 2 bytes hole, try to pack */ struct xfrm_sec_ctx * security; /* 260 4 */ struct xfrm_tmpl xfrm_vec[6]; /* 264 360 */ }; /* size: 624, sum members: 619, holes: 2, sum holes: 5 */ So lets have just one hole instead of two, by moving 'type' to just before 'action', end result: [acme@newtoy net-2.6.20]$ codiff -s /tmp/tcp.o.before net/ipv4/tcp.o /pub/scm/linux/kernel/git/acme/net-2.6.20/net/ipv4/tcp.c: struct xfrm_policy | -4 1 struct changed [acme@newtoy net-2.6.20]$ [acme@newtoy net-2.6.20]$ pahole -c 64 net/ipv4/tcp.o xfrm_policy /* /pub/scm/linux/kernel/git/acme/net-2.6.20/include/linux/security.h:67 */ struct xfrm_policy { struct xfrm_policy * next; /* 0 4 */ struct hlist_node bydst; /* 4 8 */ struct hlist_node byidx; /* 12 8 */ rwlock_t lock; /* 20 36 */ atomic_t refcnt; /* 56 4 */ struct timer_list timer; /* 60 24 */ u32 priority; /* 84 4 */ u32 index; /* 88 4 */ struct xfrm_selector selector; /* 92 56 */ struct xfrm_lifetime_cfg lft; /* 148 64 */ struct xfrm_lifetime_cur curlft; /* 212 32 */ struct dst_entry * bundles; /* 244 4 */ u16 family; /* 248 2 */ u8 type; /* 250 1 */ u8 action; /* 251 1 */ u8 flags; /* 252 1 */ u8 dead; /* 253 1 */ u8 xfrm_nr; /* 254 1 */ /* XXX 1 byte hole, try to pack */ struct xfrm_sec_ctx * security; /* 256 4 */ struct xfrm_tmpl xfrm_vec[6]; /* 260 360 */ }; /* size: 620, sum members: 619, holes: 1, sum holes: 1 */ Are there any fugly data dependencies here? None that I know. In the process changed the removed the __ prefixed types, that are just for userspace visible headers. Signed-off-by: Arnaldo Carvalho de Melo <acme@mandriva.com>
2006-11-28 03:58:59 +08:00
u8 type;
u8 action;
u8 flags;
u8 xfrm_nr;
u16 family;
[LSM-IPSec]: Security association restriction. This patch series implements per packet access control via the extension of the Linux Security Modules (LSM) interface by hooks in the XFRM and pfkey subsystems that leverage IPSec security associations to label packets. Extensions to the SELinux LSM are included that leverage the patch for this purpose. This patch implements the changes necessary to the XFRM subsystem, pfkey interface, ipv4/ipv6, and xfrm_user interface to restrict a socket to use only authorized security associations (or no security association) to send/receive network packets. Patch purpose: The patch is designed to enable access control per packets based on the strongly authenticated IPSec security association. Such access controls augment the existing ones based on network interface and IP address. The former are very coarse-grained, and the latter can be spoofed. By using IPSec, the system can control access to remote hosts based on cryptographic keys generated using the IPSec mechanism. This enables access control on a per-machine basis or per-application if the remote machine is running the same mechanism and trusted to enforce the access control policy. Patch design approach: The overall approach is that policy (xfrm_policy) entries set by user-level programs (e.g., setkey for ipsec-tools) are extended with a security context that is used at policy selection time in the XFRM subsystem to restrict the sockets that can send/receive packets via security associations (xfrm_states) that are built from those policies. A presentation available at www.selinux-symposium.org/2005/presentations/session2/2-3-jaeger.pdf from the SELinux symposium describes the overall approach. Patch implementation details: On output, the policy retrieved (via xfrm_policy_lookup or xfrm_sk_policy_lookup) must be authorized for the security context of the socket and the same security context is required for resultant security association (retrieved or negotiated via racoon in ipsec-tools). This is enforced in xfrm_state_find. On input, the policy retrieved must also be authorized for the socket (at __xfrm_policy_check), and the security context of the policy must also match the security association being used. The patch has virtually no impact on packets that do not use IPSec. The existing Netfilter (outgoing) and LSM rcv_skb hooks are used as before. Also, if IPSec is used without security contexts, the impact is minimal. The LSM must allow such policies to be selected for the combination of socket and remote machine, but subsequent IPSec processing proceeds as in the original case. Testing: The pfkey interface is tested using the ipsec-tools. ipsec-tools have been modified (a separate ipsec-tools patch is available for version 0.5) that supports assignment of xfrm_policy entries and security associations with security contexts via setkey and the negotiation using the security contexts via racoon. The xfrm_user interface is tested via ad hoc programs that set security contexts. These programs are also available from me, and contain programs for setting, getting, and deleting policy for testing this interface. Testing of sa functions was done by tracing kernel behavior. Signed-off-by: Trent Jaeger <tjaeger@cse.psu.edu> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> Signed-off-by: David S. Miller <davem@davemloft.net>
2005-12-14 15:12:27 +08:00
struct xfrm_sec_ctx *security;
struct xfrm_tmpl xfrm_vec[XFRM_MAX_DEPTH];
struct hlist_node bydst_inexact_list;
struct rcu_head rcu;
};
static inline struct net *xp_net(const struct xfrm_policy *xp)
{
return read_pnet(&xp->xp_net);
}
struct xfrm_kmaddress {
xfrm_address_t local;
xfrm_address_t remote;
u32 reserved;
u16 family;
};
struct xfrm_migrate {
xfrm_address_t old_daddr;
xfrm_address_t old_saddr;
xfrm_address_t new_daddr;
xfrm_address_t new_saddr;
u8 proto;
u8 mode;
u16 reserved;
u32 reqid;
u16 old_family;
u16 new_family;
};
#define XFRM_KM_TIMEOUT 30
/* what happened */
#define XFRM_REPLAY_UPDATE XFRM_AE_CR
#define XFRM_REPLAY_TIMEOUT XFRM_AE_CE
/* default aevent timeout in units of 100ms */
#define XFRM_AE_ETIME 10
/* Async Event timer multiplier */
#define XFRM_AE_ETH_M 10
/* default seq threshold size */
#define XFRM_AE_SEQT_SIZE 2
struct xfrm_mgr {
struct list_head list;
int (*notify)(struct xfrm_state *x, const struct km_event *c);
int (*acquire)(struct xfrm_state *x, struct xfrm_tmpl *, struct xfrm_policy *xp);
struct xfrm_policy *(*compile_policy)(struct sock *sk, int opt, u8 *data, int len, int *dir);
int (*new_mapping)(struct xfrm_state *x, xfrm_address_t *ipaddr, __be16 sport);
int (*notify_policy)(struct xfrm_policy *x, int dir, const struct km_event *c);
int (*report)(struct net *net, u8 proto, struct xfrm_selector *sel, xfrm_address_t *addr);
int (*migrate)(const struct xfrm_selector *sel,
u8 dir, u8 type,
const struct xfrm_migrate *m,
int num_bundles,
const struct xfrm_kmaddress *k,
const struct xfrm_encap_tmpl *encap);
bool (*is_alive)(const struct km_event *c);
};
void xfrm_register_km(struct xfrm_mgr *km);
void xfrm_unregister_km(struct xfrm_mgr *km);
struct xfrm_tunnel_skb_cb {
union {
struct inet_skb_parm h4;
struct inet6_skb_parm h6;
} header;
union {
struct ip_tunnel *ip4;
struct ip6_tnl *ip6;
} tunnel;
};
#define XFRM_TUNNEL_SKB_CB(__skb) ((struct xfrm_tunnel_skb_cb *)&((__skb)->cb[0]))
/*
* This structure is used for the duration where packets are being
* transformed by IPsec. As soon as the packet leaves IPsec the
* area beyond the generic IP part may be overwritten.
*/
struct xfrm_skb_cb {
struct xfrm_tunnel_skb_cb header;
/* Sequence number for replay protection. */
union {
struct {
__u32 low;
__u32 hi;
} output;
struct {
__be32 low;
__be32 hi;
} input;
} seq;
};
#define XFRM_SKB_CB(__skb) ((struct xfrm_skb_cb *)&((__skb)->cb[0]))
/*
* This structure is used by the afinfo prepare_input/prepare_output functions
* to transmit header information to the mode input/output functions.
*/
struct xfrm_mode_skb_cb {
struct xfrm_tunnel_skb_cb header;
/* Copied from header for IPv4, always set to zero and DF for IPv6. */
__be16 id;
__be16 frag_off;
/* IP header length (excluding options or extension headers). */
u8 ihl;
/* TOS for IPv4, class for IPv6. */
u8 tos;
/* TTL for IPv4, hop limitfor IPv6. */
u8 ttl;
/* Protocol for IPv4, NH for IPv6. */
u8 protocol;
/* Option length for IPv4, zero for IPv6. */
u8 optlen;
/* Used by IPv6 only, zero for IPv4. */
u8 flow_lbl[3];
};
#define XFRM_MODE_SKB_CB(__skb) ((struct xfrm_mode_skb_cb *)&((__skb)->cb[0]))
/*
* This structure is used by the input processing to locate the SPI and
* related information.
*/
struct xfrm_spi_skb_cb {
struct xfrm_tunnel_skb_cb header;
unsigned int daddroff;
unsigned int family;
__be32 seq;
};
#define XFRM_SPI_SKB_CB(__skb) ((struct xfrm_spi_skb_cb *)&((__skb)->cb[0]))
#ifdef CONFIG_AUDITSYSCALL
static inline struct audit_buffer *xfrm_audit_start(const char *op)
{
struct audit_buffer *audit_buf = NULL;
if (audit_enabled == AUDIT_OFF)
return NULL;
audit_buf = audit_log_start(audit_context(), GFP_ATOMIC,
AUDIT_MAC_IPSEC_EVENT);
if (audit_buf == NULL)
return NULL;
audit_log_format(audit_buf, "op=%s", op);
return audit_buf;
}
static inline void xfrm_audit_helper_usrinfo(bool task_valid,
struct audit_buffer *audit_buf)
{
const unsigned int auid = from_kuid(&init_user_ns, task_valid ?
audit_get_loginuid(current) :
INVALID_UID);
const unsigned int ses = task_valid ? audit_get_sessionid(current) :
AUDIT_SID_UNSET;
audit_log_format(audit_buf, " auid=%u ses=%u", auid, ses);
audit_log_task_context(audit_buf);
}
void xfrm_audit_policy_add(struct xfrm_policy *xp, int result, bool task_valid);
void xfrm_audit_policy_delete(struct xfrm_policy *xp, int result,
bool task_valid);
void xfrm_audit_state_add(struct xfrm_state *x, int result, bool task_valid);
void xfrm_audit_state_delete(struct xfrm_state *x, int result, bool task_valid);
void xfrm_audit_state_replay_overflow(struct xfrm_state *x,
struct sk_buff *skb);
void xfrm_audit_state_replay(struct xfrm_state *x, struct sk_buff *skb,
__be32 net_seq);
void xfrm_audit_state_notfound_simple(struct sk_buff *skb, u16 family);
void xfrm_audit_state_notfound(struct sk_buff *skb, u16 family, __be32 net_spi,
__be32 net_seq);
void xfrm_audit_state_icvfail(struct xfrm_state *x, struct sk_buff *skb,
u8 proto);
#else
xfrm: convert empty xfrm_audit_* macros to functions it removes these warnings when CONFIG_AUDITSYSCALL is unset: net/xfrm/xfrm_user.c: In function 'xfrm_add_sa': net/xfrm/xfrm_user.c:412: warning: unused variable 'sid' net/xfrm/xfrm_user.c:411: warning: unused variable 'sessionid' net/xfrm/xfrm_user.c:410: warning: unused variable 'loginuid' net/xfrm/xfrm_user.c: In function 'xfrm_del_sa': net/xfrm/xfrm_user.c:485: warning: unused variable 'sid' net/xfrm/xfrm_user.c:484: warning: unused variable 'sessionid' net/xfrm/xfrm_user.c:483: warning: unused variable 'loginuid' net/xfrm/xfrm_user.c: In function 'xfrm_add_policy': net/xfrm/xfrm_user.c:1132: warning: unused variable 'sid' net/xfrm/xfrm_user.c:1131: warning: unused variable 'sessionid' net/xfrm/xfrm_user.c:1130: warning: unused variable 'loginuid' net/xfrm/xfrm_user.c: In function 'xfrm_get_policy': net/xfrm/xfrm_user.c:1382: warning: unused variable 'sid' net/xfrm/xfrm_user.c:1381: warning: unused variable 'sessionid' net/xfrm/xfrm_user.c:1380: warning: unused variable 'loginuid' net/xfrm/xfrm_user.c: In function 'xfrm_add_pol_expire': net/xfrm/xfrm_user.c:1620: warning: unused variable 'sid' net/xfrm/xfrm_user.c:1619: warning: unused variable 'sessionid' net/xfrm/xfrm_user.c:1618: warning: unused variable 'loginuid' net/xfrm/xfrm_user.c: In function 'xfrm_add_sa_expire': net/xfrm/xfrm_user.c:1658: warning: unused variable 'sid' net/xfrm/xfrm_user.c:1657: warning: unused variable 'sessionid' net/xfrm/xfrm_user.c:1656: warning: unused variable 'loginuid' Signed-off-by: Marcin Slusarz <marcin.slusarz@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2008-05-04 12:03:01 +08:00
static inline void xfrm_audit_policy_add(struct xfrm_policy *xp, int result,
bool task_valid)
xfrm: convert empty xfrm_audit_* macros to functions it removes these warnings when CONFIG_AUDITSYSCALL is unset: net/xfrm/xfrm_user.c: In function 'xfrm_add_sa': net/xfrm/xfrm_user.c:412: warning: unused variable 'sid' net/xfrm/xfrm_user.c:411: warning: unused variable 'sessionid' net/xfrm/xfrm_user.c:410: warning: unused variable 'loginuid' net/xfrm/xfrm_user.c: In function 'xfrm_del_sa': net/xfrm/xfrm_user.c:485: warning: unused variable 'sid' net/xfrm/xfrm_user.c:484: warning: unused variable 'sessionid' net/xfrm/xfrm_user.c:483: warning: unused variable 'loginuid' net/xfrm/xfrm_user.c: In function 'xfrm_add_policy': net/xfrm/xfrm_user.c:1132: warning: unused variable 'sid' net/xfrm/xfrm_user.c:1131: warning: unused variable 'sessionid' net/xfrm/xfrm_user.c:1130: warning: unused variable 'loginuid' net/xfrm/xfrm_user.c: In function 'xfrm_get_policy': net/xfrm/xfrm_user.c:1382: warning: unused variable 'sid' net/xfrm/xfrm_user.c:1381: warning: unused variable 'sessionid' net/xfrm/xfrm_user.c:1380: warning: unused variable 'loginuid' net/xfrm/xfrm_user.c: In function 'xfrm_add_pol_expire': net/xfrm/xfrm_user.c:1620: warning: unused variable 'sid' net/xfrm/xfrm_user.c:1619: warning: unused variable 'sessionid' net/xfrm/xfrm_user.c:1618: warning: unused variable 'loginuid' net/xfrm/xfrm_user.c: In function 'xfrm_add_sa_expire': net/xfrm/xfrm_user.c:1658: warning: unused variable 'sid' net/xfrm/xfrm_user.c:1657: warning: unused variable 'sessionid' net/xfrm/xfrm_user.c:1656: warning: unused variable 'loginuid' Signed-off-by: Marcin Slusarz <marcin.slusarz@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2008-05-04 12:03:01 +08:00
{
}
static inline void xfrm_audit_policy_delete(struct xfrm_policy *xp, int result,
bool task_valid)
xfrm: convert empty xfrm_audit_* macros to functions it removes these warnings when CONFIG_AUDITSYSCALL is unset: net/xfrm/xfrm_user.c: In function 'xfrm_add_sa': net/xfrm/xfrm_user.c:412: warning: unused variable 'sid' net/xfrm/xfrm_user.c:411: warning: unused variable 'sessionid' net/xfrm/xfrm_user.c:410: warning: unused variable 'loginuid' net/xfrm/xfrm_user.c: In function 'xfrm_del_sa': net/xfrm/xfrm_user.c:485: warning: unused variable 'sid' net/xfrm/xfrm_user.c:484: warning: unused variable 'sessionid' net/xfrm/xfrm_user.c:483: warning: unused variable 'loginuid' net/xfrm/xfrm_user.c: In function 'xfrm_add_policy': net/xfrm/xfrm_user.c:1132: warning: unused variable 'sid' net/xfrm/xfrm_user.c:1131: warning: unused variable 'sessionid' net/xfrm/xfrm_user.c:1130: warning: unused variable 'loginuid' net/xfrm/xfrm_user.c: In function 'xfrm_get_policy': net/xfrm/xfrm_user.c:1382: warning: unused variable 'sid' net/xfrm/xfrm_user.c:1381: warning: unused variable 'sessionid' net/xfrm/xfrm_user.c:1380: warning: unused variable 'loginuid' net/xfrm/xfrm_user.c: In function 'xfrm_add_pol_expire': net/xfrm/xfrm_user.c:1620: warning: unused variable 'sid' net/xfrm/xfrm_user.c:1619: warning: unused variable 'sessionid' net/xfrm/xfrm_user.c:1618: warning: unused variable 'loginuid' net/xfrm/xfrm_user.c: In function 'xfrm_add_sa_expire': net/xfrm/xfrm_user.c:1658: warning: unused variable 'sid' net/xfrm/xfrm_user.c:1657: warning: unused variable 'sessionid' net/xfrm/xfrm_user.c:1656: warning: unused variable 'loginuid' Signed-off-by: Marcin Slusarz <marcin.slusarz@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2008-05-04 12:03:01 +08:00
{
}
static inline void xfrm_audit_state_add(struct xfrm_state *x, int result,
bool task_valid)
xfrm: convert empty xfrm_audit_* macros to functions it removes these warnings when CONFIG_AUDITSYSCALL is unset: net/xfrm/xfrm_user.c: In function 'xfrm_add_sa': net/xfrm/xfrm_user.c:412: warning: unused variable 'sid' net/xfrm/xfrm_user.c:411: warning: unused variable 'sessionid' net/xfrm/xfrm_user.c:410: warning: unused variable 'loginuid' net/xfrm/xfrm_user.c: In function 'xfrm_del_sa': net/xfrm/xfrm_user.c:485: warning: unused variable 'sid' net/xfrm/xfrm_user.c:484: warning: unused variable 'sessionid' net/xfrm/xfrm_user.c:483: warning: unused variable 'loginuid' net/xfrm/xfrm_user.c: In function 'xfrm_add_policy': net/xfrm/xfrm_user.c:1132: warning: unused variable 'sid' net/xfrm/xfrm_user.c:1131: warning: unused variable 'sessionid' net/xfrm/xfrm_user.c:1130: warning: unused variable 'loginuid' net/xfrm/xfrm_user.c: In function 'xfrm_get_policy': net/xfrm/xfrm_user.c:1382: warning: unused variable 'sid' net/xfrm/xfrm_user.c:1381: warning: unused variable 'sessionid' net/xfrm/xfrm_user.c:1380: warning: unused variable 'loginuid' net/xfrm/xfrm_user.c: In function 'xfrm_add_pol_expire': net/xfrm/xfrm_user.c:1620: warning: unused variable 'sid' net/xfrm/xfrm_user.c:1619: warning: unused variable 'sessionid' net/xfrm/xfrm_user.c:1618: warning: unused variable 'loginuid' net/xfrm/xfrm_user.c: In function 'xfrm_add_sa_expire': net/xfrm/xfrm_user.c:1658: warning: unused variable 'sid' net/xfrm/xfrm_user.c:1657: warning: unused variable 'sessionid' net/xfrm/xfrm_user.c:1656: warning: unused variable 'loginuid' Signed-off-by: Marcin Slusarz <marcin.slusarz@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2008-05-04 12:03:01 +08:00
{
}
static inline void xfrm_audit_state_delete(struct xfrm_state *x, int result,
bool task_valid)
xfrm: convert empty xfrm_audit_* macros to functions it removes these warnings when CONFIG_AUDITSYSCALL is unset: net/xfrm/xfrm_user.c: In function 'xfrm_add_sa': net/xfrm/xfrm_user.c:412: warning: unused variable 'sid' net/xfrm/xfrm_user.c:411: warning: unused variable 'sessionid' net/xfrm/xfrm_user.c:410: warning: unused variable 'loginuid' net/xfrm/xfrm_user.c: In function 'xfrm_del_sa': net/xfrm/xfrm_user.c:485: warning: unused variable 'sid' net/xfrm/xfrm_user.c:484: warning: unused variable 'sessionid' net/xfrm/xfrm_user.c:483: warning: unused variable 'loginuid' net/xfrm/xfrm_user.c: In function 'xfrm_add_policy': net/xfrm/xfrm_user.c:1132: warning: unused variable 'sid' net/xfrm/xfrm_user.c:1131: warning: unused variable 'sessionid' net/xfrm/xfrm_user.c:1130: warning: unused variable 'loginuid' net/xfrm/xfrm_user.c: In function 'xfrm_get_policy': net/xfrm/xfrm_user.c:1382: warning: unused variable 'sid' net/xfrm/xfrm_user.c:1381: warning: unused variable 'sessionid' net/xfrm/xfrm_user.c:1380: warning: unused variable 'loginuid' net/xfrm/xfrm_user.c: In function 'xfrm_add_pol_expire': net/xfrm/xfrm_user.c:1620: warning: unused variable 'sid' net/xfrm/xfrm_user.c:1619: warning: unused variable 'sessionid' net/xfrm/xfrm_user.c:1618: warning: unused variable 'loginuid' net/xfrm/xfrm_user.c: In function 'xfrm_add_sa_expire': net/xfrm/xfrm_user.c:1658: warning: unused variable 'sid' net/xfrm/xfrm_user.c:1657: warning: unused variable 'sessionid' net/xfrm/xfrm_user.c:1656: warning: unused variable 'loginuid' Signed-off-by: Marcin Slusarz <marcin.slusarz@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2008-05-04 12:03:01 +08:00
{
}
static inline void xfrm_audit_state_replay_overflow(struct xfrm_state *x,
struct sk_buff *skb)
{
}
static inline void xfrm_audit_state_replay(struct xfrm_state *x,
struct sk_buff *skb, __be32 net_seq)
{
}
xfrm: convert empty xfrm_audit_* macros to functions it removes these warnings when CONFIG_AUDITSYSCALL is unset: net/xfrm/xfrm_user.c: In function 'xfrm_add_sa': net/xfrm/xfrm_user.c:412: warning: unused variable 'sid' net/xfrm/xfrm_user.c:411: warning: unused variable 'sessionid' net/xfrm/xfrm_user.c:410: warning: unused variable 'loginuid' net/xfrm/xfrm_user.c: In function 'xfrm_del_sa': net/xfrm/xfrm_user.c:485: warning: unused variable 'sid' net/xfrm/xfrm_user.c:484: warning: unused variable 'sessionid' net/xfrm/xfrm_user.c:483: warning: unused variable 'loginuid' net/xfrm/xfrm_user.c: In function 'xfrm_add_policy': net/xfrm/xfrm_user.c:1132: warning: unused variable 'sid' net/xfrm/xfrm_user.c:1131: warning: unused variable 'sessionid' net/xfrm/xfrm_user.c:1130: warning: unused variable 'loginuid' net/xfrm/xfrm_user.c: In function 'xfrm_get_policy': net/xfrm/xfrm_user.c:1382: warning: unused variable 'sid' net/xfrm/xfrm_user.c:1381: warning: unused variable 'sessionid' net/xfrm/xfrm_user.c:1380: warning: unused variable 'loginuid' net/xfrm/xfrm_user.c: In function 'xfrm_add_pol_expire': net/xfrm/xfrm_user.c:1620: warning: unused variable 'sid' net/xfrm/xfrm_user.c:1619: warning: unused variable 'sessionid' net/xfrm/xfrm_user.c:1618: warning: unused variable 'loginuid' net/xfrm/xfrm_user.c: In function 'xfrm_add_sa_expire': net/xfrm/xfrm_user.c:1658: warning: unused variable 'sid' net/xfrm/xfrm_user.c:1657: warning: unused variable 'sessionid' net/xfrm/xfrm_user.c:1656: warning: unused variable 'loginuid' Signed-off-by: Marcin Slusarz <marcin.slusarz@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2008-05-04 12:03:01 +08:00
static inline void xfrm_audit_state_notfound_simple(struct sk_buff *skb,
u16 family)
{
}
static inline void xfrm_audit_state_notfound(struct sk_buff *skb, u16 family,
__be32 net_spi, __be32 net_seq)
{
}
static inline void xfrm_audit_state_icvfail(struct xfrm_state *x,
struct sk_buff *skb, u8 proto)
{
}
#endif /* CONFIG_AUDITSYSCALL */
static inline void xfrm_pol_hold(struct xfrm_policy *policy)
{
if (likely(policy != NULL))
refcount_inc(&policy->refcnt);
}
void xfrm_policy_destroy(struct xfrm_policy *policy);
static inline void xfrm_pol_put(struct xfrm_policy *policy)
{
if (refcount_dec_and_test(&policy->refcnt))
xfrm_policy_destroy(policy);
}
static inline void xfrm_pols_put(struct xfrm_policy **pols, int npols)
{
int i;
for (i = npols - 1; i >= 0; --i)
xfrm_pol_put(pols[i]);
}
xfrm: destroy xfrm_state synchronously on net exit path xfrm_state_put() moves struct xfrm_state to the GC list and schedules the GC work to clean it up. On net exit call path, xfrm_state_flush() is called to clean up and xfrm_flush_gc() is called to wait for the GC work to complete before exit. However, this doesn't work because one of the ->destructor(), ipcomp_destroy(), schedules the same GC work again inside the GC work. It is hard to wait for such a nested async callback. This is also why syzbot still reports the following warning: WARNING: CPU: 1 PID: 33 at net/ipv6/xfrm6_tunnel.c:351 xfrm6_tunnel_net_exit+0x2cb/0x500 net/ipv6/xfrm6_tunnel.c:351 ... ops_exit_list.isra.0+0xb0/0x160 net/core/net_namespace.c:153 cleanup_net+0x51d/0xb10 net/core/net_namespace.c:551 process_one_work+0xd0c/0x1ce0 kernel/workqueue.c:2153 worker_thread+0x143/0x14a0 kernel/workqueue.c:2296 kthread+0x357/0x430 kernel/kthread.c:246 ret_from_fork+0x3a/0x50 arch/x86/entry/entry_64.S:352 In fact, it is perfectly fine to bypass GC and destroy xfrm_state synchronously on net exit call path, because it is in process context and doesn't need a work struct to do any blocking work. This patch introduces xfrm_state_put_sync() which simply bypasses GC, and lets its callers to decide whether to use this synchronous version. On net exit path, xfrm_state_fini() and xfrm6_tunnel_net_exit() use it. And, as ipcomp_destroy() itself is blocking, it can use xfrm_state_put_sync() directly too. Also rename xfrm_state_gc_destroy() to ___xfrm_state_destroy() to reflect this change. Fixes: b48c05ab5d32 ("xfrm: Fix warning in xfrm6_tunnel_net_exit.") Reported-and-tested-by: syzbot+e9aebef558e3ed673934@syzkaller.appspotmail.com Cc: Steffen Klassert <steffen.klassert@secunet.com> Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com> Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
2019-02-01 05:05:49 +08:00
void __xfrm_state_destroy(struct xfrm_state *, bool);
static inline void __xfrm_state_put(struct xfrm_state *x)
{
refcount_dec(&x->refcnt);
}
static inline void xfrm_state_put(struct xfrm_state *x)
{
if (refcount_dec_and_test(&x->refcnt))
xfrm: destroy xfrm_state synchronously on net exit path xfrm_state_put() moves struct xfrm_state to the GC list and schedules the GC work to clean it up. On net exit call path, xfrm_state_flush() is called to clean up and xfrm_flush_gc() is called to wait for the GC work to complete before exit. However, this doesn't work because one of the ->destructor(), ipcomp_destroy(), schedules the same GC work again inside the GC work. It is hard to wait for such a nested async callback. This is also why syzbot still reports the following warning: WARNING: CPU: 1 PID: 33 at net/ipv6/xfrm6_tunnel.c:351 xfrm6_tunnel_net_exit+0x2cb/0x500 net/ipv6/xfrm6_tunnel.c:351 ... ops_exit_list.isra.0+0xb0/0x160 net/core/net_namespace.c:153 cleanup_net+0x51d/0xb10 net/core/net_namespace.c:551 process_one_work+0xd0c/0x1ce0 kernel/workqueue.c:2153 worker_thread+0x143/0x14a0 kernel/workqueue.c:2296 kthread+0x357/0x430 kernel/kthread.c:246 ret_from_fork+0x3a/0x50 arch/x86/entry/entry_64.S:352 In fact, it is perfectly fine to bypass GC and destroy xfrm_state synchronously on net exit call path, because it is in process context and doesn't need a work struct to do any blocking work. This patch introduces xfrm_state_put_sync() which simply bypasses GC, and lets its callers to decide whether to use this synchronous version. On net exit path, xfrm_state_fini() and xfrm6_tunnel_net_exit() use it. And, as ipcomp_destroy() itself is blocking, it can use xfrm_state_put_sync() directly too. Also rename xfrm_state_gc_destroy() to ___xfrm_state_destroy() to reflect this change. Fixes: b48c05ab5d32 ("xfrm: Fix warning in xfrm6_tunnel_net_exit.") Reported-and-tested-by: syzbot+e9aebef558e3ed673934@syzkaller.appspotmail.com Cc: Steffen Klassert <steffen.klassert@secunet.com> Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com> Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
2019-02-01 05:05:49 +08:00
__xfrm_state_destroy(x, false);
}
static inline void xfrm_state_put_sync(struct xfrm_state *x)
{
if (refcount_dec_and_test(&x->refcnt))
__xfrm_state_destroy(x, true);
}
static inline void xfrm_state_hold(struct xfrm_state *x)
{
refcount_inc(&x->refcnt);
}
static inline bool addr_match(const void *token1, const void *token2,
unsigned int prefixlen)
{
const __be32 *a1 = token1;
const __be32 *a2 = token2;
unsigned int pdw;
unsigned int pbi;
pdw = prefixlen >> 5; /* num of whole u32 in prefix */
pbi = prefixlen & 0x1f; /* num of bits in incomplete u32 in prefix */
if (pdw)
if (memcmp(a1, a2, pdw << 2))
return false;
if (pbi) {
__be32 mask;
mask = htonl((0xffffffff) << (32 - pbi));
if ((a1[pdw] ^ a2[pdw]) & mask)
return false;
}
return true;
}
static inline bool addr4_match(__be32 a1, __be32 a2, u8 prefixlen)
{
/* C99 6.5.7 (3): u32 << 32 is undefined behaviour */
if (sizeof(long) == 4 && prefixlen == 0)
return true;
return !((a1 ^ a2) & htonl(~0UL << (32 - prefixlen)));
}
static __inline__
__be16 xfrm_flowi_sport(const struct flowi *fl, const union flowi_uli *uli)
{
__be16 port;
switch(fl->flowi_proto) {
case IPPROTO_TCP:
case IPPROTO_UDP:
[NET]: Supporting UDP-Lite (RFC 3828) in Linux This is a revision of the previously submitted patch, which alters the way files are organized and compiled in the following manner: * UDP and UDP-Lite now use separate object files * source file dependencies resolved via header files net/ipv{4,6}/udp_impl.h * order of inclusion files in udp.c/udplite.c adapted accordingly [NET/IPv4]: Support for the UDP-Lite protocol (RFC 3828) This patch adds support for UDP-Lite to the IPv4 stack, provided as an extension to the existing UDPv4 code: * generic routines are all located in net/ipv4/udp.c * UDP-Lite specific routines are in net/ipv4/udplite.c * MIB/statistics support in /proc/net/snmp and /proc/net/udplite * shared API with extensions for partial checksum coverage [NET/IPv6]: Extension for UDP-Lite over IPv6 It extends the existing UDPv6 code base with support for UDP-Lite in the same manner as per UDPv4. In particular, * UDPv6 generic and shared code is in net/ipv6/udp.c * UDP-Litev6 specific extensions are in net/ipv6/udplite.c * MIB/statistics support in /proc/net/snmp6 and /proc/net/udplite6 * support for IPV6_ADDRFORM * aligned the coding style of protocol initialisation with af_inet6.c * made the error handling in udpv6_queue_rcv_skb consistent; to return `-1' on error on all error cases * consolidation of shared code [NET]: UDP-Lite Documentation and basic XFRM/Netfilter support The UDP-Lite patch further provides * API documentation for UDP-Lite * basic xfrm support * basic netfilter support for IPv4 and IPv6 (LOG target) Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk> Signed-off-by: David S. Miller <davem@davemloft.net>
2006-11-28 03:10:57 +08:00
case IPPROTO_UDPLITE:
case IPPROTO_SCTP:
port = uli->ports.sport;
break;
case IPPROTO_ICMP:
case IPPROTO_ICMPV6:
port = htons(uli->icmpt.type);
break;
case IPPROTO_MH:
port = htons(uli->mht.type);
break;
case IPPROTO_GRE:
port = htons(ntohl(uli->gre_key) >> 16);
break;
default:
port = 0; /*XXX*/
}
return port;
}
static __inline__
__be16 xfrm_flowi_dport(const struct flowi *fl, const union flowi_uli *uli)
{
__be16 port;
switch(fl->flowi_proto) {
case IPPROTO_TCP:
case IPPROTO_UDP:
[NET]: Supporting UDP-Lite (RFC 3828) in Linux This is a revision of the previously submitted patch, which alters the way files are organized and compiled in the following manner: * UDP and UDP-Lite now use separate object files * source file dependencies resolved via header files net/ipv{4,6}/udp_impl.h * order of inclusion files in udp.c/udplite.c adapted accordingly [NET/IPv4]: Support for the UDP-Lite protocol (RFC 3828) This patch adds support for UDP-Lite to the IPv4 stack, provided as an extension to the existing UDPv4 code: * generic routines are all located in net/ipv4/udp.c * UDP-Lite specific routines are in net/ipv4/udplite.c * MIB/statistics support in /proc/net/snmp and /proc/net/udplite * shared API with extensions for partial checksum coverage [NET/IPv6]: Extension for UDP-Lite over IPv6 It extends the existing UDPv6 code base with support for UDP-Lite in the same manner as per UDPv4. In particular, * UDPv6 generic and shared code is in net/ipv6/udp.c * UDP-Litev6 specific extensions are in net/ipv6/udplite.c * MIB/statistics support in /proc/net/snmp6 and /proc/net/udplite6 * support for IPV6_ADDRFORM * aligned the coding style of protocol initialisation with af_inet6.c * made the error handling in udpv6_queue_rcv_skb consistent; to return `-1' on error on all error cases * consolidation of shared code [NET]: UDP-Lite Documentation and basic XFRM/Netfilter support The UDP-Lite patch further provides * API documentation for UDP-Lite * basic xfrm support * basic netfilter support for IPv4 and IPv6 (LOG target) Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk> Signed-off-by: David S. Miller <davem@davemloft.net>
2006-11-28 03:10:57 +08:00
case IPPROTO_UDPLITE:
case IPPROTO_SCTP:
port = uli->ports.dport;
break;
case IPPROTO_ICMP:
case IPPROTO_ICMPV6:
port = htons(uli->icmpt.code);
break;
case IPPROTO_GRE:
port = htons(ntohl(uli->gre_key) & 0xffff);
break;
default:
port = 0; /*XXX*/
}
return port;
}
bool xfrm_selector_match(const struct xfrm_selector *sel,
const struct flowi *fl, unsigned short family);
[LSM-IPSec]: Security association restriction. This patch series implements per packet access control via the extension of the Linux Security Modules (LSM) interface by hooks in the XFRM and pfkey subsystems that leverage IPSec security associations to label packets. Extensions to the SELinux LSM are included that leverage the patch for this purpose. This patch implements the changes necessary to the XFRM subsystem, pfkey interface, ipv4/ipv6, and xfrm_user interface to restrict a socket to use only authorized security associations (or no security association) to send/receive network packets. Patch purpose: The patch is designed to enable access control per packets based on the strongly authenticated IPSec security association. Such access controls augment the existing ones based on network interface and IP address. The former are very coarse-grained, and the latter can be spoofed. By using IPSec, the system can control access to remote hosts based on cryptographic keys generated using the IPSec mechanism. This enables access control on a per-machine basis or per-application if the remote machine is running the same mechanism and trusted to enforce the access control policy. Patch design approach: The overall approach is that policy (xfrm_policy) entries set by user-level programs (e.g., setkey for ipsec-tools) are extended with a security context that is used at policy selection time in the XFRM subsystem to restrict the sockets that can send/receive packets via security associations (xfrm_states) that are built from those policies. A presentation available at www.selinux-symposium.org/2005/presentations/session2/2-3-jaeger.pdf from the SELinux symposium describes the overall approach. Patch implementation details: On output, the policy retrieved (via xfrm_policy_lookup or xfrm_sk_policy_lookup) must be authorized for the security context of the socket and the same security context is required for resultant security association (retrieved or negotiated via racoon in ipsec-tools). This is enforced in xfrm_state_find. On input, the policy retrieved must also be authorized for the socket (at __xfrm_policy_check), and the security context of the policy must also match the security association being used. The patch has virtually no impact on packets that do not use IPSec. The existing Netfilter (outgoing) and LSM rcv_skb hooks are used as before. Also, if IPSec is used without security contexts, the impact is minimal. The LSM must allow such policies to be selected for the combination of socket and remote machine, but subsequent IPSec processing proceeds as in the original case. Testing: The pfkey interface is tested using the ipsec-tools. ipsec-tools have been modified (a separate ipsec-tools patch is available for version 0.5) that supports assignment of xfrm_policy entries and security associations with security contexts via setkey and the negotiation using the security contexts via racoon. The xfrm_user interface is tested via ad hoc programs that set security contexts. These programs are also available from me, and contain programs for setting, getting, and deleting policy for testing this interface. Testing of sa functions was done by tracing kernel behavior. Signed-off-by: Trent Jaeger <tjaeger@cse.psu.edu> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> Signed-off-by: David S. Miller <davem@davemloft.net>
2005-12-14 15:12:27 +08:00
#ifdef CONFIG_SECURITY_NETWORK_XFRM
/* If neither has a context --> match
* Otherwise, both must have a context and the sids, doi, alg must match
*/
static inline bool xfrm_sec_ctx_match(struct xfrm_sec_ctx *s1, struct xfrm_sec_ctx *s2)
[LSM-IPSec]: Security association restriction. This patch series implements per packet access control via the extension of the Linux Security Modules (LSM) interface by hooks in the XFRM and pfkey subsystems that leverage IPSec security associations to label packets. Extensions to the SELinux LSM are included that leverage the patch for this purpose. This patch implements the changes necessary to the XFRM subsystem, pfkey interface, ipv4/ipv6, and xfrm_user interface to restrict a socket to use only authorized security associations (or no security association) to send/receive network packets. Patch purpose: The patch is designed to enable access control per packets based on the strongly authenticated IPSec security association. Such access controls augment the existing ones based on network interface and IP address. The former are very coarse-grained, and the latter can be spoofed. By using IPSec, the system can control access to remote hosts based on cryptographic keys generated using the IPSec mechanism. This enables access control on a per-machine basis or per-application if the remote machine is running the same mechanism and trusted to enforce the access control policy. Patch design approach: The overall approach is that policy (xfrm_policy) entries set by user-level programs (e.g., setkey for ipsec-tools) are extended with a security context that is used at policy selection time in the XFRM subsystem to restrict the sockets that can send/receive packets via security associations (xfrm_states) that are built from those policies. A presentation available at www.selinux-symposium.org/2005/presentations/session2/2-3-jaeger.pdf from the SELinux symposium describes the overall approach. Patch implementation details: On output, the policy retrieved (via xfrm_policy_lookup or xfrm_sk_policy_lookup) must be authorized for the security context of the socket and the same security context is required for resultant security association (retrieved or negotiated via racoon in ipsec-tools). This is enforced in xfrm_state_find. On input, the policy retrieved must also be authorized for the socket (at __xfrm_policy_check), and the security context of the policy must also match the security association being used. The patch has virtually no impact on packets that do not use IPSec. The existing Netfilter (outgoing) and LSM rcv_skb hooks are used as before. Also, if IPSec is used without security contexts, the impact is minimal. The LSM must allow such policies to be selected for the combination of socket and remote machine, but subsequent IPSec processing proceeds as in the original case. Testing: The pfkey interface is tested using the ipsec-tools. ipsec-tools have been modified (a separate ipsec-tools patch is available for version 0.5) that supports assignment of xfrm_policy entries and security associations with security contexts via setkey and the negotiation using the security contexts via racoon. The xfrm_user interface is tested via ad hoc programs that set security contexts. These programs are also available from me, and contain programs for setting, getting, and deleting policy for testing this interface. Testing of sa functions was done by tracing kernel behavior. Signed-off-by: Trent Jaeger <tjaeger@cse.psu.edu> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> Signed-off-by: David S. Miller <davem@davemloft.net>
2005-12-14 15:12:27 +08:00
{
return ((!s1 && !s2) ||
(s1 && s2 &&
(s1->ctx_sid == s2->ctx_sid) &&
(s1->ctx_doi == s2->ctx_doi) &&
(s1->ctx_alg == s2->ctx_alg)));
}
#else
static inline bool xfrm_sec_ctx_match(struct xfrm_sec_ctx *s1, struct xfrm_sec_ctx *s2)
[LSM-IPSec]: Security association restriction. This patch series implements per packet access control via the extension of the Linux Security Modules (LSM) interface by hooks in the XFRM and pfkey subsystems that leverage IPSec security associations to label packets. Extensions to the SELinux LSM are included that leverage the patch for this purpose. This patch implements the changes necessary to the XFRM subsystem, pfkey interface, ipv4/ipv6, and xfrm_user interface to restrict a socket to use only authorized security associations (or no security association) to send/receive network packets. Patch purpose: The patch is designed to enable access control per packets based on the strongly authenticated IPSec security association. Such access controls augment the existing ones based on network interface and IP address. The former are very coarse-grained, and the latter can be spoofed. By using IPSec, the system can control access to remote hosts based on cryptographic keys generated using the IPSec mechanism. This enables access control on a per-machine basis or per-application if the remote machine is running the same mechanism and trusted to enforce the access control policy. Patch design approach: The overall approach is that policy (xfrm_policy) entries set by user-level programs (e.g., setkey for ipsec-tools) are extended with a security context that is used at policy selection time in the XFRM subsystem to restrict the sockets that can send/receive packets via security associations (xfrm_states) that are built from those policies. A presentation available at www.selinux-symposium.org/2005/presentations/session2/2-3-jaeger.pdf from the SELinux symposium describes the overall approach. Patch implementation details: On output, the policy retrieved (via xfrm_policy_lookup or xfrm_sk_policy_lookup) must be authorized for the security context of the socket and the same security context is required for resultant security association (retrieved or negotiated via racoon in ipsec-tools). This is enforced in xfrm_state_find. On input, the policy retrieved must also be authorized for the socket (at __xfrm_policy_check), and the security context of the policy must also match the security association being used. The patch has virtually no impact on packets that do not use IPSec. The existing Netfilter (outgoing) and LSM rcv_skb hooks are used as before. Also, if IPSec is used without security contexts, the impact is minimal. The LSM must allow such policies to be selected for the combination of socket and remote machine, but subsequent IPSec processing proceeds as in the original case. Testing: The pfkey interface is tested using the ipsec-tools. ipsec-tools have been modified (a separate ipsec-tools patch is available for version 0.5) that supports assignment of xfrm_policy entries and security associations with security contexts via setkey and the negotiation using the security contexts via racoon. The xfrm_user interface is tested via ad hoc programs that set security contexts. These programs are also available from me, and contain programs for setting, getting, and deleting policy for testing this interface. Testing of sa functions was done by tracing kernel behavior. Signed-off-by: Trent Jaeger <tjaeger@cse.psu.edu> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> Signed-off-by: David S. Miller <davem@davemloft.net>
2005-12-14 15:12:27 +08:00
{
return true;
[LSM-IPSec]: Security association restriction. This patch series implements per packet access control via the extension of the Linux Security Modules (LSM) interface by hooks in the XFRM and pfkey subsystems that leverage IPSec security associations to label packets. Extensions to the SELinux LSM are included that leverage the patch for this purpose. This patch implements the changes necessary to the XFRM subsystem, pfkey interface, ipv4/ipv6, and xfrm_user interface to restrict a socket to use only authorized security associations (or no security association) to send/receive network packets. Patch purpose: The patch is designed to enable access control per packets based on the strongly authenticated IPSec security association. Such access controls augment the existing ones based on network interface and IP address. The former are very coarse-grained, and the latter can be spoofed. By using IPSec, the system can control access to remote hosts based on cryptographic keys generated using the IPSec mechanism. This enables access control on a per-machine basis or per-application if the remote machine is running the same mechanism and trusted to enforce the access control policy. Patch design approach: The overall approach is that policy (xfrm_policy) entries set by user-level programs (e.g., setkey for ipsec-tools) are extended with a security context that is used at policy selection time in the XFRM subsystem to restrict the sockets that can send/receive packets via security associations (xfrm_states) that are built from those policies. A presentation available at www.selinux-symposium.org/2005/presentations/session2/2-3-jaeger.pdf from the SELinux symposium describes the overall approach. Patch implementation details: On output, the policy retrieved (via xfrm_policy_lookup or xfrm_sk_policy_lookup) must be authorized for the security context of the socket and the same security context is required for resultant security association (retrieved or negotiated via racoon in ipsec-tools). This is enforced in xfrm_state_find. On input, the policy retrieved must also be authorized for the socket (at __xfrm_policy_check), and the security context of the policy must also match the security association being used. The patch has virtually no impact on packets that do not use IPSec. The existing Netfilter (outgoing) and LSM rcv_skb hooks are used as before. Also, if IPSec is used without security contexts, the impact is minimal. The LSM must allow such policies to be selected for the combination of socket and remote machine, but subsequent IPSec processing proceeds as in the original case. Testing: The pfkey interface is tested using the ipsec-tools. ipsec-tools have been modified (a separate ipsec-tools patch is available for version 0.5) that supports assignment of xfrm_policy entries and security associations with security contexts via setkey and the negotiation using the security contexts via racoon. The xfrm_user interface is tested via ad hoc programs that set security contexts. These programs are also available from me, and contain programs for setting, getting, and deleting policy for testing this interface. Testing of sa functions was done by tracing kernel behavior. Signed-off-by: Trent Jaeger <tjaeger@cse.psu.edu> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> Signed-off-by: David S. Miller <davem@davemloft.net>
2005-12-14 15:12:27 +08:00
}
#endif
/* A struct encoding bundle of transformations to apply to some set of flow.
*
* xdst->child points to the next element of bundle.
* dst->xfrm points to an instanse of transformer.
*
* Due to unfortunate limitations of current routing cache, which we
* have no time to fix, it mirrors struct rtable and bound to the same
* routing key, including saddr,daddr. However, we can have many of
* bundles differing by session id. All the bundles grow from a parent
* policy rule.
*/
struct xfrm_dst {
union {
struct dst_entry dst;
struct rtable rt;
struct rt6_info rt6;
} u;
struct dst_entry *route;
struct dst_entry *child;
struct dst_entry *path;
struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX];
int num_pols, num_xfrms;
u32 xfrm_genid;
u32 policy_genid;
u32 route_mtu_cached;
u32 child_mtu_cached;
u32 route_cookie;
u32 path_cookie;
};
static inline struct dst_entry *xfrm_dst_path(const struct dst_entry *dst)
{
#ifdef CONFIG_XFRM
if (dst->xfrm || (dst->flags & DST_XFRM_QUEUE)) {
const struct xfrm_dst *xdst = (const struct xfrm_dst *) dst;
return xdst->path;
}
#endif
return (struct dst_entry *) dst;
}
static inline struct dst_entry *xfrm_dst_child(const struct dst_entry *dst)
{
#ifdef CONFIG_XFRM
if (dst->xfrm || (dst->flags & DST_XFRM_QUEUE)) {
struct xfrm_dst *xdst = (struct xfrm_dst *) dst;
return xdst->child;
}
#endif
return NULL;
}
#ifdef CONFIG_XFRM
static inline void xfrm_dst_set_child(struct xfrm_dst *xdst, struct dst_entry *child)
{
xdst->child = child;
}
static inline void xfrm_dst_destroy(struct xfrm_dst *xdst)
{
xfrm_pols_put(xdst->pols, xdst->num_pols);
dst_release(xdst->route);
if (likely(xdst->u.dst.xfrm))
xfrm_state_put(xdst->u.dst.xfrm);
}
#endif
void xfrm_dst_ifdown(struct dst_entry *dst, struct net_device *dev);
struct xfrm_if_parms {
int link; /* ifindex of underlying L2 interface */
u32 if_id; /* interface identifyer */
xfrm: interface: support collect metadata mode This commit adds support for 'collect_md' mode on xfrm interfaces. Each net can have one collect_md device, created by providing the IFLA_XFRM_COLLECT_METADATA flag at creation. This device cannot be altered and has no if_id or link device attributes. On transmit to this device, the if_id is fetched from the attached dst metadata on the skb. If exists, the link property is also fetched from the metadata. The dst metadata type used is METADATA_XFRM which holds these properties. On the receive side, xfrmi_rcv_cb() populates a dst metadata for each packet received and attaches it to the skb. The if_id used in this case is fetched from the xfrm state, and the link is fetched from the incoming device. This information can later be used by upper layers such as tc, ebpf, and ip rules. Because the skb is scrubed in xfrmi_rcv_cb(), the attachment of the dst metadata is postponed until after scrubing. Similarly, xfrm_input() is adapted to avoid dropping metadata dsts by only dropping 'valid' (skb_valid_dst(skb) == true) dsts. Policy matching on packets arriving from collect_md xfrmi devices is done by using the xfrm state existing in the skb's sec_path. The xfrm_if_cb.decode_cb() interface implemented by xfrmi_decode_session() is changed to keep the details of the if_id extraction tucked away in xfrm_interface.c. Reviewed-by: Nicolas Dichtel <nicolas.dichtel@6wind.com> Reviewed-by: Nikolay Aleksandrov <razor@blackwall.org> Signed-off-by: Eyal Birger <eyal.birger@gmail.com> Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
2022-08-26 19:46:59 +08:00
bool collect_md;
};
struct xfrm_if {
struct xfrm_if __rcu *next; /* next interface in list */
struct net_device *dev; /* virtual device associated with interface */
struct net *net; /* netns for packet i/o */
struct xfrm_if_parms p; /* interface parms */
struct gro_cells gro_cells;
};
struct xfrm_offload {
/* Output sequence number for replay protection on offloading. */
struct {
__u32 low;
__u32 hi;
} seq;
__u32 flags;
#define SA_DELETE_REQ 1
#define CRYPTO_DONE 2
#define CRYPTO_NEXT_DONE 4
#define CRYPTO_FALLBACK 8
#define XFRM_GSO_SEGMENT 16
#define XFRM_GRO 32
/* 64 is free */
#define XFRM_DEV_RESUME 128
#define XFRM_XMIT 256
__u32 status;
#define CRYPTO_SUCCESS 1
#define CRYPTO_GENERIC_ERROR 2
#define CRYPTO_TRANSPORT_AH_AUTH_FAILED 4
#define CRYPTO_TRANSPORT_ESP_AUTH_FAILED 8
#define CRYPTO_TUNNEL_AH_AUTH_FAILED 16
#define CRYPTO_TUNNEL_ESP_AUTH_FAILED 32
#define CRYPTO_INVALID_PACKET_SYNTAX 64
#define CRYPTO_INVALID_PROTOCOL 128
__u8 proto;
__u8 inner_ipproto;
};
struct sec_path {
int len;
int olen;
struct xfrm_state *xvec[XFRM_MAX_DEPTH];
struct xfrm_offload ovec[XFRM_MAX_OFFLOAD_DEPTH];
};
struct sec_path *secpath_set(struct sk_buff *skb);
static inline void
secpath_reset(struct sk_buff *skb)
{
#ifdef CONFIG_XFRM
skb_ext_del(skb, SKB_EXT_SEC_PATH);
#endif
}
static inline int
xfrm_addr_any(const xfrm_address_t *addr, unsigned short family)
{
switch (family) {
case AF_INET:
return addr->a4 == 0;
case AF_INET6:
return ipv6_addr_any(&addr->in6);
}
return 0;
}
static inline int
__xfrm4_state_addr_cmp(const struct xfrm_tmpl *tmpl, const struct xfrm_state *x)
{
return (tmpl->saddr.a4 &&
tmpl->saddr.a4 != x->props.saddr.a4);
}
static inline int
__xfrm6_state_addr_cmp(const struct xfrm_tmpl *tmpl, const struct xfrm_state *x)
{
return (!ipv6_addr_any((struct in6_addr*)&tmpl->saddr) &&
!ipv6_addr_equal((struct in6_addr *)&tmpl->saddr, (struct in6_addr*)&x->props.saddr));
}
static inline int
xfrm_state_addr_cmp(const struct xfrm_tmpl *tmpl, const struct xfrm_state *x, unsigned short family)
{
switch (family) {
case AF_INET:
return __xfrm4_state_addr_cmp(tmpl, x);
case AF_INET6:
return __xfrm6_state_addr_cmp(tmpl, x);
}
return !0;
}
#ifdef CONFIG_XFRM
int __xfrm_policy_check(struct sock *, int dir, struct sk_buff *skb,
unsigned short family);
static inline bool __xfrm_check_nopolicy(struct net *net, struct sk_buff *skb,
int dir)
{
if (!net->xfrm.policy_count[dir] && !secpath_exists(skb))
return net->xfrm.policy_default[dir] == XFRM_USERPOLICY_ACCEPT;
return false;
}
static inline bool __xfrm_check_dev_nopolicy(struct sk_buff *skb,
int dir, unsigned short family)
{
if (dir != XFRM_POLICY_OUT && family == AF_INET) {
/* same dst may be used for traffic originating from
* devices with different policy settings.
*/
return IPCB(skb)->flags & IPSKB_NOPOLICY;
}
return skb_dst(skb) && (skb_dst(skb)->flags & DST_NOPOLICY);
}
static inline int __xfrm_policy_check2(struct sock *sk, int dir,
struct sk_buff *skb,
unsigned int family, int reverse)
{
struct net *net = dev_net(skb->dev);
int ndir = dir | (reverse ? XFRM_POLICY_MASK + 1 : 0);
if (sk && sk->sk_policy[XFRM_POLICY_IN])
return __xfrm_policy_check(sk, ndir, skb, family);
return __xfrm_check_nopolicy(net, skb, dir) ||
__xfrm_check_dev_nopolicy(skb, dir, family) ||
__xfrm_policy_check(sk, ndir, skb, family);
}
static inline int xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, unsigned short family)
{
return __xfrm_policy_check2(sk, dir, skb, family, 0);
}
static inline int xfrm4_policy_check(struct sock *sk, int dir, struct sk_buff *skb)
{
return xfrm_policy_check(sk, dir, skb, AF_INET);
}
static inline int xfrm6_policy_check(struct sock *sk, int dir, struct sk_buff *skb)
{
return xfrm_policy_check(sk, dir, skb, AF_INET6);
}
static inline int xfrm4_policy_check_reverse(struct sock *sk, int dir,
struct sk_buff *skb)
{
return __xfrm_policy_check2(sk, dir, skb, AF_INET, 1);
}
static inline int xfrm6_policy_check_reverse(struct sock *sk, int dir,
struct sk_buff *skb)
{
return __xfrm_policy_check2(sk, dir, skb, AF_INET6, 1);
}
int __xfrm_decode_session(struct sk_buff *skb, struct flowi *fl,
unsigned int family, int reverse);
static inline int xfrm_decode_session(struct sk_buff *skb, struct flowi *fl,
unsigned int family)
{
return __xfrm_decode_session(skb, fl, family, 0);
}
static inline int xfrm_decode_session_reverse(struct sk_buff *skb,
struct flowi *fl,
unsigned int family)
{
return __xfrm_decode_session(skb, fl, family, 1);
}
int __xfrm_route_forward(struct sk_buff *skb, unsigned short family);
static inline int xfrm_route_forward(struct sk_buff *skb, unsigned short family)
{
struct net *net = dev_net(skb->dev);
if (!net->xfrm.policy_count[XFRM_POLICY_OUT] &&
net->xfrm.policy_default[XFRM_POLICY_OUT] == XFRM_USERPOLICY_ACCEPT)
return true;
return (skb_dst(skb)->flags & DST_NOXFRM) ||
__xfrm_route_forward(skb, family);
}
static inline int xfrm4_route_forward(struct sk_buff *skb)
{
return xfrm_route_forward(skb, AF_INET);
}
static inline int xfrm6_route_forward(struct sk_buff *skb)
{
return xfrm_route_forward(skb, AF_INET6);
}
int __xfrm_sk_clone_policy(struct sock *sk, const struct sock *osk);
static inline int xfrm_sk_clone_policy(struct sock *sk, const struct sock *osk)
{
net: Find dst with sk's xfrm policy not ctl_sk If we set XFRM security policy by calling setsockopt with option IPV6_XFRM_POLICY, the policy will be stored in 'sock_policy' in 'sock' struct. However tcp_v6_send_response doesn't look up dst_entry with the actual socket but looks up with tcp control socket. This may cause a problem that a RST packet is sent without ESP encryption & peer's TCP socket can't receive it. This patch will make the function look up dest_entry with actual socket, if the socket has XFRM policy(sock_policy), so that the TCP response packet via this function can be encrypted, & aligned on the encrypted TCP socket. Tested: We encountered this problem when a TCP socket which is encrypted in ESP transport mode encryption, receives challenge ACK at SYN_SENT state. After receiving challenge ACK, TCP needs to send RST to establish the socket at next SYN try. But the RST was not encrypted & peer TCP socket still remains on ESTABLISHED state. So we verified this with test step as below. [Test step] 1. Making a TCP state mismatch between client(IDLE) & server(ESTABLISHED). 2. Client tries a new connection on the same TCP ports(src & dst). 3. Server will return challenge ACK instead of SYN,ACK. 4. Client will send RST to server to clear the SOCKET. 5. Client will retransmit SYN to server on the same TCP ports. [Expected result] The TCP connection should be established. Cc: Maciej Żenczykowski <maze@google.com> Cc: Eric Dumazet <edumazet@google.com> Cc: Steffen Klassert <steffen.klassert@secunet.com> Cc: Sehee Lee <seheele@google.com> Signed-off-by: Sewook Seo <sewookseo@google.com> Reviewed-by: Eric Dumazet <edumazet@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2022-07-07 18:01:39 +08:00
if (!sk_fullsock(osk))
return 0;
sk->sk_policy[0] = NULL;
sk->sk_policy[1] = NULL;
if (unlikely(osk->sk_policy[0] || osk->sk_policy[1]))
return __xfrm_sk_clone_policy(sk, osk);
return 0;
}
int xfrm_policy_delete(struct xfrm_policy *pol, int dir);
static inline void xfrm_sk_free_policy(struct sock *sk)
{
struct xfrm_policy *pol;
pol = rcu_dereference_protected(sk->sk_policy[0], 1);
if (unlikely(pol != NULL)) {
xfrm_policy_delete(pol, XFRM_POLICY_MAX);
sk->sk_policy[0] = NULL;
}
pol = rcu_dereference_protected(sk->sk_policy[1], 1);
if (unlikely(pol != NULL)) {
xfrm_policy_delete(pol, XFRM_POLICY_MAX+1);
sk->sk_policy[1] = NULL;
}
}
#else
static inline void xfrm_sk_free_policy(struct sock *sk) {}
static inline int xfrm_sk_clone_policy(struct sock *sk, const struct sock *osk) { return 0; }
static inline int xfrm6_route_forward(struct sk_buff *skb) { return 1; }
static inline int xfrm4_route_forward(struct sk_buff *skb) { return 1; }
static inline int xfrm6_policy_check(struct sock *sk, int dir, struct sk_buff *skb)
{
return 1;
}
static inline int xfrm4_policy_check(struct sock *sk, int dir, struct sk_buff *skb)
{
return 1;
}
static inline int xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, unsigned short family)
{
return 1;
}
static inline int xfrm_decode_session_reverse(struct sk_buff *skb,
struct flowi *fl,
unsigned int family)
{
return -ENOSYS;
}
static inline int xfrm4_policy_check_reverse(struct sock *sk, int dir,
struct sk_buff *skb)
{
return 1;
}
static inline int xfrm6_policy_check_reverse(struct sock *sk, int dir,
struct sk_buff *skb)
{
return 1;
}
#endif
static __inline__
xfrm_address_t *xfrm_flowi_daddr(const struct flowi *fl, unsigned short family)
{
switch (family){
case AF_INET:
return (xfrm_address_t *)&fl->u.ip4.daddr;
case AF_INET6:
return (xfrm_address_t *)&fl->u.ip6.daddr;
}
return NULL;
}
static __inline__
xfrm_address_t *xfrm_flowi_saddr(const struct flowi *fl, unsigned short family)
{
switch (family){
case AF_INET:
return (xfrm_address_t *)&fl->u.ip4.saddr;
case AF_INET6:
return (xfrm_address_t *)&fl->u.ip6.saddr;
}
return NULL;
}
static __inline__
void xfrm_flowi_addr_get(const struct flowi *fl,
xfrm_address_t *saddr, xfrm_address_t *daddr,
unsigned short family)
{
switch(family) {
case AF_INET:
memcpy(&saddr->a4, &fl->u.ip4.saddr, sizeof(saddr->a4));
memcpy(&daddr->a4, &fl->u.ip4.daddr, sizeof(daddr->a4));
break;
case AF_INET6:
saddr->in6 = fl->u.ip6.saddr;
daddr->in6 = fl->u.ip6.daddr;
break;
}
}
static __inline__ int
__xfrm4_state_addr_check(const struct xfrm_state *x,
const xfrm_address_t *daddr, const xfrm_address_t *saddr)
{
if (daddr->a4 == x->id.daddr.a4 &&
(saddr->a4 == x->props.saddr.a4 || !saddr->a4 || !x->props.saddr.a4))
return 1;
return 0;
}
static __inline__ int
__xfrm6_state_addr_check(const struct xfrm_state *x,
const xfrm_address_t *daddr, const xfrm_address_t *saddr)
{
if (ipv6_addr_equal((struct in6_addr *)daddr, (struct in6_addr *)&x->id.daddr) &&
(ipv6_addr_equal((struct in6_addr *)saddr, (struct in6_addr *)&x->props.saddr) ||
ipv6_addr_any((struct in6_addr *)saddr) ||
ipv6_addr_any((struct in6_addr *)&x->props.saddr)))
return 1;
return 0;
}
static __inline__ int
xfrm_state_addr_check(const struct xfrm_state *x,
const xfrm_address_t *daddr, const xfrm_address_t *saddr,
unsigned short family)
{
switch (family) {
case AF_INET:
return __xfrm4_state_addr_check(x, daddr, saddr);
case AF_INET6:
return __xfrm6_state_addr_check(x, daddr, saddr);
}
return 0;
}
static __inline__ int
xfrm_state_addr_flow_check(const struct xfrm_state *x, const struct flowi *fl,
unsigned short family)
{
switch (family) {
case AF_INET:
return __xfrm4_state_addr_check(x,
(const xfrm_address_t *)&fl->u.ip4.daddr,
(const xfrm_address_t *)&fl->u.ip4.saddr);
case AF_INET6:
return __xfrm6_state_addr_check(x,
(const xfrm_address_t *)&fl->u.ip6.daddr,
(const xfrm_address_t *)&fl->u.ip6.saddr);
}
return 0;
}
static inline int xfrm_state_kern(const struct xfrm_state *x)
{
return atomic_read(&x->tunnel_users);
}
static inline bool xfrm_id_proto_valid(u8 proto)
{
switch (proto) {
case IPPROTO_AH:
case IPPROTO_ESP:
case IPPROTO_COMP:
#if IS_ENABLED(CONFIG_IPV6)
case IPPROTO_ROUTING:
case IPPROTO_DSTOPTS:
#endif
return true;
default:
return false;
}
}
/* IPSEC_PROTO_ANY only matches 3 IPsec protocols, 0 could match all. */
static inline int xfrm_id_proto_match(u8 proto, u8 userproto)
{
return (!userproto || proto == userproto ||
(userproto == IPSEC_PROTO_ANY && (proto == IPPROTO_AH ||
proto == IPPROTO_ESP ||
proto == IPPROTO_COMP)));
}
/*
* xfrm algorithm information
*/
struct xfrm_algo_aead_info {
char *geniv;
u16 icv_truncbits;
};
struct xfrm_algo_auth_info {
u16 icv_truncbits;
u16 icv_fullbits;
};
struct xfrm_algo_encr_info {
char *geniv;
u16 blockbits;
u16 defkeybits;
};
struct xfrm_algo_comp_info {
u16 threshold;
};
struct xfrm_algo_desc {
char *name;
char *compat;
u8 available:1;
u8 pfkey_supported:1;
union {
struct xfrm_algo_aead_info aead;
struct xfrm_algo_auth_info auth;
struct xfrm_algo_encr_info encr;
struct xfrm_algo_comp_info comp;
} uinfo;
struct sadb_alg desc;
};
/* XFRM protocol handlers. */
struct xfrm4_protocol {
int (*handler)(struct sk_buff *skb);
int (*input_handler)(struct sk_buff *skb, int nexthdr, __be32 spi,
int encap_type);
int (*cb_handler)(struct sk_buff *skb, int err);
int (*err_handler)(struct sk_buff *skb, u32 info);
struct xfrm4_protocol __rcu *next;
int priority;
};
struct xfrm6_protocol {
int (*handler)(struct sk_buff *skb);
int (*input_handler)(struct sk_buff *skb, int nexthdr, __be32 spi,
int encap_type);
int (*cb_handler)(struct sk_buff *skb, int err);
int (*err_handler)(struct sk_buff *skb, struct inet6_skb_parm *opt,
u8 type, u8 code, int offset, __be32 info);
[INET]: Introduce tunnel4/tunnel6 Basically this patch moves the generic tunnel protocol stuff out of xfrm4_tunnel/xfrm6_tunnel and moves it into the new files of tunnel4.c and tunnel6 respectively. The reason for this is that the problem that Hugo uncovered is only the tip of the iceberg. The real problem is that when we removed the dependency of ipip on xfrm4_tunnel we didn't really consider the module case at all. For instance, as it is it's possible to build both ipip and xfrm4_tunnel as modules and if the latter is loaded then ipip simply won't load. After considering the alternatives I've decided that the best way out of this is to restore the dependency of ipip on the non-xfrm-specific part of xfrm4_tunnel. This is acceptable IMHO because the intention of the removal was really to be able to use ipip without the xfrm subsystem. This is still preserved by this patch. So now both ipip/xfrm4_tunnel depend on the new tunnel4.c which handles the arbitration between the two. The order of processing is determined by a simple integer which ensures that ipip gets processed before xfrm4_tunnel. The situation for ICMP handling is a little bit more complicated since we may not have enough information to determine who it's for. It's not a big deal at the moment since the xfrm ICMP handlers are basically no-ops. In future we can deal with this when we look at ICMP caching in general. The user-visible change to this is the removal of the TUNNEL Kconfig prompts. This makes sense because it can only be used through IPCOMP as it stands. The addition of the new modules shouldn't introduce any problems since module dependency will cause them to be loaded. Oh and I also turned some unnecessary pskb's in IPv6 related to this patch to skb's. Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> Signed-off-by: David S. Miller <davem@davemloft.net>
2006-03-28 17:12:13 +08:00
struct xfrm6_protocol __rcu *next;
[INET]: Introduce tunnel4/tunnel6 Basically this patch moves the generic tunnel protocol stuff out of xfrm4_tunnel/xfrm6_tunnel and moves it into the new files of tunnel4.c and tunnel6 respectively. The reason for this is that the problem that Hugo uncovered is only the tip of the iceberg. The real problem is that when we removed the dependency of ipip on xfrm4_tunnel we didn't really consider the module case at all. For instance, as it is it's possible to build both ipip and xfrm4_tunnel as modules and if the latter is loaded then ipip simply won't load. After considering the alternatives I've decided that the best way out of this is to restore the dependency of ipip on the non-xfrm-specific part of xfrm4_tunnel. This is acceptable IMHO because the intention of the removal was really to be able to use ipip without the xfrm subsystem. This is still preserved by this patch. So now both ipip/xfrm4_tunnel depend on the new tunnel4.c which handles the arbitration between the two. The order of processing is determined by a simple integer which ensures that ipip gets processed before xfrm4_tunnel. The situation for ICMP handling is a little bit more complicated since we may not have enough information to determine who it's for. It's not a big deal at the moment since the xfrm ICMP handlers are basically no-ops. In future we can deal with this when we look at ICMP caching in general. The user-visible change to this is the removal of the TUNNEL Kconfig prompts. This makes sense because it can only be used through IPCOMP as it stands. The addition of the new modules shouldn't introduce any problems since module dependency will cause them to be loaded. Oh and I also turned some unnecessary pskb's in IPv6 related to this patch to skb's. Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> Signed-off-by: David S. Miller <davem@davemloft.net>
2006-03-28 17:12:13 +08:00
int priority;
};
/* XFRM tunnel handlers. */
struct xfrm_tunnel {
int (*handler)(struct sk_buff *skb);
int (*cb_handler)(struct sk_buff *skb, int err);
int (*err_handler)(struct sk_buff *skb, u32 info);
[INET]: Introduce tunnel4/tunnel6 Basically this patch moves the generic tunnel protocol stuff out of xfrm4_tunnel/xfrm6_tunnel and moves it into the new files of tunnel4.c and tunnel6 respectively. The reason for this is that the problem that Hugo uncovered is only the tip of the iceberg. The real problem is that when we removed the dependency of ipip on xfrm4_tunnel we didn't really consider the module case at all. For instance, as it is it's possible to build both ipip and xfrm4_tunnel as modules and if the latter is loaded then ipip simply won't load. After considering the alternatives I've decided that the best way out of this is to restore the dependency of ipip on the non-xfrm-specific part of xfrm4_tunnel. This is acceptable IMHO because the intention of the removal was really to be able to use ipip without the xfrm subsystem. This is still preserved by this patch. So now both ipip/xfrm4_tunnel depend on the new tunnel4.c which handles the arbitration between the two. The order of processing is determined by a simple integer which ensures that ipip gets processed before xfrm4_tunnel. The situation for ICMP handling is a little bit more complicated since we may not have enough information to determine who it's for. It's not a big deal at the moment since the xfrm ICMP handlers are basically no-ops. In future we can deal with this when we look at ICMP caching in general. The user-visible change to this is the removal of the TUNNEL Kconfig prompts. This makes sense because it can only be used through IPCOMP as it stands. The addition of the new modules shouldn't introduce any problems since module dependency will cause them to be loaded. Oh and I also turned some unnecessary pskb's in IPv6 related to this patch to skb's. Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> Signed-off-by: David S. Miller <davem@davemloft.net>
2006-03-28 17:12:13 +08:00
struct xfrm_tunnel __rcu *next;
int priority;
};
struct xfrm6_tunnel {
[INET]: Introduce tunnel4/tunnel6 Basically this patch moves the generic tunnel protocol stuff out of xfrm4_tunnel/xfrm6_tunnel and moves it into the new files of tunnel4.c and tunnel6 respectively. The reason for this is that the problem that Hugo uncovered is only the tip of the iceberg. The real problem is that when we removed the dependency of ipip on xfrm4_tunnel we didn't really consider the module case at all. For instance, as it is it's possible to build both ipip and xfrm4_tunnel as modules and if the latter is loaded then ipip simply won't load. After considering the alternatives I've decided that the best way out of this is to restore the dependency of ipip on the non-xfrm-specific part of xfrm4_tunnel. This is acceptable IMHO because the intention of the removal was really to be able to use ipip without the xfrm subsystem. This is still preserved by this patch. So now both ipip/xfrm4_tunnel depend on the new tunnel4.c which handles the arbitration between the two. The order of processing is determined by a simple integer which ensures that ipip gets processed before xfrm4_tunnel. The situation for ICMP handling is a little bit more complicated since we may not have enough information to determine who it's for. It's not a big deal at the moment since the xfrm ICMP handlers are basically no-ops. In future we can deal with this when we look at ICMP caching in general. The user-visible change to this is the removal of the TUNNEL Kconfig prompts. This makes sense because it can only be used through IPCOMP as it stands. The addition of the new modules shouldn't introduce any problems since module dependency will cause them to be loaded. Oh and I also turned some unnecessary pskb's in IPv6 related to this patch to skb's. Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> Signed-off-by: David S. Miller <davem@davemloft.net>
2006-03-28 17:12:13 +08:00
int (*handler)(struct sk_buff *skb);
int (*cb_handler)(struct sk_buff *skb, int err);
[INET]: Introduce tunnel4/tunnel6 Basically this patch moves the generic tunnel protocol stuff out of xfrm4_tunnel/xfrm6_tunnel and moves it into the new files of tunnel4.c and tunnel6 respectively. The reason for this is that the problem that Hugo uncovered is only the tip of the iceberg. The real problem is that when we removed the dependency of ipip on xfrm4_tunnel we didn't really consider the module case at all. For instance, as it is it's possible to build both ipip and xfrm4_tunnel as modules and if the latter is loaded then ipip simply won't load. After considering the alternatives I've decided that the best way out of this is to restore the dependency of ipip on the non-xfrm-specific part of xfrm4_tunnel. This is acceptable IMHO because the intention of the removal was really to be able to use ipip without the xfrm subsystem. This is still preserved by this patch. So now both ipip/xfrm4_tunnel depend on the new tunnel4.c which handles the arbitration between the two. The order of processing is determined by a simple integer which ensures that ipip gets processed before xfrm4_tunnel. The situation for ICMP handling is a little bit more complicated since we may not have enough information to determine who it's for. It's not a big deal at the moment since the xfrm ICMP handlers are basically no-ops. In future we can deal with this when we look at ICMP caching in general. The user-visible change to this is the removal of the TUNNEL Kconfig prompts. This makes sense because it can only be used through IPCOMP as it stands. The addition of the new modules shouldn't introduce any problems since module dependency will cause them to be loaded. Oh and I also turned some unnecessary pskb's in IPv6 related to this patch to skb's. Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> Signed-off-by: David S. Miller <davem@davemloft.net>
2006-03-28 17:12:13 +08:00
int (*err_handler)(struct sk_buff *skb, struct inet6_skb_parm *opt,
u8 type, u8 code, int offset, __be32 info);
struct xfrm6_tunnel __rcu *next;
[INET]: Introduce tunnel4/tunnel6 Basically this patch moves the generic tunnel protocol stuff out of xfrm4_tunnel/xfrm6_tunnel and moves it into the new files of tunnel4.c and tunnel6 respectively. The reason for this is that the problem that Hugo uncovered is only the tip of the iceberg. The real problem is that when we removed the dependency of ipip on xfrm4_tunnel we didn't really consider the module case at all. For instance, as it is it's possible to build both ipip and xfrm4_tunnel as modules and if the latter is loaded then ipip simply won't load. After considering the alternatives I've decided that the best way out of this is to restore the dependency of ipip on the non-xfrm-specific part of xfrm4_tunnel. This is acceptable IMHO because the intention of the removal was really to be able to use ipip without the xfrm subsystem. This is still preserved by this patch. So now both ipip/xfrm4_tunnel depend on the new tunnel4.c which handles the arbitration between the two. The order of processing is determined by a simple integer which ensures that ipip gets processed before xfrm4_tunnel. The situation for ICMP handling is a little bit more complicated since we may not have enough information to determine who it's for. It's not a big deal at the moment since the xfrm ICMP handlers are basically no-ops. In future we can deal with this when we look at ICMP caching in general. The user-visible change to this is the removal of the TUNNEL Kconfig prompts. This makes sense because it can only be used through IPCOMP as it stands. The addition of the new modules shouldn't introduce any problems since module dependency will cause them to be loaded. Oh and I also turned some unnecessary pskb's in IPv6 related to this patch to skb's. Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> Signed-off-by: David S. Miller <davem@davemloft.net>
2006-03-28 17:12:13 +08:00
int priority;
};
void xfrm_init(void);
void xfrm4_init(void);
int xfrm_state_init(struct net *net);
void xfrm_state_fini(struct net *net);
void xfrm4_state_init(void);
void xfrm4_protocol_init(void);
#ifdef CONFIG_XFRM
int xfrm6_init(void);
void xfrm6_fini(void);
int xfrm6_state_init(void);
void xfrm6_state_fini(void);
int xfrm6_protocol_init(void);
void xfrm6_protocol_fini(void);
#else
static inline int xfrm6_init(void)
{
return 0;
}
static inline void xfrm6_fini(void)
{
;
}
#endif
#ifdef CONFIG_XFRM_STATISTICS
int xfrm_proc_init(struct net *net);
void xfrm_proc_fini(struct net *net);
#endif
int xfrm_sysctl_init(struct net *net);
#ifdef CONFIG_SYSCTL
void xfrm_sysctl_fini(struct net *net);
#else
static inline void xfrm_sysctl_fini(struct net *net)
{
}
#endif
void xfrm_state_walk_init(struct xfrm_state_walk *walk, u8 proto,
struct xfrm_address_filter *filter);
int xfrm_state_walk(struct net *net, struct xfrm_state_walk *walk,
int (*func)(struct xfrm_state *, int, void*), void *);
void xfrm_state_walk_done(struct xfrm_state_walk *walk, struct net *net);
struct xfrm_state *xfrm_state_alloc(struct net *net);
void xfrm_state_free(struct xfrm_state *x);
struct xfrm_state *xfrm_state_find(const xfrm_address_t *daddr,
const xfrm_address_t *saddr,
const struct flowi *fl,
struct xfrm_tmpl *tmpl,
struct xfrm_policy *pol, int *err,
unsigned short family, u32 if_id);
struct xfrm_state *xfrm_stateonly_find(struct net *net, u32 mark, u32 if_id,
xfrm_address_t *daddr,
xfrm_address_t *saddr,
unsigned short family,
u8 mode, u8 proto, u32 reqid);
struct xfrm_state *xfrm_state_lookup_byspi(struct net *net, __be32 spi,
unsigned short family);
int xfrm_state_check_expire(struct xfrm_state *x);
void xfrm_state_insert(struct xfrm_state *x);
int xfrm_state_add(struct xfrm_state *x);
int xfrm_state_update(struct xfrm_state *x);
struct xfrm_state *xfrm_state_lookup(struct net *net, u32 mark,
const xfrm_address_t *daddr, __be32 spi,
u8 proto, unsigned short family);
struct xfrm_state *xfrm_state_lookup_byaddr(struct net *net, u32 mark,
const xfrm_address_t *daddr,
const xfrm_address_t *saddr,
u8 proto,
unsigned short family);
#ifdef CONFIG_XFRM_SUB_POLICY
void xfrm_tmpl_sort(struct xfrm_tmpl **dst, struct xfrm_tmpl **src, int n,
unsigned short family);
void xfrm_state_sort(struct xfrm_state **dst, struct xfrm_state **src, int n,
unsigned short family);
#else
static inline void xfrm_tmpl_sort(struct xfrm_tmpl **d, struct xfrm_tmpl **s,
int n, unsigned short family)
{
}
static inline void xfrm_state_sort(struct xfrm_state **d, struct xfrm_state **s,
int n, unsigned short family)
{
}
#endif
struct xfrmk_sadinfo {
u32 sadhcnt; /* current hash bkts */
u32 sadhmcnt; /* max allowed hash bkts */
u32 sadcnt; /* current running count */
};
struct xfrmk_spdinfo {
u32 incnt;
u32 outcnt;
u32 fwdcnt;
u32 inscnt;
u32 outscnt;
u32 fwdscnt;
u32 spdhcnt;
u32 spdhmcnt;
};
struct xfrm_state *xfrm_find_acq_byseq(struct net *net, u32 mark, u32 seq);
int xfrm_state_delete(struct xfrm_state *x);
xfrm: destroy xfrm_state synchronously on net exit path xfrm_state_put() moves struct xfrm_state to the GC list and schedules the GC work to clean it up. On net exit call path, xfrm_state_flush() is called to clean up and xfrm_flush_gc() is called to wait for the GC work to complete before exit. However, this doesn't work because one of the ->destructor(), ipcomp_destroy(), schedules the same GC work again inside the GC work. It is hard to wait for such a nested async callback. This is also why syzbot still reports the following warning: WARNING: CPU: 1 PID: 33 at net/ipv6/xfrm6_tunnel.c:351 xfrm6_tunnel_net_exit+0x2cb/0x500 net/ipv6/xfrm6_tunnel.c:351 ... ops_exit_list.isra.0+0xb0/0x160 net/core/net_namespace.c:153 cleanup_net+0x51d/0xb10 net/core/net_namespace.c:551 process_one_work+0xd0c/0x1ce0 kernel/workqueue.c:2153 worker_thread+0x143/0x14a0 kernel/workqueue.c:2296 kthread+0x357/0x430 kernel/kthread.c:246 ret_from_fork+0x3a/0x50 arch/x86/entry/entry_64.S:352 In fact, it is perfectly fine to bypass GC and destroy xfrm_state synchronously on net exit call path, because it is in process context and doesn't need a work struct to do any blocking work. This patch introduces xfrm_state_put_sync() which simply bypasses GC, and lets its callers to decide whether to use this synchronous version. On net exit path, xfrm_state_fini() and xfrm6_tunnel_net_exit() use it. And, as ipcomp_destroy() itself is blocking, it can use xfrm_state_put_sync() directly too. Also rename xfrm_state_gc_destroy() to ___xfrm_state_destroy() to reflect this change. Fixes: b48c05ab5d32 ("xfrm: Fix warning in xfrm6_tunnel_net_exit.") Reported-and-tested-by: syzbot+e9aebef558e3ed673934@syzkaller.appspotmail.com Cc: Steffen Klassert <steffen.klassert@secunet.com> Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com> Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
2019-02-01 05:05:49 +08:00
int xfrm_state_flush(struct net *net, u8 proto, bool task_valid, bool sync);
int xfrm_dev_state_flush(struct net *net, struct net_device *dev, bool task_valid);
void xfrm_sad_getinfo(struct net *net, struct xfrmk_sadinfo *si);
void xfrm_spd_getinfo(struct net *net, struct xfrmk_spdinfo *si);
u32 xfrm_replay_seqhi(struct xfrm_state *x, __be32 net_seq);
int xfrm_init_replay(struct xfrm_state *x);
u32 xfrm_state_mtu(struct xfrm_state *x, int mtu);
int __xfrm_init_state(struct xfrm_state *x, bool init_replay, bool offload);
int xfrm_init_state(struct xfrm_state *x);
int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type);
int xfrm_input_resume(struct sk_buff *skb, int nexthdr);
int xfrm_trans_queue_net(struct net *net, struct sk_buff *skb,
int (*finish)(struct net *, struct sock *,
struct sk_buff *));
int xfrm_trans_queue(struct sk_buff *skb,
int (*finish)(struct net *, struct sock *,
struct sk_buff *));
2021-03-02 03:00:04 +08:00
int xfrm_output_resume(struct sock *sk, struct sk_buff *skb, int err);
int xfrm_output(struct sock *sk, struct sk_buff *skb);
#if IS_ENABLED(CONFIG_NET_PKTGEN)
int pktgen_xfrm_outer_mode_output(struct xfrm_state *x, struct sk_buff *skb);
#endif
void xfrm_local_error(struct sk_buff *skb, int mtu);
int xfrm4_extract_input(struct xfrm_state *x, struct sk_buff *skb);
int xfrm4_rcv_encap(struct sk_buff *skb, int nexthdr, __be32 spi,
int encap_type);
int xfrm4_transport_finish(struct sk_buff *skb, int async);
int xfrm4_rcv(struct sk_buff *skb);
static inline int xfrm4_rcv_spi(struct sk_buff *skb, int nexthdr, __be32 spi)
{
XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip4 = NULL;
XFRM_SPI_SKB_CB(skb)->family = AF_INET;
XFRM_SPI_SKB_CB(skb)->daddroff = offsetof(struct iphdr, daddr);
return xfrm_input(skb, nexthdr, spi, 0);
}
int xfrm4_output(struct net *net, struct sock *sk, struct sk_buff *skb);
int xfrm4_protocol_register(struct xfrm4_protocol *handler, unsigned char protocol);
int xfrm4_protocol_deregister(struct xfrm4_protocol *handler, unsigned char protocol);
int xfrm4_tunnel_register(struct xfrm_tunnel *handler, unsigned short family);
int xfrm4_tunnel_deregister(struct xfrm_tunnel *handler, unsigned short family);
void xfrm4_local_error(struct sk_buff *skb, u32 mtu);
int xfrm6_extract_input(struct xfrm_state *x, struct sk_buff *skb);
int xfrm6_rcv_spi(struct sk_buff *skb, int nexthdr, __be32 spi,
struct ip6_tnl *t);
int xfrm6_rcv_encap(struct sk_buff *skb, int nexthdr, __be32 spi,
int encap_type);
int xfrm6_transport_finish(struct sk_buff *skb, int async);
int xfrm6_rcv_tnl(struct sk_buff *skb, struct ip6_tnl *t);
int xfrm6_rcv(struct sk_buff *skb);
int xfrm6_input_addr(struct sk_buff *skb, xfrm_address_t *daddr,
xfrm_address_t *saddr, u8 proto);
void xfrm6_local_error(struct sk_buff *skb, u32 mtu);
int xfrm6_protocol_register(struct xfrm6_protocol *handler, unsigned char protocol);
int xfrm6_protocol_deregister(struct xfrm6_protocol *handler, unsigned char protocol);
int xfrm6_tunnel_register(struct xfrm6_tunnel *handler, unsigned short family);
int xfrm6_tunnel_deregister(struct xfrm6_tunnel *handler, unsigned short family);
__be32 xfrm6_tunnel_alloc_spi(struct net *net, xfrm_address_t *saddr);
__be32 xfrm6_tunnel_spi_lookup(struct net *net, const xfrm_address_t *saddr);
int xfrm6_output(struct net *net, struct sock *sk, struct sk_buff *skb);
#ifdef CONFIG_XFRM
void xfrm6_local_rxpmtu(struct sk_buff *skb, u32 mtu);
int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb);
int xfrm6_udp_encap_rcv(struct sock *sk, struct sk_buff *skb);
int xfrm_user_policy(struct sock *sk, int optname, sockptr_t optval,
int optlen);
#else
static inline int xfrm_user_policy(struct sock *sk, int optname,
sockptr_t optval, int optlen)
{
return -ENOPROTOOPT;
}
#endif
struct dst_entry *__xfrm_dst_lookup(struct net *net, int tos, int oif,
const xfrm_address_t *saddr,
const xfrm_address_t *daddr,
net: xfrm: support setting an output mark. On systems that use mark-based routing it may be necessary for routing lookups to use marks in order for packets to be routed correctly. An example of such a system is Android, which uses socket marks to route packets via different networks. Currently, routing lookups in tunnel mode always use a mark of zero, making routing incorrect on such systems. This patch adds a new output_mark element to the xfrm state and a corresponding XFRMA_OUTPUT_MARK netlink attribute. The output mark differs from the existing xfrm mark in two ways: 1. The xfrm mark is used to match xfrm policies and states, while the xfrm output mark is used to set the mark (and influence the routing) of the packets emitted by those states. 2. The existing mark is constrained to be a subset of the bits of the originating socket or transformed packet, but the output mark is arbitrary and depends only on the state. The use of a separate mark provides additional flexibility. For example: - A packet subject to two transforms (e.g., transport mode inside tunnel mode) can have two different output marks applied to it, one for the transport mode SA and one for the tunnel mode SA. - On a system where socket marks determine routing, the packets emitted by an IPsec tunnel can be routed based on a mark that is determined by the tunnel, not by the marks of the unencrypted packets. - Support for setting the output marks can be introduced without breaking any existing setups that employ both mark-based routing and xfrm tunnel mode. Simply changing the code to use the xfrm mark for routing output packets could xfrm mark could change behaviour in a way that breaks these setups. If the output mark is unspecified or set to zero, the mark is not set or changed. Tested: make allyesconfig; make -j64 Tested: https://android-review.googlesource.com/452776 Signed-off-by: Lorenzo Colitti <lorenzo@google.com> Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
2017-08-11 01:11:33 +08:00
int family, u32 mark);
struct xfrm_policy *xfrm_policy_alloc(struct net *net, gfp_t gfp);
void xfrm_policy_walk_init(struct xfrm_policy_walk *walk, u8 type);
int xfrm_policy_walk(struct net *net, struct xfrm_policy_walk *walk,
int (*func)(struct xfrm_policy *, int, int, void*),
void *);
void xfrm_policy_walk_done(struct xfrm_policy_walk *walk, struct net *net);
int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl);
struct xfrm_policy *xfrm_policy_bysel_ctx(struct net *net,
const struct xfrm_mark *mark,
u32 if_id, u8 type, int dir,
struct xfrm_selector *sel,
struct xfrm_sec_ctx *ctx, int delete,
int *err);
struct xfrm_policy *xfrm_policy_byid(struct net *net,
const struct xfrm_mark *mark, u32 if_id,
u8 type, int dir, u32 id, int delete,
int *err);
int xfrm_policy_flush(struct net *net, u8 type, bool task_valid);
xfrm: configure policy hash table thresholds by netlink Enable to specify local and remote prefix length thresholds for the policy hash table via a netlink XFRM_MSG_NEWSPDINFO message. prefix length thresholds are specified by XFRMA_SPD_IPV4_HTHRESH and XFRMA_SPD_IPV6_HTHRESH optional attributes (struct xfrmu_spdhthresh). example: struct xfrmu_spdhthresh thresh4 = { .lbits = 0; .rbits = 24; }; struct xfrmu_spdhthresh thresh6 = { .lbits = 0; .rbits = 56; }; struct nlmsghdr *hdr; struct nl_msg *msg; msg = nlmsg_alloc(); hdr = nlmsg_put(msg, NL_AUTO_PORT, NL_AUTO_SEQ, XFRMA_SPD_IPV4_HTHRESH, sizeof(__u32), NLM_F_REQUEST); nla_put(msg, XFRMA_SPD_IPV4_HTHRESH, sizeof(thresh4), &thresh4); nla_put(msg, XFRMA_SPD_IPV6_HTHRESH, sizeof(thresh6), &thresh6); nla_send_auto(sk, msg); The numbers are the policy selector minimum prefix lengths to put a policy in the hash table. - lbits is the local threshold (source address for out policies, destination address for in and fwd policies). - rbits is the remote threshold (destination address for out policies, source address for in and fwd policies). The default values are: XFRMA_SPD_IPV4_HTHRESH: 32 32 XFRMA_SPD_IPV6_HTHRESH: 128 128 Dynamic re-building of the SPD is performed when the thresholds values are changed. The current thresholds can be read via a XFRM_MSG_GETSPDINFO request: the kernel replies to XFRM_MSG_GETSPDINFO requests by an XFRM_MSG_NEWSPDINFO message, with both attributes XFRMA_SPD_IPV4_HTHRESH and XFRMA_SPD_IPV6_HTHRESH. Signed-off-by: Christophe Gouault <christophe.gouault@6wind.com> Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
2014-08-29 22:16:05 +08:00
void xfrm_policy_hash_rebuild(struct net *net);
u32 xfrm_get_acqseq(void);
int verify_spi_info(u8 proto, u32 min, u32 max);
int xfrm_alloc_spi(struct xfrm_state *x, u32 minspi, u32 maxspi);
struct xfrm_state *xfrm_find_acq(struct net *net, const struct xfrm_mark *mark,
u8 mode, u32 reqid, u32 if_id, u8 proto,
const xfrm_address_t *daddr,
const xfrm_address_t *saddr, int create,
unsigned short family);
int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol);
#ifdef CONFIG_XFRM_MIGRATE
int km_migrate(const struct xfrm_selector *sel, u8 dir, u8 type,
const struct xfrm_migrate *m, int num_bundles,
const struct xfrm_kmaddress *k,
const struct xfrm_encap_tmpl *encap);
xfrm: Check if_id in xfrm_migrate This patch enables distinguishing SAs and SPs based on if_id during the xfrm_migrate flow. This ensures support for xfrm interfaces throughout the SA/SP lifecycle. When there are multiple existing SPs with the same direction, the same xfrm_selector and different endpoint addresses, xfrm_migrate might fail with ENODATA. Specifically, the code path for performing xfrm_migrate is: Stage 1: find policy to migrate with xfrm_migrate_policy_find(sel, dir, type, net) Stage 2: find and update state(s) with xfrm_migrate_state_find(mp, net) Stage 3: update endpoint address(es) of template(s) with xfrm_policy_migrate(pol, m, num_migrate) Currently "Stage 1" always returns the first xfrm_policy that matches, and "Stage 3" looks for the xfrm_tmpl that matches the old endpoint address. Thus if there are multiple xfrm_policy with same selector, direction, type and net, "Stage 1" might rertun a wrong xfrm_policy and "Stage 3" will fail with ENODATA because it cannot find a xfrm_tmpl with the matching endpoint address. The fix is to allow userspace to pass an if_id and add if_id to the matching rule in Stage 1 and Stage 2 since if_id is a unique ID for xfrm_policy and xfrm_state. For compatibility, if_id will only be checked if the attribute is set. Tested with additions to Android's kernel unit test suite: https://android-review.googlesource.com/c/kernel/tests/+/1668886 Signed-off-by: Yan Yan <evitayan@google.com> Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
2022-01-19 08:00:13 +08:00
struct xfrm_state *xfrm_migrate_state_find(struct xfrm_migrate *m, struct net *net,
u32 if_id);
struct xfrm_state *xfrm_state_migrate(struct xfrm_state *x,
struct xfrm_migrate *m,
struct xfrm_encap_tmpl *encap);
int xfrm_migrate(const struct xfrm_selector *sel, u8 dir, u8 type,
struct xfrm_migrate *m, int num_bundles,
struct xfrm_kmaddress *k, struct net *net,
xfrm: Check if_id in xfrm_migrate This patch enables distinguishing SAs and SPs based on if_id during the xfrm_migrate flow. This ensures support for xfrm interfaces throughout the SA/SP lifecycle. When there are multiple existing SPs with the same direction, the same xfrm_selector and different endpoint addresses, xfrm_migrate might fail with ENODATA. Specifically, the code path for performing xfrm_migrate is: Stage 1: find policy to migrate with xfrm_migrate_policy_find(sel, dir, type, net) Stage 2: find and update state(s) with xfrm_migrate_state_find(mp, net) Stage 3: update endpoint address(es) of template(s) with xfrm_policy_migrate(pol, m, num_migrate) Currently "Stage 1" always returns the first xfrm_policy that matches, and "Stage 3" looks for the xfrm_tmpl that matches the old endpoint address. Thus if there are multiple xfrm_policy with same selector, direction, type and net, "Stage 1" might rertun a wrong xfrm_policy and "Stage 3" will fail with ENODATA because it cannot find a xfrm_tmpl with the matching endpoint address. The fix is to allow userspace to pass an if_id and add if_id to the matching rule in Stage 1 and Stage 2 since if_id is a unique ID for xfrm_policy and xfrm_state. For compatibility, if_id will only be checked if the attribute is set. Tested with additions to Android's kernel unit test suite: https://android-review.googlesource.com/c/kernel/tests/+/1668886 Signed-off-by: Yan Yan <evitayan@google.com> Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
2022-01-19 08:00:13 +08:00
struct xfrm_encap_tmpl *encap, u32 if_id);
#endif
int km_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, __be16 sport);
void km_policy_expired(struct xfrm_policy *pol, int dir, int hard, u32 portid);
int km_report(struct net *net, u8 proto, struct xfrm_selector *sel,
xfrm_address_t *addr);
void xfrm_input_init(void);
int xfrm_parse_spi(struct sk_buff *skb, u8 nexthdr, __be32 *spi, __be32 *seq);
void xfrm_probe_algs(void);
int xfrm_count_pfkey_auth_supported(void);
int xfrm_count_pfkey_enc_supported(void);
struct xfrm_algo_desc *xfrm_aalg_get_byidx(unsigned int idx);
struct xfrm_algo_desc *xfrm_ealg_get_byidx(unsigned int idx);
struct xfrm_algo_desc *xfrm_aalg_get_byid(int alg_id);
struct xfrm_algo_desc *xfrm_ealg_get_byid(int alg_id);
struct xfrm_algo_desc *xfrm_calg_get_byid(int alg_id);
struct xfrm_algo_desc *xfrm_aalg_get_byname(const char *name, int probe);
struct xfrm_algo_desc *xfrm_ealg_get_byname(const char *name, int probe);
struct xfrm_algo_desc *xfrm_calg_get_byname(const char *name, int probe);
struct xfrm_algo_desc *xfrm_aead_get_byname(const char *name, int icv_len,
int probe);
static inline bool xfrm6_addr_equal(const xfrm_address_t *a,
const xfrm_address_t *b)
{
return ipv6_addr_equal((const struct in6_addr *)a,
(const struct in6_addr *)b);
}
static inline bool xfrm_addr_equal(const xfrm_address_t *a,
const xfrm_address_t *b,
sa_family_t family)
{
switch (family) {
default:
case AF_INET:
return ((__force u32)a->a4 ^ (__force u32)b->a4) == 0;
case AF_INET6:
return xfrm6_addr_equal(a, b);
}
}
static inline int xfrm_policy_id2dir(u32 index)
{
return index & 7;
}
#ifdef CONFIG_XFRM
void xfrm_replay_advance(struct xfrm_state *x, __be32 net_seq);
int xfrm_replay_check(struct xfrm_state *x, struct sk_buff *skb, __be32 net_seq);
void xfrm_replay_notify(struct xfrm_state *x, int event);
int xfrm_replay_overflow(struct xfrm_state *x, struct sk_buff *skb);
int xfrm_replay_recheck(struct xfrm_state *x, struct sk_buff *skb, __be32 net_seq);
static inline int xfrm_aevent_is_on(struct net *net)
{
struct sock *nlsk;
int ret = 0;
rcu_read_lock();
nlsk = rcu_dereference(net->xfrm.nlsk);
if (nlsk)
ret = netlink_has_listeners(nlsk, XFRMNLGRP_AEVENTS);
rcu_read_unlock();
return ret;
}
static inline int xfrm_acquire_is_on(struct net *net)
{
struct sock *nlsk;
int ret = 0;
rcu_read_lock();
nlsk = rcu_dereference(net->xfrm.nlsk);
if (nlsk)
ret = netlink_has_listeners(nlsk, XFRMNLGRP_ACQUIRE);
rcu_read_unlock();
return ret;
}
#endif
static inline unsigned int aead_len(struct xfrm_algo_aead *alg)
{
return sizeof(*alg) + ((alg->alg_key_len + 7) / 8);
}
static inline unsigned int xfrm_alg_len(const struct xfrm_algo *alg)
{
return sizeof(*alg) + ((alg->alg_key_len + 7) / 8);
}
static inline unsigned int xfrm_alg_auth_len(const struct xfrm_algo_auth *alg)
{
return sizeof(*alg) + ((alg->alg_key_len + 7) / 8);
}
static inline unsigned int xfrm_replay_state_esn_len(struct xfrm_replay_state_esn *replay_esn)
{
return sizeof(*replay_esn) + replay_esn->bmp_len * sizeof(__u32);
}
#ifdef CONFIG_XFRM_MIGRATE
static inline int xfrm_replay_clone(struct xfrm_state *x,
struct xfrm_state *orig)
{
x->replay_esn = kmemdup(orig->replay_esn,
xfrm_replay_state_esn_len(orig->replay_esn),
GFP_KERNEL);
if (!x->replay_esn)
return -ENOMEM;
x->preplay_esn = kmemdup(orig->preplay_esn,
xfrm_replay_state_esn_len(orig->preplay_esn),
GFP_KERNEL);
if (!x->preplay_esn)
return -ENOMEM;
return 0;
}
static inline struct xfrm_algo_aead *xfrm_algo_aead_clone(struct xfrm_algo_aead *orig)
{
return kmemdup(orig, aead_len(orig), GFP_KERNEL);
}
static inline struct xfrm_algo *xfrm_algo_clone(struct xfrm_algo *orig)
{
return kmemdup(orig, xfrm_alg_len(orig), GFP_KERNEL);
}
static inline struct xfrm_algo_auth *xfrm_algo_auth_clone(struct xfrm_algo_auth *orig)
{
return kmemdup(orig, xfrm_alg_auth_len(orig), GFP_KERNEL);
}
static inline void xfrm_states_put(struct xfrm_state **states, int n)
{
int i;
for (i = 0; i < n; i++)
xfrm_state_put(*(states + i));
}
static inline void xfrm_states_delete(struct xfrm_state **states, int n)
{
int i;
for (i = 0; i < n; i++)
xfrm_state_delete(*(states + i));
}
#endif
#ifdef CONFIG_XFRM
static inline struct xfrm_state *xfrm_input_state(struct sk_buff *skb)
{
struct sec_path *sp = skb_sec_path(skb);
return sp->xvec[sp->len - 1];
}
#endif
static inline struct xfrm_offload *xfrm_offload(struct sk_buff *skb)
{
#ifdef CONFIG_XFRM
struct sec_path *sp = skb_sec_path(skb);
if (!sp || !sp->olen || sp->len != sp->olen)
return NULL;
return &sp->ovec[sp->olen - 1];
#else
return NULL;
#endif
}
void __init xfrm_dev_init(void);
#ifdef CONFIG_XFRM_OFFLOAD
void xfrm_dev_resume(struct sk_buff *skb);
void xfrm_dev_backlog(struct softnet_data *sd);
struct sk_buff *validate_xmit_xfrm(struct sk_buff *skb, netdev_features_t features, bool *again);
int xfrm_dev_state_add(struct net *net, struct xfrm_state *x,
struct xfrm_user_offload *xuo);
bool xfrm_dev_offload_ok(struct sk_buff *skb, struct xfrm_state *x);
static inline void xfrm_dev_state_advance_esn(struct xfrm_state *x)
{
struct xfrm_dev_offload *xso = &x->xso;
if (xso->dev && xso->dev->xfrmdev_ops->xdo_dev_state_advance_esn)
xso->dev->xfrmdev_ops->xdo_dev_state_advance_esn(x);
}
static inline bool xfrm_dst_offload_ok(struct dst_entry *dst)
{
struct xfrm_state *x = dst->xfrm;
struct xfrm_dst *xdst;
if (!x || !x->type_offload)
return false;
xdst = (struct xfrm_dst *) dst;
if (!x->xso.offload_handle && !xdst->child->xfrm)
return true;
if (x->xso.offload_handle && (x->xso.dev == xfrm_dst_path(dst)->dev) &&
!xdst->child->xfrm)
return true;
return false;
}
static inline void xfrm_dev_state_delete(struct xfrm_state *x)
{
struct xfrm_dev_offload *xso = &x->xso;
if (xso->dev)
xso->dev->xfrmdev_ops->xdo_dev_state_delete(x);
}
static inline void xfrm_dev_state_free(struct xfrm_state *x)
{
struct xfrm_dev_offload *xso = &x->xso;
struct net_device *dev = xso->dev;
if (dev && dev->xfrmdev_ops) {
if (dev->xfrmdev_ops->xdo_dev_state_free)
dev->xfrmdev_ops->xdo_dev_state_free(x);
xso->dev = NULL;
netdev_put(dev, &xso->dev_tracker);
}
}
#else
static inline void xfrm_dev_resume(struct sk_buff *skb)
{
}
static inline void xfrm_dev_backlog(struct softnet_data *sd)
{
}
static inline struct sk_buff *validate_xmit_xfrm(struct sk_buff *skb, netdev_features_t features, bool *again)
{
return skb;
}
static inline int xfrm_dev_state_add(struct net *net, struct xfrm_state *x, struct xfrm_user_offload *xuo)
{
return 0;
}
static inline void xfrm_dev_state_delete(struct xfrm_state *x)
{
}
static inline void xfrm_dev_state_free(struct xfrm_state *x)
{
}
static inline bool xfrm_dev_offload_ok(struct sk_buff *skb, struct xfrm_state *x)
{
return false;
}
static inline void xfrm_dev_state_advance_esn(struct xfrm_state *x)
{
}
static inline bool xfrm_dst_offload_ok(struct dst_entry *dst)
{
return false;
}
#endif
static inline int xfrm_mark_get(struct nlattr **attrs, struct xfrm_mark *m)
{
if (attrs[XFRMA_MARK])
memcpy(m, nla_data(attrs[XFRMA_MARK]), sizeof(struct xfrm_mark));
else
m->v = m->m = 0;
return m->v & m->m;
}
static inline int xfrm_mark_put(struct sk_buff *skb, const struct xfrm_mark *m)
{
int ret = 0;
if (m->m | m->v)
ret = nla_put(skb, XFRMA_MARK, sizeof(struct xfrm_mark), m);
return ret;
}
static inline __u32 xfrm_smark_get(__u32 mark, struct xfrm_state *x)
{
struct xfrm_mark *m = &x->props.smark;
return (m->v & m->m) | (mark & ~m->m);
}
static inline int xfrm_if_id_put(struct sk_buff *skb, __u32 if_id)
{
int ret = 0;
if (if_id)
ret = nla_put_u32(skb, XFRMA_IF_ID, if_id);
return ret;
}
static inline int xfrm_tunnel_check(struct sk_buff *skb, struct xfrm_state *x,
unsigned int family)
{
bool tunnel = false;
switch(family) {
case AF_INET:
if (XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip4)
tunnel = true;
break;
case AF_INET6:
if (XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6)
tunnel = true;
break;
}
if (tunnel && !(x->outer_mode.flags & XFRM_MODE_FLAG_TUNNEL))
return -EINVAL;
return 0;
}
extern const int xfrm_msg_min[XFRM_NR_MSGTYPES];
extern const struct nla_policy xfrma_policy[XFRMA_MAX+1];
struct xfrm_translator {
/* Allocate frag_list and put compat translation there */
int (*alloc_compat)(struct sk_buff *skb, const struct nlmsghdr *src);
/* Allocate nlmsg with 64-bit translaton of received 32-bit message */
struct nlmsghdr *(*rcv_msg_compat)(const struct nlmsghdr *nlh,
int maxtype, const struct nla_policy *policy,
struct netlink_ext_ack *extack);
/* Translate 32-bit user_policy from sockptr */
int (*xlate_user_policy_sockptr)(u8 **pdata32, int optlen);
struct module *owner;
};
#if IS_ENABLED(CONFIG_XFRM_USER_COMPAT)
extern int xfrm_register_translator(struct xfrm_translator *xtr);
extern int xfrm_unregister_translator(struct xfrm_translator *xtr);
extern struct xfrm_translator *xfrm_get_translator(void);
extern void xfrm_put_translator(struct xfrm_translator *xtr);
#else
static inline struct xfrm_translator *xfrm_get_translator(void)
{
return NULL;
}
static inline void xfrm_put_translator(struct xfrm_translator *xtr)
{
}
#endif
#if IS_ENABLED(CONFIG_IPV6)
static inline bool xfrm6_local_dontfrag(const struct sock *sk)
{
int proto;
if (!sk || sk->sk_family != AF_INET6)
return false;
proto = sk->sk_protocol;
if (proto == IPPROTO_UDP || proto == IPPROTO_RAW)
return inet6_sk(sk)->dontfrag;
return false;
}
#endif
#endif /* _NET_XFRM_H */