2005-04-17 06:20:36 +08:00
|
|
|
#ifndef __LINUX_NETFILTER_H
|
|
|
|
#define __LINUX_NETFILTER_H
|
|
|
|
|
|
|
|
#ifdef __KERNEL__
|
|
|
|
#include <linux/init.h>
|
|
|
|
#include <linux/types.h>
|
|
|
|
#include <linux/skbuff.h>
|
|
|
|
#include <linux/net.h>
|
|
|
|
#include <linux/if.h>
|
2008-01-15 15:40:34 +08:00
|
|
|
#include <linux/in.h>
|
|
|
|
#include <linux/in6.h>
|
2005-04-17 06:20:36 +08:00
|
|
|
#include <linux/wait.h>
|
|
|
|
#include <linux/list.h>
|
|
|
|
#endif
|
|
|
|
#include <linux/compiler.h>
|
|
|
|
|
|
|
|
/* Responses from hook functions. */
|
|
|
|
#define NF_DROP 0
|
|
|
|
#define NF_ACCEPT 1
|
|
|
|
#define NF_STOLEN 2
|
|
|
|
#define NF_QUEUE 3
|
|
|
|
#define NF_REPEAT 4
|
|
|
|
#define NF_STOP 5
|
|
|
|
#define NF_MAX_VERDICT NF_STOP
|
|
|
|
|
2005-08-10 10:43:44 +08:00
|
|
|
/* we overload the higher bits for encoding auxiliary data such as the queue
|
|
|
|
* number. Not nice, but better than additional function arguments. */
|
|
|
|
#define NF_VERDICT_MASK 0x0000ffff
|
|
|
|
#define NF_VERDICT_BITS 16
|
|
|
|
|
|
|
|
#define NF_VERDICT_QMASK 0xffff0000
|
|
|
|
#define NF_VERDICT_QBITS 16
|
|
|
|
|
2005-08-13 02:36:44 +08:00
|
|
|
#define NF_QUEUE_NR(x) (((x << NF_VERDICT_QBITS) & NF_VERDICT_QMASK) | NF_QUEUE)
|
2005-08-10 10:43:44 +08:00
|
|
|
|
2005-08-10 10:24:19 +08:00
|
|
|
/* only for userspace compatibility */
|
|
|
|
#ifndef __KERNEL__
|
2005-04-17 06:20:36 +08:00
|
|
|
/* Generic cache responses from hook functions.
|
|
|
|
<= 0x2000 is used for protocol-flags. */
|
|
|
|
#define NFC_UNKNOWN 0x4000
|
|
|
|
#define NFC_ALTERED 0x8000
|
2005-08-10 10:24:19 +08:00
|
|
|
#endif
|
2005-04-17 06:20:36 +08:00
|
|
|
|
2007-11-20 10:53:30 +08:00
|
|
|
enum nf_inet_hooks {
|
|
|
|
NF_INET_PRE_ROUTING,
|
|
|
|
NF_INET_LOCAL_IN,
|
|
|
|
NF_INET_FORWARD,
|
|
|
|
NF_INET_LOCAL_OUT,
|
|
|
|
NF_INET_POST_ROUTING,
|
|
|
|
NF_INET_NUMHOOKS
|
|
|
|
};
|
|
|
|
|
2007-12-18 14:43:50 +08:00
|
|
|
union nf_inet_addr {
|
|
|
|
u_int32_t all[4];
|
|
|
|
__be32 ip;
|
|
|
|
__be32 ip6[4];
|
2008-01-15 15:40:34 +08:00
|
|
|
struct in_addr in;
|
|
|
|
struct in6_addr in6;
|
2007-12-18 14:43:50 +08:00
|
|
|
};
|
|
|
|
|
2005-04-17 06:20:36 +08:00
|
|
|
#ifdef __KERNEL__
|
|
|
|
#ifdef CONFIG_NETFILTER
|
|
|
|
|
|
|
|
extern void netfilter_init(void);
|
|
|
|
|
|
|
|
/* Largest hook number + 1 */
|
|
|
|
#define NF_MAX_HOOKS 8
|
|
|
|
|
|
|
|
struct sk_buff;
|
|
|
|
struct net_device;
|
|
|
|
|
|
|
|
typedef unsigned int nf_hookfn(unsigned int hooknum,
|
2007-10-15 15:53:15 +08:00
|
|
|
struct sk_buff *skb,
|
2005-04-17 06:20:36 +08:00
|
|
|
const struct net_device *in,
|
|
|
|
const struct net_device *out,
|
|
|
|
int (*okfn)(struct sk_buff *));
|
|
|
|
|
|
|
|
struct nf_hook_ops
|
|
|
|
{
|
|
|
|
struct list_head list;
|
|
|
|
|
|
|
|
/* User fills in from here down. */
|
|
|
|
nf_hookfn *hook;
|
|
|
|
struct module *owner;
|
|
|
|
int pf;
|
|
|
|
int hooknum;
|
|
|
|
/* Hooks are ordered in ascending priority. */
|
|
|
|
int priority;
|
|
|
|
};
|
|
|
|
|
|
|
|
struct nf_sockopt_ops
|
|
|
|
{
|
|
|
|
struct list_head list;
|
|
|
|
|
|
|
|
int pf;
|
|
|
|
|
|
|
|
/* Non-inclusive ranges: use 0/0/NULL to never get called. */
|
|
|
|
int set_optmin;
|
|
|
|
int set_optmax;
|
|
|
|
int (*set)(struct sock *sk, int optval, void __user *user, unsigned int len);
|
2006-03-21 14:45:21 +08:00
|
|
|
int (*compat_set)(struct sock *sk, int optval,
|
|
|
|
void __user *user, unsigned int len);
|
2005-04-17 06:20:36 +08:00
|
|
|
|
|
|
|
int get_optmin;
|
|
|
|
int get_optmax;
|
|
|
|
int (*get)(struct sock *sk, int optval, void __user *user, int *len);
|
2006-03-21 14:45:21 +08:00
|
|
|
int (*compat_get)(struct sock *sk, int optval,
|
|
|
|
void __user *user, int *len);
|
2005-04-17 06:20:36 +08:00
|
|
|
|
[NETFILTER]: Fix/improve deadlock condition on module removal netfilter
So I've had a deadlock reported to me. I've found that the sequence of
events goes like this:
1) process A (modprobe) runs to remove ip_tables.ko
2) process B (iptables-restore) runs and calls setsockopt on a netfilter socket,
increasing the ip_tables socket_ops use count
3) process A acquires a file lock on the file ip_tables.ko, calls remove_module
in the kernel, which in turn executes the ip_tables module cleanup routine,
which calls nf_unregister_sockopt
4) nf_unregister_sockopt, seeing that the use count is non-zero, puts the
calling process into uninterruptible sleep, expecting the process using the
socket option code to wake it up when it exits the kernel
4) the user of the socket option code (process B) in do_ipt_get_ctl, calls
ipt_find_table_lock, which in this case calls request_module to load
ip_tables_nat.ko
5) request_module forks a copy of modprobe (process C) to load the module and
blocks until modprobe exits.
6) Process C. forked by request_module process the dependencies of
ip_tables_nat.ko, of which ip_tables.ko is one.
7) Process C attempts to lock the request module and all its dependencies, it
blocks when it attempts to lock ip_tables.ko (which was previously locked in
step 3)
Theres not really any great permanent solution to this that I can see, but I've
developed a two part solution that corrects the problem
Part 1) Modifies the nf_sockopt registration code so that, instead of using a
use counter internal to the nf_sockopt_ops structure, we instead use a pointer
to the registering modules owner to do module reference counting when nf_sockopt
calls a modules set/get routine. This prevents the deadlock by preventing set 4
from happening.
Part 2) Enhances the modprobe utilty so that by default it preforms non-blocking
remove operations (the same way rmmod does), and add an option to explicity
request blocking operation. So if you select blocking operation in modprobe you
can still cause the above deadlock, but only if you explicity try (and since
root can do any old stupid thing it would like.... :) ).
Signed-off-by: Neil Horman <nhorman@tuxdriver.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
2007-09-11 17:28:26 +08:00
|
|
|
/* Use the module struct to lock set/get code in place */
|
|
|
|
struct module *owner;
|
2005-04-17 06:20:36 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
/* Function to register/unregister hook points. */
|
|
|
|
int nf_register_hook(struct nf_hook_ops *reg);
|
|
|
|
void nf_unregister_hook(struct nf_hook_ops *reg);
|
2006-04-07 05:09:12 +08:00
|
|
|
int nf_register_hooks(struct nf_hook_ops *reg, unsigned int n);
|
|
|
|
void nf_unregister_hooks(struct nf_hook_ops *reg, unsigned int n);
|
2005-04-17 06:20:36 +08:00
|
|
|
|
|
|
|
/* Functions to register get/setsockopt ranges (non-inclusive). You
|
|
|
|
need to check permissions yourself! */
|
|
|
|
int nf_register_sockopt(struct nf_sockopt_ops *reg);
|
|
|
|
void nf_unregister_sockopt(struct nf_sockopt_ops *reg);
|
|
|
|
|
2006-11-29 09:35:17 +08:00
|
|
|
#ifdef CONFIG_SYSCTL
|
|
|
|
/* Sysctl registration */
|
2008-01-09 16:34:02 +08:00
|
|
|
extern struct ctl_path nf_net_netfilter_sysctl_path[];
|
|
|
|
extern struct ctl_path nf_net_ipv4_netfilter_sysctl_path[];
|
2006-11-29 09:35:17 +08:00
|
|
|
#endif /* CONFIG_SYSCTL */
|
|
|
|
|
2005-04-17 06:20:36 +08:00
|
|
|
extern struct list_head nf_hooks[NPROTO][NF_MAX_HOOKS];
|
|
|
|
|
2007-10-15 15:53:15 +08:00
|
|
|
int nf_hook_slow(int pf, unsigned int hook, struct sk_buff *skb,
|
2006-01-07 15:01:48 +08:00
|
|
|
struct net_device *indev, struct net_device *outdev,
|
|
|
|
int (*okfn)(struct sk_buff *), int thresh);
|
|
|
|
|
|
|
|
/**
|
|
|
|
* nf_hook_thresh - call a netfilter hook
|
|
|
|
*
|
|
|
|
* Returns 1 if the hook has allowed the packet to pass. The function
|
|
|
|
* okfn must be invoked by the caller in this case. Any other return
|
|
|
|
* value indicates the packet has been consumed by the hook.
|
|
|
|
*/
|
|
|
|
static inline int nf_hook_thresh(int pf, unsigned int hook,
|
2007-10-15 15:53:15 +08:00
|
|
|
struct sk_buff *skb,
|
2006-01-07 15:01:48 +08:00
|
|
|
struct net_device *indev,
|
|
|
|
struct net_device *outdev,
|
2006-02-16 07:10:22 +08:00
|
|
|
int (*okfn)(struct sk_buff *), int thresh,
|
|
|
|
int cond)
|
2006-01-07 15:01:48 +08:00
|
|
|
{
|
2006-02-16 07:10:22 +08:00
|
|
|
if (!cond)
|
|
|
|
return 1;
|
2006-01-07 15:01:48 +08:00
|
|
|
#ifndef CONFIG_NETFILTER_DEBUG
|
|
|
|
if (list_empty(&nf_hooks[pf][hook]))
|
|
|
|
return 1;
|
|
|
|
#endif
|
2007-10-15 15:53:15 +08:00
|
|
|
return nf_hook_slow(pf, hook, skb, indev, outdev, okfn, thresh);
|
2006-01-07 15:01:48 +08:00
|
|
|
}
|
|
|
|
|
2007-10-15 15:53:15 +08:00
|
|
|
static inline int nf_hook(int pf, unsigned int hook, struct sk_buff *skb,
|
2006-01-07 15:01:48 +08:00
|
|
|
struct net_device *indev, struct net_device *outdev,
|
|
|
|
int (*okfn)(struct sk_buff *))
|
|
|
|
{
|
2007-10-15 15:53:15 +08:00
|
|
|
return nf_hook_thresh(pf, hook, skb, indev, outdev, okfn, INT_MIN, 1);
|
2006-01-07 15:01:48 +08:00
|
|
|
}
|
2005-04-17 06:20:36 +08:00
|
|
|
|
|
|
|
/* Activate hook; either okfn or kfree_skb called, unless a hook
|
|
|
|
returns NF_STOLEN (in which case, it's up to the hook to deal with
|
|
|
|
the consequences).
|
|
|
|
|
|
|
|
Returns -ERRNO if packet dropped. Zero means queued, stolen or
|
|
|
|
accepted.
|
|
|
|
*/
|
|
|
|
|
|
|
|
/* RR:
|
|
|
|
> I don't want nf_hook to return anything because people might forget
|
|
|
|
> about async and trust the return value to mean "packet was ok".
|
|
|
|
|
|
|
|
AK:
|
|
|
|
Just document it clearly, then you can expect some sense from kernel
|
|
|
|
coders :)
|
|
|
|
*/
|
|
|
|
|
|
|
|
/* This is gross, but inline doesn't cut it for avoiding the function
|
|
|
|
call in fast path: gcc doesn't inline (needs value tracking?). --RR */
|
2006-01-07 15:01:48 +08:00
|
|
|
|
|
|
|
/* HX: It's slightly less gross now. */
|
|
|
|
|
2005-04-17 06:20:36 +08:00
|
|
|
#define NF_HOOK_THRESH(pf, hook, skb, indev, outdev, okfn, thresh) \
|
|
|
|
({int __ret; \
|
2007-10-15 15:53:15 +08:00
|
|
|
if ((__ret=nf_hook_thresh(pf, hook, (skb), indev, outdev, okfn, thresh, 1)) == 1)\
|
2006-02-16 07:10:22 +08:00
|
|
|
__ret = (okfn)(skb); \
|
|
|
|
__ret;})
|
|
|
|
|
|
|
|
#define NF_HOOK_COND(pf, hook, skb, indev, outdev, okfn, cond) \
|
|
|
|
({int __ret; \
|
2007-10-15 15:53:15 +08:00
|
|
|
if ((__ret=nf_hook_thresh(pf, hook, (skb), indev, outdev, okfn, INT_MIN, cond)) == 1)\
|
2005-04-17 06:20:36 +08:00
|
|
|
__ret = (okfn)(skb); \
|
|
|
|
__ret;})
|
|
|
|
|
2006-01-07 15:01:48 +08:00
|
|
|
#define NF_HOOK(pf, hook, skb, indev, outdev, okfn) \
|
|
|
|
NF_HOOK_THRESH(pf, hook, skb, indev, outdev, okfn, INT_MIN)
|
2005-04-17 06:20:36 +08:00
|
|
|
|
|
|
|
/* Call setsockopt() */
|
|
|
|
int nf_setsockopt(struct sock *sk, int pf, int optval, char __user *opt,
|
|
|
|
int len);
|
|
|
|
int nf_getsockopt(struct sock *sk, int pf, int optval, char __user *opt,
|
|
|
|
int *len);
|
|
|
|
|
2006-03-21 14:45:21 +08:00
|
|
|
int compat_nf_setsockopt(struct sock *sk, int pf, int optval,
|
|
|
|
char __user *opt, int len);
|
|
|
|
int compat_nf_getsockopt(struct sock *sk, int pf, int optval,
|
|
|
|
char __user *opt, int *len);
|
|
|
|
|
2005-08-10 10:37:23 +08:00
|
|
|
/* Call this before modifying an existing packet: ensures it is
|
|
|
|
modifiable and linear to the point you care about (writable_len).
|
|
|
|
Returns true or false. */
|
2007-10-14 15:39:18 +08:00
|
|
|
extern int skb_make_writable(struct sk_buff *skb, unsigned int writable_len);
|
2005-08-10 10:37:23 +08:00
|
|
|
|
2007-12-05 17:22:05 +08:00
|
|
|
struct flowi;
|
2007-12-05 17:26:33 +08:00
|
|
|
struct nf_queue_entry;
|
2007-12-05 17:24:48 +08:00
|
|
|
|
2006-04-07 05:18:09 +08:00
|
|
|
struct nf_afinfo {
|
|
|
|
unsigned short family;
|
2006-11-15 13:40:42 +08:00
|
|
|
__sum16 (*checksum)(struct sk_buff *skb, unsigned int hook,
|
2006-04-07 05:18:43 +08:00
|
|
|
unsigned int dataoff, u_int8_t protocol);
|
2007-12-05 17:22:05 +08:00
|
|
|
int (*route)(struct dst_entry **dst, struct flowi *fl);
|
2006-04-07 05:18:09 +08:00
|
|
|
void (*saveroute)(const struct sk_buff *skb,
|
2007-12-05 17:26:33 +08:00
|
|
|
struct nf_queue_entry *entry);
|
2007-10-15 15:53:15 +08:00
|
|
|
int (*reroute)(struct sk_buff *skb,
|
2007-12-05 17:26:33 +08:00
|
|
|
const struct nf_queue_entry *entry);
|
2006-04-07 05:18:09 +08:00
|
|
|
int route_key_size;
|
2005-08-10 10:42:34 +08:00
|
|
|
};
|
|
|
|
|
2007-12-18 14:42:27 +08:00
|
|
|
extern const struct nf_afinfo *nf_afinfo[NPROTO];
|
|
|
|
static inline const struct nf_afinfo *nf_get_afinfo(unsigned short family)
|
2006-04-07 05:18:09 +08:00
|
|
|
{
|
|
|
|
return rcu_dereference(nf_afinfo[family]);
|
|
|
|
}
|
2005-08-10 10:42:34 +08:00
|
|
|
|
2006-11-15 13:40:42 +08:00
|
|
|
static inline __sum16
|
2006-04-07 05:18:43 +08:00
|
|
|
nf_checksum(struct sk_buff *skb, unsigned int hook, unsigned int dataoff,
|
|
|
|
u_int8_t protocol, unsigned short family)
|
|
|
|
{
|
2007-12-18 14:42:27 +08:00
|
|
|
const struct nf_afinfo *afinfo;
|
2006-11-15 13:40:42 +08:00
|
|
|
__sum16 csum = 0;
|
2006-04-07 05:18:43 +08:00
|
|
|
|
|
|
|
rcu_read_lock();
|
|
|
|
afinfo = nf_get_afinfo(family);
|
|
|
|
if (afinfo)
|
|
|
|
csum = afinfo->checksum(skb, hook, dataoff, protocol);
|
|
|
|
rcu_read_unlock();
|
|
|
|
return csum;
|
|
|
|
}
|
|
|
|
|
2007-12-18 14:42:27 +08:00
|
|
|
extern int nf_register_afinfo(const struct nf_afinfo *afinfo);
|
|
|
|
extern void nf_unregister_afinfo(const struct nf_afinfo *afinfo);
|
2006-04-07 05:18:09 +08:00
|
|
|
|
2006-01-07 15:06:30 +08:00
|
|
|
#include <net/flow.h>
|
|
|
|
extern void (*ip_nat_decode_session)(struct sk_buff *, struct flowi *);
|
|
|
|
|
|
|
|
static inline void
|
|
|
|
nf_nat_decode_session(struct sk_buff *skb, struct flowi *fl, int family)
|
|
|
|
{
|
2007-12-18 14:42:51 +08:00
|
|
|
#ifdef CONFIG_NF_NAT_NEEDED
|
2006-01-07 15:06:30 +08:00
|
|
|
void (*decodefn)(struct sk_buff *, struct flowi *);
|
|
|
|
|
2007-12-18 14:42:51 +08:00
|
|
|
if (family == AF_INET) {
|
|
|
|
rcu_read_lock();
|
|
|
|
decodefn = rcu_dereference(ip_nat_decode_session);
|
|
|
|
if (decodefn)
|
|
|
|
decodefn(skb, fl);
|
|
|
|
rcu_read_unlock();
|
|
|
|
}
|
2006-01-07 15:06:30 +08:00
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
2005-08-10 10:58:27 +08:00
|
|
|
#ifdef CONFIG_PROC_FS
|
|
|
|
#include <linux/proc_fs.h>
|
|
|
|
extern struct proc_dir_entry *proc_net_netfilter;
|
|
|
|
#endif
|
|
|
|
|
2005-04-17 06:20:36 +08:00
|
|
|
#else /* !CONFIG_NETFILTER */
|
|
|
|
#define NF_HOOK(pf, hook, skb, indev, outdev, okfn) (okfn)(skb)
|
2006-02-16 07:10:22 +08:00
|
|
|
#define NF_HOOK_COND(pf, hook, skb, indev, outdev, okfn, cond) (okfn)(skb)
|
2006-01-08 04:50:27 +08:00
|
|
|
static inline int nf_hook_thresh(int pf, unsigned int hook,
|
2007-10-15 15:53:15 +08:00
|
|
|
struct sk_buff *skb,
|
2006-01-08 04:50:27 +08:00
|
|
|
struct net_device *indev,
|
|
|
|
struct net_device *outdev,
|
2006-02-16 07:10:22 +08:00
|
|
|
int (*okfn)(struct sk_buff *), int thresh,
|
|
|
|
int cond)
|
2006-01-08 04:50:27 +08:00
|
|
|
{
|
2007-10-15 15:53:15 +08:00
|
|
|
return okfn(skb);
|
2006-01-08 04:50:27 +08:00
|
|
|
}
|
2007-10-15 15:53:15 +08:00
|
|
|
static inline int nf_hook(int pf, unsigned int hook, struct sk_buff *skb,
|
2006-01-08 04:50:27 +08:00
|
|
|
struct net_device *indev, struct net_device *outdev,
|
|
|
|
int (*okfn)(struct sk_buff *))
|
|
|
|
{
|
2006-02-16 07:18:19 +08:00
|
|
|
return 1;
|
2006-01-08 04:50:27 +08:00
|
|
|
}
|
|
|
|
struct flowi;
|
2006-01-07 15:06:30 +08:00
|
|
|
static inline void
|
|
|
|
nf_nat_decode_session(struct sk_buff *skb, struct flowi *fl, int family) {}
|
2005-04-17 06:20:36 +08:00
|
|
|
#endif /*CONFIG_NETFILTER*/
|
|
|
|
|
2007-03-24 02:17:07 +08:00
|
|
|
#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
|
|
|
|
extern void (*ip_ct_attach)(struct sk_buff *, struct sk_buff *);
|
|
|
|
extern void nf_ct_attach(struct sk_buff *, struct sk_buff *);
|
2007-03-24 02:17:27 +08:00
|
|
|
extern void (*nf_ct_destroy)(struct nf_conntrack *);
|
2007-03-24 02:17:07 +08:00
|
|
|
#else
|
|
|
|
static inline void nf_ct_attach(struct sk_buff *new, struct sk_buff *skb) {}
|
|
|
|
#endif
|
|
|
|
|
2005-04-17 06:20:36 +08:00
|
|
|
#endif /*__KERNEL__*/
|
|
|
|
#endif /*__LINUX_NETFILTER_H*/
|