mirror of
https://github.com/edk2-porting/linux-next.git
synced 2024-12-20 03:04:01 +08:00
Merge branch 'master' of git://1984.lsi.us.es/nf-next
Pablo Neira Ayuso says: ==================== The following patchset contains Netfilter and IPVS updates for your net-next tree, most relevantly they are: * Add net namespace support to NFLOG, ULOG and ebt_ulog and NFQUEUE. The LOG and ebt_log target has been also adapted, but they still depend on the syslog netnamespace that seems to be missing, from Gao Feng. * Don't lose indications of congestion in IPv6 fragmentation handling, from Hannes Frederic Sowa.i * IPVS conversion to use RCU, including some code consolidation patches and optimizations, also some from Julian Anastasov. * cpu fanout support for NFQUEUE, from Holger Eitzenberger. * Better error reporting to userspace when dropping packets from all our _*_[xfrm|route]_me_harder functions, from Patrick McHardy. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
commit
d16658206a
@ -289,11 +289,6 @@ nf_nat_decode_session(struct sk_buff *skb, struct flowi *fl, u_int8_t family)
|
||||
#endif
|
||||
}
|
||||
|
||||
#ifdef CONFIG_PROC_FS
|
||||
#include <linux/proc_fs.h>
|
||||
extern struct proc_dir_entry *proc_net_netfilter;
|
||||
#endif
|
||||
|
||||
#else /* !CONFIG_NETFILTER */
|
||||
#define NF_HOOK(pf, hook, skb, indev, outdev, okfn) (okfn)(skb)
|
||||
#define NF_HOOK_COND(pf, hook, skb, indev, outdev, okfn, cond) (okfn)(skb)
|
||||
|
@ -575,7 +575,40 @@ static inline void skb_dst_set(struct sk_buff *skb, struct dst_entry *dst)
|
||||
skb->_skb_refdst = (unsigned long)dst;
|
||||
}
|
||||
|
||||
extern void skb_dst_set_noref(struct sk_buff *skb, struct dst_entry *dst);
|
||||
extern void __skb_dst_set_noref(struct sk_buff *skb, struct dst_entry *dst,
|
||||
bool force);
|
||||
|
||||
/**
|
||||
* skb_dst_set_noref - sets skb dst, hopefully, without taking reference
|
||||
* @skb: buffer
|
||||
* @dst: dst entry
|
||||
*
|
||||
* Sets skb dst, assuming a reference was not taken on dst.
|
||||
* If dst entry is cached, we do not take reference and dst_release
|
||||
* will be avoided by refdst_drop. If dst entry is not cached, we take
|
||||
* reference, so that last dst_release can destroy the dst immediately.
|
||||
*/
|
||||
static inline void skb_dst_set_noref(struct sk_buff *skb, struct dst_entry *dst)
|
||||
{
|
||||
__skb_dst_set_noref(skb, dst, false);
|
||||
}
|
||||
|
||||
/**
|
||||
* skb_dst_set_noref_force - sets skb dst, without taking reference
|
||||
* @skb: buffer
|
||||
* @dst: dst entry
|
||||
*
|
||||
* Sets skb dst, assuming a reference was not taken on dst.
|
||||
* No reference is taken and no dst_release will be called. While for
|
||||
* cached dsts deferred reclaim is a basic feature, for entries that are
|
||||
* not cached it is caller's job to guarantee that last dst_release for
|
||||
* provided dst happens when nobody uses it, eg. after a RCU grace period.
|
||||
*/
|
||||
static inline void skb_dst_set_noref_force(struct sk_buff *skb,
|
||||
struct dst_entry *dst)
|
||||
{
|
||||
__skb_dst_set_noref(skb, dst, true);
|
||||
}
|
||||
|
||||
/**
|
||||
* skb_dst_is_noref - Test if skb dst isn't refcounted
|
||||
|
@ -233,6 +233,21 @@ static inline void ip_vs_addr_copy(int af, union nf_inet_addr *dst,
|
||||
dst->ip = src->ip;
|
||||
}
|
||||
|
||||
static inline void ip_vs_addr_set(int af, union nf_inet_addr *dst,
|
||||
const union nf_inet_addr *src)
|
||||
{
|
||||
#ifdef CONFIG_IP_VS_IPV6
|
||||
if (af == AF_INET6) {
|
||||
dst->in6 = src->in6;
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
dst->ip = src->ip;
|
||||
dst->all[1] = 0;
|
||||
dst->all[2] = 0;
|
||||
dst->all[3] = 0;
|
||||
}
|
||||
|
||||
static inline int ip_vs_addr_equal(int af, const union nf_inet_addr *a,
|
||||
const union nf_inet_addr *b)
|
||||
{
|
||||
@ -344,8 +359,6 @@ static inline const char *ip_vs_dbg_addr(int af, char *buf, size_t buf_len,
|
||||
#define LeaveFunction(level) do {} while (0)
|
||||
#endif
|
||||
|
||||
#define IP_VS_WAIT_WHILE(expr) while (expr) { cpu_relax(); }
|
||||
|
||||
|
||||
/*
|
||||
* The port number of FTP service (in network order).
|
||||
@ -566,20 +579,19 @@ struct ip_vs_conn_param {
|
||||
*/
|
||||
struct ip_vs_conn {
|
||||
struct hlist_node c_list; /* hashed list heads */
|
||||
#ifdef CONFIG_NET_NS
|
||||
struct net *net; /* Name space */
|
||||
#endif
|
||||
/* Protocol, addresses and port numbers */
|
||||
u16 af; /* address family */
|
||||
__be16 cport;
|
||||
__be16 vport;
|
||||
__be16 dport;
|
||||
__u32 fwmark; /* Fire wall mark from skb */
|
||||
__be16 vport;
|
||||
u16 af; /* address family */
|
||||
union nf_inet_addr caddr; /* client address */
|
||||
union nf_inet_addr vaddr; /* virtual address */
|
||||
union nf_inet_addr daddr; /* destination address */
|
||||
volatile __u32 flags; /* status flags */
|
||||
__u16 protocol; /* Which protocol (TCP/UDP) */
|
||||
#ifdef CONFIG_NET_NS
|
||||
struct net *net; /* Name space */
|
||||
#endif
|
||||
|
||||
/* counter and timer */
|
||||
atomic_t refcnt; /* reference count */
|
||||
@ -593,6 +605,7 @@ struct ip_vs_conn {
|
||||
* state transition triggerd
|
||||
* synchronization
|
||||
*/
|
||||
__u32 fwmark; /* Fire wall mark from skb */
|
||||
unsigned long sync_endtime; /* jiffies + sent_retries */
|
||||
|
||||
/* Control members */
|
||||
@ -620,6 +633,8 @@ struct ip_vs_conn {
|
||||
const struct ip_vs_pe *pe;
|
||||
char *pe_data;
|
||||
__u8 pe_data_len;
|
||||
|
||||
struct rcu_head rcu_head;
|
||||
};
|
||||
|
||||
/*
|
||||
@ -695,10 +710,9 @@ struct ip_vs_dest_user_kern {
|
||||
* and the forwarding entries
|
||||
*/
|
||||
struct ip_vs_service {
|
||||
struct list_head s_list; /* for normal service table */
|
||||
struct list_head f_list; /* for fwmark-based service table */
|
||||
struct hlist_node s_list; /* for normal service table */
|
||||
struct hlist_node f_list; /* for fwmark-based service table */
|
||||
atomic_t refcnt; /* reference counter */
|
||||
atomic_t usecnt; /* use counter */
|
||||
|
||||
u16 af; /* address family */
|
||||
__u16 protocol; /* which protocol (TCP/UDP) */
|
||||
@ -713,25 +727,35 @@ struct ip_vs_service {
|
||||
struct list_head destinations; /* real server d-linked list */
|
||||
__u32 num_dests; /* number of servers */
|
||||
struct ip_vs_stats stats; /* statistics for the service */
|
||||
struct ip_vs_app *inc; /* bind conns to this app inc */
|
||||
|
||||
/* for scheduling */
|
||||
struct ip_vs_scheduler *scheduler; /* bound scheduler object */
|
||||
rwlock_t sched_lock; /* lock sched_data */
|
||||
struct ip_vs_scheduler __rcu *scheduler; /* bound scheduler object */
|
||||
spinlock_t sched_lock; /* lock sched_data */
|
||||
void *sched_data; /* scheduler application data */
|
||||
|
||||
/* alternate persistence engine */
|
||||
struct ip_vs_pe *pe;
|
||||
struct ip_vs_pe __rcu *pe;
|
||||
|
||||
struct rcu_head rcu_head;
|
||||
};
|
||||
|
||||
/* Information for cached dst */
|
||||
struct ip_vs_dest_dst {
|
||||
struct dst_entry *dst_cache; /* destination cache entry */
|
||||
u32 dst_cookie;
|
||||
union nf_inet_addr dst_saddr;
|
||||
struct rcu_head rcu_head;
|
||||
};
|
||||
|
||||
/* In grace period after removing */
|
||||
#define IP_VS_DEST_STATE_REMOVING 0x01
|
||||
/*
|
||||
* The real server destination forwarding entry
|
||||
* with ip address, port number, and so on.
|
||||
*/
|
||||
struct ip_vs_dest {
|
||||
struct list_head n_list; /* for the dests in the service */
|
||||
struct list_head d_list; /* for table with all the dests */
|
||||
struct hlist_node d_list; /* for table with all the dests */
|
||||
|
||||
u16 af; /* address family */
|
||||
__be16 port; /* port number of the server */
|
||||
@ -742,6 +766,7 @@ struct ip_vs_dest {
|
||||
|
||||
atomic_t refcnt; /* reference counter */
|
||||
struct ip_vs_stats stats; /* statistics */
|
||||
unsigned long state; /* state flags */
|
||||
|
||||
/* connection counters and thresholds */
|
||||
atomic_t activeconns; /* active connections */
|
||||
@ -752,10 +777,7 @@ struct ip_vs_dest {
|
||||
|
||||
/* for destination cache */
|
||||
spinlock_t dst_lock; /* lock of dst_cache */
|
||||
struct dst_entry *dst_cache; /* destination cache entry */
|
||||
u32 dst_rtos; /* RT_TOS(tos) for dst */
|
||||
u32 dst_cookie;
|
||||
union nf_inet_addr dst_saddr;
|
||||
struct ip_vs_dest_dst __rcu *dest_dst; /* cached dst info */
|
||||
|
||||
/* for virtual service */
|
||||
struct ip_vs_service *svc; /* service it belongs to */
|
||||
@ -763,6 +785,10 @@ struct ip_vs_dest {
|
||||
__be16 vport; /* virtual port number */
|
||||
union nf_inet_addr vaddr; /* virtual IP address */
|
||||
__u32 vfwmark; /* firewall mark of service */
|
||||
|
||||
struct list_head t_list; /* in dest_trash */
|
||||
struct rcu_head rcu_head;
|
||||
unsigned int in_rs_table:1; /* we are in rs_table */
|
||||
};
|
||||
|
||||
|
||||
@ -778,9 +804,13 @@ struct ip_vs_scheduler {
|
||||
/* scheduler initializing service */
|
||||
int (*init_service)(struct ip_vs_service *svc);
|
||||
/* scheduling service finish */
|
||||
int (*done_service)(struct ip_vs_service *svc);
|
||||
/* scheduler updating service */
|
||||
int (*update_service)(struct ip_vs_service *svc);
|
||||
void (*done_service)(struct ip_vs_service *svc);
|
||||
/* dest is linked */
|
||||
int (*add_dest)(struct ip_vs_service *svc, struct ip_vs_dest *dest);
|
||||
/* dest is unlinked */
|
||||
int (*del_dest)(struct ip_vs_service *svc, struct ip_vs_dest *dest);
|
||||
/* dest is updated */
|
||||
int (*upd_dest)(struct ip_vs_service *svc, struct ip_vs_dest *dest);
|
||||
|
||||
/* selecting a server from the given service */
|
||||
struct ip_vs_dest* (*schedule)(struct ip_vs_service *svc,
|
||||
@ -819,6 +849,7 @@ struct ip_vs_app {
|
||||
struct ip_vs_app *app; /* its real application */
|
||||
__be16 port; /* port number in net order */
|
||||
atomic_t usecnt; /* usage counter */
|
||||
struct rcu_head rcu_head;
|
||||
|
||||
/*
|
||||
* output hook: Process packet in inout direction, diff set for TCP.
|
||||
@ -881,6 +912,9 @@ struct ipvs_master_sync_state {
|
||||
struct netns_ipvs *ipvs;
|
||||
};
|
||||
|
||||
/* How much time to keep dests in trash */
|
||||
#define IP_VS_DEST_TRASH_PERIOD (120 * HZ)
|
||||
|
||||
/* IPVS in network namespace */
|
||||
struct netns_ipvs {
|
||||
int gen; /* Generation */
|
||||
@ -892,7 +926,7 @@ struct netns_ipvs {
|
||||
#define IP_VS_RTAB_SIZE (1 << IP_VS_RTAB_BITS)
|
||||
#define IP_VS_RTAB_MASK (IP_VS_RTAB_SIZE - 1)
|
||||
|
||||
struct list_head rs_table[IP_VS_RTAB_SIZE];
|
||||
struct hlist_head rs_table[IP_VS_RTAB_SIZE];
|
||||
/* ip_vs_app */
|
||||
struct list_head app_list;
|
||||
/* ip_vs_proto */
|
||||
@ -904,7 +938,6 @@ struct netns_ipvs {
|
||||
#define TCP_APP_TAB_SIZE (1 << TCP_APP_TAB_BITS)
|
||||
#define TCP_APP_TAB_MASK (TCP_APP_TAB_SIZE - 1)
|
||||
struct list_head tcp_apps[TCP_APP_TAB_SIZE];
|
||||
spinlock_t tcp_app_lock;
|
||||
#endif
|
||||
/* ip_vs_proto_udp */
|
||||
#ifdef CONFIG_IP_VS_PROTO_UDP
|
||||
@ -912,7 +945,6 @@ struct netns_ipvs {
|
||||
#define UDP_APP_TAB_SIZE (1 << UDP_APP_TAB_BITS)
|
||||
#define UDP_APP_TAB_MASK (UDP_APP_TAB_SIZE - 1)
|
||||
struct list_head udp_apps[UDP_APP_TAB_SIZE];
|
||||
spinlock_t udp_app_lock;
|
||||
#endif
|
||||
/* ip_vs_proto_sctp */
|
||||
#ifdef CONFIG_IP_VS_PROTO_SCTP
|
||||
@ -921,7 +953,6 @@ struct netns_ipvs {
|
||||
#define SCTP_APP_TAB_MASK (SCTP_APP_TAB_SIZE - 1)
|
||||
/* Hash table for SCTP application incarnations */
|
||||
struct list_head sctp_apps[SCTP_APP_TAB_SIZE];
|
||||
spinlock_t sctp_app_lock;
|
||||
#endif
|
||||
/* ip_vs_conn */
|
||||
atomic_t conn_count; /* connection counter */
|
||||
@ -931,9 +962,10 @@ struct netns_ipvs {
|
||||
|
||||
int num_services; /* no of virtual services */
|
||||
|
||||
rwlock_t rs_lock; /* real services table */
|
||||
/* Trash for destinations */
|
||||
struct list_head dest_trash;
|
||||
spinlock_t dest_trash_lock;
|
||||
struct timer_list dest_trash_timer; /* expiration timer */
|
||||
/* Service counters */
|
||||
atomic_t ftpsvc_counter;
|
||||
atomic_t nullsvc_counter;
|
||||
@ -1181,9 +1213,19 @@ struct ip_vs_conn * ip_vs_conn_out_get_proto(int af, const struct sk_buff *skb,
|
||||
const struct ip_vs_iphdr *iph,
|
||||
int inverse);
|
||||
|
||||
/* Get reference to gain full access to conn.
|
||||
* By default, RCU read-side critical sections have access only to
|
||||
* conn fields and its PE data, see ip_vs_conn_rcu_free() for reference.
|
||||
*/
|
||||
static inline bool __ip_vs_conn_get(struct ip_vs_conn *cp)
|
||||
{
|
||||
return atomic_inc_not_zero(&cp->refcnt);
|
||||
}
|
||||
|
||||
/* put back the conn without restarting its timer */
|
||||
static inline void __ip_vs_conn_put(struct ip_vs_conn *cp)
|
||||
{
|
||||
smp_mb__before_atomic_dec();
|
||||
atomic_dec(&cp->refcnt);
|
||||
}
|
||||
extern void ip_vs_conn_put(struct ip_vs_conn *cp);
|
||||
@ -1298,8 +1340,6 @@ extern void ip_vs_app_inc_put(struct ip_vs_app *inc);
|
||||
extern int ip_vs_app_pkt_out(struct ip_vs_conn *, struct sk_buff *skb);
|
||||
extern int ip_vs_app_pkt_in(struct ip_vs_conn *, struct sk_buff *skb);
|
||||
|
||||
void ip_vs_bind_pe(struct ip_vs_service *svc, struct ip_vs_pe *pe);
|
||||
void ip_vs_unbind_pe(struct ip_vs_service *svc);
|
||||
int register_ip_vs_pe(struct ip_vs_pe *pe);
|
||||
int unregister_ip_vs_pe(struct ip_vs_pe *pe);
|
||||
struct ip_vs_pe *ip_vs_pe_getbyname(const char *name);
|
||||
@ -1346,7 +1386,8 @@ extern int register_ip_vs_scheduler(struct ip_vs_scheduler *scheduler);
|
||||
extern int unregister_ip_vs_scheduler(struct ip_vs_scheduler *scheduler);
|
||||
extern int ip_vs_bind_scheduler(struct ip_vs_service *svc,
|
||||
struct ip_vs_scheduler *scheduler);
|
||||
extern int ip_vs_unbind_scheduler(struct ip_vs_service *svc);
|
||||
extern void ip_vs_unbind_scheduler(struct ip_vs_service *svc,
|
||||
struct ip_vs_scheduler *sched);
|
||||
extern struct ip_vs_scheduler *ip_vs_scheduler_get(const char *sched_name);
|
||||
extern void ip_vs_scheduler_put(struct ip_vs_scheduler *scheduler);
|
||||
extern struct ip_vs_conn *
|
||||
@ -1366,17 +1407,12 @@ extern struct ip_vs_stats ip_vs_stats;
|
||||
extern int sysctl_ip_vs_sync_ver;
|
||||
|
||||
extern struct ip_vs_service *
|
||||
ip_vs_service_get(struct net *net, int af, __u32 fwmark, __u16 protocol,
|
||||
ip_vs_service_find(struct net *net, int af, __u32 fwmark, __u16 protocol,
|
||||
const union nf_inet_addr *vaddr, __be16 vport);
|
||||
|
||||
static inline void ip_vs_service_put(struct ip_vs_service *svc)
|
||||
{
|
||||
atomic_dec(&svc->usecnt);
|
||||
}
|
||||
|
||||
extern struct ip_vs_dest *
|
||||
ip_vs_lookup_real_service(struct net *net, int af, __u16 protocol,
|
||||
const union nf_inet_addr *daddr, __be16 dport);
|
||||
extern bool
|
||||
ip_vs_has_real_service(struct net *net, int af, __u16 protocol,
|
||||
const union nf_inet_addr *daddr, __be16 dport);
|
||||
|
||||
extern int ip_vs_use_count_inc(void);
|
||||
extern void ip_vs_use_count_dec(void);
|
||||
@ -1388,8 +1424,18 @@ extern struct ip_vs_dest *
|
||||
ip_vs_find_dest(struct net *net, int af, const union nf_inet_addr *daddr,
|
||||
__be16 dport, const union nf_inet_addr *vaddr, __be16 vport,
|
||||
__u16 protocol, __u32 fwmark, __u32 flags);
|
||||
extern struct ip_vs_dest *ip_vs_try_bind_dest(struct ip_vs_conn *cp);
|
||||
extern void ip_vs_try_bind_dest(struct ip_vs_conn *cp);
|
||||
|
||||
static inline void ip_vs_dest_hold(struct ip_vs_dest *dest)
|
||||
{
|
||||
atomic_inc(&dest->refcnt);
|
||||
}
|
||||
|
||||
static inline void ip_vs_dest_put(struct ip_vs_dest *dest)
|
||||
{
|
||||
smp_mb__before_atomic_dec();
|
||||
atomic_dec(&dest->refcnt);
|
||||
}
|
||||
|
||||
/*
|
||||
* IPVS sync daemon data and function prototypes
|
||||
@ -1428,7 +1474,7 @@ extern int ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
|
||||
extern int ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
|
||||
struct ip_vs_protocol *pp, int offset,
|
||||
unsigned int hooknum, struct ip_vs_iphdr *iph);
|
||||
extern void ip_vs_dst_reset(struct ip_vs_dest *dest);
|
||||
extern void ip_vs_dest_dst_rcu_free(struct rcu_head *head);
|
||||
|
||||
#ifdef CONFIG_IP_VS_IPV6
|
||||
extern int ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
|
||||
|
@ -17,6 +17,7 @@
|
||||
#include <net/netns/ipv6.h>
|
||||
#include <net/netns/sctp.h>
|
||||
#include <net/netns/dccp.h>
|
||||
#include <net/netns/netfilter.h>
|
||||
#include <net/netns/x_tables.h>
|
||||
#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
|
||||
#include <net/netns/conntrack.h>
|
||||
@ -94,6 +95,7 @@ struct net {
|
||||
struct netns_dccp dccp;
|
||||
#endif
|
||||
#ifdef CONFIG_NETFILTER
|
||||
struct netns_nf nf;
|
||||
struct netns_xt xt;
|
||||
#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
|
||||
struct netns_ct ct;
|
||||
|
@ -49,12 +49,18 @@ struct nf_logger {
|
||||
int nf_log_register(u_int8_t pf, struct nf_logger *logger);
|
||||
void nf_log_unregister(struct nf_logger *logger);
|
||||
|
||||
int nf_log_bind_pf(u_int8_t pf, const struct nf_logger *logger);
|
||||
void nf_log_unbind_pf(u_int8_t pf);
|
||||
void nf_log_set(struct net *net, u_int8_t pf,
|
||||
const struct nf_logger *logger);
|
||||
void nf_log_unset(struct net *net, const struct nf_logger *logger);
|
||||
|
||||
int nf_log_bind_pf(struct net *net, u_int8_t pf,
|
||||
const struct nf_logger *logger);
|
||||
void nf_log_unbind_pf(struct net *net, u_int8_t pf);
|
||||
|
||||
/* Calls the registered backend logging function */
|
||||
__printf(7, 8)
|
||||
void nf_log_packet(u_int8_t pf,
|
||||
__printf(8, 9)
|
||||
void nf_log_packet(struct net *net,
|
||||
u_int8_t pf,
|
||||
unsigned int hooknum,
|
||||
const struct sk_buff *skb,
|
||||
const struct net_device *in,
|
||||
|
18
include/net/netns/netfilter.h
Normal file
18
include/net/netns/netfilter.h
Normal file
@ -0,0 +1,18 @@
|
||||
#ifndef __NETNS_NETFILTER_H
|
||||
#define __NETNS_NETFILTER_H
|
||||
|
||||
#include <linux/proc_fs.h>
|
||||
#include <linux/netfilter.h>
|
||||
|
||||
struct nf_logger;
|
||||
|
||||
struct netns_nf {
|
||||
#if defined CONFIG_PROC_FS
|
||||
struct proc_dir_entry *proc_netfilter;
|
||||
#endif
|
||||
const struct nf_logger __rcu *nf_loggers[NFPROTO_NUMPROTO];
|
||||
#ifdef CONFIG_SYSCTL
|
||||
struct ctl_table_header *nf_log_dir_header;
|
||||
#endif
|
||||
};
|
||||
#endif
|
@ -26,4 +26,13 @@ struct xt_NFQ_info_v2 {
|
||||
__u16 bypass;
|
||||
};
|
||||
|
||||
struct xt_NFQ_info_v3 {
|
||||
__u16 queuenum;
|
||||
__u16 queues_total;
|
||||
__u16 flags;
|
||||
#define NFQ_FLAG_BYPASS 0x01 /* for compatibility with v2 */
|
||||
#define NFQ_FLAG_CPU_FANOUT 0x02 /* use current CPU (no hashing) */
|
||||
#define NFQ_FLAG_MASK 0x03
|
||||
};
|
||||
|
||||
#endif /* _XT_NFQ_TARGET_H */
|
||||
|
@ -4,9 +4,9 @@
|
||||
#include <linux/types.h>
|
||||
|
||||
struct ip6t_frag {
|
||||
__u32 ids[2]; /* Security Parameter Index */
|
||||
__u32 ids[2]; /* Identification range */
|
||||
__u32 hdrlen; /* Header Length */
|
||||
__u8 flags; /* */
|
||||
__u8 flags; /* Flags */
|
||||
__u8 invflags; /* Inverse flags */
|
||||
};
|
||||
|
||||
|
@ -78,6 +78,11 @@ ebt_log_packet(u_int8_t pf, unsigned int hooknum,
|
||||
const char *prefix)
|
||||
{
|
||||
unsigned int bitmask;
|
||||
struct net *net = dev_net(in ? in : out);
|
||||
|
||||
/* FIXME: Disabled from containers until syslog ns is supported */
|
||||
if (!net_eq(net, &init_net))
|
||||
return;
|
||||
|
||||
spin_lock_bh(&ebt_log_lock);
|
||||
printk(KERN_SOH "%c%s IN=%s OUT=%s MAC source = %pM MAC dest = %pM proto = 0x%04x",
|
||||
@ -176,17 +181,18 @@ ebt_log_tg(struct sk_buff *skb, const struct xt_action_param *par)
|
||||
{
|
||||
const struct ebt_log_info *info = par->targinfo;
|
||||
struct nf_loginfo li;
|
||||
struct net *net = dev_net(par->in ? par->in : par->out);
|
||||
|
||||
li.type = NF_LOG_TYPE_LOG;
|
||||
li.u.log.level = info->loglevel;
|
||||
li.u.log.logflags = info->bitmask;
|
||||
|
||||
if (info->bitmask & EBT_LOG_NFLOG)
|
||||
nf_log_packet(NFPROTO_BRIDGE, par->hooknum, skb, par->in,
|
||||
par->out, &li, "%s", info->prefix);
|
||||
nf_log_packet(net, NFPROTO_BRIDGE, par->hooknum, skb,
|
||||
par->in, par->out, &li, "%s", info->prefix);
|
||||
else
|
||||
ebt_log_packet(NFPROTO_BRIDGE, par->hooknum, skb, par->in,
|
||||
par->out, &li, info->prefix);
|
||||
par->out, &li, info->prefix);
|
||||
return EBT_CONTINUE;
|
||||
}
|
||||
|
||||
@ -206,19 +212,47 @@ static struct nf_logger ebt_log_logger __read_mostly = {
|
||||
.me = THIS_MODULE,
|
||||
};
|
||||
|
||||
static int __net_init ebt_log_net_init(struct net *net)
|
||||
{
|
||||
nf_log_set(net, NFPROTO_BRIDGE, &ebt_log_logger);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void __net_exit ebt_log_net_fini(struct net *net)
|
||||
{
|
||||
nf_log_unset(net, &ebt_log_logger);
|
||||
}
|
||||
|
||||
static struct pernet_operations ebt_log_net_ops = {
|
||||
.init = ebt_log_net_init,
|
||||
.exit = ebt_log_net_fini,
|
||||
};
|
||||
|
||||
static int __init ebt_log_init(void)
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = register_pernet_subsys(&ebt_log_net_ops);
|
||||
if (ret < 0)
|
||||
goto err_pernet;
|
||||
|
||||
ret = xt_register_target(&ebt_log_tg_reg);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
goto err_target;
|
||||
|
||||
nf_log_register(NFPROTO_BRIDGE, &ebt_log_logger);
|
||||
return 0;
|
||||
|
||||
return ret;
|
||||
|
||||
err_target:
|
||||
unregister_pernet_subsys(&ebt_log_net_ops);
|
||||
err_pernet:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void __exit ebt_log_fini(void)
|
||||
{
|
||||
unregister_pernet_subsys(&ebt_log_net_ops);
|
||||
nf_log_unregister(&ebt_log_logger);
|
||||
xt_unregister_target(&ebt_log_tg_reg);
|
||||
}
|
||||
|
@ -24,14 +24,15 @@ ebt_nflog_tg(struct sk_buff *skb, const struct xt_action_param *par)
|
||||
{
|
||||
const struct ebt_nflog_info *info = par->targinfo;
|
||||
struct nf_loginfo li;
|
||||
struct net *net = dev_net(par->in ? par->in : par->out);
|
||||
|
||||
li.type = NF_LOG_TYPE_ULOG;
|
||||
li.u.ulog.copy_len = info->len;
|
||||
li.u.ulog.group = info->group;
|
||||
li.u.ulog.qthreshold = info->threshold;
|
||||
|
||||
nf_log_packet(PF_BRIDGE, par->hooknum, skb, par->in, par->out,
|
||||
&li, "%s", info->prefix);
|
||||
nf_log_packet(net, PF_BRIDGE, par->hooknum, skb, par->in,
|
||||
par->out, &li, "%s", info->prefix);
|
||||
return EBT_CONTINUE;
|
||||
}
|
||||
|
||||
|
@ -41,6 +41,7 @@
|
||||
#include <linux/netfilter_bridge/ebtables.h>
|
||||
#include <linux/netfilter_bridge/ebt_ulog.h>
|
||||
#include <net/netfilter/nf_log.h>
|
||||
#include <net/netns/generic.h>
|
||||
#include <net/sock.h>
|
||||
#include "../br_private.h"
|
||||
|
||||
@ -62,13 +63,22 @@ typedef struct {
|
||||
spinlock_t lock; /* the per-queue lock */
|
||||
} ebt_ulog_buff_t;
|
||||
|
||||
static ebt_ulog_buff_t ulog_buffers[EBT_ULOG_MAXNLGROUPS];
|
||||
static struct sock *ebtulognl;
|
||||
static int ebt_ulog_net_id __read_mostly;
|
||||
struct ebt_ulog_net {
|
||||
unsigned int nlgroup[EBT_ULOG_MAXNLGROUPS];
|
||||
ebt_ulog_buff_t ulog_buffers[EBT_ULOG_MAXNLGROUPS];
|
||||
struct sock *ebtulognl;
|
||||
};
|
||||
|
||||
static struct ebt_ulog_net *ebt_ulog_pernet(struct net *net)
|
||||
{
|
||||
return net_generic(net, ebt_ulog_net_id);
|
||||
}
|
||||
|
||||
/* send one ulog_buff_t to userspace */
|
||||
static void ulog_send(unsigned int nlgroup)
|
||||
static void ulog_send(struct ebt_ulog_net *ebt, unsigned int nlgroup)
|
||||
{
|
||||
ebt_ulog_buff_t *ub = &ulog_buffers[nlgroup];
|
||||
ebt_ulog_buff_t *ub = &ebt->ulog_buffers[nlgroup];
|
||||
|
||||
del_timer(&ub->timer);
|
||||
|
||||
@ -80,7 +90,7 @@ static void ulog_send(unsigned int nlgroup)
|
||||
ub->lastnlh->nlmsg_type = NLMSG_DONE;
|
||||
|
||||
NETLINK_CB(ub->skb).dst_group = nlgroup + 1;
|
||||
netlink_broadcast(ebtulognl, ub->skb, 0, nlgroup + 1, GFP_ATOMIC);
|
||||
netlink_broadcast(ebt->ebtulognl, ub->skb, 0, nlgroup + 1, GFP_ATOMIC);
|
||||
|
||||
ub->qlen = 0;
|
||||
ub->skb = NULL;
|
||||
@ -89,10 +99,15 @@ static void ulog_send(unsigned int nlgroup)
|
||||
/* timer function to flush queue in flushtimeout time */
|
||||
static void ulog_timer(unsigned long data)
|
||||
{
|
||||
spin_lock_bh(&ulog_buffers[data].lock);
|
||||
if (ulog_buffers[data].skb)
|
||||
ulog_send(data);
|
||||
spin_unlock_bh(&ulog_buffers[data].lock);
|
||||
struct ebt_ulog_net *ebt = container_of((void *)data,
|
||||
struct ebt_ulog_net,
|
||||
nlgroup[*(unsigned int *)data]);
|
||||
|
||||
ebt_ulog_buff_t *ub = &ebt->ulog_buffers[*(unsigned int *)data];
|
||||
spin_lock_bh(&ub->lock);
|
||||
if (ub->skb)
|
||||
ulog_send(ebt, *(unsigned int *)data);
|
||||
spin_unlock_bh(&ub->lock);
|
||||
}
|
||||
|
||||
static struct sk_buff *ulog_alloc_skb(unsigned int size)
|
||||
@ -123,8 +138,10 @@ static void ebt_ulog_packet(unsigned int hooknr, const struct sk_buff *skb,
|
||||
ebt_ulog_packet_msg_t *pm;
|
||||
size_t size, copy_len;
|
||||
struct nlmsghdr *nlh;
|
||||
struct net *net = dev_net(in ? in : out);
|
||||
struct ebt_ulog_net *ebt = ebt_ulog_pernet(net);
|
||||
unsigned int group = uloginfo->nlgroup;
|
||||
ebt_ulog_buff_t *ub = &ulog_buffers[group];
|
||||
ebt_ulog_buff_t *ub = &ebt->ulog_buffers[group];
|
||||
spinlock_t *lock = &ub->lock;
|
||||
ktime_t kt;
|
||||
|
||||
@ -146,7 +163,7 @@ static void ebt_ulog_packet(unsigned int hooknr, const struct sk_buff *skb,
|
||||
if (!(ub->skb = ulog_alloc_skb(size)))
|
||||
goto unlock;
|
||||
} else if (size > skb_tailroom(ub->skb)) {
|
||||
ulog_send(group);
|
||||
ulog_send(ebt, group);
|
||||
|
||||
if (!(ub->skb = ulog_alloc_skb(size)))
|
||||
goto unlock;
|
||||
@ -205,7 +222,7 @@ static void ebt_ulog_packet(unsigned int hooknr, const struct sk_buff *skb,
|
||||
ub->lastnlh = nlh;
|
||||
|
||||
if (ub->qlen >= uloginfo->qthreshold)
|
||||
ulog_send(group);
|
||||
ulog_send(ebt, group);
|
||||
else if (!timer_pending(&ub->timer)) {
|
||||
ub->timer.expires = jiffies + flushtimeout * HZ / 100;
|
||||
add_timer(&ub->timer);
|
||||
@ -277,47 +294,39 @@ static struct nf_logger ebt_ulog_logger __read_mostly = {
|
||||
.me = THIS_MODULE,
|
||||
};
|
||||
|
||||
static int __init ebt_ulog_init(void)
|
||||
static int __net_init ebt_ulog_net_init(struct net *net)
|
||||
{
|
||||
int ret;
|
||||
int i;
|
||||
struct ebt_ulog_net *ebt = ebt_ulog_pernet(net);
|
||||
|
||||
struct netlink_kernel_cfg cfg = {
|
||||
.groups = EBT_ULOG_MAXNLGROUPS,
|
||||
};
|
||||
|
||||
if (nlbufsiz >= 128*1024) {
|
||||
pr_warning("Netlink buffer has to be <= 128kB,"
|
||||
" please try a smaller nlbufsiz parameter.\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/* initialize ulog_buffers */
|
||||
for (i = 0; i < EBT_ULOG_MAXNLGROUPS; i++) {
|
||||
setup_timer(&ulog_buffers[i].timer, ulog_timer, i);
|
||||
spin_lock_init(&ulog_buffers[i].lock);
|
||||
ebt->nlgroup[i] = i;
|
||||
setup_timer(&ebt->ulog_buffers[i].timer, ulog_timer,
|
||||
(unsigned long)&ebt->nlgroup[i]);
|
||||
spin_lock_init(&ebt->ulog_buffers[i].lock);
|
||||
}
|
||||
|
||||
ebtulognl = netlink_kernel_create(&init_net, NETLINK_NFLOG, &cfg);
|
||||
if (!ebtulognl)
|
||||
ret = -ENOMEM;
|
||||
else if ((ret = xt_register_target(&ebt_ulog_tg_reg)) != 0)
|
||||
netlink_kernel_release(ebtulognl);
|
||||
ebt->ebtulognl = netlink_kernel_create(net, NETLINK_NFLOG, &cfg);
|
||||
if (!ebt->ebtulognl)
|
||||
return -ENOMEM;
|
||||
|
||||
if (ret == 0)
|
||||
nf_log_register(NFPROTO_BRIDGE, &ebt_ulog_logger);
|
||||
|
||||
return ret;
|
||||
nf_log_set(net, NFPROTO_BRIDGE, &ebt_ulog_logger);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void __exit ebt_ulog_fini(void)
|
||||
static void __net_exit ebt_ulog_net_fini(struct net *net)
|
||||
{
|
||||
ebt_ulog_buff_t *ub;
|
||||
int i;
|
||||
struct ebt_ulog_net *ebt = ebt_ulog_pernet(net);
|
||||
|
||||
nf_log_unregister(&ebt_ulog_logger);
|
||||
xt_unregister_target(&ebt_ulog_tg_reg);
|
||||
nf_log_unset(net, &ebt_ulog_logger);
|
||||
for (i = 0; i < EBT_ULOG_MAXNLGROUPS; i++) {
|
||||
ub = &ulog_buffers[i];
|
||||
ebt_ulog_buff_t *ub = &ebt->ulog_buffers[i];
|
||||
del_timer(&ub->timer);
|
||||
|
||||
if (ub->skb) {
|
||||
@ -325,7 +334,49 @@ static void __exit ebt_ulog_fini(void)
|
||||
ub->skb = NULL;
|
||||
}
|
||||
}
|
||||
netlink_kernel_release(ebtulognl);
|
||||
netlink_kernel_release(ebt->ebtulognl);
|
||||
}
|
||||
|
||||
static struct pernet_operations ebt_ulog_net_ops = {
|
||||
.init = ebt_ulog_net_init,
|
||||
.exit = ebt_ulog_net_fini,
|
||||
.id = &ebt_ulog_net_id,
|
||||
.size = sizeof(struct ebt_ulog_net),
|
||||
};
|
||||
|
||||
static int __init ebt_ulog_init(void)
|
||||
{
|
||||
int ret;
|
||||
|
||||
if (nlbufsiz >= 128*1024) {
|
||||
pr_warn("Netlink buffer has to be <= 128kB,"
|
||||
"please try a smaller nlbufsiz parameter.\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
ret = register_pernet_subsys(&ebt_ulog_net_ops);
|
||||
if (ret)
|
||||
goto out_pernet;
|
||||
|
||||
ret = xt_register_target(&ebt_ulog_tg_reg);
|
||||
if (ret)
|
||||
goto out_target;
|
||||
|
||||
nf_log_register(NFPROTO_BRIDGE, &ebt_ulog_logger);
|
||||
|
||||
return 0;
|
||||
|
||||
out_target:
|
||||
unregister_pernet_subsys(&ebt_ulog_net_ops);
|
||||
out_pernet:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void __exit ebt_ulog_fini(void)
|
||||
{
|
||||
nf_log_unregister(&ebt_ulog_logger);
|
||||
xt_unregister_target(&ebt_ulog_tg_reg);
|
||||
unregister_pernet_subsys(&ebt_ulog_net_ops);
|
||||
}
|
||||
|
||||
module_init(ebt_ulog_init);
|
||||
|
@ -320,27 +320,28 @@ void __dst_destroy_metrics_generic(struct dst_entry *dst, unsigned long old)
|
||||
EXPORT_SYMBOL(__dst_destroy_metrics_generic);
|
||||
|
||||
/**
|
||||
* skb_dst_set_noref - sets skb dst, without a reference
|
||||
* __skb_dst_set_noref - sets skb dst, without a reference
|
||||
* @skb: buffer
|
||||
* @dst: dst entry
|
||||
* @force: if force is set, use noref version even for DST_NOCACHE entries
|
||||
*
|
||||
* Sets skb dst, assuming a reference was not taken on dst
|
||||
* skb_dst_drop() should not dst_release() this dst
|
||||
*/
|
||||
void skb_dst_set_noref(struct sk_buff *skb, struct dst_entry *dst)
|
||||
void __skb_dst_set_noref(struct sk_buff *skb, struct dst_entry *dst, bool force)
|
||||
{
|
||||
WARN_ON(!rcu_read_lock_held() && !rcu_read_lock_bh_held());
|
||||
/* If dst not in cache, we must take a reference, because
|
||||
* dst_release() will destroy dst as soon as its refcount becomes zero
|
||||
*/
|
||||
if (unlikely(dst->flags & DST_NOCACHE)) {
|
||||
if (unlikely((dst->flags & DST_NOCACHE) && !force)) {
|
||||
dst_hold(dst);
|
||||
skb_dst_set(skb, dst);
|
||||
} else {
|
||||
skb->_skb_refdst = (unsigned long)dst | SKB_DST_NOREF;
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL(skb_dst_set_noref);
|
||||
EXPORT_SYMBOL(__skb_dst_set_noref);
|
||||
|
||||
/* Dirty hack. We did it in 2.2 (in __dst_free),
|
||||
* we have _very_ good reasons not to repeat
|
||||
|
@ -430,8 +430,7 @@ static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from)
|
||||
to->tc_index = from->tc_index;
|
||||
#endif
|
||||
nf_copy(to, from);
|
||||
#if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \
|
||||
defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE)
|
||||
#if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE)
|
||||
to->nf_trace = from->nf_trace;
|
||||
#endif
|
||||
#if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE)
|
||||
|
@ -182,8 +182,7 @@ ipt_get_target_c(const struct ipt_entry *e)
|
||||
return ipt_get_target((struct ipt_entry *)e);
|
||||
}
|
||||
|
||||
#if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \
|
||||
defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE)
|
||||
#if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE)
|
||||
static const char *const hooknames[] = {
|
||||
[NF_INET_PRE_ROUTING] = "PREROUTING",
|
||||
[NF_INET_LOCAL_IN] = "INPUT",
|
||||
@ -259,6 +258,7 @@ static void trace_packet(const struct sk_buff *skb,
|
||||
const char *hookname, *chainname, *comment;
|
||||
const struct ipt_entry *iter;
|
||||
unsigned int rulenum = 0;
|
||||
struct net *net = dev_net(in ? in : out);
|
||||
|
||||
table_base = private->entries[smp_processor_id()];
|
||||
root = get_entry(table_base, private->hook_entry[hook]);
|
||||
@ -271,7 +271,7 @@ static void trace_packet(const struct sk_buff *skb,
|
||||
&chainname, &comment, &rulenum) != 0)
|
||||
break;
|
||||
|
||||
nf_log_packet(AF_INET, hook, skb, in, out, &trace_loginfo,
|
||||
nf_log_packet(net, AF_INET, hook, skb, in, out, &trace_loginfo,
|
||||
"TRACE: %s:%s:%s:%u ",
|
||||
tablename, chainname, comment, rulenum);
|
||||
}
|
||||
@ -361,8 +361,7 @@ ipt_do_table(struct sk_buff *skb,
|
||||
t = ipt_get_target(e);
|
||||
IP_NF_ASSERT(t->u.kernel.target);
|
||||
|
||||
#if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \
|
||||
defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE)
|
||||
#if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE)
|
||||
/* The packet is traced: log it */
|
||||
if (unlikely(skb->nf_trace))
|
||||
trace_packet(skb, hook, in, out,
|
||||
|
@ -45,6 +45,7 @@
|
||||
#include <linux/netfilter/x_tables.h>
|
||||
#include <linux/netfilter_ipv4/ipt_ULOG.h>
|
||||
#include <net/netfilter/nf_log.h>
|
||||
#include <net/netns/generic.h>
|
||||
#include <net/sock.h>
|
||||
#include <linux/bitops.h>
|
||||
#include <asm/unaligned.h>
|
||||
@ -78,15 +79,23 @@ typedef struct {
|
||||
struct timer_list timer; /* the timer function */
|
||||
} ulog_buff_t;
|
||||
|
||||
static ulog_buff_t ulog_buffers[ULOG_MAXNLGROUPS]; /* array of buffers */
|
||||
static int ulog_net_id __read_mostly;
|
||||
struct ulog_net {
|
||||
unsigned int nlgroup[ULOG_MAXNLGROUPS];
|
||||
ulog_buff_t ulog_buffers[ULOG_MAXNLGROUPS];
|
||||
struct sock *nflognl;
|
||||
spinlock_t lock;
|
||||
};
|
||||
|
||||
static struct sock *nflognl; /* our socket */
|
||||
static DEFINE_SPINLOCK(ulog_lock); /* spinlock */
|
||||
static struct ulog_net *ulog_pernet(struct net *net)
|
||||
{
|
||||
return net_generic(net, ulog_net_id);
|
||||
}
|
||||
|
||||
/* send one ulog_buff_t to userspace */
|
||||
static void ulog_send(unsigned int nlgroupnum)
|
||||
static void ulog_send(struct ulog_net *ulog, unsigned int nlgroupnum)
|
||||
{
|
||||
ulog_buff_t *ub = &ulog_buffers[nlgroupnum];
|
||||
ulog_buff_t *ub = &ulog->ulog_buffers[nlgroupnum];
|
||||
|
||||
pr_debug("ulog_send: timer is deleting\n");
|
||||
del_timer(&ub->timer);
|
||||
@ -103,7 +112,8 @@ static void ulog_send(unsigned int nlgroupnum)
|
||||
NETLINK_CB(ub->skb).dst_group = nlgroupnum + 1;
|
||||
pr_debug("throwing %d packets to netlink group %u\n",
|
||||
ub->qlen, nlgroupnum + 1);
|
||||
netlink_broadcast(nflognl, ub->skb, 0, nlgroupnum + 1, GFP_ATOMIC);
|
||||
netlink_broadcast(ulog->nflognl, ub->skb, 0, nlgroupnum + 1,
|
||||
GFP_ATOMIC);
|
||||
|
||||
ub->qlen = 0;
|
||||
ub->skb = NULL;
|
||||
@ -114,13 +124,16 @@ static void ulog_send(unsigned int nlgroupnum)
|
||||
/* timer function to flush queue in flushtimeout time */
|
||||
static void ulog_timer(unsigned long data)
|
||||
{
|
||||
struct ulog_net *ulog = container_of((void *)data,
|
||||
struct ulog_net,
|
||||
nlgroup[*(unsigned int *)data]);
|
||||
pr_debug("timer function called, calling ulog_send\n");
|
||||
|
||||
/* lock to protect against somebody modifying our structure
|
||||
* from ipt_ulog_target at the same time */
|
||||
spin_lock_bh(&ulog_lock);
|
||||
ulog_send(data);
|
||||
spin_unlock_bh(&ulog_lock);
|
||||
spin_lock_bh(&ulog->lock);
|
||||
ulog_send(ulog, data);
|
||||
spin_unlock_bh(&ulog->lock);
|
||||
}
|
||||
|
||||
static struct sk_buff *ulog_alloc_skb(unsigned int size)
|
||||
@ -160,6 +173,8 @@ static void ipt_ulog_packet(unsigned int hooknum,
|
||||
size_t size, copy_len;
|
||||
struct nlmsghdr *nlh;
|
||||
struct timeval tv;
|
||||
struct net *net = dev_net(in ? in : out);
|
||||
struct ulog_net *ulog = ulog_pernet(net);
|
||||
|
||||
/* ffs == find first bit set, necessary because userspace
|
||||
* is already shifting groupnumber, but we need unshifted.
|
||||
@ -174,9 +189,9 @@ static void ipt_ulog_packet(unsigned int hooknum,
|
||||
|
||||
size = nlmsg_total_size(sizeof(*pm) + copy_len);
|
||||
|
||||
ub = &ulog_buffers[groupnum];
|
||||
ub = &ulog->ulog_buffers[groupnum];
|
||||
|
||||
spin_lock_bh(&ulog_lock);
|
||||
spin_lock_bh(&ulog->lock);
|
||||
|
||||
if (!ub->skb) {
|
||||
if (!(ub->skb = ulog_alloc_skb(size)))
|
||||
@ -186,7 +201,7 @@ static void ipt_ulog_packet(unsigned int hooknum,
|
||||
/* either the queue len is too high or we don't have
|
||||
* enough room in nlskb left. send it to userspace. */
|
||||
|
||||
ulog_send(groupnum);
|
||||
ulog_send(ulog, groupnum);
|
||||
|
||||
if (!(ub->skb = ulog_alloc_skb(size)))
|
||||
goto alloc_failure;
|
||||
@ -260,16 +275,16 @@ static void ipt_ulog_packet(unsigned int hooknum,
|
||||
if (ub->qlen >= loginfo->qthreshold) {
|
||||
if (loginfo->qthreshold > 1)
|
||||
nlh->nlmsg_type = NLMSG_DONE;
|
||||
ulog_send(groupnum);
|
||||
ulog_send(ulog, groupnum);
|
||||
}
|
||||
out_unlock:
|
||||
spin_unlock_bh(&ulog_lock);
|
||||
spin_unlock_bh(&ulog->lock);
|
||||
|
||||
return;
|
||||
|
||||
alloc_failure:
|
||||
pr_debug("Error building netlink message\n");
|
||||
spin_unlock_bh(&ulog_lock);
|
||||
spin_unlock_bh(&ulog->lock);
|
||||
}
|
||||
|
||||
static unsigned int
|
||||
@ -376,54 +391,43 @@ static struct nf_logger ipt_ulog_logger __read_mostly = {
|
||||
.me = THIS_MODULE,
|
||||
};
|
||||
|
||||
static int __init ulog_tg_init(void)
|
||||
static int __net_init ulog_tg_net_init(struct net *net)
|
||||
{
|
||||
int ret, i;
|
||||
int i;
|
||||
struct ulog_net *ulog = ulog_pernet(net);
|
||||
struct netlink_kernel_cfg cfg = {
|
||||
.groups = ULOG_MAXNLGROUPS,
|
||||
};
|
||||
|
||||
pr_debug("init module\n");
|
||||
|
||||
if (nlbufsiz > 128*1024) {
|
||||
pr_warning("Netlink buffer has to be <= 128kB\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
spin_lock_init(&ulog->lock);
|
||||
/* initialize ulog_buffers */
|
||||
for (i = 0; i < ULOG_MAXNLGROUPS; i++)
|
||||
setup_timer(&ulog_buffers[i].timer, ulog_timer, i);
|
||||
setup_timer(&ulog->ulog_buffers[i].timer, ulog_timer, i);
|
||||
|
||||
nflognl = netlink_kernel_create(&init_net, NETLINK_NFLOG, &cfg);
|
||||
if (!nflognl)
|
||||
ulog->nflognl = netlink_kernel_create(net, NETLINK_NFLOG, &cfg);
|
||||
if (!ulog->nflognl)
|
||||
return -ENOMEM;
|
||||
|
||||
ret = xt_register_target(&ulog_tg_reg);
|
||||
if (ret < 0) {
|
||||
netlink_kernel_release(nflognl);
|
||||
return ret;
|
||||
}
|
||||
if (nflog)
|
||||
nf_log_register(NFPROTO_IPV4, &ipt_ulog_logger);
|
||||
nf_log_set(net, NFPROTO_IPV4, &ipt_ulog_logger);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void __exit ulog_tg_exit(void)
|
||||
static void __net_exit ulog_tg_net_exit(struct net *net)
|
||||
{
|
||||
ulog_buff_t *ub;
|
||||
int i;
|
||||
|
||||
pr_debug("cleanup_module\n");
|
||||
struct ulog_net *ulog = ulog_pernet(net);
|
||||
|
||||
if (nflog)
|
||||
nf_log_unregister(&ipt_ulog_logger);
|
||||
xt_unregister_target(&ulog_tg_reg);
|
||||
netlink_kernel_release(nflognl);
|
||||
nf_log_unset(net, &ipt_ulog_logger);
|
||||
|
||||
netlink_kernel_release(ulog->nflognl);
|
||||
|
||||
/* remove pending timers and free allocated skb's */
|
||||
for (i = 0; i < ULOG_MAXNLGROUPS; i++) {
|
||||
ub = &ulog_buffers[i];
|
||||
ub = &ulog->ulog_buffers[i];
|
||||
pr_debug("timer is deleting\n");
|
||||
del_timer(&ub->timer);
|
||||
|
||||
@ -434,5 +438,50 @@ static void __exit ulog_tg_exit(void)
|
||||
}
|
||||
}
|
||||
|
||||
static struct pernet_operations ulog_tg_net_ops = {
|
||||
.init = ulog_tg_net_init,
|
||||
.exit = ulog_tg_net_exit,
|
||||
.id = &ulog_net_id,
|
||||
.size = sizeof(struct ulog_net),
|
||||
};
|
||||
|
||||
static int __init ulog_tg_init(void)
|
||||
{
|
||||
int ret;
|
||||
pr_debug("init module\n");
|
||||
|
||||
if (nlbufsiz > 128*1024) {
|
||||
pr_warn("Netlink buffer has to be <= 128kB\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
ret = register_pernet_subsys(&ulog_tg_net_ops);
|
||||
if (ret)
|
||||
goto out_pernet;
|
||||
|
||||
ret = xt_register_target(&ulog_tg_reg);
|
||||
if (ret < 0)
|
||||
goto out_target;
|
||||
|
||||
if (nflog)
|
||||
nf_log_register(NFPROTO_IPV4, &ipt_ulog_logger);
|
||||
|
||||
return 0;
|
||||
|
||||
out_target:
|
||||
unregister_pernet_subsys(&ulog_tg_net_ops);
|
||||
out_pernet:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void __exit ulog_tg_exit(void)
|
||||
{
|
||||
pr_debug("cleanup_module\n");
|
||||
if (nflog)
|
||||
nf_log_unregister(&ipt_ulog_logger);
|
||||
xt_unregister_target(&ulog_tg_reg);
|
||||
unregister_pernet_subsys(&ulog_tg_net_ops);
|
||||
}
|
||||
|
||||
module_init(ulog_tg_init);
|
||||
module_exit(ulog_tg_exit);
|
||||
|
@ -187,8 +187,8 @@ icmp_error(struct net *net, struct nf_conn *tmpl,
|
||||
icmph = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_ih), &_ih);
|
||||
if (icmph == NULL) {
|
||||
if (LOG_INVALID(net, IPPROTO_ICMP))
|
||||
nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL,
|
||||
"nf_ct_icmp: short packet ");
|
||||
nf_log_packet(net, PF_INET, 0, skb, NULL, NULL,
|
||||
NULL, "nf_ct_icmp: short packet ");
|
||||
return -NF_ACCEPT;
|
||||
}
|
||||
|
||||
@ -196,7 +196,7 @@ icmp_error(struct net *net, struct nf_conn *tmpl,
|
||||
if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING &&
|
||||
nf_ip_checksum(skb, hooknum, dataoff, 0)) {
|
||||
if (LOG_INVALID(net, IPPROTO_ICMP))
|
||||
nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL,
|
||||
nf_log_packet(net, PF_INET, 0, skb, NULL, NULL, NULL,
|
||||
"nf_ct_icmp: bad HW ICMP checksum ");
|
||||
return -NF_ACCEPT;
|
||||
}
|
||||
@ -209,7 +209,7 @@ icmp_error(struct net *net, struct nf_conn *tmpl,
|
||||
*/
|
||||
if (icmph->type > NR_ICMP_TYPES) {
|
||||
if (LOG_INVALID(net, IPPROTO_ICMP))
|
||||
nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL,
|
||||
nf_log_packet(net, PF_INET, 0, skb, NULL, NULL, NULL,
|
||||
"nf_ct_icmp: invalid ICMP type ");
|
||||
return -NF_ACCEPT;
|
||||
}
|
||||
|
@ -284,6 +284,7 @@ static void trace_packet(const struct sk_buff *skb,
|
||||
const char *hookname, *chainname, *comment;
|
||||
const struct ip6t_entry *iter;
|
||||
unsigned int rulenum = 0;
|
||||
struct net *net = dev_net(in ? in : out);
|
||||
|
||||
table_base = private->entries[smp_processor_id()];
|
||||
root = get_entry(table_base, private->hook_entry[hook]);
|
||||
@ -296,7 +297,7 @@ static void trace_packet(const struct sk_buff *skb,
|
||||
&chainname, &comment, &rulenum) != 0)
|
||||
break;
|
||||
|
||||
nf_log_packet(AF_INET6, hook, skb, in, out, &trace_loginfo,
|
||||
nf_log_packet(net, AF_INET6, hook, skb, in, out, &trace_loginfo,
|
||||
"TRACE: %s:%s:%s:%u ",
|
||||
tablename, chainname, comment, rulenum);
|
||||
}
|
||||
|
@ -131,7 +131,8 @@ static bool icmpv6_new(struct nf_conn *ct, const struct sk_buff *skb,
|
||||
type + 128);
|
||||
nf_ct_dump_tuple_ipv6(&ct->tuplehash[0].tuple);
|
||||
if (LOG_INVALID(nf_ct_net(ct), IPPROTO_ICMPV6))
|
||||
nf_log_packet(PF_INET6, 0, skb, NULL, NULL, NULL,
|
||||
nf_log_packet(nf_ct_net(ct), PF_INET6, 0, skb, NULL,
|
||||
NULL, NULL,
|
||||
"nf_ct_icmpv6: invalid new with type %d ",
|
||||
type + 128);
|
||||
return false;
|
||||
@ -203,7 +204,7 @@ icmpv6_error(struct net *net, struct nf_conn *tmpl,
|
||||
icmp6h = skb_header_pointer(skb, dataoff, sizeof(_ih), &_ih);
|
||||
if (icmp6h == NULL) {
|
||||
if (LOG_INVALID(net, IPPROTO_ICMPV6))
|
||||
nf_log_packet(PF_INET6, 0, skb, NULL, NULL, NULL,
|
||||
nf_log_packet(net, PF_INET6, 0, skb, NULL, NULL, NULL,
|
||||
"nf_ct_icmpv6: short packet ");
|
||||
return -NF_ACCEPT;
|
||||
}
|
||||
@ -211,7 +212,7 @@ icmpv6_error(struct net *net, struct nf_conn *tmpl,
|
||||
if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING &&
|
||||
nf_ip6_checksum(skb, hooknum, dataoff, IPPROTO_ICMPV6)) {
|
||||
if (LOG_INVALID(net, IPPROTO_ICMPV6))
|
||||
nf_log_packet(PF_INET6, 0, skb, NULL, NULL, NULL,
|
||||
nf_log_packet(net, PF_INET6, 0, skb, NULL, NULL, NULL,
|
||||
"nf_ct_icmpv6: ICMPv6 checksum failed ");
|
||||
return -NF_ACCEPT;
|
||||
}
|
||||
|
@ -41,6 +41,7 @@
|
||||
#include <net/rawv6.h>
|
||||
#include <net/ndisc.h>
|
||||
#include <net/addrconf.h>
|
||||
#include <net/inet_ecn.h>
|
||||
#include <net/netfilter/ipv6/nf_conntrack_ipv6.h>
|
||||
#include <linux/sysctl.h>
|
||||
#include <linux/netfilter.h>
|
||||
@ -138,6 +139,11 @@ static void __net_exit nf_ct_frags6_sysctl_unregister(struct net *net)
|
||||
}
|
||||
#endif
|
||||
|
||||
static inline u8 ip6_frag_ecn(const struct ipv6hdr *ipv6h)
|
||||
{
|
||||
return 1 << (ipv6_get_dsfield(ipv6h) & INET_ECN_MASK);
|
||||
}
|
||||
|
||||
static unsigned int nf_hashfn(struct inet_frag_queue *q)
|
||||
{
|
||||
const struct frag_queue *nq;
|
||||
@ -166,7 +172,7 @@ static void nf_ct_frag6_expire(unsigned long data)
|
||||
/* Creation primitives. */
|
||||
static inline struct frag_queue *fq_find(struct net *net, __be32 id,
|
||||
u32 user, struct in6_addr *src,
|
||||
struct in6_addr *dst)
|
||||
struct in6_addr *dst, u8 ecn)
|
||||
{
|
||||
struct inet_frag_queue *q;
|
||||
struct ip6_create_arg arg;
|
||||
@ -176,6 +182,7 @@ static inline struct frag_queue *fq_find(struct net *net, __be32 id,
|
||||
arg.user = user;
|
||||
arg.src = src;
|
||||
arg.dst = dst;
|
||||
arg.ecn = ecn;
|
||||
|
||||
read_lock_bh(&nf_frags.lock);
|
||||
hash = inet6_hash_frag(id, src, dst, nf_frags.rnd);
|
||||
@ -196,6 +203,7 @@ static int nf_ct_frag6_queue(struct frag_queue *fq, struct sk_buff *skb,
|
||||
struct sk_buff *prev, *next;
|
||||
unsigned int payload_len;
|
||||
int offset, end;
|
||||
u8 ecn;
|
||||
|
||||
if (fq->q.last_in & INET_FRAG_COMPLETE) {
|
||||
pr_debug("Already completed\n");
|
||||
@ -213,6 +221,8 @@ static int nf_ct_frag6_queue(struct frag_queue *fq, struct sk_buff *skb,
|
||||
return -1;
|
||||
}
|
||||
|
||||
ecn = ip6_frag_ecn(ipv6_hdr(skb));
|
||||
|
||||
if (skb->ip_summed == CHECKSUM_COMPLETE) {
|
||||
const unsigned char *nh = skb_network_header(skb);
|
||||
skb->csum = csum_sub(skb->csum,
|
||||
@ -317,6 +327,7 @@ found:
|
||||
}
|
||||
fq->q.stamp = skb->tstamp;
|
||||
fq->q.meat += skb->len;
|
||||
fq->ecn |= ecn;
|
||||
if (payload_len > fq->q.max_size)
|
||||
fq->q.max_size = payload_len;
|
||||
add_frag_mem_limit(&fq->q, skb->truesize);
|
||||
@ -352,12 +363,17 @@ nf_ct_frag6_reasm(struct frag_queue *fq, struct net_device *dev)
|
||||
{
|
||||
struct sk_buff *fp, *op, *head = fq->q.fragments;
|
||||
int payload_len;
|
||||
u8 ecn;
|
||||
|
||||
inet_frag_kill(&fq->q, &nf_frags);
|
||||
|
||||
WARN_ON(head == NULL);
|
||||
WARN_ON(NFCT_FRAG6_CB(head)->offset != 0);
|
||||
|
||||
ecn = ip_frag_ecn_table[fq->ecn];
|
||||
if (unlikely(ecn == 0xff))
|
||||
goto out_fail;
|
||||
|
||||
/* Unfragmented part is taken from the first segment. */
|
||||
payload_len = ((head->data - skb_network_header(head)) -
|
||||
sizeof(struct ipv6hdr) + fq->q.len -
|
||||
@ -428,6 +444,7 @@ nf_ct_frag6_reasm(struct frag_queue *fq, struct net_device *dev)
|
||||
head->dev = dev;
|
||||
head->tstamp = fq->q.stamp;
|
||||
ipv6_hdr(head)->payload_len = htons(payload_len);
|
||||
ipv6_change_dsfield(ipv6_hdr(head), 0xff, ecn);
|
||||
IP6CB(head)->frag_max_size = sizeof(struct ipv6hdr) + fq->q.max_size;
|
||||
|
||||
/* Yes, and fold redundant checksum back. 8) */
|
||||
@ -572,7 +589,8 @@ struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb, u32 user)
|
||||
inet_frag_evictor(&net->nf_frag.frags, &nf_frags, false);
|
||||
local_bh_enable();
|
||||
|
||||
fq = fq_find(net, fhdr->identification, user, &hdr->saddr, &hdr->daddr);
|
||||
fq = fq_find(net, fhdr->identification, user, &hdr->saddr, &hdr->daddr,
|
||||
ip6_frag_ecn(hdr));
|
||||
if (fq == NULL) {
|
||||
pr_debug("Can't find and can't create new queue\n");
|
||||
goto ret_orig;
|
||||
|
@ -276,10 +276,30 @@ void (*nf_nat_decode_session_hook)(struct sk_buff *, struct flowi *);
|
||||
EXPORT_SYMBOL(nf_nat_decode_session_hook);
|
||||
#endif
|
||||
|
||||
static int __net_init netfilter_net_init(struct net *net)
|
||||
{
|
||||
#ifdef CONFIG_PROC_FS
|
||||
struct proc_dir_entry *proc_net_netfilter;
|
||||
EXPORT_SYMBOL(proc_net_netfilter);
|
||||
net->nf.proc_netfilter = proc_net_mkdir(net, "netfilter",
|
||||
net->proc_net);
|
||||
if (!net->nf.proc_netfilter) {
|
||||
if (!net_eq(net, &init_net))
|
||||
pr_err("cannot create netfilter proc entry");
|
||||
|
||||
return -ENOMEM;
|
||||
}
|
||||
#endif
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void __net_exit netfilter_net_exit(struct net *net)
|
||||
{
|
||||
remove_proc_entry("netfilter", net->proc_net);
|
||||
}
|
||||
|
||||
static struct pernet_operations netfilter_net_ops = {
|
||||
.init = netfilter_net_init,
|
||||
.exit = netfilter_net_exit,
|
||||
};
|
||||
|
||||
void __init netfilter_init(void)
|
||||
{
|
||||
@ -289,11 +309,8 @@ void __init netfilter_init(void)
|
||||
INIT_LIST_HEAD(&nf_hooks[i][h]);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_PROC_FS
|
||||
proc_net_netfilter = proc_mkdir("netfilter", init_net.proc_net);
|
||||
if (!proc_net_netfilter)
|
||||
if (register_pernet_subsys(&netfilter_net_ops) < 0)
|
||||
panic("cannot create netfilter proc entry");
|
||||
#endif
|
||||
|
||||
if (netfilter_log_init() < 0)
|
||||
panic("cannot initialize nf_log");
|
||||
|
@ -58,6 +58,18 @@ static inline void ip_vs_app_put(struct ip_vs_app *app)
|
||||
module_put(app->module);
|
||||
}
|
||||
|
||||
static void ip_vs_app_inc_destroy(struct ip_vs_app *inc)
|
||||
{
|
||||
kfree(inc->timeout_table);
|
||||
kfree(inc);
|
||||
}
|
||||
|
||||
static void ip_vs_app_inc_rcu_free(struct rcu_head *head)
|
||||
{
|
||||
struct ip_vs_app *inc = container_of(head, struct ip_vs_app, rcu_head);
|
||||
|
||||
ip_vs_app_inc_destroy(inc);
|
||||
}
|
||||
|
||||
/*
|
||||
* Allocate/initialize app incarnation and register it in proto apps.
|
||||
@ -106,8 +118,7 @@ ip_vs_app_inc_new(struct net *net, struct ip_vs_app *app, __u16 proto,
|
||||
return 0;
|
||||
|
||||
out:
|
||||
kfree(inc->timeout_table);
|
||||
kfree(inc);
|
||||
ip_vs_app_inc_destroy(inc);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -131,8 +142,7 @@ ip_vs_app_inc_release(struct net *net, struct ip_vs_app *inc)
|
||||
|
||||
list_del(&inc->a_list);
|
||||
|
||||
kfree(inc->timeout_table);
|
||||
kfree(inc);
|
||||
call_rcu(&inc->rcu_head, ip_vs_app_inc_rcu_free);
|
||||
}
|
||||
|
||||
|
||||
@ -144,9 +154,9 @@ int ip_vs_app_inc_get(struct ip_vs_app *inc)
|
||||
{
|
||||
int result;
|
||||
|
||||
atomic_inc(&inc->usecnt);
|
||||
if (unlikely((result = ip_vs_app_get(inc->app)) != 1))
|
||||
atomic_dec(&inc->usecnt);
|
||||
result = ip_vs_app_get(inc->app);
|
||||
if (result)
|
||||
atomic_inc(&inc->usecnt);
|
||||
return result;
|
||||
}
|
||||
|
||||
@ -156,8 +166,8 @@ int ip_vs_app_inc_get(struct ip_vs_app *inc)
|
||||
*/
|
||||
void ip_vs_app_inc_put(struct ip_vs_app *inc)
|
||||
{
|
||||
ip_vs_app_put(inc->app);
|
||||
atomic_dec(&inc->usecnt);
|
||||
ip_vs_app_put(inc->app);
|
||||
}
|
||||
|
||||
|
||||
@ -218,6 +228,7 @@ out_unlock:
|
||||
/*
|
||||
* ip_vs_app unregistration routine
|
||||
* We are sure there are no app incarnations attached to services
|
||||
* Caller should use synchronize_rcu() or rcu_barrier()
|
||||
*/
|
||||
void unregister_ip_vs_app(struct net *net, struct ip_vs_app *app)
|
||||
{
|
||||
@ -341,14 +352,14 @@ static inline void vs_seq_update(struct ip_vs_conn *cp, struct ip_vs_seq *vseq,
|
||||
unsigned int flag, __u32 seq, int diff)
|
||||
{
|
||||
/* spinlock is to keep updating cp->flags atomic */
|
||||
spin_lock(&cp->lock);
|
||||
spin_lock_bh(&cp->lock);
|
||||
if (!(cp->flags & flag) || after(seq, vseq->init_seq)) {
|
||||
vseq->previous_delta = vseq->delta;
|
||||
vseq->delta += diff;
|
||||
vseq->init_seq = seq;
|
||||
cp->flags |= flag;
|
||||
}
|
||||
spin_unlock(&cp->lock);
|
||||
spin_unlock_bh(&cp->lock);
|
||||
}
|
||||
|
||||
static inline int app_tcp_pkt_out(struct ip_vs_conn *cp, struct sk_buff *skb,
|
||||
|
@ -79,51 +79,21 @@ static unsigned int ip_vs_conn_rnd __read_mostly;
|
||||
|
||||
struct ip_vs_aligned_lock
|
||||
{
|
||||
rwlock_t l;
|
||||
spinlock_t l;
|
||||
} __attribute__((__aligned__(SMP_CACHE_BYTES)));
|
||||
|
||||
/* lock array for conn table */
|
||||
static struct ip_vs_aligned_lock
|
||||
__ip_vs_conntbl_lock_array[CT_LOCKARRAY_SIZE] __cacheline_aligned;
|
||||
|
||||
static inline void ct_read_lock(unsigned int key)
|
||||
{
|
||||
read_lock(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l);
|
||||
}
|
||||
|
||||
static inline void ct_read_unlock(unsigned int key)
|
||||
{
|
||||
read_unlock(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l);
|
||||
}
|
||||
|
||||
static inline void ct_write_lock(unsigned int key)
|
||||
{
|
||||
write_lock(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l);
|
||||
}
|
||||
|
||||
static inline void ct_write_unlock(unsigned int key)
|
||||
{
|
||||
write_unlock(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l);
|
||||
}
|
||||
|
||||
static inline void ct_read_lock_bh(unsigned int key)
|
||||
{
|
||||
read_lock_bh(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l);
|
||||
}
|
||||
|
||||
static inline void ct_read_unlock_bh(unsigned int key)
|
||||
{
|
||||
read_unlock_bh(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l);
|
||||
}
|
||||
|
||||
static inline void ct_write_lock_bh(unsigned int key)
|
||||
{
|
||||
write_lock_bh(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l);
|
||||
spin_lock_bh(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l);
|
||||
}
|
||||
|
||||
static inline void ct_write_unlock_bh(unsigned int key)
|
||||
{
|
||||
write_unlock_bh(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l);
|
||||
spin_unlock_bh(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l);
|
||||
}
|
||||
|
||||
|
||||
@ -197,13 +167,13 @@ static inline int ip_vs_conn_hash(struct ip_vs_conn *cp)
|
||||
/* Hash by protocol, client address and port */
|
||||
hash = ip_vs_conn_hashkey_conn(cp);
|
||||
|
||||
ct_write_lock(hash);
|
||||
ct_write_lock_bh(hash);
|
||||
spin_lock(&cp->lock);
|
||||
|
||||
if (!(cp->flags & IP_VS_CONN_F_HASHED)) {
|
||||
hlist_add_head(&cp->c_list, &ip_vs_conn_tab[hash]);
|
||||
cp->flags |= IP_VS_CONN_F_HASHED;
|
||||
atomic_inc(&cp->refcnt);
|
||||
hlist_add_head_rcu(&cp->c_list, &ip_vs_conn_tab[hash]);
|
||||
ret = 1;
|
||||
} else {
|
||||
pr_err("%s(): request for already hashed, called from %pF\n",
|
||||
@ -212,7 +182,7 @@ static inline int ip_vs_conn_hash(struct ip_vs_conn *cp)
|
||||
}
|
||||
|
||||
spin_unlock(&cp->lock);
|
||||
ct_write_unlock(hash);
|
||||
ct_write_unlock_bh(hash);
|
||||
|
||||
return ret;
|
||||
}
|
||||
@ -220,7 +190,7 @@ static inline int ip_vs_conn_hash(struct ip_vs_conn *cp)
|
||||
|
||||
/*
|
||||
* UNhashes ip_vs_conn from ip_vs_conn_tab.
|
||||
* returns bool success.
|
||||
* returns bool success. Caller should hold conn reference.
|
||||
*/
|
||||
static inline int ip_vs_conn_unhash(struct ip_vs_conn *cp)
|
||||
{
|
||||
@ -230,11 +200,11 @@ static inline int ip_vs_conn_unhash(struct ip_vs_conn *cp)
|
||||
/* unhash it and decrease its reference counter */
|
||||
hash = ip_vs_conn_hashkey_conn(cp);
|
||||
|
||||
ct_write_lock(hash);
|
||||
ct_write_lock_bh(hash);
|
||||
spin_lock(&cp->lock);
|
||||
|
||||
if (cp->flags & IP_VS_CONN_F_HASHED) {
|
||||
hlist_del(&cp->c_list);
|
||||
hlist_del_rcu(&cp->c_list);
|
||||
cp->flags &= ~IP_VS_CONN_F_HASHED;
|
||||
atomic_dec(&cp->refcnt);
|
||||
ret = 1;
|
||||
@ -242,7 +212,37 @@ static inline int ip_vs_conn_unhash(struct ip_vs_conn *cp)
|
||||
ret = 0;
|
||||
|
||||
spin_unlock(&cp->lock);
|
||||
ct_write_unlock(hash);
|
||||
ct_write_unlock_bh(hash);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* Try to unlink ip_vs_conn from ip_vs_conn_tab.
|
||||
* returns bool success.
|
||||
*/
|
||||
static inline bool ip_vs_conn_unlink(struct ip_vs_conn *cp)
|
||||
{
|
||||
unsigned int hash;
|
||||
bool ret;
|
||||
|
||||
hash = ip_vs_conn_hashkey_conn(cp);
|
||||
|
||||
ct_write_lock_bh(hash);
|
||||
spin_lock(&cp->lock);
|
||||
|
||||
if (cp->flags & IP_VS_CONN_F_HASHED) {
|
||||
ret = false;
|
||||
/* Decrease refcnt and unlink conn only if we are last user */
|
||||
if (atomic_cmpxchg(&cp->refcnt, 1, 0) == 1) {
|
||||
hlist_del_rcu(&cp->c_list);
|
||||
cp->flags &= ~IP_VS_CONN_F_HASHED;
|
||||
ret = true;
|
||||
}
|
||||
} else
|
||||
ret = atomic_read(&cp->refcnt) ? false : true;
|
||||
|
||||
spin_unlock(&cp->lock);
|
||||
ct_write_unlock_bh(hash);
|
||||
|
||||
return ret;
|
||||
}
|
||||
@ -262,24 +262,25 @@ __ip_vs_conn_in_get(const struct ip_vs_conn_param *p)
|
||||
|
||||
hash = ip_vs_conn_hashkey_param(p, false);
|
||||
|
||||
ct_read_lock(hash);
|
||||
rcu_read_lock();
|
||||
|
||||
hlist_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) {
|
||||
if (cp->af == p->af &&
|
||||
p->cport == cp->cport && p->vport == cp->vport &&
|
||||
hlist_for_each_entry_rcu(cp, &ip_vs_conn_tab[hash], c_list) {
|
||||
if (p->cport == cp->cport && p->vport == cp->vport &&
|
||||
cp->af == p->af &&
|
||||
ip_vs_addr_equal(p->af, p->caddr, &cp->caddr) &&
|
||||
ip_vs_addr_equal(p->af, p->vaddr, &cp->vaddr) &&
|
||||
((!p->cport) ^ (!(cp->flags & IP_VS_CONN_F_NO_CPORT))) &&
|
||||
p->protocol == cp->protocol &&
|
||||
ip_vs_conn_net_eq(cp, p->net)) {
|
||||
if (!__ip_vs_conn_get(cp))
|
||||
continue;
|
||||
/* HIT */
|
||||
atomic_inc(&cp->refcnt);
|
||||
ct_read_unlock(hash);
|
||||
rcu_read_unlock();
|
||||
return cp;
|
||||
}
|
||||
}
|
||||
|
||||
ct_read_unlock(hash);
|
||||
rcu_read_unlock();
|
||||
|
||||
return NULL;
|
||||
}
|
||||
@ -346,14 +347,16 @@ struct ip_vs_conn *ip_vs_ct_in_get(const struct ip_vs_conn_param *p)
|
||||
|
||||
hash = ip_vs_conn_hashkey_param(p, false);
|
||||
|
||||
ct_read_lock(hash);
|
||||
rcu_read_lock();
|
||||
|
||||
hlist_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) {
|
||||
if (!ip_vs_conn_net_eq(cp, p->net))
|
||||
continue;
|
||||
if (p->pe_data && p->pe->ct_match) {
|
||||
if (p->pe == cp->pe && p->pe->ct_match(p, cp))
|
||||
goto out;
|
||||
hlist_for_each_entry_rcu(cp, &ip_vs_conn_tab[hash], c_list) {
|
||||
if (unlikely(p->pe_data && p->pe->ct_match)) {
|
||||
if (!ip_vs_conn_net_eq(cp, p->net))
|
||||
continue;
|
||||
if (p->pe == cp->pe && p->pe->ct_match(p, cp)) {
|
||||
if (__ip_vs_conn_get(cp))
|
||||
goto out;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
@ -363,17 +366,18 @@ struct ip_vs_conn *ip_vs_ct_in_get(const struct ip_vs_conn_param *p)
|
||||
* p->vaddr is a fwmark */
|
||||
ip_vs_addr_equal(p->protocol == IPPROTO_IP ? AF_UNSPEC :
|
||||
p->af, p->vaddr, &cp->vaddr) &&
|
||||
p->cport == cp->cport && p->vport == cp->vport &&
|
||||
p->vport == cp->vport && p->cport == cp->cport &&
|
||||
cp->flags & IP_VS_CONN_F_TEMPLATE &&
|
||||
p->protocol == cp->protocol)
|
||||
goto out;
|
||||
p->protocol == cp->protocol &&
|
||||
ip_vs_conn_net_eq(cp, p->net)) {
|
||||
if (__ip_vs_conn_get(cp))
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
cp = NULL;
|
||||
|
||||
out:
|
||||
if (cp)
|
||||
atomic_inc(&cp->refcnt);
|
||||
ct_read_unlock(hash);
|
||||
rcu_read_unlock();
|
||||
|
||||
IP_VS_DBG_BUF(9, "template lookup/in %s %s:%d->%s:%d %s\n",
|
||||
ip_vs_proto_name(p->protocol),
|
||||
@ -398,23 +402,24 @@ struct ip_vs_conn *ip_vs_conn_out_get(const struct ip_vs_conn_param *p)
|
||||
*/
|
||||
hash = ip_vs_conn_hashkey_param(p, true);
|
||||
|
||||
ct_read_lock(hash);
|
||||
rcu_read_lock();
|
||||
|
||||
hlist_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) {
|
||||
if (cp->af == p->af &&
|
||||
p->vport == cp->cport && p->cport == cp->dport &&
|
||||
hlist_for_each_entry_rcu(cp, &ip_vs_conn_tab[hash], c_list) {
|
||||
if (p->vport == cp->cport && p->cport == cp->dport &&
|
||||
cp->af == p->af &&
|
||||
ip_vs_addr_equal(p->af, p->vaddr, &cp->caddr) &&
|
||||
ip_vs_addr_equal(p->af, p->caddr, &cp->daddr) &&
|
||||
p->protocol == cp->protocol &&
|
||||
ip_vs_conn_net_eq(cp, p->net)) {
|
||||
if (!__ip_vs_conn_get(cp))
|
||||
continue;
|
||||
/* HIT */
|
||||
atomic_inc(&cp->refcnt);
|
||||
ret = cp;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
ct_read_unlock(hash);
|
||||
rcu_read_unlock();
|
||||
|
||||
IP_VS_DBG_BUF(9, "lookup/out %s %s:%d->%s:%d %s\n",
|
||||
ip_vs_proto_name(p->protocol),
|
||||
@ -457,13 +462,13 @@ void ip_vs_conn_put(struct ip_vs_conn *cp)
|
||||
void ip_vs_conn_fill_cport(struct ip_vs_conn *cp, __be16 cport)
|
||||
{
|
||||
if (ip_vs_conn_unhash(cp)) {
|
||||
spin_lock(&cp->lock);
|
||||
spin_lock_bh(&cp->lock);
|
||||
if (cp->flags & IP_VS_CONN_F_NO_CPORT) {
|
||||
atomic_dec(&ip_vs_conn_no_cport_cnt);
|
||||
cp->flags &= ~IP_VS_CONN_F_NO_CPORT;
|
||||
cp->cport = cport;
|
||||
}
|
||||
spin_unlock(&cp->lock);
|
||||
spin_unlock_bh(&cp->lock);
|
||||
|
||||
/* hash on new dport */
|
||||
ip_vs_conn_hash(cp);
|
||||
@ -549,7 +554,7 @@ ip_vs_bind_dest(struct ip_vs_conn *cp, struct ip_vs_dest *dest)
|
||||
return;
|
||||
|
||||
/* Increase the refcnt counter of the dest */
|
||||
atomic_inc(&dest->refcnt);
|
||||
ip_vs_dest_hold(dest);
|
||||
|
||||
conn_flags = atomic_read(&dest->conn_flags);
|
||||
if (cp->protocol != IPPROTO_UDP)
|
||||
@ -606,20 +611,22 @@ ip_vs_bind_dest(struct ip_vs_conn *cp, struct ip_vs_dest *dest)
|
||||
* Check if there is a destination for the connection, if so
|
||||
* bind the connection to the destination.
|
||||
*/
|
||||
struct ip_vs_dest *ip_vs_try_bind_dest(struct ip_vs_conn *cp)
|
||||
void ip_vs_try_bind_dest(struct ip_vs_conn *cp)
|
||||
{
|
||||
struct ip_vs_dest *dest;
|
||||
|
||||
rcu_read_lock();
|
||||
dest = ip_vs_find_dest(ip_vs_conn_net(cp), cp->af, &cp->daddr,
|
||||
cp->dport, &cp->vaddr, cp->vport,
|
||||
cp->protocol, cp->fwmark, cp->flags);
|
||||
if (dest) {
|
||||
struct ip_vs_proto_data *pd;
|
||||
|
||||
spin_lock(&cp->lock);
|
||||
spin_lock_bh(&cp->lock);
|
||||
if (cp->dest) {
|
||||
spin_unlock(&cp->lock);
|
||||
return dest;
|
||||
spin_unlock_bh(&cp->lock);
|
||||
rcu_read_unlock();
|
||||
return;
|
||||
}
|
||||
|
||||
/* Applications work depending on the forwarding method
|
||||
@ -628,7 +635,7 @@ struct ip_vs_dest *ip_vs_try_bind_dest(struct ip_vs_conn *cp)
|
||||
ip_vs_unbind_app(cp);
|
||||
|
||||
ip_vs_bind_dest(cp, dest);
|
||||
spin_unlock(&cp->lock);
|
||||
spin_unlock_bh(&cp->lock);
|
||||
|
||||
/* Update its packet transmitter */
|
||||
cp->packet_xmit = NULL;
|
||||
@ -643,7 +650,7 @@ struct ip_vs_dest *ip_vs_try_bind_dest(struct ip_vs_conn *cp)
|
||||
if (pd && atomic_read(&pd->appcnt))
|
||||
ip_vs_bind_app(cp, pd->pp);
|
||||
}
|
||||
return dest;
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
|
||||
@ -695,12 +702,7 @@ static inline void ip_vs_unbind_dest(struct ip_vs_conn *cp)
|
||||
dest->flags &= ~IP_VS_DEST_F_OVERLOAD;
|
||||
}
|
||||
|
||||
/*
|
||||
* Simply decrease the refcnt of the dest, because the
|
||||
* dest will be either in service's destination list
|
||||
* or in the trash.
|
||||
*/
|
||||
atomic_dec(&dest->refcnt);
|
||||
ip_vs_dest_put(dest);
|
||||
}
|
||||
|
||||
static int expire_quiescent_template(struct netns_ipvs *ipvs,
|
||||
@ -757,41 +759,36 @@ int ip_vs_check_template(struct ip_vs_conn *ct)
|
||||
* Simply decrease the refcnt of the template,
|
||||
* don't restart its timer.
|
||||
*/
|
||||
atomic_dec(&ct->refcnt);
|
||||
__ip_vs_conn_put(ct);
|
||||
return 0;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
static void ip_vs_conn_rcu_free(struct rcu_head *head)
|
||||
{
|
||||
struct ip_vs_conn *cp = container_of(head, struct ip_vs_conn,
|
||||
rcu_head);
|
||||
|
||||
ip_vs_pe_put(cp->pe);
|
||||
kfree(cp->pe_data);
|
||||
kmem_cache_free(ip_vs_conn_cachep, cp);
|
||||
}
|
||||
|
||||
static void ip_vs_conn_expire(unsigned long data)
|
||||
{
|
||||
struct ip_vs_conn *cp = (struct ip_vs_conn *)data;
|
||||
struct net *net = ip_vs_conn_net(cp);
|
||||
struct netns_ipvs *ipvs = net_ipvs(net);
|
||||
|
||||
cp->timeout = 60*HZ;
|
||||
|
||||
/*
|
||||
* hey, I'm using it
|
||||
*/
|
||||
atomic_inc(&cp->refcnt);
|
||||
|
||||
/*
|
||||
* do I control anybody?
|
||||
*/
|
||||
if (atomic_read(&cp->n_control))
|
||||
goto expire_later;
|
||||
|
||||
/*
|
||||
* unhash it if it is hashed in the conn table
|
||||
*/
|
||||
if (!ip_vs_conn_unhash(cp) && !(cp->flags & IP_VS_CONN_F_ONE_PACKET))
|
||||
goto expire_later;
|
||||
|
||||
/*
|
||||
* refcnt==1 implies I'm the only one referrer
|
||||
*/
|
||||
if (likely(atomic_read(&cp->refcnt) == 1)) {
|
||||
/* Unlink conn if not referenced anymore */
|
||||
if (likely(ip_vs_conn_unlink(cp))) {
|
||||
/* delete the timer if it is activated by other users */
|
||||
del_timer(&cp->timer);
|
||||
|
||||
@ -810,38 +807,41 @@ static void ip_vs_conn_expire(unsigned long data)
|
||||
ip_vs_conn_drop_conntrack(cp);
|
||||
}
|
||||
|
||||
ip_vs_pe_put(cp->pe);
|
||||
kfree(cp->pe_data);
|
||||
if (unlikely(cp->app != NULL))
|
||||
ip_vs_unbind_app(cp);
|
||||
ip_vs_unbind_dest(cp);
|
||||
if (cp->flags & IP_VS_CONN_F_NO_CPORT)
|
||||
atomic_dec(&ip_vs_conn_no_cport_cnt);
|
||||
call_rcu(&cp->rcu_head, ip_vs_conn_rcu_free);
|
||||
atomic_dec(&ipvs->conn_count);
|
||||
|
||||
kmem_cache_free(ip_vs_conn_cachep, cp);
|
||||
return;
|
||||
}
|
||||
|
||||
/* hash it back to the table */
|
||||
ip_vs_conn_hash(cp);
|
||||
|
||||
expire_later:
|
||||
IP_VS_DBG(7, "delayed: conn->refcnt-1=%d conn->n_control=%d\n",
|
||||
atomic_read(&cp->refcnt)-1,
|
||||
IP_VS_DBG(7, "delayed: conn->refcnt=%d conn->n_control=%d\n",
|
||||
atomic_read(&cp->refcnt),
|
||||
atomic_read(&cp->n_control));
|
||||
|
||||
atomic_inc(&cp->refcnt);
|
||||
cp->timeout = 60*HZ;
|
||||
|
||||
if (ipvs->sync_state & IP_VS_STATE_MASTER)
|
||||
ip_vs_sync_conn(net, cp, sysctl_sync_threshold(ipvs));
|
||||
|
||||
ip_vs_conn_put(cp);
|
||||
}
|
||||
|
||||
|
||||
/* Modify timer, so that it expires as soon as possible.
|
||||
* Can be called without reference only if under RCU lock.
|
||||
*/
|
||||
void ip_vs_conn_expire_now(struct ip_vs_conn *cp)
|
||||
{
|
||||
if (del_timer(&cp->timer))
|
||||
mod_timer(&cp->timer, jiffies);
|
||||
/* Using mod_timer_pending will ensure the timer is not
|
||||
* modified after the final del_timer in ip_vs_conn_expire.
|
||||
*/
|
||||
if (timer_pending(&cp->timer) &&
|
||||
time_after(cp->timer.expires, jiffies))
|
||||
mod_timer_pending(&cp->timer, jiffies);
|
||||
}
|
||||
|
||||
|
||||
@ -858,7 +858,7 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p,
|
||||
struct ip_vs_proto_data *pd = ip_vs_proto_data_get(p->net,
|
||||
p->protocol);
|
||||
|
||||
cp = kmem_cache_zalloc(ip_vs_conn_cachep, GFP_ATOMIC);
|
||||
cp = kmem_cache_alloc(ip_vs_conn_cachep, GFP_ATOMIC);
|
||||
if (cp == NULL) {
|
||||
IP_VS_ERR_RL("%s(): no memory\n", __func__);
|
||||
return NULL;
|
||||
@ -869,13 +869,13 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p,
|
||||
ip_vs_conn_net_set(cp, p->net);
|
||||
cp->af = p->af;
|
||||
cp->protocol = p->protocol;
|
||||
ip_vs_addr_copy(p->af, &cp->caddr, p->caddr);
|
||||
ip_vs_addr_set(p->af, &cp->caddr, p->caddr);
|
||||
cp->cport = p->cport;
|
||||
ip_vs_addr_copy(p->af, &cp->vaddr, p->vaddr);
|
||||
ip_vs_addr_set(p->af, &cp->vaddr, p->vaddr);
|
||||
cp->vport = p->vport;
|
||||
/* proto should only be IPPROTO_IP if d_addr is a fwmark */
|
||||
ip_vs_addr_copy(p->protocol == IPPROTO_IP ? AF_UNSPEC : p->af,
|
||||
&cp->daddr, daddr);
|
||||
ip_vs_addr_set(p->protocol == IPPROTO_IP ? AF_UNSPEC : p->af,
|
||||
&cp->daddr, daddr);
|
||||
cp->dport = dport;
|
||||
cp->flags = flags;
|
||||
cp->fwmark = fwmark;
|
||||
@ -884,6 +884,10 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p,
|
||||
cp->pe = p->pe;
|
||||
cp->pe_data = p->pe_data;
|
||||
cp->pe_data_len = p->pe_data_len;
|
||||
} else {
|
||||
cp->pe = NULL;
|
||||
cp->pe_data = NULL;
|
||||
cp->pe_data_len = 0;
|
||||
}
|
||||
spin_lock_init(&cp->lock);
|
||||
|
||||
@ -894,18 +898,28 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p,
|
||||
*/
|
||||
atomic_set(&cp->refcnt, 1);
|
||||
|
||||
cp->control = NULL;
|
||||
atomic_set(&cp->n_control, 0);
|
||||
atomic_set(&cp->in_pkts, 0);
|
||||
|
||||
cp->packet_xmit = NULL;
|
||||
cp->app = NULL;
|
||||
cp->app_data = NULL;
|
||||
/* reset struct ip_vs_seq */
|
||||
cp->in_seq.delta = 0;
|
||||
cp->out_seq.delta = 0;
|
||||
|
||||
atomic_inc(&ipvs->conn_count);
|
||||
if (flags & IP_VS_CONN_F_NO_CPORT)
|
||||
atomic_inc(&ip_vs_conn_no_cport_cnt);
|
||||
|
||||
/* Bind the connection with a destination server */
|
||||
cp->dest = NULL;
|
||||
ip_vs_bind_dest(cp, dest);
|
||||
|
||||
/* Set its state and timeout */
|
||||
cp->state = 0;
|
||||
cp->old_state = 0;
|
||||
cp->timeout = 3*HZ;
|
||||
cp->sync_endtime = jiffies & ~3UL;
|
||||
|
||||
@ -952,14 +966,17 @@ static void *ip_vs_conn_array(struct seq_file *seq, loff_t pos)
|
||||
struct ip_vs_iter_state *iter = seq->private;
|
||||
|
||||
for (idx = 0; idx < ip_vs_conn_tab_size; idx++) {
|
||||
ct_read_lock_bh(idx);
|
||||
hlist_for_each_entry(cp, &ip_vs_conn_tab[idx], c_list) {
|
||||
rcu_read_lock();
|
||||
hlist_for_each_entry_rcu(cp, &ip_vs_conn_tab[idx], c_list) {
|
||||
/* __ip_vs_conn_get() is not needed by
|
||||
* ip_vs_conn_seq_show and ip_vs_conn_sync_seq_show
|
||||
*/
|
||||
if (pos-- == 0) {
|
||||
iter->l = &ip_vs_conn_tab[idx];
|
||||
return cp;
|
||||
}
|
||||
}
|
||||
ct_read_unlock_bh(idx);
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
return NULL;
|
||||
@ -977,6 +994,7 @@ static void *ip_vs_conn_seq_next(struct seq_file *seq, void *v, loff_t *pos)
|
||||
{
|
||||
struct ip_vs_conn *cp = v;
|
||||
struct ip_vs_iter_state *iter = seq->private;
|
||||
struct hlist_node *e;
|
||||
struct hlist_head *l = iter->l;
|
||||
int idx;
|
||||
|
||||
@ -985,19 +1003,19 @@ static void *ip_vs_conn_seq_next(struct seq_file *seq, void *v, loff_t *pos)
|
||||
return ip_vs_conn_array(seq, 0);
|
||||
|
||||
/* more on same hash chain? */
|
||||
if (cp->c_list.next)
|
||||
return hlist_entry(cp->c_list.next, struct ip_vs_conn, c_list);
|
||||
e = rcu_dereference(hlist_next_rcu(&cp->c_list));
|
||||
if (e)
|
||||
return hlist_entry(e, struct ip_vs_conn, c_list);
|
||||
rcu_read_unlock();
|
||||
|
||||
idx = l - ip_vs_conn_tab;
|
||||
ct_read_unlock_bh(idx);
|
||||
|
||||
while (++idx < ip_vs_conn_tab_size) {
|
||||
ct_read_lock_bh(idx);
|
||||
hlist_for_each_entry(cp, &ip_vs_conn_tab[idx], c_list) {
|
||||
rcu_read_lock();
|
||||
hlist_for_each_entry_rcu(cp, &ip_vs_conn_tab[idx], c_list) {
|
||||
iter->l = &ip_vs_conn_tab[idx];
|
||||
return cp;
|
||||
}
|
||||
ct_read_unlock_bh(idx);
|
||||
rcu_read_unlock();
|
||||
}
|
||||
iter->l = NULL;
|
||||
return NULL;
|
||||
@ -1009,7 +1027,7 @@ static void ip_vs_conn_seq_stop(struct seq_file *seq, void *v)
|
||||
struct hlist_head *l = iter->l;
|
||||
|
||||
if (l)
|
||||
ct_read_unlock_bh(l - ip_vs_conn_tab);
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
static int ip_vs_conn_seq_show(struct seq_file *seq, void *v)
|
||||
@ -1188,7 +1206,7 @@ static inline int todrop_entry(struct ip_vs_conn *cp)
|
||||
void ip_vs_random_dropentry(struct net *net)
|
||||
{
|
||||
int idx;
|
||||
struct ip_vs_conn *cp;
|
||||
struct ip_vs_conn *cp, *cp_c;
|
||||
|
||||
/*
|
||||
* Randomly scan 1/32 of the whole table every second
|
||||
@ -1199,9 +1217,9 @@ void ip_vs_random_dropentry(struct net *net)
|
||||
/*
|
||||
* Lock is actually needed in this loop.
|
||||
*/
|
||||
ct_write_lock_bh(hash);
|
||||
rcu_read_lock();
|
||||
|
||||
hlist_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) {
|
||||
hlist_for_each_entry_rcu(cp, &ip_vs_conn_tab[hash], c_list) {
|
||||
if (cp->flags & IP_VS_CONN_F_TEMPLATE)
|
||||
/* connection template */
|
||||
continue;
|
||||
@ -1228,12 +1246,15 @@ void ip_vs_random_dropentry(struct net *net)
|
||||
|
||||
IP_VS_DBG(4, "del connection\n");
|
||||
ip_vs_conn_expire_now(cp);
|
||||
if (cp->control) {
|
||||
cp_c = cp->control;
|
||||
/* cp->control is valid only with reference to cp */
|
||||
if (cp_c && __ip_vs_conn_get(cp)) {
|
||||
IP_VS_DBG(4, "del conn template\n");
|
||||
ip_vs_conn_expire_now(cp->control);
|
||||
ip_vs_conn_expire_now(cp_c);
|
||||
__ip_vs_conn_put(cp);
|
||||
}
|
||||
}
|
||||
ct_write_unlock_bh(hash);
|
||||
rcu_read_unlock();
|
||||
}
|
||||
}
|
||||
|
||||
@ -1244,7 +1265,7 @@ void ip_vs_random_dropentry(struct net *net)
|
||||
static void ip_vs_conn_flush(struct net *net)
|
||||
{
|
||||
int idx;
|
||||
struct ip_vs_conn *cp;
|
||||
struct ip_vs_conn *cp, *cp_c;
|
||||
struct netns_ipvs *ipvs = net_ipvs(net);
|
||||
|
||||
flush_again:
|
||||
@ -1252,19 +1273,22 @@ flush_again:
|
||||
/*
|
||||
* Lock is actually needed in this loop.
|
||||
*/
|
||||
ct_write_lock_bh(idx);
|
||||
rcu_read_lock();
|
||||
|
||||
hlist_for_each_entry(cp, &ip_vs_conn_tab[idx], c_list) {
|
||||
hlist_for_each_entry_rcu(cp, &ip_vs_conn_tab[idx], c_list) {
|
||||
if (!ip_vs_conn_net_eq(cp, net))
|
||||
continue;
|
||||
IP_VS_DBG(4, "del connection\n");
|
||||
ip_vs_conn_expire_now(cp);
|
||||
if (cp->control) {
|
||||
cp_c = cp->control;
|
||||
/* cp->control is valid only with reference to cp */
|
||||
if (cp_c && __ip_vs_conn_get(cp)) {
|
||||
IP_VS_DBG(4, "del conn template\n");
|
||||
ip_vs_conn_expire_now(cp->control);
|
||||
ip_vs_conn_expire_now(cp_c);
|
||||
__ip_vs_conn_put(cp);
|
||||
}
|
||||
}
|
||||
ct_write_unlock_bh(idx);
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
/* the counter may be not NULL, because maybe some conn entries
|
||||
@ -1331,7 +1355,7 @@ int __init ip_vs_conn_init(void)
|
||||
INIT_HLIST_HEAD(&ip_vs_conn_tab[idx]);
|
||||
|
||||
for (idx = 0; idx < CT_LOCKARRAY_SIZE; idx++) {
|
||||
rwlock_init(&__ip_vs_conntbl_lock_array[idx].l);
|
||||
spin_lock_init(&__ip_vs_conntbl_lock_array[idx].l);
|
||||
}
|
||||
|
||||
/* calculate the random value for connection hash */
|
||||
@ -1342,6 +1366,8 @@ int __init ip_vs_conn_init(void)
|
||||
|
||||
void ip_vs_conn_cleanup(void)
|
||||
{
|
||||
/* Wait all ip_vs_conn_rcu_free() callbacks to complete */
|
||||
rcu_barrier();
|
||||
/* Release the empty cache */
|
||||
kmem_cache_destroy(ip_vs_conn_cachep);
|
||||
vfree(ip_vs_conn_tab);
|
||||
|
@ -203,7 +203,7 @@ ip_vs_conn_fill_param_persist(const struct ip_vs_service *svc,
|
||||
{
|
||||
ip_vs_conn_fill_param(svc->net, svc->af, protocol, caddr, cport, vaddr,
|
||||
vport, p);
|
||||
p->pe = svc->pe;
|
||||
p->pe = rcu_dereference(svc->pe);
|
||||
if (p->pe && p->pe->fill_param)
|
||||
return p->pe->fill_param(p, skb);
|
||||
|
||||
@ -296,12 +296,15 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
|
||||
/* Check if a template already exists */
|
||||
ct = ip_vs_ct_in_get(¶m);
|
||||
if (!ct || !ip_vs_check_template(ct)) {
|
||||
struct ip_vs_scheduler *sched;
|
||||
|
||||
/*
|
||||
* No template found or the dest of the connection
|
||||
* template is not available.
|
||||
* return *ignored=0 i.e. ICMP and NF_DROP
|
||||
*/
|
||||
dest = svc->scheduler->schedule(svc, skb);
|
||||
sched = rcu_dereference(svc->scheduler);
|
||||
dest = sched->schedule(svc, skb);
|
||||
if (!dest) {
|
||||
IP_VS_DBG(1, "p-schedule: no dest found.\n");
|
||||
kfree(param.pe_data);
|
||||
@ -391,6 +394,7 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
|
||||
{
|
||||
struct ip_vs_protocol *pp = pd->pp;
|
||||
struct ip_vs_conn *cp = NULL;
|
||||
struct ip_vs_scheduler *sched;
|
||||
struct ip_vs_dest *dest;
|
||||
__be16 _ports[2], *pptr;
|
||||
unsigned int flags;
|
||||
@ -446,7 +450,8 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
|
||||
return NULL;
|
||||
}
|
||||
|
||||
dest = svc->scheduler->schedule(svc, skb);
|
||||
sched = rcu_dereference(svc->scheduler);
|
||||
dest = sched->schedule(svc, skb);
|
||||
if (dest == NULL) {
|
||||
IP_VS_DBG(1, "Schedule: no dest found.\n");
|
||||
return NULL;
|
||||
@ -504,7 +509,6 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
|
||||
|
||||
pptr = frag_safe_skb_hp(skb, iph->len, sizeof(_ports), _ports, iph);
|
||||
if (pptr == NULL) {
|
||||
ip_vs_service_put(svc);
|
||||
return NF_DROP;
|
||||
}
|
||||
|
||||
@ -530,8 +534,6 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
|
||||
IP_VS_CONN_F_ONE_PACKET : 0;
|
||||
union nf_inet_addr daddr = { .all = { 0, 0, 0, 0 } };
|
||||
|
||||
ip_vs_service_put(svc);
|
||||
|
||||
/* create a new connection entry */
|
||||
IP_VS_DBG(6, "%s(): create a cache_bypass entry\n", __func__);
|
||||
{
|
||||
@ -568,12 +570,8 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
|
||||
* listed in the ipvs table), pass the packets, because it is
|
||||
* not ipvs job to decide to drop the packets.
|
||||
*/
|
||||
if ((svc->port == FTPPORT) && (pptr[1] != FTPPORT)) {
|
||||
ip_vs_service_put(svc);
|
||||
if ((svc->port == FTPPORT) && (pptr[1] != FTPPORT))
|
||||
return NF_ACCEPT;
|
||||
}
|
||||
|
||||
ip_vs_service_put(svc);
|
||||
|
||||
/*
|
||||
* Notify the client that the destination is unreachable, and
|
||||
@ -640,8 +638,11 @@ static inline enum ip_defrag_users ip_vs_defrag_user(unsigned int hooknum)
|
||||
|
||||
static inline int ip_vs_gather_frags(struct sk_buff *skb, u_int32_t user)
|
||||
{
|
||||
int err = ip_defrag(skb, user);
|
||||
int err;
|
||||
|
||||
local_bh_disable();
|
||||
err = ip_defrag(skb, user);
|
||||
local_bh_enable();
|
||||
if (!err)
|
||||
ip_send_check(ip_hdr(skb));
|
||||
|
||||
@ -1161,9 +1162,8 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af)
|
||||
sizeof(_ports), _ports, &iph);
|
||||
if (pptr == NULL)
|
||||
return NF_ACCEPT; /* Not for me */
|
||||
if (ip_vs_lookup_real_service(net, af, iph.protocol,
|
||||
&iph.saddr,
|
||||
pptr[0])) {
|
||||
if (ip_vs_has_real_service(net, af, iph.protocol, &iph.saddr,
|
||||
pptr[0])) {
|
||||
/*
|
||||
* Notify the real server: there is no
|
||||
* existing entry if it is not RST
|
||||
@ -1220,13 +1220,7 @@ ip_vs_local_reply4(unsigned int hooknum, struct sk_buff *skb,
|
||||
const struct net_device *in, const struct net_device *out,
|
||||
int (*okfn)(struct sk_buff *))
|
||||
{
|
||||
unsigned int verdict;
|
||||
|
||||
/* Disable BH in LOCAL_OUT until all places are fixed */
|
||||
local_bh_disable();
|
||||
verdict = ip_vs_out(hooknum, skb, AF_INET);
|
||||
local_bh_enable();
|
||||
return verdict;
|
||||
return ip_vs_out(hooknum, skb, AF_INET);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_IP_VS_IPV6
|
||||
@ -1253,13 +1247,7 @@ ip_vs_local_reply6(unsigned int hooknum, struct sk_buff *skb,
|
||||
const struct net_device *in, const struct net_device *out,
|
||||
int (*okfn)(struct sk_buff *))
|
||||
{
|
||||
unsigned int verdict;
|
||||
|
||||
/* Disable BH in LOCAL_OUT until all places are fixed */
|
||||
local_bh_disable();
|
||||
verdict = ip_vs_out(hooknum, skb, AF_INET6);
|
||||
local_bh_enable();
|
||||
return verdict;
|
||||
return ip_vs_out(hooknum, skb, AF_INET6);
|
||||
}
|
||||
|
||||
#endif
|
||||
@ -1395,10 +1383,13 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum)
|
||||
goto ignore_ipip;
|
||||
/* Prefer the resulting PMTU */
|
||||
if (dest) {
|
||||
spin_lock(&dest->dst_lock);
|
||||
if (dest->dst_cache)
|
||||
mtu = dst_mtu(dest->dst_cache);
|
||||
spin_unlock(&dest->dst_lock);
|
||||
struct ip_vs_dest_dst *dest_dst;
|
||||
|
||||
rcu_read_lock();
|
||||
dest_dst = rcu_dereference(dest->dest_dst);
|
||||
if (dest_dst)
|
||||
mtu = dst_mtu(dest_dst->dst_cache);
|
||||
rcu_read_unlock();
|
||||
}
|
||||
if (mtu > 68 + sizeof(struct iphdr))
|
||||
mtu -= sizeof(struct iphdr);
|
||||
@ -1714,13 +1705,7 @@ ip_vs_local_request4(unsigned int hooknum, struct sk_buff *skb,
|
||||
const struct net_device *in, const struct net_device *out,
|
||||
int (*okfn)(struct sk_buff *))
|
||||
{
|
||||
unsigned int verdict;
|
||||
|
||||
/* Disable BH in LOCAL_OUT until all places are fixed */
|
||||
local_bh_disable();
|
||||
verdict = ip_vs_in(hooknum, skb, AF_INET);
|
||||
local_bh_enable();
|
||||
return verdict;
|
||||
return ip_vs_in(hooknum, skb, AF_INET);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_IP_VS_IPV6
|
||||
@ -1779,13 +1764,7 @@ ip_vs_local_request6(unsigned int hooknum, struct sk_buff *skb,
|
||||
const struct net_device *in, const struct net_device *out,
|
||||
int (*okfn)(struct sk_buff *))
|
||||
{
|
||||
unsigned int verdict;
|
||||
|
||||
/* Disable BH in LOCAL_OUT until all places are fixed */
|
||||
local_bh_disable();
|
||||
verdict = ip_vs_in(hooknum, skb, AF_INET6);
|
||||
local_bh_enable();
|
||||
return verdict;
|
||||
return ip_vs_in(hooknum, skb, AF_INET6);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -51,7 +51,7 @@
|
||||
* IPVS DH bucket
|
||||
*/
|
||||
struct ip_vs_dh_bucket {
|
||||
struct ip_vs_dest *dest; /* real server (cache) */
|
||||
struct ip_vs_dest __rcu *dest; /* real server (cache) */
|
||||
};
|
||||
|
||||
/*
|
||||
@ -64,6 +64,10 @@ struct ip_vs_dh_bucket {
|
||||
#define IP_VS_DH_TAB_SIZE (1 << IP_VS_DH_TAB_BITS)
|
||||
#define IP_VS_DH_TAB_MASK (IP_VS_DH_TAB_SIZE - 1)
|
||||
|
||||
struct ip_vs_dh_state {
|
||||
struct ip_vs_dh_bucket buckets[IP_VS_DH_TAB_SIZE];
|
||||
struct rcu_head rcu_head;
|
||||
};
|
||||
|
||||
/*
|
||||
* Returns hash value for IPVS DH entry
|
||||
@ -85,10 +89,9 @@ static inline unsigned int ip_vs_dh_hashkey(int af, const union nf_inet_addr *ad
|
||||
* Get ip_vs_dest associated with supplied parameters.
|
||||
*/
|
||||
static inline struct ip_vs_dest *
|
||||
ip_vs_dh_get(int af, struct ip_vs_dh_bucket *tbl,
|
||||
const union nf_inet_addr *addr)
|
||||
ip_vs_dh_get(int af, struct ip_vs_dh_state *s, const union nf_inet_addr *addr)
|
||||
{
|
||||
return (tbl[ip_vs_dh_hashkey(af, addr)]).dest;
|
||||
return rcu_dereference(s->buckets[ip_vs_dh_hashkey(af, addr)].dest);
|
||||
}
|
||||
|
||||
|
||||
@ -96,25 +99,30 @@ ip_vs_dh_get(int af, struct ip_vs_dh_bucket *tbl,
|
||||
* Assign all the hash buckets of the specified table with the service.
|
||||
*/
|
||||
static int
|
||||
ip_vs_dh_assign(struct ip_vs_dh_bucket *tbl, struct ip_vs_service *svc)
|
||||
ip_vs_dh_reassign(struct ip_vs_dh_state *s, struct ip_vs_service *svc)
|
||||
{
|
||||
int i;
|
||||
struct ip_vs_dh_bucket *b;
|
||||
struct list_head *p;
|
||||
struct ip_vs_dest *dest;
|
||||
bool empty;
|
||||
|
||||
b = tbl;
|
||||
b = &s->buckets[0];
|
||||
p = &svc->destinations;
|
||||
empty = list_empty(p);
|
||||
for (i=0; i<IP_VS_DH_TAB_SIZE; i++) {
|
||||
if (list_empty(p)) {
|
||||
b->dest = NULL;
|
||||
} else {
|
||||
dest = rcu_dereference_protected(b->dest, 1);
|
||||
if (dest)
|
||||
ip_vs_dest_put(dest);
|
||||
if (empty)
|
||||
RCU_INIT_POINTER(b->dest, NULL);
|
||||
else {
|
||||
if (p == &svc->destinations)
|
||||
p = p->next;
|
||||
|
||||
dest = list_entry(p, struct ip_vs_dest, n_list);
|
||||
atomic_inc(&dest->refcnt);
|
||||
b->dest = dest;
|
||||
ip_vs_dest_hold(dest);
|
||||
RCU_INIT_POINTER(b->dest, dest);
|
||||
|
||||
p = p->next;
|
||||
}
|
||||
@ -127,16 +135,18 @@ ip_vs_dh_assign(struct ip_vs_dh_bucket *tbl, struct ip_vs_service *svc)
|
||||
/*
|
||||
* Flush all the hash buckets of the specified table.
|
||||
*/
|
||||
static void ip_vs_dh_flush(struct ip_vs_dh_bucket *tbl)
|
||||
static void ip_vs_dh_flush(struct ip_vs_dh_state *s)
|
||||
{
|
||||
int i;
|
||||
struct ip_vs_dh_bucket *b;
|
||||
struct ip_vs_dest *dest;
|
||||
|
||||
b = tbl;
|
||||
b = &s->buckets[0];
|
||||
for (i=0; i<IP_VS_DH_TAB_SIZE; i++) {
|
||||
if (b->dest) {
|
||||
atomic_dec(&b->dest->refcnt);
|
||||
b->dest = NULL;
|
||||
dest = rcu_dereference_protected(b->dest, 1);
|
||||
if (dest) {
|
||||
ip_vs_dest_put(dest);
|
||||
RCU_INIT_POINTER(b->dest, NULL);
|
||||
}
|
||||
b++;
|
||||
}
|
||||
@ -145,51 +155,46 @@ static void ip_vs_dh_flush(struct ip_vs_dh_bucket *tbl)
|
||||
|
||||
static int ip_vs_dh_init_svc(struct ip_vs_service *svc)
|
||||
{
|
||||
struct ip_vs_dh_bucket *tbl;
|
||||
struct ip_vs_dh_state *s;
|
||||
|
||||
/* allocate the DH table for this service */
|
||||
tbl = kmalloc(sizeof(struct ip_vs_dh_bucket)*IP_VS_DH_TAB_SIZE,
|
||||
GFP_KERNEL);
|
||||
if (tbl == NULL)
|
||||
s = kzalloc(sizeof(struct ip_vs_dh_state), GFP_KERNEL);
|
||||
if (s == NULL)
|
||||
return -ENOMEM;
|
||||
|
||||
svc->sched_data = tbl;
|
||||
svc->sched_data = s;
|
||||
IP_VS_DBG(6, "DH hash table (memory=%Zdbytes) allocated for "
|
||||
"current service\n",
|
||||
sizeof(struct ip_vs_dh_bucket)*IP_VS_DH_TAB_SIZE);
|
||||
|
||||
/* assign the hash buckets with the updated service */
|
||||
ip_vs_dh_assign(tbl, svc);
|
||||
/* assign the hash buckets with current dests */
|
||||
ip_vs_dh_reassign(s, svc);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
static int ip_vs_dh_done_svc(struct ip_vs_service *svc)
|
||||
static void ip_vs_dh_done_svc(struct ip_vs_service *svc)
|
||||
{
|
||||
struct ip_vs_dh_bucket *tbl = svc->sched_data;
|
||||
struct ip_vs_dh_state *s = svc->sched_data;
|
||||
|
||||
/* got to clean up hash buckets here */
|
||||
ip_vs_dh_flush(tbl);
|
||||
ip_vs_dh_flush(s);
|
||||
|
||||
/* release the table itself */
|
||||
kfree(svc->sched_data);
|
||||
kfree_rcu(s, rcu_head);
|
||||
IP_VS_DBG(6, "DH hash table (memory=%Zdbytes) released\n",
|
||||
sizeof(struct ip_vs_dh_bucket)*IP_VS_DH_TAB_SIZE);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
static int ip_vs_dh_update_svc(struct ip_vs_service *svc)
|
||||
static int ip_vs_dh_dest_changed(struct ip_vs_service *svc,
|
||||
struct ip_vs_dest *dest)
|
||||
{
|
||||
struct ip_vs_dh_bucket *tbl = svc->sched_data;
|
||||
|
||||
/* got to clean up hash buckets here */
|
||||
ip_vs_dh_flush(tbl);
|
||||
struct ip_vs_dh_state *s = svc->sched_data;
|
||||
|
||||
/* assign the hash buckets with the updated service */
|
||||
ip_vs_dh_assign(tbl, svc);
|
||||
ip_vs_dh_reassign(s, svc);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@ -212,19 +217,20 @@ static struct ip_vs_dest *
|
||||
ip_vs_dh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
|
||||
{
|
||||
struct ip_vs_dest *dest;
|
||||
struct ip_vs_dh_bucket *tbl;
|
||||
struct ip_vs_dh_state *s;
|
||||
struct ip_vs_iphdr iph;
|
||||
|
||||
ip_vs_fill_iph_addr_only(svc->af, skb, &iph);
|
||||
|
||||
IP_VS_DBG(6, "%s(): Scheduling...\n", __func__);
|
||||
|
||||
tbl = (struct ip_vs_dh_bucket *)svc->sched_data;
|
||||
dest = ip_vs_dh_get(svc->af, tbl, &iph.daddr);
|
||||
s = (struct ip_vs_dh_state *) svc->sched_data;
|
||||
dest = ip_vs_dh_get(svc->af, s, &iph.daddr);
|
||||
if (!dest
|
||||
|| !(dest->flags & IP_VS_DEST_F_AVAILABLE)
|
||||
|| atomic_read(&dest->weight) <= 0
|
||||
|| is_overloaded(dest)) {
|
||||
ip_vs_scheduler_err(svc, "no destination available");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
@ -248,7 +254,8 @@ static struct ip_vs_scheduler ip_vs_dh_scheduler =
|
||||
.n_list = LIST_HEAD_INIT(ip_vs_dh_scheduler.n_list),
|
||||
.init_service = ip_vs_dh_init_svc,
|
||||
.done_service = ip_vs_dh_done_svc,
|
||||
.update_service = ip_vs_dh_update_svc,
|
||||
.add_dest = ip_vs_dh_dest_changed,
|
||||
.del_dest = ip_vs_dh_dest_changed,
|
||||
.schedule = ip_vs_dh_schedule,
|
||||
};
|
||||
|
||||
@ -262,6 +269,7 @@ static int __init ip_vs_dh_init(void)
|
||||
static void __exit ip_vs_dh_cleanup(void)
|
||||
{
|
||||
unregister_ip_vs_scheduler(&ip_vs_dh_scheduler);
|
||||
synchronize_rcu();
|
||||
}
|
||||
|
||||
|
||||
|
@ -267,10 +267,12 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp,
|
||||
* hopefully it will succeed on the retransmitted
|
||||
* packet.
|
||||
*/
|
||||
rcu_read_lock();
|
||||
ret = nf_nat_mangle_tcp_packet(skb, ct, ctinfo,
|
||||
iph->ihl * 4,
|
||||
start-data, end-start,
|
||||
buf, buf_len);
|
||||
rcu_read_unlock();
|
||||
if (ret) {
|
||||
ip_vs_nfct_expect_related(skb, ct, n_cp,
|
||||
IPPROTO_TCP, 0, 0);
|
||||
@ -480,6 +482,7 @@ static int __init ip_vs_ftp_init(void)
|
||||
int rv;
|
||||
|
||||
rv = register_pernet_subsys(&ip_vs_ftp_ops);
|
||||
/* rcu_barrier() is called by netns on error */
|
||||
return rv;
|
||||
}
|
||||
|
||||
@ -489,6 +492,7 @@ static int __init ip_vs_ftp_init(void)
|
||||
static void __exit ip_vs_ftp_exit(void)
|
||||
{
|
||||
unregister_pernet_subsys(&ip_vs_ftp_ops);
|
||||
/* rcu_barrier() is called by netns */
|
||||
}
|
||||
|
||||
|
||||
|
@ -90,11 +90,12 @@
|
||||
* IP address and its destination server
|
||||
*/
|
||||
struct ip_vs_lblc_entry {
|
||||
struct list_head list;
|
||||
struct hlist_node list;
|
||||
int af; /* address family */
|
||||
union nf_inet_addr addr; /* destination IP address */
|
||||
struct ip_vs_dest *dest; /* real server (cache) */
|
||||
struct ip_vs_dest __rcu *dest; /* real server (cache) */
|
||||
unsigned long lastuse; /* last used time */
|
||||
struct rcu_head rcu_head;
|
||||
};
|
||||
|
||||
|
||||
@ -102,12 +103,14 @@ struct ip_vs_lblc_entry {
|
||||
* IPVS lblc hash table
|
||||
*/
|
||||
struct ip_vs_lblc_table {
|
||||
struct list_head bucket[IP_VS_LBLC_TAB_SIZE]; /* hash bucket */
|
||||
struct rcu_head rcu_head;
|
||||
struct hlist_head __rcu bucket[IP_VS_LBLC_TAB_SIZE]; /* hash bucket */
|
||||
struct timer_list periodic_timer; /* collect stale entries */
|
||||
atomic_t entries; /* number of entries */
|
||||
int max_size; /* maximum size of entries */
|
||||
struct timer_list periodic_timer; /* collect stale entries */
|
||||
int rover; /* rover for expire check */
|
||||
int counter; /* counter for no expire */
|
||||
bool dead;
|
||||
};
|
||||
|
||||
|
||||
@ -129,13 +132,16 @@ static ctl_table vs_vars_table[] = {
|
||||
|
||||
static inline void ip_vs_lblc_free(struct ip_vs_lblc_entry *en)
|
||||
{
|
||||
list_del(&en->list);
|
||||
struct ip_vs_dest *dest;
|
||||
|
||||
hlist_del_rcu(&en->list);
|
||||
/*
|
||||
* We don't kfree dest because it is referred either by its service
|
||||
* or the trash dest list.
|
||||
*/
|
||||
atomic_dec(&en->dest->refcnt);
|
||||
kfree(en);
|
||||
dest = rcu_dereference_protected(en->dest, 1);
|
||||
ip_vs_dest_put(dest);
|
||||
kfree_rcu(en, rcu_head);
|
||||
}
|
||||
|
||||
|
||||
@ -165,15 +171,12 @@ ip_vs_lblc_hash(struct ip_vs_lblc_table *tbl, struct ip_vs_lblc_entry *en)
|
||||
{
|
||||
unsigned int hash = ip_vs_lblc_hashkey(en->af, &en->addr);
|
||||
|
||||
list_add(&en->list, &tbl->bucket[hash]);
|
||||
hlist_add_head_rcu(&en->list, &tbl->bucket[hash]);
|
||||
atomic_inc(&tbl->entries);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Get ip_vs_lblc_entry associated with supplied parameters. Called under read
|
||||
* lock
|
||||
*/
|
||||
/* Get ip_vs_lblc_entry associated with supplied parameters. */
|
||||
static inline struct ip_vs_lblc_entry *
|
||||
ip_vs_lblc_get(int af, struct ip_vs_lblc_table *tbl,
|
||||
const union nf_inet_addr *addr)
|
||||
@ -181,7 +184,7 @@ ip_vs_lblc_get(int af, struct ip_vs_lblc_table *tbl,
|
||||
unsigned int hash = ip_vs_lblc_hashkey(af, addr);
|
||||
struct ip_vs_lblc_entry *en;
|
||||
|
||||
list_for_each_entry(en, &tbl->bucket[hash], list)
|
||||
hlist_for_each_entry_rcu(en, &tbl->bucket[hash], list)
|
||||
if (ip_vs_addr_equal(af, &en->addr, addr))
|
||||
return en;
|
||||
|
||||
@ -191,7 +194,7 @@ ip_vs_lblc_get(int af, struct ip_vs_lblc_table *tbl,
|
||||
|
||||
/*
|
||||
* Create or update an ip_vs_lblc_entry, which is a mapping of a destination IP
|
||||
* address to a server. Called under write lock.
|
||||
* address to a server. Called under spin lock.
|
||||
*/
|
||||
static inline struct ip_vs_lblc_entry *
|
||||
ip_vs_lblc_new(struct ip_vs_lblc_table *tbl, const union nf_inet_addr *daddr,
|
||||
@ -209,14 +212,20 @@ ip_vs_lblc_new(struct ip_vs_lblc_table *tbl, const union nf_inet_addr *daddr,
|
||||
ip_vs_addr_copy(dest->af, &en->addr, daddr);
|
||||
en->lastuse = jiffies;
|
||||
|
||||
atomic_inc(&dest->refcnt);
|
||||
en->dest = dest;
|
||||
ip_vs_dest_hold(dest);
|
||||
RCU_INIT_POINTER(en->dest, dest);
|
||||
|
||||
ip_vs_lblc_hash(tbl, en);
|
||||
} else if (en->dest != dest) {
|
||||
atomic_dec(&en->dest->refcnt);
|
||||
atomic_inc(&dest->refcnt);
|
||||
en->dest = dest;
|
||||
} else {
|
||||
struct ip_vs_dest *old_dest;
|
||||
|
||||
old_dest = rcu_dereference_protected(en->dest, 1);
|
||||
if (old_dest != dest) {
|
||||
ip_vs_dest_put(old_dest);
|
||||
ip_vs_dest_hold(dest);
|
||||
/* No ordering constraints for refcnt */
|
||||
RCU_INIT_POINTER(en->dest, dest);
|
||||
}
|
||||
}
|
||||
|
||||
return en;
|
||||
@ -226,17 +235,22 @@ ip_vs_lblc_new(struct ip_vs_lblc_table *tbl, const union nf_inet_addr *daddr,
|
||||
/*
|
||||
* Flush all the entries of the specified table.
|
||||
*/
|
||||
static void ip_vs_lblc_flush(struct ip_vs_lblc_table *tbl)
|
||||
static void ip_vs_lblc_flush(struct ip_vs_service *svc)
|
||||
{
|
||||
struct ip_vs_lblc_entry *en, *nxt;
|
||||
struct ip_vs_lblc_table *tbl = svc->sched_data;
|
||||
struct ip_vs_lblc_entry *en;
|
||||
struct hlist_node *next;
|
||||
int i;
|
||||
|
||||
spin_lock_bh(&svc->sched_lock);
|
||||
tbl->dead = 1;
|
||||
for (i=0; i<IP_VS_LBLC_TAB_SIZE; i++) {
|
||||
list_for_each_entry_safe(en, nxt, &tbl->bucket[i], list) {
|
||||
hlist_for_each_entry_safe(en, next, &tbl->bucket[i], list) {
|
||||
ip_vs_lblc_free(en);
|
||||
atomic_dec(&tbl->entries);
|
||||
}
|
||||
}
|
||||
spin_unlock_bh(&svc->sched_lock);
|
||||
}
|
||||
|
||||
static int sysctl_lblc_expiration(struct ip_vs_service *svc)
|
||||
@ -252,15 +266,16 @@ static int sysctl_lblc_expiration(struct ip_vs_service *svc)
|
||||
static inline void ip_vs_lblc_full_check(struct ip_vs_service *svc)
|
||||
{
|
||||
struct ip_vs_lblc_table *tbl = svc->sched_data;
|
||||
struct ip_vs_lblc_entry *en, *nxt;
|
||||
struct ip_vs_lblc_entry *en;
|
||||
struct hlist_node *next;
|
||||
unsigned long now = jiffies;
|
||||
int i, j;
|
||||
|
||||
for (i=0, j=tbl->rover; i<IP_VS_LBLC_TAB_SIZE; i++) {
|
||||
j = (j + 1) & IP_VS_LBLC_TAB_MASK;
|
||||
|
||||
write_lock(&svc->sched_lock);
|
||||
list_for_each_entry_safe(en, nxt, &tbl->bucket[j], list) {
|
||||
spin_lock(&svc->sched_lock);
|
||||
hlist_for_each_entry_safe(en, next, &tbl->bucket[j], list) {
|
||||
if (time_before(now,
|
||||
en->lastuse +
|
||||
sysctl_lblc_expiration(svc)))
|
||||
@ -269,7 +284,7 @@ static inline void ip_vs_lblc_full_check(struct ip_vs_service *svc)
|
||||
ip_vs_lblc_free(en);
|
||||
atomic_dec(&tbl->entries);
|
||||
}
|
||||
write_unlock(&svc->sched_lock);
|
||||
spin_unlock(&svc->sched_lock);
|
||||
}
|
||||
tbl->rover = j;
|
||||
}
|
||||
@ -293,7 +308,8 @@ static void ip_vs_lblc_check_expire(unsigned long data)
|
||||
unsigned long now = jiffies;
|
||||
int goal;
|
||||
int i, j;
|
||||
struct ip_vs_lblc_entry *en, *nxt;
|
||||
struct ip_vs_lblc_entry *en;
|
||||
struct hlist_node *next;
|
||||
|
||||
if ((tbl->counter % COUNT_FOR_FULL_EXPIRATION) == 0) {
|
||||
/* do full expiration check */
|
||||
@ -314,8 +330,8 @@ static void ip_vs_lblc_check_expire(unsigned long data)
|
||||
for (i=0, j=tbl->rover; i<IP_VS_LBLC_TAB_SIZE; i++) {
|
||||
j = (j + 1) & IP_VS_LBLC_TAB_MASK;
|
||||
|
||||
write_lock(&svc->sched_lock);
|
||||
list_for_each_entry_safe(en, nxt, &tbl->bucket[j], list) {
|
||||
spin_lock(&svc->sched_lock);
|
||||
hlist_for_each_entry_safe(en, next, &tbl->bucket[j], list) {
|
||||
if (time_before(now, en->lastuse + ENTRY_TIMEOUT))
|
||||
continue;
|
||||
|
||||
@ -323,7 +339,7 @@ static void ip_vs_lblc_check_expire(unsigned long data)
|
||||
atomic_dec(&tbl->entries);
|
||||
goal--;
|
||||
}
|
||||
write_unlock(&svc->sched_lock);
|
||||
spin_unlock(&svc->sched_lock);
|
||||
if (goal <= 0)
|
||||
break;
|
||||
}
|
||||
@ -354,11 +370,12 @@ static int ip_vs_lblc_init_svc(struct ip_vs_service *svc)
|
||||
* Initialize the hash buckets
|
||||
*/
|
||||
for (i=0; i<IP_VS_LBLC_TAB_SIZE; i++) {
|
||||
INIT_LIST_HEAD(&tbl->bucket[i]);
|
||||
INIT_HLIST_HEAD(&tbl->bucket[i]);
|
||||
}
|
||||
tbl->max_size = IP_VS_LBLC_TAB_SIZE*16;
|
||||
tbl->rover = 0;
|
||||
tbl->counter = 1;
|
||||
tbl->dead = 0;
|
||||
|
||||
/*
|
||||
* Hook periodic timer for garbage collection
|
||||
@ -371,7 +388,7 @@ static int ip_vs_lblc_init_svc(struct ip_vs_service *svc)
|
||||
}
|
||||
|
||||
|
||||
static int ip_vs_lblc_done_svc(struct ip_vs_service *svc)
|
||||
static void ip_vs_lblc_done_svc(struct ip_vs_service *svc)
|
||||
{
|
||||
struct ip_vs_lblc_table *tbl = svc->sched_data;
|
||||
|
||||
@ -379,14 +396,12 @@ static int ip_vs_lblc_done_svc(struct ip_vs_service *svc)
|
||||
del_timer_sync(&tbl->periodic_timer);
|
||||
|
||||
/* got to clean up table entries here */
|
||||
ip_vs_lblc_flush(tbl);
|
||||
ip_vs_lblc_flush(svc);
|
||||
|
||||
/* release the table itself */
|
||||
kfree(tbl);
|
||||
kfree_rcu(tbl, rcu_head);
|
||||
IP_VS_DBG(6, "LBLC hash table (memory=%Zdbytes) released\n",
|
||||
sizeof(*tbl));
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
@ -408,7 +423,7 @@ __ip_vs_lblc_schedule(struct ip_vs_service *svc)
|
||||
* The server with weight=0 is quiesced and will not receive any
|
||||
* new connection.
|
||||
*/
|
||||
list_for_each_entry(dest, &svc->destinations, n_list) {
|
||||
list_for_each_entry_rcu(dest, &svc->destinations, n_list) {
|
||||
if (dest->flags & IP_VS_DEST_F_OVERLOAD)
|
||||
continue;
|
||||
if (atomic_read(&dest->weight) > 0) {
|
||||
@ -423,7 +438,7 @@ __ip_vs_lblc_schedule(struct ip_vs_service *svc)
|
||||
* Find the destination with the least load.
|
||||
*/
|
||||
nextstage:
|
||||
list_for_each_entry_continue(dest, &svc->destinations, n_list) {
|
||||
list_for_each_entry_continue_rcu(dest, &svc->destinations, n_list) {
|
||||
if (dest->flags & IP_VS_DEST_F_OVERLOAD)
|
||||
continue;
|
||||
|
||||
@ -457,7 +472,7 @@ is_overloaded(struct ip_vs_dest *dest, struct ip_vs_service *svc)
|
||||
if (atomic_read(&dest->activeconns) > atomic_read(&dest->weight)) {
|
||||
struct ip_vs_dest *d;
|
||||
|
||||
list_for_each_entry(d, &svc->destinations, n_list) {
|
||||
list_for_each_entry_rcu(d, &svc->destinations, n_list) {
|
||||
if (atomic_read(&d->activeconns)*2
|
||||
< atomic_read(&d->weight)) {
|
||||
return 1;
|
||||
@ -484,7 +499,6 @@ ip_vs_lblc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
|
||||
IP_VS_DBG(6, "%s(): Scheduling...\n", __func__);
|
||||
|
||||
/* First look in our cache */
|
||||
read_lock(&svc->sched_lock);
|
||||
en = ip_vs_lblc_get(svc->af, tbl, &iph.daddr);
|
||||
if (en) {
|
||||
/* We only hold a read lock, but this is atomic */
|
||||
@ -499,14 +513,11 @@ ip_vs_lblc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
|
||||
* free up entries from the trash at any time.
|
||||
*/
|
||||
|
||||
if (en->dest->flags & IP_VS_DEST_F_AVAILABLE)
|
||||
dest = en->dest;
|
||||
dest = rcu_dereference(en->dest);
|
||||
if ((dest->flags & IP_VS_DEST_F_AVAILABLE) &&
|
||||
atomic_read(&dest->weight) > 0 && !is_overloaded(dest, svc))
|
||||
goto out;
|
||||
}
|
||||
read_unlock(&svc->sched_lock);
|
||||
|
||||
/* If the destination has a weight and is not overloaded, use it */
|
||||
if (dest && atomic_read(&dest->weight) > 0 && !is_overloaded(dest, svc))
|
||||
goto out;
|
||||
|
||||
/* No cache entry or it is invalid, time to schedule */
|
||||
dest = __ip_vs_lblc_schedule(svc);
|
||||
@ -516,9 +527,10 @@ ip_vs_lblc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
|
||||
}
|
||||
|
||||
/* If we fail to create a cache entry, we'll just use the valid dest */
|
||||
write_lock(&svc->sched_lock);
|
||||
ip_vs_lblc_new(tbl, &iph.daddr, dest);
|
||||
write_unlock(&svc->sched_lock);
|
||||
spin_lock_bh(&svc->sched_lock);
|
||||
if (!tbl->dead)
|
||||
ip_vs_lblc_new(tbl, &iph.daddr, dest);
|
||||
spin_unlock_bh(&svc->sched_lock);
|
||||
|
||||
out:
|
||||
IP_VS_DBG_BUF(6, "LBLC: destination IP address %s --> server %s:%d\n",
|
||||
@ -621,6 +633,7 @@ static void __exit ip_vs_lblc_cleanup(void)
|
||||
{
|
||||
unregister_ip_vs_scheduler(&ip_vs_lblc_scheduler);
|
||||
unregister_pernet_subsys(&ip_vs_lblc_ops);
|
||||
synchronize_rcu();
|
||||
}
|
||||
|
||||
|
||||
|
@ -89,40 +89,44 @@
|
||||
*/
|
||||
struct ip_vs_dest_set_elem {
|
||||
struct list_head list; /* list link */
|
||||
struct ip_vs_dest *dest; /* destination server */
|
||||
struct ip_vs_dest __rcu *dest; /* destination server */
|
||||
struct rcu_head rcu_head;
|
||||
};
|
||||
|
||||
struct ip_vs_dest_set {
|
||||
atomic_t size; /* set size */
|
||||
unsigned long lastmod; /* last modified time */
|
||||
struct list_head list; /* destination list */
|
||||
rwlock_t lock; /* lock for this list */
|
||||
};
|
||||
|
||||
|
||||
static struct ip_vs_dest_set_elem *
|
||||
ip_vs_dest_set_insert(struct ip_vs_dest_set *set, struct ip_vs_dest *dest)
|
||||
static void ip_vs_dest_set_insert(struct ip_vs_dest_set *set,
|
||||
struct ip_vs_dest *dest, bool check)
|
||||
{
|
||||
struct ip_vs_dest_set_elem *e;
|
||||
|
||||
list_for_each_entry(e, &set->list, list) {
|
||||
if (e->dest == dest)
|
||||
/* already existed */
|
||||
return NULL;
|
||||
if (check) {
|
||||
list_for_each_entry(e, &set->list, list) {
|
||||
struct ip_vs_dest *d;
|
||||
|
||||
d = rcu_dereference_protected(e->dest, 1);
|
||||
if (d == dest)
|
||||
/* already existed */
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
e = kmalloc(sizeof(*e), GFP_ATOMIC);
|
||||
if (e == NULL)
|
||||
return NULL;
|
||||
return;
|
||||
|
||||
atomic_inc(&dest->refcnt);
|
||||
e->dest = dest;
|
||||
ip_vs_dest_hold(dest);
|
||||
RCU_INIT_POINTER(e->dest, dest);
|
||||
|
||||
list_add(&e->list, &set->list);
|
||||
list_add_rcu(&e->list, &set->list);
|
||||
atomic_inc(&set->size);
|
||||
|
||||
set->lastmod = jiffies;
|
||||
return e;
|
||||
}
|
||||
|
||||
static void
|
||||
@ -131,13 +135,16 @@ ip_vs_dest_set_erase(struct ip_vs_dest_set *set, struct ip_vs_dest *dest)
|
||||
struct ip_vs_dest_set_elem *e;
|
||||
|
||||
list_for_each_entry(e, &set->list, list) {
|
||||
if (e->dest == dest) {
|
||||
struct ip_vs_dest *d;
|
||||
|
||||
d = rcu_dereference_protected(e->dest, 1);
|
||||
if (d == dest) {
|
||||
/* HIT */
|
||||
atomic_dec(&set->size);
|
||||
set->lastmod = jiffies;
|
||||
atomic_dec(&e->dest->refcnt);
|
||||
list_del(&e->list);
|
||||
kfree(e);
|
||||
ip_vs_dest_put(dest);
|
||||
list_del_rcu(&e->list);
|
||||
kfree_rcu(e, rcu_head);
|
||||
break;
|
||||
}
|
||||
}
|
||||
@ -147,17 +154,18 @@ static void ip_vs_dest_set_eraseall(struct ip_vs_dest_set *set)
|
||||
{
|
||||
struct ip_vs_dest_set_elem *e, *ep;
|
||||
|
||||
write_lock(&set->lock);
|
||||
list_for_each_entry_safe(e, ep, &set->list, list) {
|
||||
struct ip_vs_dest *d;
|
||||
|
||||
d = rcu_dereference_protected(e->dest, 1);
|
||||
/*
|
||||
* We don't kfree dest because it is referred either
|
||||
* by its service or by the trash dest list.
|
||||
*/
|
||||
atomic_dec(&e->dest->refcnt);
|
||||
list_del(&e->list);
|
||||
kfree(e);
|
||||
ip_vs_dest_put(d);
|
||||
list_del_rcu(&e->list);
|
||||
kfree_rcu(e, rcu_head);
|
||||
}
|
||||
write_unlock(&set->lock);
|
||||
}
|
||||
|
||||
/* get weighted least-connection node in the destination set */
|
||||
@ -171,8 +179,8 @@ static inline struct ip_vs_dest *ip_vs_dest_set_min(struct ip_vs_dest_set *set)
|
||||
return NULL;
|
||||
|
||||
/* select the first destination server, whose weight > 0 */
|
||||
list_for_each_entry(e, &set->list, list) {
|
||||
least = e->dest;
|
||||
list_for_each_entry_rcu(e, &set->list, list) {
|
||||
least = rcu_dereference(e->dest);
|
||||
if (least->flags & IP_VS_DEST_F_OVERLOAD)
|
||||
continue;
|
||||
|
||||
@ -186,8 +194,8 @@ static inline struct ip_vs_dest *ip_vs_dest_set_min(struct ip_vs_dest_set *set)
|
||||
|
||||
/* find the destination with the weighted least load */
|
||||
nextstage:
|
||||
list_for_each_entry(e, &set->list, list) {
|
||||
dest = e->dest;
|
||||
list_for_each_entry_continue_rcu(e, &set->list, list) {
|
||||
dest = rcu_dereference(e->dest);
|
||||
if (dest->flags & IP_VS_DEST_F_OVERLOAD)
|
||||
continue;
|
||||
|
||||
@ -224,7 +232,7 @@ static inline struct ip_vs_dest *ip_vs_dest_set_max(struct ip_vs_dest_set *set)
|
||||
|
||||
/* select the first destination server, whose weight > 0 */
|
||||
list_for_each_entry(e, &set->list, list) {
|
||||
most = e->dest;
|
||||
most = rcu_dereference_protected(e->dest, 1);
|
||||
if (atomic_read(&most->weight) > 0) {
|
||||
moh = ip_vs_dest_conn_overhead(most);
|
||||
goto nextstage;
|
||||
@ -234,8 +242,8 @@ static inline struct ip_vs_dest *ip_vs_dest_set_max(struct ip_vs_dest_set *set)
|
||||
|
||||
/* find the destination with the weighted most load */
|
||||
nextstage:
|
||||
list_for_each_entry(e, &set->list, list) {
|
||||
dest = e->dest;
|
||||
list_for_each_entry_continue(e, &set->list, list) {
|
||||
dest = rcu_dereference_protected(e->dest, 1);
|
||||
doh = ip_vs_dest_conn_overhead(dest);
|
||||
/* moh/mw < doh/dw ==> moh*dw < doh*mw, where mw,dw>0 */
|
||||
if ((moh * atomic_read(&dest->weight) <
|
||||
@ -262,11 +270,12 @@ static inline struct ip_vs_dest *ip_vs_dest_set_max(struct ip_vs_dest_set *set)
|
||||
* IP address and its destination server set
|
||||
*/
|
||||
struct ip_vs_lblcr_entry {
|
||||
struct list_head list;
|
||||
struct hlist_node list;
|
||||
int af; /* address family */
|
||||
union nf_inet_addr addr; /* destination IP address */
|
||||
struct ip_vs_dest_set set; /* destination server set */
|
||||
unsigned long lastuse; /* last used time */
|
||||
struct rcu_head rcu_head;
|
||||
};
|
||||
|
||||
|
||||
@ -274,12 +283,14 @@ struct ip_vs_lblcr_entry {
|
||||
* IPVS lblcr hash table
|
||||
*/
|
||||
struct ip_vs_lblcr_table {
|
||||
struct list_head bucket[IP_VS_LBLCR_TAB_SIZE]; /* hash bucket */
|
||||
struct rcu_head rcu_head;
|
||||
struct hlist_head __rcu bucket[IP_VS_LBLCR_TAB_SIZE]; /* hash bucket */
|
||||
atomic_t entries; /* number of entries */
|
||||
int max_size; /* maximum size of entries */
|
||||
struct timer_list periodic_timer; /* collect stale entries */
|
||||
int rover; /* rover for expire check */
|
||||
int counter; /* counter for no expire */
|
||||
bool dead;
|
||||
};
|
||||
|
||||
|
||||
@ -302,9 +313,9 @@ static ctl_table vs_vars_table[] = {
|
||||
|
||||
static inline void ip_vs_lblcr_free(struct ip_vs_lblcr_entry *en)
|
||||
{
|
||||
list_del(&en->list);
|
||||
hlist_del_rcu(&en->list);
|
||||
ip_vs_dest_set_eraseall(&en->set);
|
||||
kfree(en);
|
||||
kfree_rcu(en, rcu_head);
|
||||
}
|
||||
|
||||
|
||||
@ -334,15 +345,12 @@ ip_vs_lblcr_hash(struct ip_vs_lblcr_table *tbl, struct ip_vs_lblcr_entry *en)
|
||||
{
|
||||
unsigned int hash = ip_vs_lblcr_hashkey(en->af, &en->addr);
|
||||
|
||||
list_add(&en->list, &tbl->bucket[hash]);
|
||||
hlist_add_head_rcu(&en->list, &tbl->bucket[hash]);
|
||||
atomic_inc(&tbl->entries);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Get ip_vs_lblcr_entry associated with supplied parameters. Called under
|
||||
* read lock.
|
||||
*/
|
||||
/* Get ip_vs_lblcr_entry associated with supplied parameters. */
|
||||
static inline struct ip_vs_lblcr_entry *
|
||||
ip_vs_lblcr_get(int af, struct ip_vs_lblcr_table *tbl,
|
||||
const union nf_inet_addr *addr)
|
||||
@ -350,7 +358,7 @@ ip_vs_lblcr_get(int af, struct ip_vs_lblcr_table *tbl,
|
||||
unsigned int hash = ip_vs_lblcr_hashkey(af, addr);
|
||||
struct ip_vs_lblcr_entry *en;
|
||||
|
||||
list_for_each_entry(en, &tbl->bucket[hash], list)
|
||||
hlist_for_each_entry_rcu(en, &tbl->bucket[hash], list)
|
||||
if (ip_vs_addr_equal(af, &en->addr, addr))
|
||||
return en;
|
||||
|
||||
@ -360,7 +368,7 @@ ip_vs_lblcr_get(int af, struct ip_vs_lblcr_table *tbl,
|
||||
|
||||
/*
|
||||
* Create or update an ip_vs_lblcr_entry, which is a mapping of a destination
|
||||
* IP address to a server. Called under write lock.
|
||||
* IP address to a server. Called under spin lock.
|
||||
*/
|
||||
static inline struct ip_vs_lblcr_entry *
|
||||
ip_vs_lblcr_new(struct ip_vs_lblcr_table *tbl, const union nf_inet_addr *daddr,
|
||||
@ -381,14 +389,14 @@ ip_vs_lblcr_new(struct ip_vs_lblcr_table *tbl, const union nf_inet_addr *daddr,
|
||||
/* initialize its dest set */
|
||||
atomic_set(&(en->set.size), 0);
|
||||
INIT_LIST_HEAD(&en->set.list);
|
||||
rwlock_init(&en->set.lock);
|
||||
|
||||
ip_vs_dest_set_insert(&en->set, dest, false);
|
||||
|
||||
ip_vs_lblcr_hash(tbl, en);
|
||||
return en;
|
||||
}
|
||||
|
||||
write_lock(&en->set.lock);
|
||||
ip_vs_dest_set_insert(&en->set, dest);
|
||||
write_unlock(&en->set.lock);
|
||||
ip_vs_dest_set_insert(&en->set, dest, true);
|
||||
|
||||
return en;
|
||||
}
|
||||
@ -397,17 +405,21 @@ ip_vs_lblcr_new(struct ip_vs_lblcr_table *tbl, const union nf_inet_addr *daddr,
|
||||
/*
|
||||
* Flush all the entries of the specified table.
|
||||
*/
|
||||
static void ip_vs_lblcr_flush(struct ip_vs_lblcr_table *tbl)
|
||||
static void ip_vs_lblcr_flush(struct ip_vs_service *svc)
|
||||
{
|
||||
struct ip_vs_lblcr_table *tbl = svc->sched_data;
|
||||
int i;
|
||||
struct ip_vs_lblcr_entry *en, *nxt;
|
||||
struct ip_vs_lblcr_entry *en;
|
||||
struct hlist_node *next;
|
||||
|
||||
/* No locking required, only called during cleanup. */
|
||||
spin_lock_bh(&svc->sched_lock);
|
||||
tbl->dead = 1;
|
||||
for (i=0; i<IP_VS_LBLCR_TAB_SIZE; i++) {
|
||||
list_for_each_entry_safe(en, nxt, &tbl->bucket[i], list) {
|
||||
hlist_for_each_entry_safe(en, next, &tbl->bucket[i], list) {
|
||||
ip_vs_lblcr_free(en);
|
||||
}
|
||||
}
|
||||
spin_unlock_bh(&svc->sched_lock);
|
||||
}
|
||||
|
||||
static int sysctl_lblcr_expiration(struct ip_vs_service *svc)
|
||||
@ -425,13 +437,14 @@ static inline void ip_vs_lblcr_full_check(struct ip_vs_service *svc)
|
||||
struct ip_vs_lblcr_table *tbl = svc->sched_data;
|
||||
unsigned long now = jiffies;
|
||||
int i, j;
|
||||
struct ip_vs_lblcr_entry *en, *nxt;
|
||||
struct ip_vs_lblcr_entry *en;
|
||||
struct hlist_node *next;
|
||||
|
||||
for (i=0, j=tbl->rover; i<IP_VS_LBLCR_TAB_SIZE; i++) {
|
||||
j = (j + 1) & IP_VS_LBLCR_TAB_MASK;
|
||||
|
||||
write_lock(&svc->sched_lock);
|
||||
list_for_each_entry_safe(en, nxt, &tbl->bucket[j], list) {
|
||||
spin_lock(&svc->sched_lock);
|
||||
hlist_for_each_entry_safe(en, next, &tbl->bucket[j], list) {
|
||||
if (time_after(en->lastuse +
|
||||
sysctl_lblcr_expiration(svc), now))
|
||||
continue;
|
||||
@ -439,7 +452,7 @@ static inline void ip_vs_lblcr_full_check(struct ip_vs_service *svc)
|
||||
ip_vs_lblcr_free(en);
|
||||
atomic_dec(&tbl->entries);
|
||||
}
|
||||
write_unlock(&svc->sched_lock);
|
||||
spin_unlock(&svc->sched_lock);
|
||||
}
|
||||
tbl->rover = j;
|
||||
}
|
||||
@ -463,7 +476,8 @@ static void ip_vs_lblcr_check_expire(unsigned long data)
|
||||
unsigned long now = jiffies;
|
||||
int goal;
|
||||
int i, j;
|
||||
struct ip_vs_lblcr_entry *en, *nxt;
|
||||
struct ip_vs_lblcr_entry *en;
|
||||
struct hlist_node *next;
|
||||
|
||||
if ((tbl->counter % COUNT_FOR_FULL_EXPIRATION) == 0) {
|
||||
/* do full expiration check */
|
||||
@ -484,8 +498,8 @@ static void ip_vs_lblcr_check_expire(unsigned long data)
|
||||
for (i=0, j=tbl->rover; i<IP_VS_LBLCR_TAB_SIZE; i++) {
|
||||
j = (j + 1) & IP_VS_LBLCR_TAB_MASK;
|
||||
|
||||
write_lock(&svc->sched_lock);
|
||||
list_for_each_entry_safe(en, nxt, &tbl->bucket[j], list) {
|
||||
spin_lock(&svc->sched_lock);
|
||||
hlist_for_each_entry_safe(en, next, &tbl->bucket[j], list) {
|
||||
if (time_before(now, en->lastuse+ENTRY_TIMEOUT))
|
||||
continue;
|
||||
|
||||
@ -493,7 +507,7 @@ static void ip_vs_lblcr_check_expire(unsigned long data)
|
||||
atomic_dec(&tbl->entries);
|
||||
goal--;
|
||||
}
|
||||
write_unlock(&svc->sched_lock);
|
||||
spin_unlock(&svc->sched_lock);
|
||||
if (goal <= 0)
|
||||
break;
|
||||
}
|
||||
@ -523,11 +537,12 @@ static int ip_vs_lblcr_init_svc(struct ip_vs_service *svc)
|
||||
* Initialize the hash buckets
|
||||
*/
|
||||
for (i=0; i<IP_VS_LBLCR_TAB_SIZE; i++) {
|
||||
INIT_LIST_HEAD(&tbl->bucket[i]);
|
||||
INIT_HLIST_HEAD(&tbl->bucket[i]);
|
||||
}
|
||||
tbl->max_size = IP_VS_LBLCR_TAB_SIZE*16;
|
||||
tbl->rover = 0;
|
||||
tbl->counter = 1;
|
||||
tbl->dead = 0;
|
||||
|
||||
/*
|
||||
* Hook periodic timer for garbage collection
|
||||
@ -540,7 +555,7 @@ static int ip_vs_lblcr_init_svc(struct ip_vs_service *svc)
|
||||
}
|
||||
|
||||
|
||||
static int ip_vs_lblcr_done_svc(struct ip_vs_service *svc)
|
||||
static void ip_vs_lblcr_done_svc(struct ip_vs_service *svc)
|
||||
{
|
||||
struct ip_vs_lblcr_table *tbl = svc->sched_data;
|
||||
|
||||
@ -548,14 +563,12 @@ static int ip_vs_lblcr_done_svc(struct ip_vs_service *svc)
|
||||
del_timer_sync(&tbl->periodic_timer);
|
||||
|
||||
/* got to clean up table entries here */
|
||||
ip_vs_lblcr_flush(tbl);
|
||||
ip_vs_lblcr_flush(svc);
|
||||
|
||||
/* release the table itself */
|
||||
kfree(tbl);
|
||||
kfree_rcu(tbl, rcu_head);
|
||||
IP_VS_DBG(6, "LBLCR hash table (memory=%Zdbytes) released\n",
|
||||
sizeof(*tbl));
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
@ -577,7 +590,7 @@ __ip_vs_lblcr_schedule(struct ip_vs_service *svc)
|
||||
* The server with weight=0 is quiesced and will not receive any
|
||||
* new connection.
|
||||
*/
|
||||
list_for_each_entry(dest, &svc->destinations, n_list) {
|
||||
list_for_each_entry_rcu(dest, &svc->destinations, n_list) {
|
||||
if (dest->flags & IP_VS_DEST_F_OVERLOAD)
|
||||
continue;
|
||||
|
||||
@ -593,7 +606,7 @@ __ip_vs_lblcr_schedule(struct ip_vs_service *svc)
|
||||
* Find the destination with the least load.
|
||||
*/
|
||||
nextstage:
|
||||
list_for_each_entry_continue(dest, &svc->destinations, n_list) {
|
||||
list_for_each_entry_continue_rcu(dest, &svc->destinations, n_list) {
|
||||
if (dest->flags & IP_VS_DEST_F_OVERLOAD)
|
||||
continue;
|
||||
|
||||
@ -627,7 +640,7 @@ is_overloaded(struct ip_vs_dest *dest, struct ip_vs_service *svc)
|
||||
if (atomic_read(&dest->activeconns) > atomic_read(&dest->weight)) {
|
||||
struct ip_vs_dest *d;
|
||||
|
||||
list_for_each_entry(d, &svc->destinations, n_list) {
|
||||
list_for_each_entry_rcu(d, &svc->destinations, n_list) {
|
||||
if (atomic_read(&d->activeconns)*2
|
||||
< atomic_read(&d->weight)) {
|
||||
return 1;
|
||||
@ -646,7 +659,7 @@ ip_vs_lblcr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
|
||||
{
|
||||
struct ip_vs_lblcr_table *tbl = svc->sched_data;
|
||||
struct ip_vs_iphdr iph;
|
||||
struct ip_vs_dest *dest = NULL;
|
||||
struct ip_vs_dest *dest;
|
||||
struct ip_vs_lblcr_entry *en;
|
||||
|
||||
ip_vs_fill_iph_addr_only(svc->af, skb, &iph);
|
||||
@ -654,53 +667,46 @@ ip_vs_lblcr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
|
||||
IP_VS_DBG(6, "%s(): Scheduling...\n", __func__);
|
||||
|
||||
/* First look in our cache */
|
||||
read_lock(&svc->sched_lock);
|
||||
en = ip_vs_lblcr_get(svc->af, tbl, &iph.daddr);
|
||||
if (en) {
|
||||
/* We only hold a read lock, but this is atomic */
|
||||
en->lastuse = jiffies;
|
||||
|
||||
/* Get the least loaded destination */
|
||||
read_lock(&en->set.lock);
|
||||
dest = ip_vs_dest_set_min(&en->set);
|
||||
read_unlock(&en->set.lock);
|
||||
|
||||
/* More than one destination + enough time passed by, cleanup */
|
||||
if (atomic_read(&en->set.size) > 1 &&
|
||||
time_after(jiffies, en->set.lastmod +
|
||||
time_after(jiffies, en->set.lastmod +
|
||||
sysctl_lblcr_expiration(svc))) {
|
||||
struct ip_vs_dest *m;
|
||||
spin_lock_bh(&svc->sched_lock);
|
||||
if (atomic_read(&en->set.size) > 1) {
|
||||
struct ip_vs_dest *m;
|
||||
|
||||
write_lock(&en->set.lock);
|
||||
m = ip_vs_dest_set_max(&en->set);
|
||||
if (m)
|
||||
ip_vs_dest_set_erase(&en->set, m);
|
||||
write_unlock(&en->set.lock);
|
||||
m = ip_vs_dest_set_max(&en->set);
|
||||
if (m)
|
||||
ip_vs_dest_set_erase(&en->set, m);
|
||||
}
|
||||
spin_unlock_bh(&svc->sched_lock);
|
||||
}
|
||||
|
||||
/* If the destination is not overloaded, use it */
|
||||
if (dest && !is_overloaded(dest, svc)) {
|
||||
read_unlock(&svc->sched_lock);
|
||||
if (dest && !is_overloaded(dest, svc))
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* The cache entry is invalid, time to schedule */
|
||||
dest = __ip_vs_lblcr_schedule(svc);
|
||||
if (!dest) {
|
||||
ip_vs_scheduler_err(svc, "no destination available");
|
||||
read_unlock(&svc->sched_lock);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* Update our cache entry */
|
||||
write_lock(&en->set.lock);
|
||||
ip_vs_dest_set_insert(&en->set, dest);
|
||||
write_unlock(&en->set.lock);
|
||||
}
|
||||
read_unlock(&svc->sched_lock);
|
||||
|
||||
if (dest)
|
||||
spin_lock_bh(&svc->sched_lock);
|
||||
if (!tbl->dead)
|
||||
ip_vs_dest_set_insert(&en->set, dest, true);
|
||||
spin_unlock_bh(&svc->sched_lock);
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* No cache entry, time to schedule */
|
||||
dest = __ip_vs_lblcr_schedule(svc);
|
||||
@ -710,9 +716,10 @@ ip_vs_lblcr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
|
||||
}
|
||||
|
||||
/* If we fail to create a cache entry, we'll just use the valid dest */
|
||||
write_lock(&svc->sched_lock);
|
||||
ip_vs_lblcr_new(tbl, &iph.daddr, dest);
|
||||
write_unlock(&svc->sched_lock);
|
||||
spin_lock_bh(&svc->sched_lock);
|
||||
if (!tbl->dead)
|
||||
ip_vs_lblcr_new(tbl, &iph.daddr, dest);
|
||||
spin_unlock_bh(&svc->sched_lock);
|
||||
|
||||
out:
|
||||
IP_VS_DBG_BUF(6, "LBLCR: destination IP address %s --> server %s:%d\n",
|
||||
@ -814,6 +821,7 @@ static void __exit ip_vs_lblcr_cleanup(void)
|
||||
{
|
||||
unregister_ip_vs_scheduler(&ip_vs_lblcr_scheduler);
|
||||
unregister_pernet_subsys(&ip_vs_lblcr_ops);
|
||||
synchronize_rcu();
|
||||
}
|
||||
|
||||
|
||||
|
@ -42,7 +42,7 @@ ip_vs_lc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
|
||||
* served, but no new connection is assigned to the server.
|
||||
*/
|
||||
|
||||
list_for_each_entry(dest, &svc->destinations, n_list) {
|
||||
list_for_each_entry_rcu(dest, &svc->destinations, n_list) {
|
||||
if ((dest->flags & IP_VS_DEST_F_OVERLOAD) ||
|
||||
atomic_read(&dest->weight) == 0)
|
||||
continue;
|
||||
@ -84,6 +84,7 @@ static int __init ip_vs_lc_init(void)
|
||||
static void __exit ip_vs_lc_cleanup(void)
|
||||
{
|
||||
unregister_ip_vs_scheduler(&ip_vs_lc_scheduler);
|
||||
synchronize_rcu();
|
||||
}
|
||||
|
||||
module_init(ip_vs_lc_init);
|
||||
|
@ -75,7 +75,7 @@ ip_vs_nq_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
|
||||
* new connections.
|
||||
*/
|
||||
|
||||
list_for_each_entry(dest, &svc->destinations, n_list) {
|
||||
list_for_each_entry_rcu(dest, &svc->destinations, n_list) {
|
||||
|
||||
if (dest->flags & IP_VS_DEST_F_OVERLOAD ||
|
||||
!atomic_read(&dest->weight))
|
||||
@ -133,6 +133,7 @@ static int __init ip_vs_nq_init(void)
|
||||
static void __exit ip_vs_nq_cleanup(void)
|
||||
{
|
||||
unregister_ip_vs_scheduler(&ip_vs_nq_scheduler);
|
||||
synchronize_rcu();
|
||||
}
|
||||
|
||||
module_init(ip_vs_nq_init);
|
||||
|
@ -13,20 +13,8 @@
|
||||
/* IPVS pe list */
|
||||
static LIST_HEAD(ip_vs_pe);
|
||||
|
||||
/* lock for service table */
|
||||
static DEFINE_SPINLOCK(ip_vs_pe_lock);
|
||||
|
||||
/* Bind a service with a pe */
|
||||
void ip_vs_bind_pe(struct ip_vs_service *svc, struct ip_vs_pe *pe)
|
||||
{
|
||||
svc->pe = pe;
|
||||
}
|
||||
|
||||
/* Unbind a service from its pe */
|
||||
void ip_vs_unbind_pe(struct ip_vs_service *svc)
|
||||
{
|
||||
svc->pe = NULL;
|
||||
}
|
||||
/* semaphore for IPVS PEs. */
|
||||
static DEFINE_MUTEX(ip_vs_pe_mutex);
|
||||
|
||||
/* Get pe in the pe list by name */
|
||||
struct ip_vs_pe *__ip_vs_pe_getbyname(const char *pe_name)
|
||||
@ -36,9 +24,8 @@ struct ip_vs_pe *__ip_vs_pe_getbyname(const char *pe_name)
|
||||
IP_VS_DBG(10, "%s(): pe_name \"%s\"\n", __func__,
|
||||
pe_name);
|
||||
|
||||
spin_lock_bh(&ip_vs_pe_lock);
|
||||
|
||||
list_for_each_entry(pe, &ip_vs_pe, n_list) {
|
||||
rcu_read_lock();
|
||||
list_for_each_entry_rcu(pe, &ip_vs_pe, n_list) {
|
||||
/* Test and get the modules atomically */
|
||||
if (pe->module &&
|
||||
!try_module_get(pe->module)) {
|
||||
@ -47,14 +34,14 @@ struct ip_vs_pe *__ip_vs_pe_getbyname(const char *pe_name)
|
||||
}
|
||||
if (strcmp(pe_name, pe->name)==0) {
|
||||
/* HIT */
|
||||
spin_unlock_bh(&ip_vs_pe_lock);
|
||||
rcu_read_unlock();
|
||||
return pe;
|
||||
}
|
||||
if (pe->module)
|
||||
module_put(pe->module);
|
||||
}
|
||||
rcu_read_unlock();
|
||||
|
||||
spin_unlock_bh(&ip_vs_pe_lock);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
@ -83,22 +70,13 @@ int register_ip_vs_pe(struct ip_vs_pe *pe)
|
||||
/* increase the module use count */
|
||||
ip_vs_use_count_inc();
|
||||
|
||||
spin_lock_bh(&ip_vs_pe_lock);
|
||||
|
||||
if (!list_empty(&pe->n_list)) {
|
||||
spin_unlock_bh(&ip_vs_pe_lock);
|
||||
ip_vs_use_count_dec();
|
||||
pr_err("%s(): [%s] pe already linked\n",
|
||||
__func__, pe->name);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
mutex_lock(&ip_vs_pe_mutex);
|
||||
/* Make sure that the pe with this name doesn't exist
|
||||
* in the pe list.
|
||||
*/
|
||||
list_for_each_entry(tmp, &ip_vs_pe, n_list) {
|
||||
if (strcmp(tmp->name, pe->name) == 0) {
|
||||
spin_unlock_bh(&ip_vs_pe_lock);
|
||||
mutex_unlock(&ip_vs_pe_mutex);
|
||||
ip_vs_use_count_dec();
|
||||
pr_err("%s(): [%s] pe already existed "
|
||||
"in the system\n", __func__, pe->name);
|
||||
@ -106,8 +84,8 @@ int register_ip_vs_pe(struct ip_vs_pe *pe)
|
||||
}
|
||||
}
|
||||
/* Add it into the d-linked pe list */
|
||||
list_add(&pe->n_list, &ip_vs_pe);
|
||||
spin_unlock_bh(&ip_vs_pe_lock);
|
||||
list_add_rcu(&pe->n_list, &ip_vs_pe);
|
||||
mutex_unlock(&ip_vs_pe_mutex);
|
||||
|
||||
pr_info("[%s] pe registered.\n", pe->name);
|
||||
|
||||
@ -118,17 +96,10 @@ EXPORT_SYMBOL_GPL(register_ip_vs_pe);
|
||||
/* Unregister a pe from the pe list */
|
||||
int unregister_ip_vs_pe(struct ip_vs_pe *pe)
|
||||
{
|
||||
spin_lock_bh(&ip_vs_pe_lock);
|
||||
if (list_empty(&pe->n_list)) {
|
||||
spin_unlock_bh(&ip_vs_pe_lock);
|
||||
pr_err("%s(): [%s] pe is not in the list. failed\n",
|
||||
__func__, pe->name);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
mutex_lock(&ip_vs_pe_mutex);
|
||||
/* Remove it from the d-linked pe list */
|
||||
list_del(&pe->n_list);
|
||||
spin_unlock_bh(&ip_vs_pe_lock);
|
||||
list_del_rcu(&pe->n_list);
|
||||
mutex_unlock(&ip_vs_pe_mutex);
|
||||
|
||||
/* decrease the module use count */
|
||||
ip_vs_use_count_dec();
|
||||
|
@ -172,6 +172,7 @@ static int __init ip_vs_sip_init(void)
|
||||
static void __exit ip_vs_sip_cleanup(void)
|
||||
{
|
||||
unregister_ip_vs_pe(&ip_vs_sip_pe);
|
||||
synchronize_rcu();
|
||||
}
|
||||
|
||||
module_init(ip_vs_sip_init);
|
||||
|
@ -27,9 +27,10 @@ sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
|
||||
if (sch == NULL)
|
||||
return 0;
|
||||
net = skb_net(skb);
|
||||
rcu_read_lock();
|
||||
if ((sch->type == SCTP_CID_INIT) &&
|
||||
(svc = ip_vs_service_get(net, af, skb->mark, iph->protocol,
|
||||
&iph->daddr, sh->dest))) {
|
||||
(svc = ip_vs_service_find(net, af, skb->mark, iph->protocol,
|
||||
&iph->daddr, sh->dest))) {
|
||||
int ignored;
|
||||
|
||||
if (ip_vs_todrop(net_ipvs(net))) {
|
||||
@ -37,7 +38,7 @@ sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
|
||||
* It seems that we are very loaded.
|
||||
* We have to drop this packet :(
|
||||
*/
|
||||
ip_vs_service_put(svc);
|
||||
rcu_read_unlock();
|
||||
*verdict = NF_DROP;
|
||||
return 0;
|
||||
}
|
||||
@ -49,14 +50,13 @@ sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
|
||||
if (!*cpp && ignored <= 0) {
|
||||
if (!ignored)
|
||||
*verdict = ip_vs_leave(svc, skb, pd, iph);
|
||||
else {
|
||||
ip_vs_service_put(svc);
|
||||
else
|
||||
*verdict = NF_DROP;
|
||||
}
|
||||
rcu_read_unlock();
|
||||
return 0;
|
||||
}
|
||||
ip_vs_service_put(svc);
|
||||
}
|
||||
rcu_read_unlock();
|
||||
/* NF_ACCEPT */
|
||||
return 1;
|
||||
}
|
||||
@ -994,9 +994,9 @@ static void
|
||||
sctp_state_transition(struct ip_vs_conn *cp, int direction,
|
||||
const struct sk_buff *skb, struct ip_vs_proto_data *pd)
|
||||
{
|
||||
spin_lock(&cp->lock);
|
||||
spin_lock_bh(&cp->lock);
|
||||
set_sctp_state(pd, cp, direction, skb);
|
||||
spin_unlock(&cp->lock);
|
||||
spin_unlock_bh(&cp->lock);
|
||||
}
|
||||
|
||||
static inline __u16 sctp_app_hashkey(__be16 port)
|
||||
@ -1016,30 +1016,25 @@ static int sctp_register_app(struct net *net, struct ip_vs_app *inc)
|
||||
|
||||
hash = sctp_app_hashkey(port);
|
||||
|
||||
spin_lock_bh(&ipvs->sctp_app_lock);
|
||||
list_for_each_entry(i, &ipvs->sctp_apps[hash], p_list) {
|
||||
if (i->port == port) {
|
||||
ret = -EEXIST;
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
list_add(&inc->p_list, &ipvs->sctp_apps[hash]);
|
||||
list_add_rcu(&inc->p_list, &ipvs->sctp_apps[hash]);
|
||||
atomic_inc(&pd->appcnt);
|
||||
out:
|
||||
spin_unlock_bh(&ipvs->sctp_app_lock);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void sctp_unregister_app(struct net *net, struct ip_vs_app *inc)
|
||||
{
|
||||
struct netns_ipvs *ipvs = net_ipvs(net);
|
||||
struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_SCTP);
|
||||
|
||||
spin_lock_bh(&ipvs->sctp_app_lock);
|
||||
atomic_dec(&pd->appcnt);
|
||||
list_del(&inc->p_list);
|
||||
spin_unlock_bh(&ipvs->sctp_app_lock);
|
||||
list_del_rcu(&inc->p_list);
|
||||
}
|
||||
|
||||
static int sctp_app_conn_bind(struct ip_vs_conn *cp)
|
||||
@ -1055,12 +1050,12 @@ static int sctp_app_conn_bind(struct ip_vs_conn *cp)
|
||||
/* Lookup application incarnations and bind the right one */
|
||||
hash = sctp_app_hashkey(cp->vport);
|
||||
|
||||
spin_lock(&ipvs->sctp_app_lock);
|
||||
list_for_each_entry(inc, &ipvs->sctp_apps[hash], p_list) {
|
||||
rcu_read_lock();
|
||||
list_for_each_entry_rcu(inc, &ipvs->sctp_apps[hash], p_list) {
|
||||
if (inc->port == cp->vport) {
|
||||
if (unlikely(!ip_vs_app_inc_get(inc)))
|
||||
break;
|
||||
spin_unlock(&ipvs->sctp_app_lock);
|
||||
rcu_read_unlock();
|
||||
|
||||
IP_VS_DBG_BUF(9, "%s: Binding conn %s:%u->"
|
||||
"%s:%u to app %s on port %u\n",
|
||||
@ -1076,7 +1071,7 @@ static int sctp_app_conn_bind(struct ip_vs_conn *cp)
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
spin_unlock(&ipvs->sctp_app_lock);
|
||||
rcu_read_unlock();
|
||||
out:
|
||||
return result;
|
||||
}
|
||||
@ -1090,7 +1085,6 @@ static int __ip_vs_sctp_init(struct net *net, struct ip_vs_proto_data *pd)
|
||||
struct netns_ipvs *ipvs = net_ipvs(net);
|
||||
|
||||
ip_vs_init_hash_table(ipvs->sctp_apps, SCTP_APP_TAB_SIZE);
|
||||
spin_lock_init(&ipvs->sctp_app_lock);
|
||||
pd->timeout_table = ip_vs_create_timeout_table((int *)sctp_timeouts,
|
||||
sizeof(sctp_timeouts));
|
||||
if (!pd->timeout_table)
|
||||
|
@ -47,9 +47,10 @@ tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
|
||||
}
|
||||
net = skb_net(skb);
|
||||
/* No !th->ack check to allow scheduling on SYN+ACK for Active FTP */
|
||||
rcu_read_lock();
|
||||
if (th->syn &&
|
||||
(svc = ip_vs_service_get(net, af, skb->mark, iph->protocol,
|
||||
&iph->daddr, th->dest))) {
|
||||
(svc = ip_vs_service_find(net, af, skb->mark, iph->protocol,
|
||||
&iph->daddr, th->dest))) {
|
||||
int ignored;
|
||||
|
||||
if (ip_vs_todrop(net_ipvs(net))) {
|
||||
@ -57,7 +58,7 @@ tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
|
||||
* It seems that we are very loaded.
|
||||
* We have to drop this packet :(
|
||||
*/
|
||||
ip_vs_service_put(svc);
|
||||
rcu_read_unlock();
|
||||
*verdict = NF_DROP;
|
||||
return 0;
|
||||
}
|
||||
@ -70,14 +71,13 @@ tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
|
||||
if (!*cpp && ignored <= 0) {
|
||||
if (!ignored)
|
||||
*verdict = ip_vs_leave(svc, skb, pd, iph);
|
||||
else {
|
||||
ip_vs_service_put(svc);
|
||||
else
|
||||
*verdict = NF_DROP;
|
||||
}
|
||||
rcu_read_unlock();
|
||||
return 0;
|
||||
}
|
||||
ip_vs_service_put(svc);
|
||||
}
|
||||
rcu_read_unlock();
|
||||
/* NF_ACCEPT */
|
||||
return 1;
|
||||
}
|
||||
@ -557,9 +557,9 @@ tcp_state_transition(struct ip_vs_conn *cp, int direction,
|
||||
if (th == NULL)
|
||||
return;
|
||||
|
||||
spin_lock(&cp->lock);
|
||||
spin_lock_bh(&cp->lock);
|
||||
set_tcp_state(pd, cp, direction, th);
|
||||
spin_unlock(&cp->lock);
|
||||
spin_unlock_bh(&cp->lock);
|
||||
}
|
||||
|
||||
static inline __u16 tcp_app_hashkey(__be16 port)
|
||||
@ -580,18 +580,16 @@ static int tcp_register_app(struct net *net, struct ip_vs_app *inc)
|
||||
|
||||
hash = tcp_app_hashkey(port);
|
||||
|
||||
spin_lock_bh(&ipvs->tcp_app_lock);
|
||||
list_for_each_entry(i, &ipvs->tcp_apps[hash], p_list) {
|
||||
if (i->port == port) {
|
||||
ret = -EEXIST;
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
list_add(&inc->p_list, &ipvs->tcp_apps[hash]);
|
||||
list_add_rcu(&inc->p_list, &ipvs->tcp_apps[hash]);
|
||||
atomic_inc(&pd->appcnt);
|
||||
|
||||
out:
|
||||
spin_unlock_bh(&ipvs->tcp_app_lock);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -599,13 +597,10 @@ static int tcp_register_app(struct net *net, struct ip_vs_app *inc)
|
||||
static void
|
||||
tcp_unregister_app(struct net *net, struct ip_vs_app *inc)
|
||||
{
|
||||
struct netns_ipvs *ipvs = net_ipvs(net);
|
||||
struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
|
||||
|
||||
spin_lock_bh(&ipvs->tcp_app_lock);
|
||||
atomic_dec(&pd->appcnt);
|
||||
list_del(&inc->p_list);
|
||||
spin_unlock_bh(&ipvs->tcp_app_lock);
|
||||
list_del_rcu(&inc->p_list);
|
||||
}
|
||||
|
||||
|
||||
@ -624,12 +619,12 @@ tcp_app_conn_bind(struct ip_vs_conn *cp)
|
||||
/* Lookup application incarnations and bind the right one */
|
||||
hash = tcp_app_hashkey(cp->vport);
|
||||
|
||||
spin_lock(&ipvs->tcp_app_lock);
|
||||
list_for_each_entry(inc, &ipvs->tcp_apps[hash], p_list) {
|
||||
rcu_read_lock();
|
||||
list_for_each_entry_rcu(inc, &ipvs->tcp_apps[hash], p_list) {
|
||||
if (inc->port == cp->vport) {
|
||||
if (unlikely(!ip_vs_app_inc_get(inc)))
|
||||
break;
|
||||
spin_unlock(&ipvs->tcp_app_lock);
|
||||
rcu_read_unlock();
|
||||
|
||||
IP_VS_DBG_BUF(9, "%s(): Binding conn %s:%u->"
|
||||
"%s:%u to app %s on port %u\n",
|
||||
@ -646,7 +641,7 @@ tcp_app_conn_bind(struct ip_vs_conn *cp)
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
spin_unlock(&ipvs->tcp_app_lock);
|
||||
rcu_read_unlock();
|
||||
|
||||
out:
|
||||
return result;
|
||||
@ -660,11 +655,11 @@ void ip_vs_tcp_conn_listen(struct net *net, struct ip_vs_conn *cp)
|
||||
{
|
||||
struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
|
||||
|
||||
spin_lock(&cp->lock);
|
||||
spin_lock_bh(&cp->lock);
|
||||
cp->state = IP_VS_TCP_S_LISTEN;
|
||||
cp->timeout = (pd ? pd->timeout_table[IP_VS_TCP_S_LISTEN]
|
||||
: tcp_timeouts[IP_VS_TCP_S_LISTEN]);
|
||||
spin_unlock(&cp->lock);
|
||||
spin_unlock_bh(&cp->lock);
|
||||
}
|
||||
|
||||
/* ---------------------------------------------
|
||||
@ -676,7 +671,6 @@ static int __ip_vs_tcp_init(struct net *net, struct ip_vs_proto_data *pd)
|
||||
struct netns_ipvs *ipvs = net_ipvs(net);
|
||||
|
||||
ip_vs_init_hash_table(ipvs->tcp_apps, TCP_APP_TAB_SIZE);
|
||||
spin_lock_init(&ipvs->tcp_app_lock);
|
||||
pd->timeout_table = ip_vs_create_timeout_table((int *)tcp_timeouts,
|
||||
sizeof(tcp_timeouts));
|
||||
if (!pd->timeout_table)
|
||||
|
@ -44,8 +44,9 @@ udp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
|
||||
return 0;
|
||||
}
|
||||
net = skb_net(skb);
|
||||
svc = ip_vs_service_get(net, af, skb->mark, iph->protocol,
|
||||
&iph->daddr, uh->dest);
|
||||
rcu_read_lock();
|
||||
svc = ip_vs_service_find(net, af, skb->mark, iph->protocol,
|
||||
&iph->daddr, uh->dest);
|
||||
if (svc) {
|
||||
int ignored;
|
||||
|
||||
@ -54,7 +55,7 @@ udp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
|
||||
* It seems that we are very loaded.
|
||||
* We have to drop this packet :(
|
||||
*/
|
||||
ip_vs_service_put(svc);
|
||||
rcu_read_unlock();
|
||||
*verdict = NF_DROP;
|
||||
return 0;
|
||||
}
|
||||
@ -67,14 +68,13 @@ udp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
|
||||
if (!*cpp && ignored <= 0) {
|
||||
if (!ignored)
|
||||
*verdict = ip_vs_leave(svc, skb, pd, iph);
|
||||
else {
|
||||
ip_vs_service_put(svc);
|
||||
else
|
||||
*verdict = NF_DROP;
|
||||
}
|
||||
rcu_read_unlock();
|
||||
return 0;
|
||||
}
|
||||
ip_vs_service_put(svc);
|
||||
}
|
||||
rcu_read_unlock();
|
||||
/* NF_ACCEPT */
|
||||
return 1;
|
||||
}
|
||||
@ -359,19 +359,16 @@ static int udp_register_app(struct net *net, struct ip_vs_app *inc)
|
||||
|
||||
hash = udp_app_hashkey(port);
|
||||
|
||||
|
||||
spin_lock_bh(&ipvs->udp_app_lock);
|
||||
list_for_each_entry(i, &ipvs->udp_apps[hash], p_list) {
|
||||
if (i->port == port) {
|
||||
ret = -EEXIST;
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
list_add(&inc->p_list, &ipvs->udp_apps[hash]);
|
||||
list_add_rcu(&inc->p_list, &ipvs->udp_apps[hash]);
|
||||
atomic_inc(&pd->appcnt);
|
||||
|
||||
out:
|
||||
spin_unlock_bh(&ipvs->udp_app_lock);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -380,12 +377,9 @@ static void
|
||||
udp_unregister_app(struct net *net, struct ip_vs_app *inc)
|
||||
{
|
||||
struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_UDP);
|
||||
struct netns_ipvs *ipvs = net_ipvs(net);
|
||||
|
||||
spin_lock_bh(&ipvs->udp_app_lock);
|
||||
atomic_dec(&pd->appcnt);
|
||||
list_del(&inc->p_list);
|
||||
spin_unlock_bh(&ipvs->udp_app_lock);
|
||||
list_del_rcu(&inc->p_list);
|
||||
}
|
||||
|
||||
|
||||
@ -403,12 +397,12 @@ static int udp_app_conn_bind(struct ip_vs_conn *cp)
|
||||
/* Lookup application incarnations and bind the right one */
|
||||
hash = udp_app_hashkey(cp->vport);
|
||||
|
||||
spin_lock(&ipvs->udp_app_lock);
|
||||
list_for_each_entry(inc, &ipvs->udp_apps[hash], p_list) {
|
||||
rcu_read_lock();
|
||||
list_for_each_entry_rcu(inc, &ipvs->udp_apps[hash], p_list) {
|
||||
if (inc->port == cp->vport) {
|
||||
if (unlikely(!ip_vs_app_inc_get(inc)))
|
||||
break;
|
||||
spin_unlock(&ipvs->udp_app_lock);
|
||||
rcu_read_unlock();
|
||||
|
||||
IP_VS_DBG_BUF(9, "%s(): Binding conn %s:%u->"
|
||||
"%s:%u to app %s on port %u\n",
|
||||
@ -425,7 +419,7 @@ static int udp_app_conn_bind(struct ip_vs_conn *cp)
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
spin_unlock(&ipvs->udp_app_lock);
|
||||
rcu_read_unlock();
|
||||
|
||||
out:
|
||||
return result;
|
||||
@ -467,7 +461,6 @@ static int __udp_init(struct net *net, struct ip_vs_proto_data *pd)
|
||||
struct netns_ipvs *ipvs = net_ipvs(net);
|
||||
|
||||
ip_vs_init_hash_table(ipvs->udp_apps, UDP_APP_TAB_SIZE);
|
||||
spin_lock_init(&ipvs->udp_app_lock);
|
||||
pd->timeout_table = ip_vs_create_timeout_table((int *)udp_timeouts,
|
||||
sizeof(udp_timeouts));
|
||||
if (!pd->timeout_table)
|
||||
|
@ -35,9 +35,18 @@ static int ip_vs_rr_init_svc(struct ip_vs_service *svc)
|
||||
}
|
||||
|
||||
|
||||
static int ip_vs_rr_update_svc(struct ip_vs_service *svc)
|
||||
static int ip_vs_rr_del_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest)
|
||||
{
|
||||
svc->sched_data = &svc->destinations;
|
||||
struct list_head *p;
|
||||
|
||||
spin_lock_bh(&svc->sched_lock);
|
||||
p = (struct list_head *) svc->sched_data;
|
||||
/* dest is already unlinked, so p->prev is not valid but
|
||||
* p->next is valid, use it to reach previous entry.
|
||||
*/
|
||||
if (p == &dest->n_list)
|
||||
svc->sched_data = p->next->prev;
|
||||
spin_unlock_bh(&svc->sched_lock);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -48,36 +57,41 @@ static int ip_vs_rr_update_svc(struct ip_vs_service *svc)
|
||||
static struct ip_vs_dest *
|
||||
ip_vs_rr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
|
||||
{
|
||||
struct list_head *p, *q;
|
||||
struct ip_vs_dest *dest;
|
||||
struct list_head *p;
|
||||
struct ip_vs_dest *dest, *last;
|
||||
int pass = 0;
|
||||
|
||||
IP_VS_DBG(6, "%s(): Scheduling...\n", __func__);
|
||||
|
||||
write_lock(&svc->sched_lock);
|
||||
p = (struct list_head *)svc->sched_data;
|
||||
p = p->next;
|
||||
q = p;
|
||||
do {
|
||||
/* skip list head */
|
||||
if (q == &svc->destinations) {
|
||||
q = q->next;
|
||||
continue;
|
||||
}
|
||||
spin_lock_bh(&svc->sched_lock);
|
||||
p = (struct list_head *) svc->sched_data;
|
||||
last = dest = list_entry(p, struct ip_vs_dest, n_list);
|
||||
|
||||
dest = list_entry(q, struct ip_vs_dest, n_list);
|
||||
if (!(dest->flags & IP_VS_DEST_F_OVERLOAD) &&
|
||||
atomic_read(&dest->weight) > 0)
|
||||
/* HIT */
|
||||
goto out;
|
||||
q = q->next;
|
||||
} while (q != p);
|
||||
write_unlock(&svc->sched_lock);
|
||||
do {
|
||||
list_for_each_entry_continue_rcu(dest,
|
||||
&svc->destinations,
|
||||
n_list) {
|
||||
if (!(dest->flags & IP_VS_DEST_F_OVERLOAD) &&
|
||||
atomic_read(&dest->weight) > 0)
|
||||
/* HIT */
|
||||
goto out;
|
||||
if (dest == last)
|
||||
goto stop;
|
||||
}
|
||||
pass++;
|
||||
/* Previous dest could be unlinked, do not loop forever.
|
||||
* If we stay at head there is no need for 2nd pass.
|
||||
*/
|
||||
} while (pass < 2 && p != &svc->destinations);
|
||||
|
||||
stop:
|
||||
spin_unlock_bh(&svc->sched_lock);
|
||||
ip_vs_scheduler_err(svc, "no destination available");
|
||||
return NULL;
|
||||
|
||||
out:
|
||||
svc->sched_data = q;
|
||||
write_unlock(&svc->sched_lock);
|
||||
svc->sched_data = &dest->n_list;
|
||||
spin_unlock_bh(&svc->sched_lock);
|
||||
IP_VS_DBG_BUF(6, "RR: server %s:%u "
|
||||
"activeconns %d refcnt %d weight %d\n",
|
||||
IP_VS_DBG_ADDR(svc->af, &dest->addr), ntohs(dest->port),
|
||||
@ -94,7 +108,8 @@ static struct ip_vs_scheduler ip_vs_rr_scheduler = {
|
||||
.module = THIS_MODULE,
|
||||
.n_list = LIST_HEAD_INIT(ip_vs_rr_scheduler.n_list),
|
||||
.init_service = ip_vs_rr_init_svc,
|
||||
.update_service = ip_vs_rr_update_svc,
|
||||
.add_dest = NULL,
|
||||
.del_dest = ip_vs_rr_del_dest,
|
||||
.schedule = ip_vs_rr_schedule,
|
||||
};
|
||||
|
||||
@ -106,6 +121,7 @@ static int __init ip_vs_rr_init(void)
|
||||
static void __exit ip_vs_rr_cleanup(void)
|
||||
{
|
||||
unregister_ip_vs_scheduler(&ip_vs_rr_scheduler);
|
||||
synchronize_rcu();
|
||||
}
|
||||
|
||||
module_init(ip_vs_rr_init);
|
||||
|
@ -35,8 +35,8 @@ EXPORT_SYMBOL(ip_vs_scheduler_err);
|
||||
*/
|
||||
static LIST_HEAD(ip_vs_schedulers);
|
||||
|
||||
/* lock for service table */
|
||||
static DEFINE_SPINLOCK(ip_vs_sched_lock);
|
||||
/* semaphore for schedulers */
|
||||
static DEFINE_MUTEX(ip_vs_sched_mutex);
|
||||
|
||||
|
||||
/*
|
||||
@ -47,8 +47,6 @@ int ip_vs_bind_scheduler(struct ip_vs_service *svc,
|
||||
{
|
||||
int ret;
|
||||
|
||||
svc->scheduler = scheduler;
|
||||
|
||||
if (scheduler->init_service) {
|
||||
ret = scheduler->init_service(svc);
|
||||
if (ret) {
|
||||
@ -56,7 +54,7 @@ int ip_vs_bind_scheduler(struct ip_vs_service *svc,
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
||||
rcu_assign_pointer(svc->scheduler, scheduler);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -64,22 +62,19 @@ int ip_vs_bind_scheduler(struct ip_vs_service *svc,
|
||||
/*
|
||||
* Unbind a service with its scheduler
|
||||
*/
|
||||
int ip_vs_unbind_scheduler(struct ip_vs_service *svc)
|
||||
void ip_vs_unbind_scheduler(struct ip_vs_service *svc,
|
||||
struct ip_vs_scheduler *sched)
|
||||
{
|
||||
struct ip_vs_scheduler *sched = svc->scheduler;
|
||||
struct ip_vs_scheduler *cur_sched;
|
||||
|
||||
if (!sched)
|
||||
return 0;
|
||||
cur_sched = rcu_dereference_protected(svc->scheduler, 1);
|
||||
/* This check proves that old 'sched' was installed */
|
||||
if (!cur_sched)
|
||||
return;
|
||||
|
||||
if (sched->done_service) {
|
||||
if (sched->done_service(svc) != 0) {
|
||||
pr_err("%s(): done error\n", __func__);
|
||||
return -EINVAL;
|
||||
}
|
||||
}
|
||||
|
||||
svc->scheduler = NULL;
|
||||
return 0;
|
||||
if (sched->done_service)
|
||||
sched->done_service(svc);
|
||||
/* svc->scheduler can not be set to NULL */
|
||||
}
|
||||
|
||||
|
||||
@ -92,7 +87,7 @@ static struct ip_vs_scheduler *ip_vs_sched_getbyname(const char *sched_name)
|
||||
|
||||
IP_VS_DBG(2, "%s(): sched_name \"%s\"\n", __func__, sched_name);
|
||||
|
||||
spin_lock_bh(&ip_vs_sched_lock);
|
||||
mutex_lock(&ip_vs_sched_mutex);
|
||||
|
||||
list_for_each_entry(sched, &ip_vs_schedulers, n_list) {
|
||||
/*
|
||||
@ -106,14 +101,14 @@ static struct ip_vs_scheduler *ip_vs_sched_getbyname(const char *sched_name)
|
||||
}
|
||||
if (strcmp(sched_name, sched->name)==0) {
|
||||
/* HIT */
|
||||
spin_unlock_bh(&ip_vs_sched_lock);
|
||||
mutex_unlock(&ip_vs_sched_mutex);
|
||||
return sched;
|
||||
}
|
||||
if (sched->module)
|
||||
module_put(sched->module);
|
||||
}
|
||||
|
||||
spin_unlock_bh(&ip_vs_sched_lock);
|
||||
mutex_unlock(&ip_vs_sched_mutex);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
@ -153,21 +148,21 @@ void ip_vs_scheduler_put(struct ip_vs_scheduler *scheduler)
|
||||
|
||||
void ip_vs_scheduler_err(struct ip_vs_service *svc, const char *msg)
|
||||
{
|
||||
struct ip_vs_scheduler *sched;
|
||||
|
||||
sched = rcu_dereference(svc->scheduler);
|
||||
if (svc->fwmark) {
|
||||
IP_VS_ERR_RL("%s: FWM %u 0x%08X - %s\n",
|
||||
svc->scheduler->name, svc->fwmark,
|
||||
svc->fwmark, msg);
|
||||
sched->name, svc->fwmark, svc->fwmark, msg);
|
||||
#ifdef CONFIG_IP_VS_IPV6
|
||||
} else if (svc->af == AF_INET6) {
|
||||
IP_VS_ERR_RL("%s: %s [%pI6c]:%d - %s\n",
|
||||
svc->scheduler->name,
|
||||
ip_vs_proto_name(svc->protocol),
|
||||
sched->name, ip_vs_proto_name(svc->protocol),
|
||||
&svc->addr.in6, ntohs(svc->port), msg);
|
||||
#endif
|
||||
} else {
|
||||
IP_VS_ERR_RL("%s: %s %pI4:%d - %s\n",
|
||||
svc->scheduler->name,
|
||||
ip_vs_proto_name(svc->protocol),
|
||||
sched->name, ip_vs_proto_name(svc->protocol),
|
||||
&svc->addr.ip, ntohs(svc->port), msg);
|
||||
}
|
||||
}
|
||||
@ -192,10 +187,10 @@ int register_ip_vs_scheduler(struct ip_vs_scheduler *scheduler)
|
||||
/* increase the module use count */
|
||||
ip_vs_use_count_inc();
|
||||
|
||||
spin_lock_bh(&ip_vs_sched_lock);
|
||||
mutex_lock(&ip_vs_sched_mutex);
|
||||
|
||||
if (!list_empty(&scheduler->n_list)) {
|
||||
spin_unlock_bh(&ip_vs_sched_lock);
|
||||
mutex_unlock(&ip_vs_sched_mutex);
|
||||
ip_vs_use_count_dec();
|
||||
pr_err("%s(): [%s] scheduler already linked\n",
|
||||
__func__, scheduler->name);
|
||||
@ -208,7 +203,7 @@ int register_ip_vs_scheduler(struct ip_vs_scheduler *scheduler)
|
||||
*/
|
||||
list_for_each_entry(sched, &ip_vs_schedulers, n_list) {
|
||||
if (strcmp(scheduler->name, sched->name) == 0) {
|
||||
spin_unlock_bh(&ip_vs_sched_lock);
|
||||
mutex_unlock(&ip_vs_sched_mutex);
|
||||
ip_vs_use_count_dec();
|
||||
pr_err("%s(): [%s] scheduler already existed "
|
||||
"in the system\n", __func__, scheduler->name);
|
||||
@ -219,7 +214,7 @@ int register_ip_vs_scheduler(struct ip_vs_scheduler *scheduler)
|
||||
* Add it into the d-linked scheduler list
|
||||
*/
|
||||
list_add(&scheduler->n_list, &ip_vs_schedulers);
|
||||
spin_unlock_bh(&ip_vs_sched_lock);
|
||||
mutex_unlock(&ip_vs_sched_mutex);
|
||||
|
||||
pr_info("[%s] scheduler registered.\n", scheduler->name);
|
||||
|
||||
@ -237,9 +232,9 @@ int unregister_ip_vs_scheduler(struct ip_vs_scheduler *scheduler)
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
spin_lock_bh(&ip_vs_sched_lock);
|
||||
mutex_lock(&ip_vs_sched_mutex);
|
||||
if (list_empty(&scheduler->n_list)) {
|
||||
spin_unlock_bh(&ip_vs_sched_lock);
|
||||
mutex_unlock(&ip_vs_sched_mutex);
|
||||
pr_err("%s(): [%s] scheduler is not in the list. failed\n",
|
||||
__func__, scheduler->name);
|
||||
return -EINVAL;
|
||||
@ -249,7 +244,7 @@ int unregister_ip_vs_scheduler(struct ip_vs_scheduler *scheduler)
|
||||
* Remove it from the d-linked scheduler list
|
||||
*/
|
||||
list_del(&scheduler->n_list);
|
||||
spin_unlock_bh(&ip_vs_sched_lock);
|
||||
mutex_unlock(&ip_vs_sched_mutex);
|
||||
|
||||
/* decrease the module use count */
|
||||
ip_vs_use_count_dec();
|
||||
|
@ -79,7 +79,7 @@ ip_vs_sed_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
|
||||
* new connections.
|
||||
*/
|
||||
|
||||
list_for_each_entry(dest, &svc->destinations, n_list) {
|
||||
list_for_each_entry_rcu(dest, &svc->destinations, n_list) {
|
||||
if (!(dest->flags & IP_VS_DEST_F_OVERLOAD) &&
|
||||
atomic_read(&dest->weight) > 0) {
|
||||
least = dest;
|
||||
@ -94,7 +94,7 @@ ip_vs_sed_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
|
||||
* Find the destination with the least load.
|
||||
*/
|
||||
nextstage:
|
||||
list_for_each_entry_continue(dest, &svc->destinations, n_list) {
|
||||
list_for_each_entry_continue_rcu(dest, &svc->destinations, n_list) {
|
||||
if (dest->flags & IP_VS_DEST_F_OVERLOAD)
|
||||
continue;
|
||||
doh = ip_vs_sed_dest_overhead(dest);
|
||||
@ -134,6 +134,7 @@ static int __init ip_vs_sed_init(void)
|
||||
static void __exit ip_vs_sed_cleanup(void)
|
||||
{
|
||||
unregister_ip_vs_scheduler(&ip_vs_sed_scheduler);
|
||||
synchronize_rcu();
|
||||
}
|
||||
|
||||
module_init(ip_vs_sed_init);
|
||||
|
@ -53,7 +53,7 @@
|
||||
* IPVS SH bucket
|
||||
*/
|
||||
struct ip_vs_sh_bucket {
|
||||
struct ip_vs_dest *dest; /* real server (cache) */
|
||||
struct ip_vs_dest __rcu *dest; /* real server (cache) */
|
||||
};
|
||||
|
||||
/*
|
||||
@ -66,6 +66,10 @@ struct ip_vs_sh_bucket {
|
||||
#define IP_VS_SH_TAB_SIZE (1 << IP_VS_SH_TAB_BITS)
|
||||
#define IP_VS_SH_TAB_MASK (IP_VS_SH_TAB_SIZE - 1)
|
||||
|
||||
struct ip_vs_sh_state {
|
||||
struct ip_vs_sh_bucket buckets[IP_VS_SH_TAB_SIZE];
|
||||
struct rcu_head rcu_head;
|
||||
};
|
||||
|
||||
/*
|
||||
* Returns hash value for IPVS SH entry
|
||||
@ -87,10 +91,9 @@ static inline unsigned int ip_vs_sh_hashkey(int af, const union nf_inet_addr *ad
|
||||
* Get ip_vs_dest associated with supplied parameters.
|
||||
*/
|
||||
static inline struct ip_vs_dest *
|
||||
ip_vs_sh_get(int af, struct ip_vs_sh_bucket *tbl,
|
||||
const union nf_inet_addr *addr)
|
||||
ip_vs_sh_get(int af, struct ip_vs_sh_state *s, const union nf_inet_addr *addr)
|
||||
{
|
||||
return (tbl[ip_vs_sh_hashkey(af, addr)]).dest;
|
||||
return rcu_dereference(s->buckets[ip_vs_sh_hashkey(af, addr)].dest);
|
||||
}
|
||||
|
||||
|
||||
@ -98,27 +101,32 @@ ip_vs_sh_get(int af, struct ip_vs_sh_bucket *tbl,
|
||||
* Assign all the hash buckets of the specified table with the service.
|
||||
*/
|
||||
static int
|
||||
ip_vs_sh_assign(struct ip_vs_sh_bucket *tbl, struct ip_vs_service *svc)
|
||||
ip_vs_sh_reassign(struct ip_vs_sh_state *s, struct ip_vs_service *svc)
|
||||
{
|
||||
int i;
|
||||
struct ip_vs_sh_bucket *b;
|
||||
struct list_head *p;
|
||||
struct ip_vs_dest *dest;
|
||||
int d_count;
|
||||
bool empty;
|
||||
|
||||
b = tbl;
|
||||
b = &s->buckets[0];
|
||||
p = &svc->destinations;
|
||||
empty = list_empty(p);
|
||||
d_count = 0;
|
||||
for (i=0; i<IP_VS_SH_TAB_SIZE; i++) {
|
||||
if (list_empty(p)) {
|
||||
b->dest = NULL;
|
||||
} else {
|
||||
dest = rcu_dereference_protected(b->dest, 1);
|
||||
if (dest)
|
||||
ip_vs_dest_put(dest);
|
||||
if (empty)
|
||||
RCU_INIT_POINTER(b->dest, NULL);
|
||||
else {
|
||||
if (p == &svc->destinations)
|
||||
p = p->next;
|
||||
|
||||
dest = list_entry(p, struct ip_vs_dest, n_list);
|
||||
atomic_inc(&dest->refcnt);
|
||||
b->dest = dest;
|
||||
ip_vs_dest_hold(dest);
|
||||
RCU_INIT_POINTER(b->dest, dest);
|
||||
|
||||
IP_VS_DBG_BUF(6, "assigned i: %d dest: %s weight: %d\n",
|
||||
i, IP_VS_DBG_ADDR(svc->af, &dest->addr),
|
||||
@ -140,16 +148,18 @@ ip_vs_sh_assign(struct ip_vs_sh_bucket *tbl, struct ip_vs_service *svc)
|
||||
/*
|
||||
* Flush all the hash buckets of the specified table.
|
||||
*/
|
||||
static void ip_vs_sh_flush(struct ip_vs_sh_bucket *tbl)
|
||||
static void ip_vs_sh_flush(struct ip_vs_sh_state *s)
|
||||
{
|
||||
int i;
|
||||
struct ip_vs_sh_bucket *b;
|
||||
struct ip_vs_dest *dest;
|
||||
|
||||
b = tbl;
|
||||
b = &s->buckets[0];
|
||||
for (i=0; i<IP_VS_SH_TAB_SIZE; i++) {
|
||||
if (b->dest) {
|
||||
atomic_dec(&b->dest->refcnt);
|
||||
b->dest = NULL;
|
||||
dest = rcu_dereference_protected(b->dest, 1);
|
||||
if (dest) {
|
||||
ip_vs_dest_put(dest);
|
||||
RCU_INIT_POINTER(b->dest, NULL);
|
||||
}
|
||||
b++;
|
||||
}
|
||||
@ -158,51 +168,46 @@ static void ip_vs_sh_flush(struct ip_vs_sh_bucket *tbl)
|
||||
|
||||
static int ip_vs_sh_init_svc(struct ip_vs_service *svc)
|
||||
{
|
||||
struct ip_vs_sh_bucket *tbl;
|
||||
struct ip_vs_sh_state *s;
|
||||
|
||||
/* allocate the SH table for this service */
|
||||
tbl = kmalloc(sizeof(struct ip_vs_sh_bucket)*IP_VS_SH_TAB_SIZE,
|
||||
GFP_KERNEL);
|
||||
if (tbl == NULL)
|
||||
s = kzalloc(sizeof(struct ip_vs_sh_state), GFP_KERNEL);
|
||||
if (s == NULL)
|
||||
return -ENOMEM;
|
||||
|
||||
svc->sched_data = tbl;
|
||||
svc->sched_data = s;
|
||||
IP_VS_DBG(6, "SH hash table (memory=%Zdbytes) allocated for "
|
||||
"current service\n",
|
||||
sizeof(struct ip_vs_sh_bucket)*IP_VS_SH_TAB_SIZE);
|
||||
|
||||
/* assign the hash buckets with the updated service */
|
||||
ip_vs_sh_assign(tbl, svc);
|
||||
/* assign the hash buckets with current dests */
|
||||
ip_vs_sh_reassign(s, svc);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
static int ip_vs_sh_done_svc(struct ip_vs_service *svc)
|
||||
static void ip_vs_sh_done_svc(struct ip_vs_service *svc)
|
||||
{
|
||||
struct ip_vs_sh_bucket *tbl = svc->sched_data;
|
||||
struct ip_vs_sh_state *s = svc->sched_data;
|
||||
|
||||
/* got to clean up hash buckets here */
|
||||
ip_vs_sh_flush(tbl);
|
||||
ip_vs_sh_flush(s);
|
||||
|
||||
/* release the table itself */
|
||||
kfree(svc->sched_data);
|
||||
kfree_rcu(s, rcu_head);
|
||||
IP_VS_DBG(6, "SH hash table (memory=%Zdbytes) released\n",
|
||||
sizeof(struct ip_vs_sh_bucket)*IP_VS_SH_TAB_SIZE);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
static int ip_vs_sh_update_svc(struct ip_vs_service *svc)
|
||||
static int ip_vs_sh_dest_changed(struct ip_vs_service *svc,
|
||||
struct ip_vs_dest *dest)
|
||||
{
|
||||
struct ip_vs_sh_bucket *tbl = svc->sched_data;
|
||||
|
||||
/* got to clean up hash buckets here */
|
||||
ip_vs_sh_flush(tbl);
|
||||
struct ip_vs_sh_state *s = svc->sched_data;
|
||||
|
||||
/* assign the hash buckets with the updated service */
|
||||
ip_vs_sh_assign(tbl, svc);
|
||||
ip_vs_sh_reassign(s, svc);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@ -225,15 +230,15 @@ static struct ip_vs_dest *
|
||||
ip_vs_sh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
|
||||
{
|
||||
struct ip_vs_dest *dest;
|
||||
struct ip_vs_sh_bucket *tbl;
|
||||
struct ip_vs_sh_state *s;
|
||||
struct ip_vs_iphdr iph;
|
||||
|
||||
ip_vs_fill_iph_addr_only(svc->af, skb, &iph);
|
||||
|
||||
IP_VS_DBG(6, "ip_vs_sh_schedule(): Scheduling...\n");
|
||||
|
||||
tbl = (struct ip_vs_sh_bucket *)svc->sched_data;
|
||||
dest = ip_vs_sh_get(svc->af, tbl, &iph.saddr);
|
||||
s = (struct ip_vs_sh_state *) svc->sched_data;
|
||||
dest = ip_vs_sh_get(svc->af, s, &iph.saddr);
|
||||
if (!dest
|
||||
|| !(dest->flags & IP_VS_DEST_F_AVAILABLE)
|
||||
|| atomic_read(&dest->weight) <= 0
|
||||
@ -262,7 +267,9 @@ static struct ip_vs_scheduler ip_vs_sh_scheduler =
|
||||
.n_list = LIST_HEAD_INIT(ip_vs_sh_scheduler.n_list),
|
||||
.init_service = ip_vs_sh_init_svc,
|
||||
.done_service = ip_vs_sh_done_svc,
|
||||
.update_service = ip_vs_sh_update_svc,
|
||||
.add_dest = ip_vs_sh_dest_changed,
|
||||
.del_dest = ip_vs_sh_dest_changed,
|
||||
.upd_dest = ip_vs_sh_dest_changed,
|
||||
.schedule = ip_vs_sh_schedule,
|
||||
};
|
||||
|
||||
@ -276,6 +283,7 @@ static int __init ip_vs_sh_init(void)
|
||||
static void __exit ip_vs_sh_cleanup(void)
|
||||
{
|
||||
unregister_ip_vs_scheduler(&ip_vs_sh_scheduler);
|
||||
synchronize_rcu();
|
||||
}
|
||||
|
||||
|
||||
|
@ -531,9 +531,9 @@ static void ip_vs_sync_conn_v0(struct net *net, struct ip_vs_conn *cp,
|
||||
if (!ip_vs_sync_conn_needed(ipvs, cp, pkts))
|
||||
return;
|
||||
|
||||
spin_lock(&ipvs->sync_buff_lock);
|
||||
spin_lock_bh(&ipvs->sync_buff_lock);
|
||||
if (!(ipvs->sync_state & IP_VS_STATE_MASTER)) {
|
||||
spin_unlock(&ipvs->sync_buff_lock);
|
||||
spin_unlock_bh(&ipvs->sync_buff_lock);
|
||||
return;
|
||||
}
|
||||
|
||||
@ -552,7 +552,7 @@ static void ip_vs_sync_conn_v0(struct net *net, struct ip_vs_conn *cp,
|
||||
if (!buff) {
|
||||
buff = ip_vs_sync_buff_create_v0(ipvs);
|
||||
if (!buff) {
|
||||
spin_unlock(&ipvs->sync_buff_lock);
|
||||
spin_unlock_bh(&ipvs->sync_buff_lock);
|
||||
pr_err("ip_vs_sync_buff_create failed.\n");
|
||||
return;
|
||||
}
|
||||
@ -590,7 +590,7 @@ static void ip_vs_sync_conn_v0(struct net *net, struct ip_vs_conn *cp,
|
||||
sb_queue_tail(ipvs, ms);
|
||||
ms->sync_buff = NULL;
|
||||
}
|
||||
spin_unlock(&ipvs->sync_buff_lock);
|
||||
spin_unlock_bh(&ipvs->sync_buff_lock);
|
||||
|
||||
/* synchronize its controller if it has */
|
||||
cp = cp->control;
|
||||
@ -641,9 +641,9 @@ sloop:
|
||||
pe_name_len = strnlen(cp->pe->name, IP_VS_PENAME_MAXLEN);
|
||||
}
|
||||
|
||||
spin_lock(&ipvs->sync_buff_lock);
|
||||
spin_lock_bh(&ipvs->sync_buff_lock);
|
||||
if (!(ipvs->sync_state & IP_VS_STATE_MASTER)) {
|
||||
spin_unlock(&ipvs->sync_buff_lock);
|
||||
spin_unlock_bh(&ipvs->sync_buff_lock);
|
||||
return;
|
||||
}
|
||||
|
||||
@ -683,7 +683,7 @@ sloop:
|
||||
if (!buff) {
|
||||
buff = ip_vs_sync_buff_create(ipvs);
|
||||
if (!buff) {
|
||||
spin_unlock(&ipvs->sync_buff_lock);
|
||||
spin_unlock_bh(&ipvs->sync_buff_lock);
|
||||
pr_err("ip_vs_sync_buff_create failed.\n");
|
||||
return;
|
||||
}
|
||||
@ -750,7 +750,7 @@ sloop:
|
||||
}
|
||||
}
|
||||
|
||||
spin_unlock(&ipvs->sync_buff_lock);
|
||||
spin_unlock_bh(&ipvs->sync_buff_lock);
|
||||
|
||||
control:
|
||||
/* synchronize its controller if it has */
|
||||
@ -843,7 +843,7 @@ static void ip_vs_proc_conn(struct net *net, struct ip_vs_conn_param *param,
|
||||
kfree(param->pe_data);
|
||||
|
||||
dest = cp->dest;
|
||||
spin_lock(&cp->lock);
|
||||
spin_lock_bh(&cp->lock);
|
||||
if ((cp->flags ^ flags) & IP_VS_CONN_F_INACTIVE &&
|
||||
!(flags & IP_VS_CONN_F_TEMPLATE) && dest) {
|
||||
if (flags & IP_VS_CONN_F_INACTIVE) {
|
||||
@ -857,24 +857,21 @@ static void ip_vs_proc_conn(struct net *net, struct ip_vs_conn_param *param,
|
||||
flags &= IP_VS_CONN_F_BACKUP_UPD_MASK;
|
||||
flags |= cp->flags & ~IP_VS_CONN_F_BACKUP_UPD_MASK;
|
||||
cp->flags = flags;
|
||||
spin_unlock(&cp->lock);
|
||||
if (!dest) {
|
||||
dest = ip_vs_try_bind_dest(cp);
|
||||
if (dest)
|
||||
atomic_dec(&dest->refcnt);
|
||||
}
|
||||
spin_unlock_bh(&cp->lock);
|
||||
if (!dest)
|
||||
ip_vs_try_bind_dest(cp);
|
||||
} else {
|
||||
/*
|
||||
* Find the appropriate destination for the connection.
|
||||
* If it is not found the connection will remain unbound
|
||||
* but still handled.
|
||||
*/
|
||||
rcu_read_lock();
|
||||
dest = ip_vs_find_dest(net, type, daddr, dport, param->vaddr,
|
||||
param->vport, protocol, fwmark, flags);
|
||||
|
||||
cp = ip_vs_conn_new(param, daddr, dport, flags, dest, fwmark);
|
||||
if (dest)
|
||||
atomic_dec(&dest->refcnt);
|
||||
rcu_read_unlock();
|
||||
if (!cp) {
|
||||
if (param->pe_data)
|
||||
kfree(param->pe_data);
|
||||
@ -1692,11 +1689,7 @@ static int sync_thread_backup(void *data)
|
||||
break;
|
||||
}
|
||||
|
||||
/* disable bottom half, because it accesses the data
|
||||
shared by softirq while getting/creating conns */
|
||||
local_bh_disable();
|
||||
ip_vs_process_message(tinfo->net, tinfo->buf, len);
|
||||
local_bh_enable();
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -51,7 +51,7 @@ ip_vs_wlc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
|
||||
* new connections.
|
||||
*/
|
||||
|
||||
list_for_each_entry(dest, &svc->destinations, n_list) {
|
||||
list_for_each_entry_rcu(dest, &svc->destinations, n_list) {
|
||||
if (!(dest->flags & IP_VS_DEST_F_OVERLOAD) &&
|
||||
atomic_read(&dest->weight) > 0) {
|
||||
least = dest;
|
||||
@ -66,7 +66,7 @@ ip_vs_wlc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
|
||||
* Find the destination with the least load.
|
||||
*/
|
||||
nextstage:
|
||||
list_for_each_entry_continue(dest, &svc->destinations, n_list) {
|
||||
list_for_each_entry_continue_rcu(dest, &svc->destinations, n_list) {
|
||||
if (dest->flags & IP_VS_DEST_F_OVERLOAD)
|
||||
continue;
|
||||
doh = ip_vs_dest_conn_overhead(dest);
|
||||
@ -106,6 +106,7 @@ static int __init ip_vs_wlc_init(void)
|
||||
static void __exit ip_vs_wlc_cleanup(void)
|
||||
{
|
||||
unregister_ip_vs_scheduler(&ip_vs_wlc_scheduler);
|
||||
synchronize_rcu();
|
||||
}
|
||||
|
||||
module_init(ip_vs_wlc_init);
|
||||
|
@ -29,14 +29,45 @@
|
||||
|
||||
#include <net/ip_vs.h>
|
||||
|
||||
/* The WRR algorithm depends on some caclulations:
|
||||
* - mw: maximum weight
|
||||
* - di: weight step, greatest common divisor from all weights
|
||||
* - cw: current required weight
|
||||
* As result, all weights are in the [di..mw] range with a step=di.
|
||||
*
|
||||
* First, we start with cw = mw and select dests with weight >= cw.
|
||||
* Then cw is reduced with di and all dests are checked again.
|
||||
* Last pass should be with cw = di. We have mw/di passes in total:
|
||||
*
|
||||
* pass 1: cw = max weight
|
||||
* pass 2: cw = max weight - di
|
||||
* pass 3: cw = max weight - 2 * di
|
||||
* ...
|
||||
* last pass: cw = di
|
||||
*
|
||||
* Weights are supposed to be >= di but we run in parallel with
|
||||
* weight changes, it is possible some dest weight to be reduced
|
||||
* below di, bad if it is the only available dest.
|
||||
*
|
||||
* So, we modify how mw is calculated, now it is reduced with (di - 1),
|
||||
* so that last cw is 1 to catch such dests with weight below di:
|
||||
* pass 1: cw = max weight - (di - 1)
|
||||
* pass 2: cw = max weight - di - (di - 1)
|
||||
* pass 3: cw = max weight - 2 * di - (di - 1)
|
||||
* ...
|
||||
* last pass: cw = 1
|
||||
*
|
||||
*/
|
||||
|
||||
/*
|
||||
* current destination pointer for weighted round-robin scheduling
|
||||
*/
|
||||
struct ip_vs_wrr_mark {
|
||||
struct list_head *cl; /* current list head */
|
||||
struct ip_vs_dest *cl; /* current dest or head */
|
||||
int cw; /* current weight */
|
||||
int mw; /* maximum weight */
|
||||
int di; /* decreasing interval */
|
||||
struct rcu_head rcu_head;
|
||||
};
|
||||
|
||||
|
||||
@ -88,36 +119,41 @@ static int ip_vs_wrr_init_svc(struct ip_vs_service *svc)
|
||||
if (mark == NULL)
|
||||
return -ENOMEM;
|
||||
|
||||
mark->cl = &svc->destinations;
|
||||
mark->cw = 0;
|
||||
mark->mw = ip_vs_wrr_max_weight(svc);
|
||||
mark->cl = list_entry(&svc->destinations, struct ip_vs_dest, n_list);
|
||||
mark->di = ip_vs_wrr_gcd_weight(svc);
|
||||
mark->mw = ip_vs_wrr_max_weight(svc) - (mark->di - 1);
|
||||
mark->cw = mark->mw;
|
||||
svc->sched_data = mark;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
static int ip_vs_wrr_done_svc(struct ip_vs_service *svc)
|
||||
{
|
||||
/*
|
||||
* Release the mark variable
|
||||
*/
|
||||
kfree(svc->sched_data);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
static int ip_vs_wrr_update_svc(struct ip_vs_service *svc)
|
||||
static void ip_vs_wrr_done_svc(struct ip_vs_service *svc)
|
||||
{
|
||||
struct ip_vs_wrr_mark *mark = svc->sched_data;
|
||||
|
||||
mark->cl = &svc->destinations;
|
||||
mark->mw = ip_vs_wrr_max_weight(svc);
|
||||
/*
|
||||
* Release the mark variable
|
||||
*/
|
||||
kfree_rcu(mark, rcu_head);
|
||||
}
|
||||
|
||||
|
||||
static int ip_vs_wrr_dest_changed(struct ip_vs_service *svc,
|
||||
struct ip_vs_dest *dest)
|
||||
{
|
||||
struct ip_vs_wrr_mark *mark = svc->sched_data;
|
||||
|
||||
spin_lock_bh(&svc->sched_lock);
|
||||
mark->cl = list_entry(&svc->destinations, struct ip_vs_dest, n_list);
|
||||
mark->di = ip_vs_wrr_gcd_weight(svc);
|
||||
if (mark->cw > mark->mw)
|
||||
mark->cw = 0;
|
||||
mark->mw = ip_vs_wrr_max_weight(svc) - (mark->di - 1);
|
||||
if (mark->cw > mark->mw || !mark->cw)
|
||||
mark->cw = mark->mw;
|
||||
else if (mark->di > 1)
|
||||
mark->cw = (mark->cw / mark->di) * mark->di + 1;
|
||||
spin_unlock_bh(&svc->sched_lock);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -128,80 +164,79 @@ static int ip_vs_wrr_update_svc(struct ip_vs_service *svc)
|
||||
static struct ip_vs_dest *
|
||||
ip_vs_wrr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
|
||||
{
|
||||
struct ip_vs_dest *dest;
|
||||
struct ip_vs_dest *dest, *last, *stop = NULL;
|
||||
struct ip_vs_wrr_mark *mark = svc->sched_data;
|
||||
struct list_head *p;
|
||||
bool last_pass = false, restarted = false;
|
||||
|
||||
IP_VS_DBG(6, "%s(): Scheduling...\n", __func__);
|
||||
|
||||
/*
|
||||
* This loop will always terminate, because mark->cw in (0, max_weight]
|
||||
* and at least one server has its weight equal to max_weight.
|
||||
*/
|
||||
write_lock(&svc->sched_lock);
|
||||
p = mark->cl;
|
||||
spin_lock_bh(&svc->sched_lock);
|
||||
dest = mark->cl;
|
||||
/* No available dests? */
|
||||
if (mark->mw == 0)
|
||||
goto err_noavail;
|
||||
last = dest;
|
||||
/* Stop only after all dests were checked for weight >= 1 (last pass) */
|
||||
while (1) {
|
||||
if (mark->cl == &svc->destinations) {
|
||||
/* it is at the head of the destination list */
|
||||
|
||||
if (mark->cl == mark->cl->next) {
|
||||
/* no dest entry */
|
||||
ip_vs_scheduler_err(svc,
|
||||
"no destination available: "
|
||||
"no destinations present");
|
||||
dest = NULL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
mark->cl = svc->destinations.next;
|
||||
mark->cw -= mark->di;
|
||||
if (mark->cw <= 0) {
|
||||
mark->cw = mark->mw;
|
||||
/*
|
||||
* Still zero, which means no available servers.
|
||||
*/
|
||||
if (mark->cw == 0) {
|
||||
mark->cl = &svc->destinations;
|
||||
ip_vs_scheduler_err(svc,
|
||||
"no destination available");
|
||||
dest = NULL;
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
} else
|
||||
mark->cl = mark->cl->next;
|
||||
|
||||
if (mark->cl != &svc->destinations) {
|
||||
/* not at the head of the list */
|
||||
dest = list_entry(mark->cl, struct ip_vs_dest, n_list);
|
||||
list_for_each_entry_continue_rcu(dest,
|
||||
&svc->destinations,
|
||||
n_list) {
|
||||
if (!(dest->flags & IP_VS_DEST_F_OVERLOAD) &&
|
||||
atomic_read(&dest->weight) >= mark->cw) {
|
||||
/* got it */
|
||||
break;
|
||||
}
|
||||
atomic_read(&dest->weight) >= mark->cw)
|
||||
goto found;
|
||||
if (dest == stop)
|
||||
goto err_over;
|
||||
}
|
||||
|
||||
if (mark->cl == p && mark->cw == mark->di) {
|
||||
/* back to the start, and no dest is found.
|
||||
It is only possible when all dests are OVERLOADED */
|
||||
dest = NULL;
|
||||
ip_vs_scheduler_err(svc,
|
||||
"no destination available: "
|
||||
"all destinations are overloaded");
|
||||
goto out;
|
||||
mark->cw -= mark->di;
|
||||
if (mark->cw <= 0) {
|
||||
mark->cw = mark->mw;
|
||||
/* Stop if we tried last pass from first dest:
|
||||
* 1. last_pass: we started checks when cw > di but
|
||||
* then all dests were checked for w >= 1
|
||||
* 2. last was head: the first and only traversal
|
||||
* was for weight >= 1, for all dests.
|
||||
*/
|
||||
if (last_pass ||
|
||||
&last->n_list == &svc->destinations)
|
||||
goto err_over;
|
||||
restarted = true;
|
||||
}
|
||||
last_pass = mark->cw <= mark->di;
|
||||
if (last_pass && restarted &&
|
||||
&last->n_list != &svc->destinations) {
|
||||
/* First traversal was for w >= 1 but only
|
||||
* for dests after 'last', now do the same
|
||||
* for all dests up to 'last'.
|
||||
*/
|
||||
stop = last;
|
||||
}
|
||||
}
|
||||
|
||||
found:
|
||||
IP_VS_DBG_BUF(6, "WRR: server %s:%u "
|
||||
"activeconns %d refcnt %d weight %d\n",
|
||||
IP_VS_DBG_ADDR(svc->af, &dest->addr), ntohs(dest->port),
|
||||
atomic_read(&dest->activeconns),
|
||||
atomic_read(&dest->refcnt),
|
||||
atomic_read(&dest->weight));
|
||||
mark->cl = dest;
|
||||
|
||||
out:
|
||||
write_unlock(&svc->sched_lock);
|
||||
spin_unlock_bh(&svc->sched_lock);
|
||||
return dest;
|
||||
|
||||
err_noavail:
|
||||
mark->cl = dest;
|
||||
dest = NULL;
|
||||
ip_vs_scheduler_err(svc, "no destination available");
|
||||
goto out;
|
||||
|
||||
err_over:
|
||||
mark->cl = dest;
|
||||
dest = NULL;
|
||||
ip_vs_scheduler_err(svc, "no destination available: "
|
||||
"all destinations are overloaded");
|
||||
goto out;
|
||||
}
|
||||
|
||||
|
||||
@ -212,7 +247,9 @@ static struct ip_vs_scheduler ip_vs_wrr_scheduler = {
|
||||
.n_list = LIST_HEAD_INIT(ip_vs_wrr_scheduler.n_list),
|
||||
.init_service = ip_vs_wrr_init_svc,
|
||||
.done_service = ip_vs_wrr_done_svc,
|
||||
.update_service = ip_vs_wrr_update_svc,
|
||||
.add_dest = ip_vs_wrr_dest_changed,
|
||||
.del_dest = ip_vs_wrr_dest_changed,
|
||||
.upd_dest = ip_vs_wrr_dest_changed,
|
||||
.schedule = ip_vs_wrr_schedule,
|
||||
};
|
||||
|
||||
@ -224,6 +261,7 @@ static int __init ip_vs_wrr_init(void)
|
||||
static void __exit ip_vs_wrr_cleanup(void)
|
||||
{
|
||||
unregister_ip_vs_scheduler(&ip_vs_wrr_scheduler);
|
||||
synchronize_rcu();
|
||||
}
|
||||
|
||||
module_init(ip_vs_wrr_init);
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -353,7 +353,7 @@ void nf_ct_helper_log(struct sk_buff *skb, const struct nf_conn *ct,
|
||||
/* rcu_read_lock()ed by nf_hook_slow */
|
||||
helper = rcu_dereference(help->helper);
|
||||
|
||||
nf_log_packet(nf_ct_l3num(ct), 0, skb, NULL, NULL, NULL,
|
||||
nf_log_packet(nf_ct_net(ct), nf_ct_l3num(ct), 0, skb, NULL, NULL, NULL,
|
||||
"nf_ct_%s: dropping packet: %pV ", helper->name, &vaf);
|
||||
|
||||
va_end(args);
|
||||
|
@ -456,7 +456,8 @@ static bool dccp_new(struct nf_conn *ct, const struct sk_buff *skb,
|
||||
|
||||
out_invalid:
|
||||
if (LOG_INVALID(net, IPPROTO_DCCP))
|
||||
nf_log_packet(nf_ct_l3num(ct), 0, skb, NULL, NULL, NULL, msg);
|
||||
nf_log_packet(net, nf_ct_l3num(ct), 0, skb, NULL, NULL,
|
||||
NULL, msg);
|
||||
return false;
|
||||
}
|
||||
|
||||
@ -542,13 +543,13 @@ static int dccp_packet(struct nf_conn *ct, const struct sk_buff *skb,
|
||||
|
||||
spin_unlock_bh(&ct->lock);
|
||||
if (LOG_INVALID(net, IPPROTO_DCCP))
|
||||
nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
|
||||
nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
|
||||
"nf_ct_dccp: invalid packet ignored ");
|
||||
return NF_ACCEPT;
|
||||
case CT_DCCP_INVALID:
|
||||
spin_unlock_bh(&ct->lock);
|
||||
if (LOG_INVALID(net, IPPROTO_DCCP))
|
||||
nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
|
||||
nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
|
||||
"nf_ct_dccp: invalid state transition ");
|
||||
return -NF_ACCEPT;
|
||||
}
|
||||
@ -613,7 +614,7 @@ static int dccp_error(struct net *net, struct nf_conn *tmpl,
|
||||
|
||||
out_invalid:
|
||||
if (LOG_INVALID(net, IPPROTO_DCCP))
|
||||
nf_log_packet(pf, 0, skb, NULL, NULL, NULL, msg);
|
||||
nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL, msg);
|
||||
return -NF_ACCEPT;
|
||||
}
|
||||
|
||||
|
@ -720,7 +720,7 @@ static bool tcp_in_window(const struct nf_conn *ct,
|
||||
tn->tcp_be_liberal)
|
||||
res = true;
|
||||
if (!res && LOG_INVALID(net, IPPROTO_TCP))
|
||||
nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
|
||||
nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
|
||||
"nf_ct_tcp: %s ",
|
||||
before(seq, sender->td_maxend + 1) ?
|
||||
after(end, sender->td_end - receiver->td_maxwin - 1) ?
|
||||
@ -772,7 +772,7 @@ static int tcp_error(struct net *net, struct nf_conn *tmpl,
|
||||
th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph);
|
||||
if (th == NULL) {
|
||||
if (LOG_INVALID(net, IPPROTO_TCP))
|
||||
nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
|
||||
nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
|
||||
"nf_ct_tcp: short packet ");
|
||||
return -NF_ACCEPT;
|
||||
}
|
||||
@ -780,7 +780,7 @@ static int tcp_error(struct net *net, struct nf_conn *tmpl,
|
||||
/* Not whole TCP header or malformed packet */
|
||||
if (th->doff*4 < sizeof(struct tcphdr) || tcplen < th->doff*4) {
|
||||
if (LOG_INVALID(net, IPPROTO_TCP))
|
||||
nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
|
||||
nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
|
||||
"nf_ct_tcp: truncated/malformed packet ");
|
||||
return -NF_ACCEPT;
|
||||
}
|
||||
@ -793,7 +793,7 @@ static int tcp_error(struct net *net, struct nf_conn *tmpl,
|
||||
if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING &&
|
||||
nf_checksum(skb, hooknum, dataoff, IPPROTO_TCP, pf)) {
|
||||
if (LOG_INVALID(net, IPPROTO_TCP))
|
||||
nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
|
||||
nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
|
||||
"nf_ct_tcp: bad TCP checksum ");
|
||||
return -NF_ACCEPT;
|
||||
}
|
||||
@ -802,7 +802,7 @@ static int tcp_error(struct net *net, struct nf_conn *tmpl,
|
||||
tcpflags = (tcp_flag_byte(th) & ~(TCPHDR_ECE|TCPHDR_CWR|TCPHDR_PSH));
|
||||
if (!tcp_valid_flags[tcpflags]) {
|
||||
if (LOG_INVALID(net, IPPROTO_TCP))
|
||||
nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
|
||||
nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
|
||||
"nf_ct_tcp: invalid TCP flag combination ");
|
||||
return -NF_ACCEPT;
|
||||
}
|
||||
@ -949,7 +949,7 @@ static int tcp_packet(struct nf_conn *ct,
|
||||
}
|
||||
spin_unlock_bh(&ct->lock);
|
||||
if (LOG_INVALID(net, IPPROTO_TCP))
|
||||
nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
|
||||
nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
|
||||
"nf_ct_tcp: invalid packet ignored in "
|
||||
"state %s ", tcp_conntrack_names[old_state]);
|
||||
return NF_ACCEPT;
|
||||
@ -959,7 +959,7 @@ static int tcp_packet(struct nf_conn *ct,
|
||||
dir, get_conntrack_index(th), old_state);
|
||||
spin_unlock_bh(&ct->lock);
|
||||
if (LOG_INVALID(net, IPPROTO_TCP))
|
||||
nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
|
||||
nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
|
||||
"nf_ct_tcp: invalid state ");
|
||||
return -NF_ACCEPT;
|
||||
case TCP_CONNTRACK_CLOSE:
|
||||
@ -969,8 +969,8 @@ static int tcp_packet(struct nf_conn *ct,
|
||||
/* Invalid RST */
|
||||
spin_unlock_bh(&ct->lock);
|
||||
if (LOG_INVALID(net, IPPROTO_TCP))
|
||||
nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
|
||||
"nf_ct_tcp: invalid RST ");
|
||||
nf_log_packet(net, pf, 0, skb, NULL, NULL,
|
||||
NULL, "nf_ct_tcp: invalid RST ");
|
||||
return -NF_ACCEPT;
|
||||
}
|
||||
if (index == TCP_RST_SET
|
||||
|
@ -119,7 +119,7 @@ static int udp_error(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb,
|
||||
hdr = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr);
|
||||
if (hdr == NULL) {
|
||||
if (LOG_INVALID(net, IPPROTO_UDP))
|
||||
nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
|
||||
nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
|
||||
"nf_ct_udp: short packet ");
|
||||
return -NF_ACCEPT;
|
||||
}
|
||||
@ -127,7 +127,7 @@ static int udp_error(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb,
|
||||
/* Truncated/malformed packets */
|
||||
if (ntohs(hdr->len) > udplen || ntohs(hdr->len) < sizeof(*hdr)) {
|
||||
if (LOG_INVALID(net, IPPROTO_UDP))
|
||||
nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
|
||||
nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
|
||||
"nf_ct_udp: truncated/malformed packet ");
|
||||
return -NF_ACCEPT;
|
||||
}
|
||||
@ -143,7 +143,7 @@ static int udp_error(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb,
|
||||
if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING &&
|
||||
nf_checksum(skb, hooknum, dataoff, IPPROTO_UDP, pf)) {
|
||||
if (LOG_INVALID(net, IPPROTO_UDP))
|
||||
nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
|
||||
nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
|
||||
"nf_ct_udp: bad UDP checksum ");
|
||||
return -NF_ACCEPT;
|
||||
}
|
||||
|
@ -131,7 +131,7 @@ static int udplite_error(struct net *net, struct nf_conn *tmpl,
|
||||
hdr = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr);
|
||||
if (hdr == NULL) {
|
||||
if (LOG_INVALID(net, IPPROTO_UDPLITE))
|
||||
nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
|
||||
nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
|
||||
"nf_ct_udplite: short packet ");
|
||||
return -NF_ACCEPT;
|
||||
}
|
||||
@ -141,7 +141,7 @@ static int udplite_error(struct net *net, struct nf_conn *tmpl,
|
||||
cscov = udplen;
|
||||
else if (cscov < sizeof(*hdr) || cscov > udplen) {
|
||||
if (LOG_INVALID(net, IPPROTO_UDPLITE))
|
||||
nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
|
||||
nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
|
||||
"nf_ct_udplite: invalid checksum coverage ");
|
||||
return -NF_ACCEPT;
|
||||
}
|
||||
@ -149,7 +149,7 @@ static int udplite_error(struct net *net, struct nf_conn *tmpl,
|
||||
/* UDPLITE mandates checksums */
|
||||
if (!hdr->check) {
|
||||
if (LOG_INVALID(net, IPPROTO_UDPLITE))
|
||||
nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
|
||||
nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
|
||||
"nf_ct_udplite: checksum missing ");
|
||||
return -NF_ACCEPT;
|
||||
}
|
||||
@ -159,7 +159,7 @@ static int udplite_error(struct net *net, struct nf_conn *tmpl,
|
||||
nf_checksum_partial(skb, hooknum, dataoff, cscov, IPPROTO_UDP,
|
||||
pf)) {
|
||||
if (LOG_INVALID(net, IPPROTO_UDPLITE))
|
||||
nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
|
||||
nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
|
||||
"nf_ct_udplite: bad UDPLite checksum ");
|
||||
return -NF_ACCEPT;
|
||||
}
|
||||
|
@ -16,7 +16,6 @@
|
||||
#define NF_LOG_PREFIXLEN 128
|
||||
#define NFLOGGER_NAME_LEN 64
|
||||
|
||||
static const struct nf_logger __rcu *nf_loggers[NFPROTO_NUMPROTO] __read_mostly;
|
||||
static struct list_head nf_loggers_l[NFPROTO_NUMPROTO] __read_mostly;
|
||||
static DEFINE_MUTEX(nf_log_mutex);
|
||||
|
||||
@ -32,13 +31,46 @@ static struct nf_logger *__find_logger(int pf, const char *str_logger)
|
||||
return NULL;
|
||||
}
|
||||
|
||||
void nf_log_set(struct net *net, u_int8_t pf, const struct nf_logger *logger)
|
||||
{
|
||||
const struct nf_logger *log;
|
||||
|
||||
if (pf == NFPROTO_UNSPEC)
|
||||
return;
|
||||
|
||||
mutex_lock(&nf_log_mutex);
|
||||
log = rcu_dereference_protected(net->nf.nf_loggers[pf],
|
||||
lockdep_is_held(&nf_log_mutex));
|
||||
if (log == NULL)
|
||||
rcu_assign_pointer(net->nf.nf_loggers[pf], logger);
|
||||
|
||||
mutex_unlock(&nf_log_mutex);
|
||||
}
|
||||
EXPORT_SYMBOL(nf_log_set);
|
||||
|
||||
void nf_log_unset(struct net *net, const struct nf_logger *logger)
|
||||
{
|
||||
int i;
|
||||
const struct nf_logger *log;
|
||||
|
||||
mutex_lock(&nf_log_mutex);
|
||||
for (i = 0; i < NFPROTO_NUMPROTO; i++) {
|
||||
log = rcu_dereference_protected(net->nf.nf_loggers[i],
|
||||
lockdep_is_held(&nf_log_mutex));
|
||||
if (log == logger)
|
||||
RCU_INIT_POINTER(net->nf.nf_loggers[i], NULL);
|
||||
}
|
||||
mutex_unlock(&nf_log_mutex);
|
||||
synchronize_rcu();
|
||||
}
|
||||
EXPORT_SYMBOL(nf_log_unset);
|
||||
|
||||
/* return EEXIST if the same logger is registered, 0 on success. */
|
||||
int nf_log_register(u_int8_t pf, struct nf_logger *logger)
|
||||
{
|
||||
const struct nf_logger *llog;
|
||||
int i;
|
||||
|
||||
if (pf >= ARRAY_SIZE(nf_loggers))
|
||||
if (pf >= ARRAY_SIZE(init_net.nf.nf_loggers))
|
||||
return -EINVAL;
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(logger->list); i++)
|
||||
@ -52,10 +84,6 @@ int nf_log_register(u_int8_t pf, struct nf_logger *logger)
|
||||
} else {
|
||||
/* register at end of list to honor first register win */
|
||||
list_add_tail(&logger->list[pf], &nf_loggers_l[pf]);
|
||||
llog = rcu_dereference_protected(nf_loggers[pf],
|
||||
lockdep_is_held(&nf_log_mutex));
|
||||
if (llog == NULL)
|
||||
rcu_assign_pointer(nf_loggers[pf], logger);
|
||||
}
|
||||
|
||||
mutex_unlock(&nf_log_mutex);
|
||||
@ -66,49 +94,43 @@ EXPORT_SYMBOL(nf_log_register);
|
||||
|
||||
void nf_log_unregister(struct nf_logger *logger)
|
||||
{
|
||||
const struct nf_logger *c_logger;
|
||||
int i;
|
||||
|
||||
mutex_lock(&nf_log_mutex);
|
||||
for (i = 0; i < ARRAY_SIZE(nf_loggers); i++) {
|
||||
c_logger = rcu_dereference_protected(nf_loggers[i],
|
||||
lockdep_is_held(&nf_log_mutex));
|
||||
if (c_logger == logger)
|
||||
RCU_INIT_POINTER(nf_loggers[i], NULL);
|
||||
for (i = 0; i < NFPROTO_NUMPROTO; i++)
|
||||
list_del(&logger->list[i]);
|
||||
}
|
||||
mutex_unlock(&nf_log_mutex);
|
||||
|
||||
synchronize_rcu();
|
||||
}
|
||||
EXPORT_SYMBOL(nf_log_unregister);
|
||||
|
||||
int nf_log_bind_pf(u_int8_t pf, const struct nf_logger *logger)
|
||||
int nf_log_bind_pf(struct net *net, u_int8_t pf,
|
||||
const struct nf_logger *logger)
|
||||
{
|
||||
if (pf >= ARRAY_SIZE(nf_loggers))
|
||||
if (pf >= ARRAY_SIZE(net->nf.nf_loggers))
|
||||
return -EINVAL;
|
||||
mutex_lock(&nf_log_mutex);
|
||||
if (__find_logger(pf, logger->name) == NULL) {
|
||||
mutex_unlock(&nf_log_mutex);
|
||||
return -ENOENT;
|
||||
}
|
||||
rcu_assign_pointer(nf_loggers[pf], logger);
|
||||
rcu_assign_pointer(net->nf.nf_loggers[pf], logger);
|
||||
mutex_unlock(&nf_log_mutex);
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL(nf_log_bind_pf);
|
||||
|
||||
void nf_log_unbind_pf(u_int8_t pf)
|
||||
void nf_log_unbind_pf(struct net *net, u_int8_t pf)
|
||||
{
|
||||
if (pf >= ARRAY_SIZE(nf_loggers))
|
||||
if (pf >= ARRAY_SIZE(net->nf.nf_loggers))
|
||||
return;
|
||||
mutex_lock(&nf_log_mutex);
|
||||
RCU_INIT_POINTER(nf_loggers[pf], NULL);
|
||||
RCU_INIT_POINTER(net->nf.nf_loggers[pf], NULL);
|
||||
mutex_unlock(&nf_log_mutex);
|
||||
}
|
||||
EXPORT_SYMBOL(nf_log_unbind_pf);
|
||||
|
||||
void nf_log_packet(u_int8_t pf,
|
||||
void nf_log_packet(struct net *net,
|
||||
u_int8_t pf,
|
||||
unsigned int hooknum,
|
||||
const struct sk_buff *skb,
|
||||
const struct net_device *in,
|
||||
@ -121,7 +143,7 @@ void nf_log_packet(u_int8_t pf,
|
||||
const struct nf_logger *logger;
|
||||
|
||||
rcu_read_lock();
|
||||
logger = rcu_dereference(nf_loggers[pf]);
|
||||
logger = rcu_dereference(net->nf.nf_loggers[pf]);
|
||||
if (logger) {
|
||||
va_start(args, fmt);
|
||||
vsnprintf(prefix, sizeof(prefix), fmt, args);
|
||||
@ -135,9 +157,11 @@ EXPORT_SYMBOL(nf_log_packet);
|
||||
#ifdef CONFIG_PROC_FS
|
||||
static void *seq_start(struct seq_file *seq, loff_t *pos)
|
||||
{
|
||||
struct net *net = seq_file_net(seq);
|
||||
|
||||
mutex_lock(&nf_log_mutex);
|
||||
|
||||
if (*pos >= ARRAY_SIZE(nf_loggers))
|
||||
if (*pos >= ARRAY_SIZE(net->nf.nf_loggers))
|
||||
return NULL;
|
||||
|
||||
return pos;
|
||||
@ -145,9 +169,11 @@ static void *seq_start(struct seq_file *seq, loff_t *pos)
|
||||
|
||||
static void *seq_next(struct seq_file *s, void *v, loff_t *pos)
|
||||
{
|
||||
struct net *net = seq_file_net(s);
|
||||
|
||||
(*pos)++;
|
||||
|
||||
if (*pos >= ARRAY_SIZE(nf_loggers))
|
||||
if (*pos >= ARRAY_SIZE(net->nf.nf_loggers))
|
||||
return NULL;
|
||||
|
||||
return pos;
|
||||
@ -164,8 +190,9 @@ static int seq_show(struct seq_file *s, void *v)
|
||||
const struct nf_logger *logger;
|
||||
struct nf_logger *t;
|
||||
int ret;
|
||||
struct net *net = seq_file_net(s);
|
||||
|
||||
logger = rcu_dereference_protected(nf_loggers[*pos],
|
||||
logger = rcu_dereference_protected(net->nf.nf_loggers[*pos],
|
||||
lockdep_is_held(&nf_log_mutex));
|
||||
|
||||
if (!logger)
|
||||
@ -199,7 +226,8 @@ static const struct seq_operations nflog_seq_ops = {
|
||||
|
||||
static int nflog_open(struct inode *inode, struct file *file)
|
||||
{
|
||||
return seq_open(file, &nflog_seq_ops);
|
||||
return seq_open_net(inode, file, &nflog_seq_ops,
|
||||
sizeof(struct seq_net_private));
|
||||
}
|
||||
|
||||
static const struct file_operations nflog_file_ops = {
|
||||
@ -207,7 +235,7 @@ static const struct file_operations nflog_file_ops = {
|
||||
.open = nflog_open,
|
||||
.read = seq_read,
|
||||
.llseek = seq_lseek,
|
||||
.release = seq_release,
|
||||
.release = seq_release_net,
|
||||
};
|
||||
|
||||
|
||||
@ -216,7 +244,6 @@ static const struct file_operations nflog_file_ops = {
|
||||
#ifdef CONFIG_SYSCTL
|
||||
static char nf_log_sysctl_fnames[NFPROTO_NUMPROTO-NFPROTO_UNSPEC][3];
|
||||
static struct ctl_table nf_log_sysctl_table[NFPROTO_NUMPROTO+1];
|
||||
static struct ctl_table_header *nf_log_dir_header;
|
||||
|
||||
static int nf_log_proc_dostring(ctl_table *table, int write,
|
||||
void __user *buffer, size_t *lenp, loff_t *ppos)
|
||||
@ -226,6 +253,7 @@ static int nf_log_proc_dostring(ctl_table *table, int write,
|
||||
size_t size = *lenp;
|
||||
int r = 0;
|
||||
int tindex = (unsigned long)table->extra1;
|
||||
struct net *net = current->nsproxy->net_ns;
|
||||
|
||||
if (write) {
|
||||
if (size > sizeof(buf))
|
||||
@ -234,7 +262,7 @@ static int nf_log_proc_dostring(ctl_table *table, int write,
|
||||
return -EFAULT;
|
||||
|
||||
if (!strcmp(buf, "NONE")) {
|
||||
nf_log_unbind_pf(tindex);
|
||||
nf_log_unbind_pf(net, tindex);
|
||||
return 0;
|
||||
}
|
||||
mutex_lock(&nf_log_mutex);
|
||||
@ -243,11 +271,11 @@ static int nf_log_proc_dostring(ctl_table *table, int write,
|
||||
mutex_unlock(&nf_log_mutex);
|
||||
return -ENOENT;
|
||||
}
|
||||
rcu_assign_pointer(nf_loggers[tindex], logger);
|
||||
rcu_assign_pointer(net->nf.nf_loggers[tindex], logger);
|
||||
mutex_unlock(&nf_log_mutex);
|
||||
} else {
|
||||
mutex_lock(&nf_log_mutex);
|
||||
logger = rcu_dereference_protected(nf_loggers[tindex],
|
||||
logger = rcu_dereference_protected(net->nf.nf_loggers[tindex],
|
||||
lockdep_is_held(&nf_log_mutex));
|
||||
if (!logger)
|
||||
table->data = "NONE";
|
||||
@ -260,49 +288,111 @@ static int nf_log_proc_dostring(ctl_table *table, int write,
|
||||
return r;
|
||||
}
|
||||
|
||||
static __init int netfilter_log_sysctl_init(void)
|
||||
static int netfilter_log_sysctl_init(struct net *net)
|
||||
{
|
||||
int i;
|
||||
struct ctl_table *table;
|
||||
|
||||
for (i = NFPROTO_UNSPEC; i < NFPROTO_NUMPROTO; i++) {
|
||||
snprintf(nf_log_sysctl_fnames[i-NFPROTO_UNSPEC], 3, "%d", i);
|
||||
nf_log_sysctl_table[i].procname =
|
||||
nf_log_sysctl_fnames[i-NFPROTO_UNSPEC];
|
||||
nf_log_sysctl_table[i].data = NULL;
|
||||
nf_log_sysctl_table[i].maxlen =
|
||||
NFLOGGER_NAME_LEN * sizeof(char);
|
||||
nf_log_sysctl_table[i].mode = 0644;
|
||||
nf_log_sysctl_table[i].proc_handler = nf_log_proc_dostring;
|
||||
nf_log_sysctl_table[i].extra1 = (void *)(unsigned long) i;
|
||||
table = nf_log_sysctl_table;
|
||||
if (!net_eq(net, &init_net)) {
|
||||
table = kmemdup(nf_log_sysctl_table,
|
||||
sizeof(nf_log_sysctl_table),
|
||||
GFP_KERNEL);
|
||||
if (!table)
|
||||
goto err_alloc;
|
||||
} else {
|
||||
for (i = NFPROTO_UNSPEC; i < NFPROTO_NUMPROTO; i++) {
|
||||
snprintf(nf_log_sysctl_fnames[i],
|
||||
3, "%d", i);
|
||||
nf_log_sysctl_table[i].procname =
|
||||
nf_log_sysctl_fnames[i];
|
||||
nf_log_sysctl_table[i].data = NULL;
|
||||
nf_log_sysctl_table[i].maxlen =
|
||||
NFLOGGER_NAME_LEN * sizeof(char);
|
||||
nf_log_sysctl_table[i].mode = 0644;
|
||||
nf_log_sysctl_table[i].proc_handler =
|
||||
nf_log_proc_dostring;
|
||||
nf_log_sysctl_table[i].extra1 =
|
||||
(void *)(unsigned long) i;
|
||||
}
|
||||
}
|
||||
|
||||
nf_log_dir_header = register_net_sysctl(&init_net, "net/netfilter/nf_log",
|
||||
nf_log_sysctl_table);
|
||||
if (!nf_log_dir_header)
|
||||
return -ENOMEM;
|
||||
net->nf.nf_log_dir_header = register_net_sysctl(net,
|
||||
"net/netfilter/nf_log",
|
||||
table);
|
||||
if (!net->nf.nf_log_dir_header)
|
||||
goto err_reg;
|
||||
|
||||
return 0;
|
||||
|
||||
err_reg:
|
||||
if (!net_eq(net, &init_net))
|
||||
kfree(table);
|
||||
err_alloc:
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
static void netfilter_log_sysctl_exit(struct net *net)
|
||||
{
|
||||
struct ctl_table *table;
|
||||
|
||||
table = net->nf.nf_log_dir_header->ctl_table_arg;
|
||||
unregister_net_sysctl_table(net->nf.nf_log_dir_header);
|
||||
if (!net_eq(net, &init_net))
|
||||
kfree(table);
|
||||
}
|
||||
#else
|
||||
static __init int netfilter_log_sysctl_init(void)
|
||||
static int netfilter_log_sysctl_init(struct net *net)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void netfilter_log_sysctl_exit(struct net *net)
|
||||
{
|
||||
}
|
||||
#endif /* CONFIG_SYSCTL */
|
||||
|
||||
int __init netfilter_log_init(void)
|
||||
static int __net_init nf_log_net_init(struct net *net)
|
||||
{
|
||||
int i, r;
|
||||
int ret = -ENOMEM;
|
||||
|
||||
#ifdef CONFIG_PROC_FS
|
||||
if (!proc_create("nf_log", S_IRUGO,
|
||||
proc_net_netfilter, &nflog_file_ops))
|
||||
return -1;
|
||||
net->nf.proc_netfilter, &nflog_file_ops))
|
||||
return ret;
|
||||
#endif
|
||||
ret = netfilter_log_sysctl_init(net);
|
||||
if (ret < 0)
|
||||
goto out_sysctl;
|
||||
|
||||
/* Errors will trigger panic, unroll on error is unnecessary. */
|
||||
r = netfilter_log_sysctl_init();
|
||||
if (r < 0)
|
||||
return r;
|
||||
return 0;
|
||||
|
||||
out_sysctl:
|
||||
/* For init_net: errors will trigger panic, don't unroll on error. */
|
||||
if (!net_eq(net, &init_net))
|
||||
remove_proc_entry("nf_log", net->nf.proc_netfilter);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void __net_exit nf_log_net_exit(struct net *net)
|
||||
{
|
||||
netfilter_log_sysctl_exit(net);
|
||||
remove_proc_entry("nf_log", net->nf.proc_netfilter);
|
||||
}
|
||||
|
||||
static struct pernet_operations nf_log_net_ops = {
|
||||
.init = nf_log_net_init,
|
||||
.exit = nf_log_net_exit,
|
||||
};
|
||||
|
||||
int __init netfilter_log_init(void)
|
||||
{
|
||||
int i, ret;
|
||||
|
||||
ret = register_pernet_subsys(&nf_log_net_ops);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
for (i = NFPROTO_UNSPEC; i < NFPROTO_NUMPROTO; i++)
|
||||
INIT_LIST_HEAD(&(nf_loggers_l[i]));
|
||||
|
@ -32,6 +32,7 @@
|
||||
#include <linux/slab.h>
|
||||
#include <net/sock.h>
|
||||
#include <net/netfilter/nf_log.h>
|
||||
#include <net/netns/generic.h>
|
||||
#include <net/netfilter/nfnetlink_log.h>
|
||||
|
||||
#include <linux/atomic.h>
|
||||
@ -56,6 +57,7 @@ struct nfulnl_instance {
|
||||
unsigned int qlen; /* number of nlmsgs in skb */
|
||||
struct sk_buff *skb; /* pre-allocatd skb */
|
||||
struct timer_list timer;
|
||||
struct net *net;
|
||||
struct user_namespace *peer_user_ns; /* User namespace of the peer process */
|
||||
int peer_portid; /* PORTID of the peer process */
|
||||
|
||||
@ -71,25 +73,34 @@ struct nfulnl_instance {
|
||||
struct rcu_head rcu;
|
||||
};
|
||||
|
||||
static DEFINE_SPINLOCK(instances_lock);
|
||||
static atomic_t global_seq;
|
||||
|
||||
#define INSTANCE_BUCKETS 16
|
||||
static struct hlist_head instance_table[INSTANCE_BUCKETS];
|
||||
static unsigned int hash_init;
|
||||
|
||||
static int nfnl_log_net_id __read_mostly;
|
||||
|
||||
struct nfnl_log_net {
|
||||
spinlock_t instances_lock;
|
||||
struct hlist_head instance_table[INSTANCE_BUCKETS];
|
||||
atomic_t global_seq;
|
||||
};
|
||||
|
||||
static struct nfnl_log_net *nfnl_log_pernet(struct net *net)
|
||||
{
|
||||
return net_generic(net, nfnl_log_net_id);
|
||||
}
|
||||
|
||||
static inline u_int8_t instance_hashfn(u_int16_t group_num)
|
||||
{
|
||||
return ((group_num & 0xff) % INSTANCE_BUCKETS);
|
||||
}
|
||||
|
||||
static struct nfulnl_instance *
|
||||
__instance_lookup(u_int16_t group_num)
|
||||
__instance_lookup(struct nfnl_log_net *log, u_int16_t group_num)
|
||||
{
|
||||
struct hlist_head *head;
|
||||
struct nfulnl_instance *inst;
|
||||
|
||||
head = &instance_table[instance_hashfn(group_num)];
|
||||
head = &log->instance_table[instance_hashfn(group_num)];
|
||||
hlist_for_each_entry_rcu(inst, head, hlist) {
|
||||
if (inst->group_num == group_num)
|
||||
return inst;
|
||||
@ -104,12 +115,12 @@ instance_get(struct nfulnl_instance *inst)
|
||||
}
|
||||
|
||||
static struct nfulnl_instance *
|
||||
instance_lookup_get(u_int16_t group_num)
|
||||
instance_lookup_get(struct nfnl_log_net *log, u_int16_t group_num)
|
||||
{
|
||||
struct nfulnl_instance *inst;
|
||||
|
||||
rcu_read_lock_bh();
|
||||
inst = __instance_lookup(group_num);
|
||||
inst = __instance_lookup(log, group_num);
|
||||
if (inst && !atomic_inc_not_zero(&inst->use))
|
||||
inst = NULL;
|
||||
rcu_read_unlock_bh();
|
||||
@ -119,7 +130,11 @@ instance_lookup_get(u_int16_t group_num)
|
||||
|
||||
static void nfulnl_instance_free_rcu(struct rcu_head *head)
|
||||
{
|
||||
kfree(container_of(head, struct nfulnl_instance, rcu));
|
||||
struct nfulnl_instance *inst =
|
||||
container_of(head, struct nfulnl_instance, rcu);
|
||||
|
||||
put_net(inst->net);
|
||||
kfree(inst);
|
||||
module_put(THIS_MODULE);
|
||||
}
|
||||
|
||||
@ -133,13 +148,15 @@ instance_put(struct nfulnl_instance *inst)
|
||||
static void nfulnl_timer(unsigned long data);
|
||||
|
||||
static struct nfulnl_instance *
|
||||
instance_create(u_int16_t group_num, int portid, struct user_namespace *user_ns)
|
||||
instance_create(struct net *net, u_int16_t group_num,
|
||||
int portid, struct user_namespace *user_ns)
|
||||
{
|
||||
struct nfulnl_instance *inst;
|
||||
struct nfnl_log_net *log = nfnl_log_pernet(net);
|
||||
int err;
|
||||
|
||||
spin_lock_bh(&instances_lock);
|
||||
if (__instance_lookup(group_num)) {
|
||||
spin_lock_bh(&log->instances_lock);
|
||||
if (__instance_lookup(log, group_num)) {
|
||||
err = -EEXIST;
|
||||
goto out_unlock;
|
||||
}
|
||||
@ -163,6 +180,7 @@ instance_create(u_int16_t group_num, int portid, struct user_namespace *user_ns)
|
||||
|
||||
setup_timer(&inst->timer, nfulnl_timer, (unsigned long)inst);
|
||||
|
||||
inst->net = get_net(net);
|
||||
inst->peer_user_ns = user_ns;
|
||||
inst->peer_portid = portid;
|
||||
inst->group_num = group_num;
|
||||
@ -174,14 +192,15 @@ instance_create(u_int16_t group_num, int portid, struct user_namespace *user_ns)
|
||||
inst->copy_range = NFULNL_COPY_RANGE_MAX;
|
||||
|
||||
hlist_add_head_rcu(&inst->hlist,
|
||||
&instance_table[instance_hashfn(group_num)]);
|
||||
&log->instance_table[instance_hashfn(group_num)]);
|
||||
|
||||
spin_unlock_bh(&instances_lock);
|
||||
|
||||
spin_unlock_bh(&log->instances_lock);
|
||||
|
||||
return inst;
|
||||
|
||||
out_unlock:
|
||||
spin_unlock_bh(&instances_lock);
|
||||
spin_unlock_bh(&log->instances_lock);
|
||||
return ERR_PTR(err);
|
||||
}
|
||||
|
||||
@ -210,11 +229,12 @@ __instance_destroy(struct nfulnl_instance *inst)
|
||||
}
|
||||
|
||||
static inline void
|
||||
instance_destroy(struct nfulnl_instance *inst)
|
||||
instance_destroy(struct nfnl_log_net *log,
|
||||
struct nfulnl_instance *inst)
|
||||
{
|
||||
spin_lock_bh(&instances_lock);
|
||||
spin_lock_bh(&log->instances_lock);
|
||||
__instance_destroy(inst);
|
||||
spin_unlock_bh(&instances_lock);
|
||||
spin_unlock_bh(&log->instances_lock);
|
||||
}
|
||||
|
||||
static int
|
||||
@ -336,7 +356,7 @@ __nfulnl_send(struct nfulnl_instance *inst)
|
||||
if (!nlh)
|
||||
goto out;
|
||||
}
|
||||
status = nfnetlink_unicast(inst->skb, &init_net, inst->peer_portid,
|
||||
status = nfnetlink_unicast(inst->skb, inst->net, inst->peer_portid,
|
||||
MSG_DONTWAIT);
|
||||
|
||||
inst->qlen = 0;
|
||||
@ -370,7 +390,8 @@ nfulnl_timer(unsigned long data)
|
||||
/* This is an inline function, we don't really care about a long
|
||||
* list of arguments */
|
||||
static inline int
|
||||
__build_packet_message(struct nfulnl_instance *inst,
|
||||
__build_packet_message(struct nfnl_log_net *log,
|
||||
struct nfulnl_instance *inst,
|
||||
const struct sk_buff *skb,
|
||||
unsigned int data_len,
|
||||
u_int8_t pf,
|
||||
@ -536,7 +557,7 @@ __build_packet_message(struct nfulnl_instance *inst,
|
||||
/* global sequence number */
|
||||
if ((inst->flags & NFULNL_CFG_F_SEQ_GLOBAL) &&
|
||||
nla_put_be32(inst->skb, NFULA_SEQ_GLOBAL,
|
||||
htonl(atomic_inc_return(&global_seq))))
|
||||
htonl(atomic_inc_return(&log->global_seq))))
|
||||
goto nla_put_failure;
|
||||
|
||||
if (data_len) {
|
||||
@ -592,13 +613,15 @@ nfulnl_log_packet(u_int8_t pf,
|
||||
const struct nf_loginfo *li;
|
||||
unsigned int qthreshold;
|
||||
unsigned int plen;
|
||||
struct net *net = dev_net(in ? in : out);
|
||||
struct nfnl_log_net *log = nfnl_log_pernet(net);
|
||||
|
||||
if (li_user && li_user->type == NF_LOG_TYPE_ULOG)
|
||||
li = li_user;
|
||||
else
|
||||
li = &default_loginfo;
|
||||
|
||||
inst = instance_lookup_get(li->u.ulog.group);
|
||||
inst = instance_lookup_get(log, li->u.ulog.group);
|
||||
if (!inst)
|
||||
return;
|
||||
|
||||
@ -680,7 +703,7 @@ nfulnl_log_packet(u_int8_t pf,
|
||||
|
||||
inst->qlen++;
|
||||
|
||||
__build_packet_message(inst, skb, data_len, pf,
|
||||
__build_packet_message(log, inst, skb, data_len, pf,
|
||||
hooknum, in, out, prefix, plen);
|
||||
|
||||
if (inst->qlen >= qthreshold)
|
||||
@ -709,24 +732,24 @@ nfulnl_rcv_nl_event(struct notifier_block *this,
|
||||
unsigned long event, void *ptr)
|
||||
{
|
||||
struct netlink_notify *n = ptr;
|
||||
struct nfnl_log_net *log = nfnl_log_pernet(n->net);
|
||||
|
||||
if (event == NETLINK_URELEASE && n->protocol == NETLINK_NETFILTER) {
|
||||
int i;
|
||||
|
||||
/* destroy all instances for this portid */
|
||||
spin_lock_bh(&instances_lock);
|
||||
spin_lock_bh(&log->instances_lock);
|
||||
for (i = 0; i < INSTANCE_BUCKETS; i++) {
|
||||
struct hlist_node *t2;
|
||||
struct nfulnl_instance *inst;
|
||||
struct hlist_head *head = &instance_table[i];
|
||||
struct hlist_head *head = &log->instance_table[i];
|
||||
|
||||
hlist_for_each_entry_safe(inst, t2, head, hlist) {
|
||||
if ((net_eq(n->net, &init_net)) &&
|
||||
(n->portid == inst->peer_portid))
|
||||
if (n->portid == inst->peer_portid)
|
||||
__instance_destroy(inst);
|
||||
}
|
||||
}
|
||||
spin_unlock_bh(&instances_lock);
|
||||
spin_unlock_bh(&log->instances_lock);
|
||||
}
|
||||
return NOTIFY_DONE;
|
||||
}
|
||||
@ -767,6 +790,8 @@ nfulnl_recv_config(struct sock *ctnl, struct sk_buff *skb,
|
||||
u_int16_t group_num = ntohs(nfmsg->res_id);
|
||||
struct nfulnl_instance *inst;
|
||||
struct nfulnl_msg_config_cmd *cmd = NULL;
|
||||
struct net *net = sock_net(ctnl);
|
||||
struct nfnl_log_net *log = nfnl_log_pernet(net);
|
||||
int ret = 0;
|
||||
|
||||
if (nfula[NFULA_CFG_CMD]) {
|
||||
@ -776,14 +801,14 @@ nfulnl_recv_config(struct sock *ctnl, struct sk_buff *skb,
|
||||
/* Commands without queue context */
|
||||
switch (cmd->command) {
|
||||
case NFULNL_CFG_CMD_PF_BIND:
|
||||
return nf_log_bind_pf(pf, &nfulnl_logger);
|
||||
return nf_log_bind_pf(net, pf, &nfulnl_logger);
|
||||
case NFULNL_CFG_CMD_PF_UNBIND:
|
||||
nf_log_unbind_pf(pf);
|
||||
nf_log_unbind_pf(net, pf);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
inst = instance_lookup_get(group_num);
|
||||
inst = instance_lookup_get(log, group_num);
|
||||
if (inst && inst->peer_portid != NETLINK_CB(skb).portid) {
|
||||
ret = -EPERM;
|
||||
goto out_put;
|
||||
@ -797,7 +822,7 @@ nfulnl_recv_config(struct sock *ctnl, struct sk_buff *skb,
|
||||
goto out_put;
|
||||
}
|
||||
|
||||
inst = instance_create(group_num,
|
||||
inst = instance_create(net, group_num,
|
||||
NETLINK_CB(skb).portid,
|
||||
sk_user_ns(NETLINK_CB(skb).ssk));
|
||||
if (IS_ERR(inst)) {
|
||||
@ -811,7 +836,7 @@ nfulnl_recv_config(struct sock *ctnl, struct sk_buff *skb,
|
||||
goto out;
|
||||
}
|
||||
|
||||
instance_destroy(inst);
|
||||
instance_destroy(log, inst);
|
||||
goto out_put;
|
||||
default:
|
||||
ret = -ENOTSUPP;
|
||||
@ -894,55 +919,68 @@ static const struct nfnetlink_subsystem nfulnl_subsys = {
|
||||
|
||||
#ifdef CONFIG_PROC_FS
|
||||
struct iter_state {
|
||||
struct seq_net_private p;
|
||||
unsigned int bucket;
|
||||
};
|
||||
|
||||
static struct hlist_node *get_first(struct iter_state *st)
|
||||
static struct hlist_node *get_first(struct net *net, struct iter_state *st)
|
||||
{
|
||||
struct nfnl_log_net *log;
|
||||
if (!st)
|
||||
return NULL;
|
||||
|
||||
log = nfnl_log_pernet(net);
|
||||
|
||||
for (st->bucket = 0; st->bucket < INSTANCE_BUCKETS; st->bucket++) {
|
||||
if (!hlist_empty(&instance_table[st->bucket]))
|
||||
return rcu_dereference_bh(hlist_first_rcu(&instance_table[st->bucket]));
|
||||
struct hlist_head *head = &log->instance_table[st->bucket];
|
||||
|
||||
if (!hlist_empty(head))
|
||||
return rcu_dereference_bh(hlist_first_rcu(head));
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static struct hlist_node *get_next(struct iter_state *st, struct hlist_node *h)
|
||||
static struct hlist_node *get_next(struct net *net, struct iter_state *st,
|
||||
struct hlist_node *h)
|
||||
{
|
||||
h = rcu_dereference_bh(hlist_next_rcu(h));
|
||||
while (!h) {
|
||||
struct nfnl_log_net *log;
|
||||
struct hlist_head *head;
|
||||
|
||||
if (++st->bucket >= INSTANCE_BUCKETS)
|
||||
return NULL;
|
||||
|
||||
h = rcu_dereference_bh(hlist_first_rcu(&instance_table[st->bucket]));
|
||||
log = nfnl_log_pernet(net);
|
||||
head = &log->instance_table[st->bucket];
|
||||
h = rcu_dereference_bh(hlist_first_rcu(head));
|
||||
}
|
||||
return h;
|
||||
}
|
||||
|
||||
static struct hlist_node *get_idx(struct iter_state *st, loff_t pos)
|
||||
static struct hlist_node *get_idx(struct net *net, struct iter_state *st,
|
||||
loff_t pos)
|
||||
{
|
||||
struct hlist_node *head;
|
||||
head = get_first(st);
|
||||
head = get_first(net, st);
|
||||
|
||||
if (head)
|
||||
while (pos && (head = get_next(st, head)))
|
||||
while (pos && (head = get_next(net, st, head)))
|
||||
pos--;
|
||||
return pos ? NULL : head;
|
||||
}
|
||||
|
||||
static void *seq_start(struct seq_file *seq, loff_t *pos)
|
||||
static void *seq_start(struct seq_file *s, loff_t *pos)
|
||||
__acquires(rcu_bh)
|
||||
{
|
||||
rcu_read_lock_bh();
|
||||
return get_idx(seq->private, *pos);
|
||||
return get_idx(seq_file_net(s), s->private, *pos);
|
||||
}
|
||||
|
||||
static void *seq_next(struct seq_file *s, void *v, loff_t *pos)
|
||||
{
|
||||
(*pos)++;
|
||||
return get_next(s->private, v);
|
||||
return get_next(seq_file_net(s), s->private, v);
|
||||
}
|
||||
|
||||
static void seq_stop(struct seq_file *s, void *v)
|
||||
@ -971,8 +1009,8 @@ static const struct seq_operations nful_seq_ops = {
|
||||
|
||||
static int nful_open(struct inode *inode, struct file *file)
|
||||
{
|
||||
return seq_open_private(file, &nful_seq_ops,
|
||||
sizeof(struct iter_state));
|
||||
return seq_open_net(inode, file, &nful_seq_ops,
|
||||
sizeof(struct iter_state));
|
||||
}
|
||||
|
||||
static const struct file_operations nful_file_ops = {
|
||||
@ -980,17 +1018,43 @@ static const struct file_operations nful_file_ops = {
|
||||
.open = nful_open,
|
||||
.read = seq_read,
|
||||
.llseek = seq_lseek,
|
||||
.release = seq_release_private,
|
||||
.release = seq_release_net,
|
||||
};
|
||||
|
||||
#endif /* PROC_FS */
|
||||
|
||||
static int __init nfnetlink_log_init(void)
|
||||
static int __net_init nfnl_log_net_init(struct net *net)
|
||||
{
|
||||
int i, status = -ENOMEM;
|
||||
unsigned int i;
|
||||
struct nfnl_log_net *log = nfnl_log_pernet(net);
|
||||
|
||||
for (i = 0; i < INSTANCE_BUCKETS; i++)
|
||||
INIT_HLIST_HEAD(&instance_table[i]);
|
||||
INIT_HLIST_HEAD(&log->instance_table[i]);
|
||||
spin_lock_init(&log->instances_lock);
|
||||
|
||||
#ifdef CONFIG_PROC_FS
|
||||
if (!proc_create("nfnetlink_log", 0440,
|
||||
net->nf.proc_netfilter, &nful_file_ops))
|
||||
return -ENOMEM;
|
||||
#endif
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void __net_exit nfnl_log_net_exit(struct net *net)
|
||||
{
|
||||
remove_proc_entry("nfnetlink_log", net->nf.proc_netfilter);
|
||||
}
|
||||
|
||||
static struct pernet_operations nfnl_log_net_ops = {
|
||||
.init = nfnl_log_net_init,
|
||||
.exit = nfnl_log_net_exit,
|
||||
.id = &nfnl_log_net_id,
|
||||
.size = sizeof(struct nfnl_log_net),
|
||||
};
|
||||
|
||||
static int __init nfnetlink_log_init(void)
|
||||
{
|
||||
int status = -ENOMEM;
|
||||
|
||||
/* it's not really all that important to have a random value, so
|
||||
* we can do this from the init function, even if there hasn't
|
||||
@ -1000,29 +1064,25 @@ static int __init nfnetlink_log_init(void)
|
||||
netlink_register_notifier(&nfulnl_rtnl_notifier);
|
||||
status = nfnetlink_subsys_register(&nfulnl_subsys);
|
||||
if (status < 0) {
|
||||
printk(KERN_ERR "log: failed to create netlink socket\n");
|
||||
pr_err("log: failed to create netlink socket\n");
|
||||
goto cleanup_netlink_notifier;
|
||||
}
|
||||
|
||||
status = nf_log_register(NFPROTO_UNSPEC, &nfulnl_logger);
|
||||
if (status < 0) {
|
||||
printk(KERN_ERR "log: failed to register logger\n");
|
||||
pr_err("log: failed to register logger\n");
|
||||
goto cleanup_subsys;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_PROC_FS
|
||||
if (!proc_create("nfnetlink_log", 0440,
|
||||
proc_net_netfilter, &nful_file_ops)) {
|
||||
status = -ENOMEM;
|
||||
status = register_pernet_subsys(&nfnl_log_net_ops);
|
||||
if (status < 0) {
|
||||
pr_err("log: failed to register pernet ops\n");
|
||||
goto cleanup_logger;
|
||||
}
|
||||
#endif
|
||||
return status;
|
||||
|
||||
#ifdef CONFIG_PROC_FS
|
||||
cleanup_logger:
|
||||
nf_log_unregister(&nfulnl_logger);
|
||||
#endif
|
||||
cleanup_subsys:
|
||||
nfnetlink_subsys_unregister(&nfulnl_subsys);
|
||||
cleanup_netlink_notifier:
|
||||
@ -1032,10 +1092,8 @@ cleanup_netlink_notifier:
|
||||
|
||||
static void __exit nfnetlink_log_fini(void)
|
||||
{
|
||||
unregister_pernet_subsys(&nfnl_log_net_ops);
|
||||
nf_log_unregister(&nfulnl_logger);
|
||||
#ifdef CONFIG_PROC_FS
|
||||
remove_proc_entry("nfnetlink_log", proc_net_netfilter);
|
||||
#endif
|
||||
nfnetlink_subsys_unregister(&nfulnl_subsys);
|
||||
netlink_unregister_notifier(&nfulnl_rtnl_notifier);
|
||||
}
|
||||
|
@ -30,6 +30,7 @@
|
||||
#include <linux/list.h>
|
||||
#include <net/sock.h>
|
||||
#include <net/netfilter/nf_queue.h>
|
||||
#include <net/netns/generic.h>
|
||||
#include <net/netfilter/nfnetlink_queue.h>
|
||||
|
||||
#include <linux/atomic.h>
|
||||
@ -66,10 +67,18 @@ struct nfqnl_instance {
|
||||
|
||||
typedef int (*nfqnl_cmpfn)(struct nf_queue_entry *, unsigned long);
|
||||
|
||||
static DEFINE_SPINLOCK(instances_lock);
|
||||
static int nfnl_queue_net_id __read_mostly;
|
||||
|
||||
#define INSTANCE_BUCKETS 16
|
||||
static struct hlist_head instance_table[INSTANCE_BUCKETS] __read_mostly;
|
||||
struct nfnl_queue_net {
|
||||
spinlock_t instances_lock;
|
||||
struct hlist_head instance_table[INSTANCE_BUCKETS];
|
||||
};
|
||||
|
||||
static struct nfnl_queue_net *nfnl_queue_pernet(struct net *net)
|
||||
{
|
||||
return net_generic(net, nfnl_queue_net_id);
|
||||
}
|
||||
|
||||
static inline u_int8_t instance_hashfn(u_int16_t queue_num)
|
||||
{
|
||||
@ -77,12 +86,12 @@ static inline u_int8_t instance_hashfn(u_int16_t queue_num)
|
||||
}
|
||||
|
||||
static struct nfqnl_instance *
|
||||
instance_lookup(u_int16_t queue_num)
|
||||
instance_lookup(struct nfnl_queue_net *q, u_int16_t queue_num)
|
||||
{
|
||||
struct hlist_head *head;
|
||||
struct nfqnl_instance *inst;
|
||||
|
||||
head = &instance_table[instance_hashfn(queue_num)];
|
||||
head = &q->instance_table[instance_hashfn(queue_num)];
|
||||
hlist_for_each_entry_rcu(inst, head, hlist) {
|
||||
if (inst->queue_num == queue_num)
|
||||
return inst;
|
||||
@ -91,14 +100,15 @@ instance_lookup(u_int16_t queue_num)
|
||||
}
|
||||
|
||||
static struct nfqnl_instance *
|
||||
instance_create(u_int16_t queue_num, int portid)
|
||||
instance_create(struct nfnl_queue_net *q, u_int16_t queue_num,
|
||||
int portid)
|
||||
{
|
||||
struct nfqnl_instance *inst;
|
||||
unsigned int h;
|
||||
int err;
|
||||
|
||||
spin_lock(&instances_lock);
|
||||
if (instance_lookup(queue_num)) {
|
||||
spin_lock(&q->instances_lock);
|
||||
if (instance_lookup(q, queue_num)) {
|
||||
err = -EEXIST;
|
||||
goto out_unlock;
|
||||
}
|
||||
@ -123,16 +133,16 @@ instance_create(u_int16_t queue_num, int portid)
|
||||
}
|
||||
|
||||
h = instance_hashfn(queue_num);
|
||||
hlist_add_head_rcu(&inst->hlist, &instance_table[h]);
|
||||
hlist_add_head_rcu(&inst->hlist, &q->instance_table[h]);
|
||||
|
||||
spin_unlock(&instances_lock);
|
||||
spin_unlock(&q->instances_lock);
|
||||
|
||||
return inst;
|
||||
|
||||
out_free:
|
||||
kfree(inst);
|
||||
out_unlock:
|
||||
spin_unlock(&instances_lock);
|
||||
spin_unlock(&q->instances_lock);
|
||||
return ERR_PTR(err);
|
||||
}
|
||||
|
||||
@ -158,11 +168,11 @@ __instance_destroy(struct nfqnl_instance *inst)
|
||||
}
|
||||
|
||||
static void
|
||||
instance_destroy(struct nfqnl_instance *inst)
|
||||
instance_destroy(struct nfnl_queue_net *q, struct nfqnl_instance *inst)
|
||||
{
|
||||
spin_lock(&instances_lock);
|
||||
spin_lock(&q->instances_lock);
|
||||
__instance_destroy(inst);
|
||||
spin_unlock(&instances_lock);
|
||||
spin_unlock(&q->instances_lock);
|
||||
}
|
||||
|
||||
static inline void
|
||||
@ -473,9 +483,12 @@ nfqnl_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum)
|
||||
int err = -ENOBUFS;
|
||||
__be32 *packet_id_ptr;
|
||||
int failopen = 0;
|
||||
struct net *net = dev_net(entry->indev ?
|
||||
entry->indev : entry->outdev);
|
||||
struct nfnl_queue_net *q = nfnl_queue_pernet(net);
|
||||
|
||||
/* rcu_read_lock()ed by nf_hook_slow() */
|
||||
queue = instance_lookup(queuenum);
|
||||
queue = instance_lookup(q, queuenum);
|
||||
if (!queue) {
|
||||
err = -ESRCH;
|
||||
goto err_out;
|
||||
@ -512,7 +525,7 @@ nfqnl_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum)
|
||||
*packet_id_ptr = htonl(entry->id);
|
||||
|
||||
/* nfnetlink_unicast will either free the nskb or add it to a socket */
|
||||
err = nfnetlink_unicast(nskb, &init_net, queue->peer_portid, MSG_DONTWAIT);
|
||||
err = nfnetlink_unicast(nskb, net, queue->peer_portid, MSG_DONTWAIT);
|
||||
if (err < 0) {
|
||||
queue->queue_user_dropped++;
|
||||
goto err_out_unlock;
|
||||
@ -625,15 +638,16 @@ dev_cmp(struct nf_queue_entry *entry, unsigned long ifindex)
|
||||
/* drop all packets with either indev or outdev == ifindex from all queue
|
||||
* instances */
|
||||
static void
|
||||
nfqnl_dev_drop(int ifindex)
|
||||
nfqnl_dev_drop(struct net *net, int ifindex)
|
||||
{
|
||||
int i;
|
||||
struct nfnl_queue_net *q = nfnl_queue_pernet(net);
|
||||
|
||||
rcu_read_lock();
|
||||
|
||||
for (i = 0; i < INSTANCE_BUCKETS; i++) {
|
||||
struct nfqnl_instance *inst;
|
||||
struct hlist_head *head = &instance_table[i];
|
||||
struct hlist_head *head = &q->instance_table[i];
|
||||
|
||||
hlist_for_each_entry_rcu(inst, head, hlist)
|
||||
nfqnl_flush(inst, dev_cmp, ifindex);
|
||||
@ -650,12 +664,9 @@ nfqnl_rcv_dev_event(struct notifier_block *this,
|
||||
{
|
||||
struct net_device *dev = ptr;
|
||||
|
||||
if (!net_eq(dev_net(dev), &init_net))
|
||||
return NOTIFY_DONE;
|
||||
|
||||
/* Drop any packets associated with the downed device */
|
||||
if (event == NETDEV_DOWN)
|
||||
nfqnl_dev_drop(dev->ifindex);
|
||||
nfqnl_dev_drop(dev_net(dev), dev->ifindex);
|
||||
return NOTIFY_DONE;
|
||||
}
|
||||
|
||||
@ -668,24 +679,24 @@ nfqnl_rcv_nl_event(struct notifier_block *this,
|
||||
unsigned long event, void *ptr)
|
||||
{
|
||||
struct netlink_notify *n = ptr;
|
||||
struct nfnl_queue_net *q = nfnl_queue_pernet(n->net);
|
||||
|
||||
if (event == NETLINK_URELEASE && n->protocol == NETLINK_NETFILTER) {
|
||||
int i;
|
||||
|
||||
/* destroy all instances for this portid */
|
||||
spin_lock(&instances_lock);
|
||||
spin_lock(&q->instances_lock);
|
||||
for (i = 0; i < INSTANCE_BUCKETS; i++) {
|
||||
struct hlist_node *t2;
|
||||
struct nfqnl_instance *inst;
|
||||
struct hlist_head *head = &instance_table[i];
|
||||
struct hlist_head *head = &q->instance_table[i];
|
||||
|
||||
hlist_for_each_entry_safe(inst, t2, head, hlist) {
|
||||
if ((n->net == &init_net) &&
|
||||
(n->portid == inst->peer_portid))
|
||||
if (n->portid == inst->peer_portid)
|
||||
__instance_destroy(inst);
|
||||
}
|
||||
}
|
||||
spin_unlock(&instances_lock);
|
||||
spin_unlock(&q->instances_lock);
|
||||
}
|
||||
return NOTIFY_DONE;
|
||||
}
|
||||
@ -706,11 +717,12 @@ static const struct nla_policy nfqa_verdict_batch_policy[NFQA_MAX+1] = {
|
||||
[NFQA_MARK] = { .type = NLA_U32 },
|
||||
};
|
||||
|
||||
static struct nfqnl_instance *verdict_instance_lookup(u16 queue_num, int nlportid)
|
||||
static struct nfqnl_instance *
|
||||
verdict_instance_lookup(struct nfnl_queue_net *q, u16 queue_num, int nlportid)
|
||||
{
|
||||
struct nfqnl_instance *queue;
|
||||
|
||||
queue = instance_lookup(queue_num);
|
||||
queue = instance_lookup(q, queue_num);
|
||||
if (!queue)
|
||||
return ERR_PTR(-ENODEV);
|
||||
|
||||
@ -754,7 +766,11 @@ nfqnl_recv_verdict_batch(struct sock *ctnl, struct sk_buff *skb,
|
||||
LIST_HEAD(batch_list);
|
||||
u16 queue_num = ntohs(nfmsg->res_id);
|
||||
|
||||
queue = verdict_instance_lookup(queue_num, NETLINK_CB(skb).portid);
|
||||
struct net *net = sock_net(ctnl);
|
||||
struct nfnl_queue_net *q = nfnl_queue_pernet(net);
|
||||
|
||||
queue = verdict_instance_lookup(q, queue_num,
|
||||
NETLINK_CB(skb).portid);
|
||||
if (IS_ERR(queue))
|
||||
return PTR_ERR(queue);
|
||||
|
||||
@ -802,10 +818,13 @@ nfqnl_recv_verdict(struct sock *ctnl, struct sk_buff *skb,
|
||||
enum ip_conntrack_info uninitialized_var(ctinfo);
|
||||
struct nf_conn *ct = NULL;
|
||||
|
||||
queue = instance_lookup(queue_num);
|
||||
if (!queue)
|
||||
struct net *net = sock_net(ctnl);
|
||||
struct nfnl_queue_net *q = nfnl_queue_pernet(net);
|
||||
|
||||
queue = verdict_instance_lookup(queue_num, NETLINK_CB(skb).portid);
|
||||
queue = instance_lookup(q, queue_num);
|
||||
if (!queue)
|
||||
queue = verdict_instance_lookup(q, queue_num,
|
||||
NETLINK_CB(skb).portid);
|
||||
if (IS_ERR(queue))
|
||||
return PTR_ERR(queue);
|
||||
|
||||
@ -869,6 +888,8 @@ nfqnl_recv_config(struct sock *ctnl, struct sk_buff *skb,
|
||||
u_int16_t queue_num = ntohs(nfmsg->res_id);
|
||||
struct nfqnl_instance *queue;
|
||||
struct nfqnl_msg_config_cmd *cmd = NULL;
|
||||
struct net *net = sock_net(ctnl);
|
||||
struct nfnl_queue_net *q = nfnl_queue_pernet(net);
|
||||
int ret = 0;
|
||||
|
||||
if (nfqa[NFQA_CFG_CMD]) {
|
||||
@ -882,7 +903,7 @@ nfqnl_recv_config(struct sock *ctnl, struct sk_buff *skb,
|
||||
}
|
||||
|
||||
rcu_read_lock();
|
||||
queue = instance_lookup(queue_num);
|
||||
queue = instance_lookup(q, queue_num);
|
||||
if (queue && queue->peer_portid != NETLINK_CB(skb).portid) {
|
||||
ret = -EPERM;
|
||||
goto err_out_unlock;
|
||||
@ -895,7 +916,8 @@ nfqnl_recv_config(struct sock *ctnl, struct sk_buff *skb,
|
||||
ret = -EBUSY;
|
||||
goto err_out_unlock;
|
||||
}
|
||||
queue = instance_create(queue_num, NETLINK_CB(skb).portid);
|
||||
queue = instance_create(q, queue_num,
|
||||
NETLINK_CB(skb).portid);
|
||||
if (IS_ERR(queue)) {
|
||||
ret = PTR_ERR(queue);
|
||||
goto err_out_unlock;
|
||||
@ -906,7 +928,7 @@ nfqnl_recv_config(struct sock *ctnl, struct sk_buff *skb,
|
||||
ret = -ENODEV;
|
||||
goto err_out_unlock;
|
||||
}
|
||||
instance_destroy(queue);
|
||||
instance_destroy(q, queue);
|
||||
break;
|
||||
case NFQNL_CFG_CMD_PF_BIND:
|
||||
case NFQNL_CFG_CMD_PF_UNBIND:
|
||||
@ -1000,19 +1022,24 @@ static const struct nfnetlink_subsystem nfqnl_subsys = {
|
||||
|
||||
#ifdef CONFIG_PROC_FS
|
||||
struct iter_state {
|
||||
struct seq_net_private p;
|
||||
unsigned int bucket;
|
||||
};
|
||||
|
||||
static struct hlist_node *get_first(struct seq_file *seq)
|
||||
{
|
||||
struct iter_state *st = seq->private;
|
||||
struct net *net;
|
||||
struct nfnl_queue_net *q;
|
||||
|
||||
if (!st)
|
||||
return NULL;
|
||||
|
||||
net = seq_file_net(seq);
|
||||
q = nfnl_queue_pernet(net);
|
||||
for (st->bucket = 0; st->bucket < INSTANCE_BUCKETS; st->bucket++) {
|
||||
if (!hlist_empty(&instance_table[st->bucket]))
|
||||
return instance_table[st->bucket].first;
|
||||
if (!hlist_empty(&q->instance_table[st->bucket]))
|
||||
return q->instance_table[st->bucket].first;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
@ -1020,13 +1047,17 @@ static struct hlist_node *get_first(struct seq_file *seq)
|
||||
static struct hlist_node *get_next(struct seq_file *seq, struct hlist_node *h)
|
||||
{
|
||||
struct iter_state *st = seq->private;
|
||||
struct net *net = seq_file_net(seq);
|
||||
|
||||
h = h->next;
|
||||
while (!h) {
|
||||
struct nfnl_queue_net *q;
|
||||
|
||||
if (++st->bucket >= INSTANCE_BUCKETS)
|
||||
return NULL;
|
||||
|
||||
h = instance_table[st->bucket].first;
|
||||
q = nfnl_queue_pernet(net);
|
||||
h = q->instance_table[st->bucket].first;
|
||||
}
|
||||
return h;
|
||||
}
|
||||
@ -1042,11 +1073,11 @@ static struct hlist_node *get_idx(struct seq_file *seq, loff_t pos)
|
||||
return pos ? NULL : head;
|
||||
}
|
||||
|
||||
static void *seq_start(struct seq_file *seq, loff_t *pos)
|
||||
__acquires(instances_lock)
|
||||
static void *seq_start(struct seq_file *s, loff_t *pos)
|
||||
__acquires(nfnl_queue_pernet(seq_file_net(s))->instances_lock)
|
||||
{
|
||||
spin_lock(&instances_lock);
|
||||
return get_idx(seq, *pos);
|
||||
spin_lock(&nfnl_queue_pernet(seq_file_net(s))->instances_lock);
|
||||
return get_idx(s, *pos);
|
||||
}
|
||||
|
||||
static void *seq_next(struct seq_file *s, void *v, loff_t *pos)
|
||||
@ -1056,9 +1087,9 @@ static void *seq_next(struct seq_file *s, void *v, loff_t *pos)
|
||||
}
|
||||
|
||||
static void seq_stop(struct seq_file *s, void *v)
|
||||
__releases(instances_lock)
|
||||
__releases(nfnl_queue_pernet(seq_file_net(s))->instances_lock)
|
||||
{
|
||||
spin_unlock(&instances_lock);
|
||||
spin_unlock(&nfnl_queue_pernet(seq_file_net(s))->instances_lock);
|
||||
}
|
||||
|
||||
static int seq_show(struct seq_file *s, void *v)
|
||||
@ -1082,7 +1113,7 @@ static const struct seq_operations nfqnl_seq_ops = {
|
||||
|
||||
static int nfqnl_open(struct inode *inode, struct file *file)
|
||||
{
|
||||
return seq_open_private(file, &nfqnl_seq_ops,
|
||||
return seq_open_net(inode, file, &nfqnl_seq_ops,
|
||||
sizeof(struct iter_state));
|
||||
}
|
||||
|
||||
@ -1091,39 +1122,63 @@ static const struct file_operations nfqnl_file_ops = {
|
||||
.open = nfqnl_open,
|
||||
.read = seq_read,
|
||||
.llseek = seq_lseek,
|
||||
.release = seq_release_private,
|
||||
.release = seq_release_net,
|
||||
};
|
||||
|
||||
#endif /* PROC_FS */
|
||||
|
||||
static int __init nfnetlink_queue_init(void)
|
||||
static int __net_init nfnl_queue_net_init(struct net *net)
|
||||
{
|
||||
int i, status = -ENOMEM;
|
||||
unsigned int i;
|
||||
struct nfnl_queue_net *q = nfnl_queue_pernet(net);
|
||||
|
||||
for (i = 0; i < INSTANCE_BUCKETS; i++)
|
||||
INIT_HLIST_HEAD(&instance_table[i]);
|
||||
INIT_HLIST_HEAD(&q->instance_table[i]);
|
||||
|
||||
spin_lock_init(&q->instances_lock);
|
||||
|
||||
#ifdef CONFIG_PROC_FS
|
||||
if (!proc_create("nfnetlink_queue", 0440,
|
||||
net->nf.proc_netfilter, &nfqnl_file_ops))
|
||||
return -ENOMEM;
|
||||
#endif
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void __net_exit nfnl_queue_net_exit(struct net *net)
|
||||
{
|
||||
remove_proc_entry("nfnetlink_queue", net->nf.proc_netfilter);
|
||||
}
|
||||
|
||||
static struct pernet_operations nfnl_queue_net_ops = {
|
||||
.init = nfnl_queue_net_init,
|
||||
.exit = nfnl_queue_net_exit,
|
||||
.id = &nfnl_queue_net_id,
|
||||
.size = sizeof(struct nfnl_queue_net),
|
||||
};
|
||||
|
||||
static int __init nfnetlink_queue_init(void)
|
||||
{
|
||||
int status = -ENOMEM;
|
||||
|
||||
netlink_register_notifier(&nfqnl_rtnl_notifier);
|
||||
status = nfnetlink_subsys_register(&nfqnl_subsys);
|
||||
if (status < 0) {
|
||||
printk(KERN_ERR "nf_queue: failed to create netlink socket\n");
|
||||
pr_err("nf_queue: failed to create netlink socket\n");
|
||||
goto cleanup_netlink_notifier;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_PROC_FS
|
||||
if (!proc_create("nfnetlink_queue", 0440,
|
||||
proc_net_netfilter, &nfqnl_file_ops))
|
||||
status = register_pernet_subsys(&nfnl_queue_net_ops);
|
||||
if (status < 0) {
|
||||
pr_err("nf_queue: failed to register pernet ops\n");
|
||||
goto cleanup_subsys;
|
||||
#endif
|
||||
|
||||
}
|
||||
register_netdevice_notifier(&nfqnl_dev_notifier);
|
||||
nf_register_queue_handler(&nfqh);
|
||||
return status;
|
||||
|
||||
#ifdef CONFIG_PROC_FS
|
||||
cleanup_subsys:
|
||||
nfnetlink_subsys_unregister(&nfqnl_subsys);
|
||||
#endif
|
||||
cleanup_netlink_notifier:
|
||||
netlink_unregister_notifier(&nfqnl_rtnl_notifier);
|
||||
return status;
|
||||
@ -1133,9 +1188,7 @@ static void __exit nfnetlink_queue_fini(void)
|
||||
{
|
||||
nf_unregister_queue_handler();
|
||||
unregister_netdevice_notifier(&nfqnl_dev_notifier);
|
||||
#ifdef CONFIG_PROC_FS
|
||||
remove_proc_entry("nfnetlink_queue", proc_net_netfilter);
|
||||
#endif
|
||||
unregister_pernet_subsys(&nfnl_queue_net_ops);
|
||||
nfnetlink_subsys_unregister(&nfqnl_subsys);
|
||||
netlink_unregister_notifier(&nfqnl_rtnl_notifier);
|
||||
|
||||
|
@ -474,7 +474,14 @@ ipt_log_packet(u_int8_t pf,
|
||||
const struct nf_loginfo *loginfo,
|
||||
const char *prefix)
|
||||
{
|
||||
struct sbuff *m = sb_open();
|
||||
struct sbuff *m;
|
||||
struct net *net = dev_net(in ? in : out);
|
||||
|
||||
/* FIXME: Disabled from containers until syslog ns is supported */
|
||||
if (!net_eq(net, &init_net))
|
||||
return;
|
||||
|
||||
m = sb_open();
|
||||
|
||||
if (!loginfo)
|
||||
loginfo = &default_loginfo;
|
||||
@ -798,7 +805,14 @@ ip6t_log_packet(u_int8_t pf,
|
||||
const struct nf_loginfo *loginfo,
|
||||
const char *prefix)
|
||||
{
|
||||
struct sbuff *m = sb_open();
|
||||
struct sbuff *m;
|
||||
struct net *net = dev_net(in ? in : out);
|
||||
|
||||
/* FIXME: Disabled from containers until syslog ns is supported */
|
||||
if (!net_eq(net, &init_net))
|
||||
return;
|
||||
|
||||
m = sb_open();
|
||||
|
||||
if (!loginfo)
|
||||
loginfo = &default_loginfo;
|
||||
@ -893,23 +907,55 @@ static struct nf_logger ip6t_log_logger __read_mostly = {
|
||||
};
|
||||
#endif
|
||||
|
||||
static int __net_init log_net_init(struct net *net)
|
||||
{
|
||||
nf_log_set(net, NFPROTO_IPV4, &ipt_log_logger);
|
||||
#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
|
||||
nf_log_set(net, NFPROTO_IPV6, &ip6t_log_logger);
|
||||
#endif
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void __net_exit log_net_exit(struct net *net)
|
||||
{
|
||||
nf_log_unset(net, &ipt_log_logger);
|
||||
#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
|
||||
nf_log_unset(net, &ip6t_log_logger);
|
||||
#endif
|
||||
}
|
||||
|
||||
static struct pernet_operations log_net_ops = {
|
||||
.init = log_net_init,
|
||||
.exit = log_net_exit,
|
||||
};
|
||||
|
||||
static int __init log_tg_init(void)
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = register_pernet_subsys(&log_net_ops);
|
||||
if (ret < 0)
|
||||
goto err_pernet;
|
||||
|
||||
ret = xt_register_targets(log_tg_regs, ARRAY_SIZE(log_tg_regs));
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
goto err_target;
|
||||
|
||||
nf_log_register(NFPROTO_IPV4, &ipt_log_logger);
|
||||
#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
|
||||
nf_log_register(NFPROTO_IPV6, &ip6t_log_logger);
|
||||
#endif
|
||||
return 0;
|
||||
|
||||
err_target:
|
||||
unregister_pernet_subsys(&log_net_ops);
|
||||
err_pernet:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void __exit log_tg_exit(void)
|
||||
{
|
||||
unregister_pernet_subsys(&log_net_ops);
|
||||
nf_log_unregister(&ipt_log_logger);
|
||||
#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
|
||||
nf_log_unregister(&ip6t_log_logger);
|
||||
|
@ -76,22 +76,31 @@ static u32 hash_v6(const struct sk_buff *skb)
|
||||
}
|
||||
#endif
|
||||
|
||||
static u32
|
||||
nfqueue_hash(const struct sk_buff *skb, const struct xt_action_param *par)
|
||||
{
|
||||
const struct xt_NFQ_info_v1 *info = par->targinfo;
|
||||
u32 queue = info->queuenum;
|
||||
|
||||
if (par->family == NFPROTO_IPV4)
|
||||
queue += ((u64) hash_v4(skb) * info->queues_total) >> 32;
|
||||
#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
|
||||
else if (par->family == NFPROTO_IPV6)
|
||||
queue += ((u64) hash_v6(skb) * info->queues_total) >> 32;
|
||||
#endif
|
||||
|
||||
return queue;
|
||||
}
|
||||
|
||||
static unsigned int
|
||||
nfqueue_tg_v1(struct sk_buff *skb, const struct xt_action_param *par)
|
||||
{
|
||||
const struct xt_NFQ_info_v1 *info = par->targinfo;
|
||||
u32 queue = info->queuenum;
|
||||
|
||||
if (info->queues_total > 1) {
|
||||
if (par->family == NFPROTO_IPV4)
|
||||
queue = (((u64) hash_v4(skb) * info->queues_total) >>
|
||||
32) + queue;
|
||||
#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
|
||||
else if (par->family == NFPROTO_IPV6)
|
||||
queue = (((u64) hash_v6(skb) * info->queues_total) >>
|
||||
32) + queue;
|
||||
#endif
|
||||
}
|
||||
if (info->queues_total > 1)
|
||||
queue = nfqueue_hash(skb, par);
|
||||
|
||||
return NF_QUEUE_NR(queue);
|
||||
}
|
||||
|
||||
@ -108,7 +117,7 @@ nfqueue_tg_v2(struct sk_buff *skb, const struct xt_action_param *par)
|
||||
|
||||
static int nfqueue_tg_check(const struct xt_tgchk_param *par)
|
||||
{
|
||||
const struct xt_NFQ_info_v2 *info = par->targinfo;
|
||||
const struct xt_NFQ_info_v3 *info = par->targinfo;
|
||||
u32 maxid;
|
||||
|
||||
if (unlikely(!rnd_inited)) {
|
||||
@ -125,11 +134,32 @@ static int nfqueue_tg_check(const struct xt_tgchk_param *par)
|
||||
info->queues_total, maxid);
|
||||
return -ERANGE;
|
||||
}
|
||||
if (par->target->revision == 2 && info->bypass > 1)
|
||||
if (par->target->revision == 2 && info->flags > 1)
|
||||
return -EINVAL;
|
||||
if (par->target->revision == 3 && info->flags & ~NFQ_FLAG_MASK)
|
||||
return -EINVAL;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static unsigned int
|
||||
nfqueue_tg_v3(struct sk_buff *skb, const struct xt_action_param *par)
|
||||
{
|
||||
const struct xt_NFQ_info_v3 *info = par->targinfo;
|
||||
u32 queue = info->queuenum;
|
||||
|
||||
if (info->queues_total > 1) {
|
||||
if (info->flags & NFQ_FLAG_CPU_FANOUT) {
|
||||
int cpu = smp_processor_id();
|
||||
|
||||
queue = info->queuenum + cpu % info->queues_total;
|
||||
} else
|
||||
queue = nfqueue_hash(skb, par);
|
||||
}
|
||||
|
||||
return NF_QUEUE_NR(queue);
|
||||
}
|
||||
|
||||
static struct xt_target nfqueue_tg_reg[] __read_mostly = {
|
||||
{
|
||||
.name = "NFQUEUE",
|
||||
@ -156,6 +186,15 @@ static struct xt_target nfqueue_tg_reg[] __read_mostly = {
|
||||
.targetsize = sizeof(struct xt_NFQ_info_v2),
|
||||
.me = THIS_MODULE,
|
||||
},
|
||||
{
|
||||
.name = "NFQUEUE",
|
||||
.revision = 3,
|
||||
.family = NFPROTO_UNSPEC,
|
||||
.checkentry = nfqueue_tg_check,
|
||||
.target = nfqueue_tg_v3,
|
||||
.targetsize = sizeof(struct xt_NFQ_info_v3),
|
||||
.me = THIS_MODULE,
|
||||
},
|
||||
};
|
||||
|
||||
static int __init nfqueue_tg_init(void)
|
||||
|
@ -201,6 +201,7 @@ xt_osf_match_packet(const struct sk_buff *skb, struct xt_action_param *p)
|
||||
unsigned char opts[MAX_IPOPTLEN];
|
||||
const struct xt_osf_finger *kf;
|
||||
const struct xt_osf_user_finger *f;
|
||||
struct net *net = dev_net(p->in ? p->in : p->out);
|
||||
|
||||
if (!info)
|
||||
return false;
|
||||
@ -325,7 +326,7 @@ xt_osf_match_packet(const struct sk_buff *skb, struct xt_action_param *p)
|
||||
fcount++;
|
||||
|
||||
if (info->flags & XT_OSF_LOG)
|
||||
nf_log_packet(p->family, p->hooknum, skb,
|
||||
nf_log_packet(net, p->family, p->hooknum, skb,
|
||||
p->in, p->out, NULL,
|
||||
"%s [%s:%s] : %pI4:%d -> %pI4:%d hops=%d\n",
|
||||
f->genre, f->version, f->subtype,
|
||||
@ -341,7 +342,8 @@ xt_osf_match_packet(const struct sk_buff *skb, struct xt_action_param *p)
|
||||
rcu_read_unlock();
|
||||
|
||||
if (!fcount && (info->flags & XT_OSF_LOG))
|
||||
nf_log_packet(p->family, p->hooknum, skb, p->in, p->out, NULL,
|
||||
nf_log_packet(net, p->family, p->hooknum, skb, p->in,
|
||||
p->out, NULL,
|
||||
"Remote OS is not known: %pI4:%u -> %pI4:%u\n",
|
||||
&ip->saddr, ntohs(tcp->source),
|
||||
&ip->daddr, ntohs(tcp->dest));
|
||||
|
Loading…
Reference in New Issue
Block a user