diff options
Diffstat (limited to 'include')
| -rw-r--r-- | include/linux/rculist_bl.h | 49 | ||||
| -rw-r--r-- | include/net/ip6_route.h | 15 | ||||
| -rw-r--r-- | include/net/ip_vs.h | 377 |
3 files changed, 377 insertions, 64 deletions
diff --git a/include/linux/rculist_bl.h b/include/linux/rculist_bl.h index 0b952d06eb0b..36363b876e53 100644 --- a/include/linux/rculist_bl.h +++ b/include/linux/rculist_bl.h @@ -8,21 +8,31 @@ #include <linux/list_bl.h> #include <linux/rcupdate.h> +/* return the first ptr or next element in an RCU protected list */ +#define hlist_bl_first_rcu(head) \ + (*((struct hlist_bl_node __rcu **)(&(head)->first))) +#define hlist_bl_next_rcu(node) \ + (*((struct hlist_bl_node __rcu **)(&(node)->next))) + static inline void hlist_bl_set_first_rcu(struct hlist_bl_head *h, struct hlist_bl_node *n) { LIST_BL_BUG_ON((unsigned long)n & LIST_BL_LOCKMASK); LIST_BL_BUG_ON(((unsigned long)h->first & LIST_BL_LOCKMASK) != LIST_BL_LOCKMASK); - rcu_assign_pointer(h->first, + rcu_assign_pointer(hlist_bl_first_rcu(h), (struct hlist_bl_node *)((unsigned long)n | LIST_BL_LOCKMASK)); } -static inline struct hlist_bl_node *hlist_bl_first_rcu(struct hlist_bl_head *h) -{ - return (struct hlist_bl_node *) - ((unsigned long)rcu_dereference_check(h->first, hlist_bl_is_locked(h)) & ~LIST_BL_LOCKMASK); -} +#define hlist_bl_first_rcu_dereference(head) \ +({ \ + struct hlist_bl_head *__head = (head); \ + \ + (struct hlist_bl_node *) \ + ((unsigned long)rcu_dereference_check(hlist_bl_first_rcu(__head), \ + hlist_bl_is_locked(__head)) & \ + ~LIST_BL_LOCKMASK); \ +}) /** * hlist_bl_del_rcu - deletes entry from hash list without re-initialization @@ -73,7 +83,7 @@ static inline void hlist_bl_add_head_rcu(struct hlist_bl_node *n, { struct hlist_bl_node *first; - /* don't need hlist_bl_first_rcu because we're under lock */ + /* don't need hlist_bl_first_rcu* because we're under lock */ first = hlist_bl_first(h); n->next = first; @@ -93,9 +103,30 @@ static inline void hlist_bl_add_head_rcu(struct hlist_bl_node *n, * */ #define hlist_bl_for_each_entry_rcu(tpos, pos, head, member) \ - for (pos = hlist_bl_first_rcu(head); \ + for (pos = hlist_bl_first_rcu_dereference(head); \ pos && \ ({ tpos = hlist_bl_entry(pos, typeof(*tpos), member); 1; }); \ - pos = rcu_dereference_raw(pos->next)) + pos = rcu_dereference_raw(hlist_bl_next_rcu(pos))) + +/** + * hlist_bl_for_each_entry_continue_rcu - continue iteration over list of given + * type + * @tpos: the type * to use as a loop cursor. + * @pos: the &struct hlist_bl_node to use as a loop cursor. + * @member: the name of the hlist_bl_node within the struct. + * + * Continue to iterate over list of given type, continuing after + * the current position which must have been in the list when the RCU read + * lock was taken. + * This would typically require either that you obtained the node from a + * previous walk of the list in the same RCU read-side critical section, or + * that you held some sort of non-RCU reference (such as a reference count) + * to keep the node alive *and* in the list. + */ +#define hlist_bl_for_each_entry_continue_rcu(tpos, pos, member) \ + for (pos = rcu_dereference_raw(hlist_bl_next_rcu(&(tpos)->member)); \ + pos && \ + ({ tpos = hlist_bl_entry(pos, typeof(*tpos), member); 1; }); \ + pos = rcu_dereference_raw(hlist_bl_next_rcu(pos))) #endif diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index a55f9bf95fe3..0c8eeb6abe7a 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -252,15 +252,22 @@ static inline bool ipv6_unicast_destination(const struct sk_buff *skb) return rt->rt6i_flags & RTF_LOCAL; } +static inline bool __ipv6_anycast_destination(const struct rt6key *rt6i_dst, + u32 rt6i_flags, + const struct in6_addr *daddr) +{ + return rt6i_flags & RTF_ANYCAST || + (rt6i_dst->plen < 127 && + !(rt6i_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) && + ipv6_addr_equal(&rt6i_dst->addr, daddr)); +} + static inline bool ipv6_anycast_destination(const struct dst_entry *dst, const struct in6_addr *daddr) { const struct rt6_info *rt = dst_rt6_info(dst); - return rt->rt6i_flags & RTF_ANYCAST || - (rt->rt6i_dst.plen < 127 && - !(rt->rt6i_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) && - ipv6_addr_equal(&rt->rt6i_dst.addr, daddr)); + return __ipv6_anycast_destination(&rt->rt6i_dst, rt->rt6i_flags, daddr); } int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index ad8a16146ac5..72d325c81313 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -11,6 +11,7 @@ #include <asm/types.h> /* for __uXX types */ #include <linux/list.h> /* for struct list_head */ +#include <linux/rculist_bl.h> /* for struct hlist_bl_head */ #include <linux/spinlock.h> /* for struct rwlock_t */ #include <linux/atomic.h> /* for struct atomic_t */ #include <linux/refcount.h> /* for struct refcount_t */ @@ -30,15 +31,22 @@ #endif #include <net/net_namespace.h> /* Netw namespace */ #include <linux/sched/isolation.h> +#include <linux/siphash.h> #define IP_VS_HDR_INVERSE 1 #define IP_VS_HDR_ICMP 2 -/* - * Hash table: for virtual service lookups - */ -#define IP_VS_SVC_TAB_BITS 8 -#define IP_VS_SVC_TAB_SIZE BIT(IP_VS_SVC_TAB_BITS) -#define IP_VS_SVC_TAB_MASK (IP_VS_SVC_TAB_SIZE - 1) + +/* conn_tab limits (as per Kconfig) */ +#define IP_VS_CONN_TAB_MIN_BITS 8 +#if BITS_PER_LONG > 32 +#define IP_VS_CONN_TAB_MAX_BITS 27 +#else +#define IP_VS_CONN_TAB_MAX_BITS 20 +#endif + +/* svc_table limits */ +#define IP_VS_SVC_TAB_MIN_BITS 4 +#define IP_VS_SVC_TAB_MAX_BITS 20 /* Generic access of ipvs struct */ static inline struct netns_ipvs *net_ipvs(struct net* net) @@ -49,8 +57,6 @@ static inline struct netns_ipvs *net_ipvs(struct net* net) /* Connections' size value needed by ip_vs_ctl.c */ extern int ip_vs_conn_tab_size; -extern struct mutex __ip_vs_mutex; - struct ip_vs_iphdr { int hdr_flags; /* ipvs flags */ __u32 off; /* Where IP or IPv4 header starts */ @@ -271,6 +277,10 @@ static inline const char *ip_vs_dbg_addr(int af, char *buf, size_t buf_len, pr_err(msg, ##__VA_ARGS__); \ } while (0) +struct ip_vs_aligned_lock { + spinlock_t l; /* Protect buckets */ +} ____cacheline_aligned_in_smp; + /* For arrays per family */ enum { IP_VS_AF_INET, @@ -283,6 +293,13 @@ static inline int ip_vs_af_index(int af) return af == AF_INET6 ? IP_VS_AF_INET6 : IP_VS_AF_INET; } +/* work_flags */ +enum { + IP_VS_WORK_SVC_RESIZE, /* Schedule svc_resize_work */ + IP_VS_WORK_SVC_NORESIZE, /* Stopping svc_resize_work */ + IP_VS_WORK_CONN_RESIZE, /* Schedule conn_resize_work */ +}; + /* The port number of FTP service (in network order). */ #define FTPPORT cpu_to_be16(21) #define FTPDATA cpu_to_be16(20) @@ -484,6 +501,198 @@ struct ip_vs_est_kt_data { int est_row; /* estimated row */ }; +/* IPVS resizable hash tables */ +struct ip_vs_rht { + struct hlist_bl_head *buckets; + struct ip_vs_rht __rcu *new_tbl; /* New/Same table */ + seqcount_t *seqc; /* Protects moves */ + struct ip_vs_aligned_lock *lock; /* Protect seqc */ + int mask; /* Buckets mask */ + int size; /* Buckets */ + int seqc_mask; /* seqc mask */ + int lock_mask; /* lock mask */ + u32 table_id; + int u_thresh; /* upper threshold */ + int l_thresh; /* lower threshold */ + int lfactor; /* Load Factor (shift)*/ + int bits; /* size = 1 << bits */ + siphash_key_t hash_key; + struct rcu_head rcu_head; +}; + +/** + * ip_vs_rht_for_each_table() - Walk the hash tables + * @table: struct ip_vs_rht __rcu *table + * @t: current table, used as cursor, struct ip_vs_rht *var + * @p: previous table, temp struct ip_vs_rht *var + * + * Walk tables assuming others can not change the installed tables + */ +#define ip_vs_rht_for_each_table(table, t, p) \ + for (p = NULL, t = rcu_dereference_protected(table, 1); \ + t != p; \ + p = t, t = rcu_dereference_protected(t->new_tbl, 1)) + +/** + * ip_vs_rht_for_each_table_rcu() - Walk the hash tables under RCU reader lock + * @table: struct ip_vs_rht __rcu *table + * @t: current table, used as cursor, struct ip_vs_rht *var + * @p: previous table, temp struct ip_vs_rht *var + * + * We usually search in one table and also in second table on resizing + */ +#define ip_vs_rht_for_each_table_rcu(table, t, p) \ + for (p = NULL, t = rcu_dereference(table); \ + t != p; \ + p = t, t = rcu_dereference(t->new_tbl)) + +/** + * ip_vs_rht_for_each_bucket() - Walk all table buckets + * @t: current table, used as cursor, struct ip_vs_rht *var + * @bucket: bucket index, used as cursor, u32 var + * @head: bucket address, used as cursor, struct hlist_bl_head *var + */ +#define ip_vs_rht_for_each_bucket(t, bucket, head) \ + for (bucket = 0, head = (t)->buckets; \ + bucket < t->size; bucket++, head++) + +/** + * ip_vs_rht_for_bucket_retry() - Retry bucket if entries are moved + * @t: current table, used as cursor, struct ip_vs_rht *var + * @bucket: index of current bucket or hash key + * @sc: temp seqcount_t *var + * @seq: temp unsigned int var for sequence count + * @retry: temp int var + */ +#define ip_vs_rht_for_bucket_retry(t, bucket, sc, seq, retry) \ + for (retry = 1, sc = &(t)->seqc[(bucket) & (t)->seqc_mask]; \ + retry && ({ seq = read_seqcount_begin(sc); 1; }); \ + retry = read_seqcount_retry(sc, seq)) + +/** + * DECLARE_IP_VS_RHT_WALK_BUCKETS_RCU() - Declare variables + * + * Variables for ip_vs_rht_walk_buckets_rcu + */ +#define DECLARE_IP_VS_RHT_WALK_BUCKETS_RCU() \ + struct ip_vs_rht *_t, *_p; \ + unsigned int _seq; \ + seqcount_t *_sc; \ + u32 _bucket; \ + int _retry +/** + * ip_vs_rht_walk_buckets_rcu() - Walk all buckets under RCU read lock + * @table: struct ip_vs_rht __rcu *table + * @head: bucket address, used as cursor, struct hlist_bl_head *var + * + * Can be used while others add/delete/move entries + * Not suitable if duplicates are not desired + * Possible cases for reader that uses cond_resched_rcu() in the loop: + * - new table can not be installed, no need to repeat + * - new table can be installed => check and repeat if new table is + * installed, needed for !PREEMPT_RCU + */ +#define ip_vs_rht_walk_buckets_rcu(table, head) \ + ip_vs_rht_for_each_table_rcu(table, _t, _p) \ + ip_vs_rht_for_each_bucket(_t, _bucket, head) \ + ip_vs_rht_for_bucket_retry(_t, _bucket, _sc, \ + _seq, _retry) + +/** + * DECLARE_IP_VS_RHT_WALK_BUCKET_RCU() - Declare variables + * + * Variables for ip_vs_rht_walk_bucket_rcu + */ +#define DECLARE_IP_VS_RHT_WALK_BUCKET_RCU() \ + unsigned int _seq; \ + seqcount_t *_sc; \ + int _retry +/** + * ip_vs_rht_walk_bucket_rcu() - Walk bucket under RCU read lock + * @t: current table, struct ip_vs_rht *var + * @bucket: index of current bucket or hash key + * @head: bucket address, used as cursor, struct hlist_bl_head *var + * + * Can be used while others add/delete/move entries + * Not suitable if duplicates are not desired + * Possible cases for reader that uses cond_resched_rcu() in the loop: + * - new table can not be installed, no need to repeat + * - new table can be installed => check and repeat if new table is + * installed, needed for !PREEMPT_RCU + */ +#define ip_vs_rht_walk_bucket_rcu(t, bucket, head) \ + if (({ head = (t)->buckets + ((bucket) & (t)->mask); 0; })) \ + {} \ + else \ + ip_vs_rht_for_bucket_retry(t, (bucket), _sc, _seq, _retry) + +/** + * DECLARE_IP_VS_RHT_WALK_BUCKETS_SAFE_RCU() - Declare variables + * + * Variables for ip_vs_rht_walk_buckets_safe_rcu + */ +#define DECLARE_IP_VS_RHT_WALK_BUCKETS_SAFE_RCU() \ + struct ip_vs_rht *_t, *_p; \ + u32 _bucket +/** + * ip_vs_rht_walk_buckets_safe_rcu() - Walk all buckets under RCU read lock + * @table: struct ip_vs_rht __rcu *table + * @head: bucket address, used as cursor, struct hlist_bl_head *var + * + * Can be used while others add/delete entries but moving is disabled + * Using cond_resched_rcu() should be safe if tables do not change + */ +#define ip_vs_rht_walk_buckets_safe_rcu(table, head) \ + ip_vs_rht_for_each_table_rcu(table, _t, _p) \ + ip_vs_rht_for_each_bucket(_t, _bucket, head) + +/** + * DECLARE_IP_VS_RHT_WALK_BUCKETS() - Declare variables + * + * Variables for ip_vs_rht_walk_buckets + */ +#define DECLARE_IP_VS_RHT_WALK_BUCKETS() \ + struct ip_vs_rht *_t, *_p; \ + u32 _bucket + +/** + * ip_vs_rht_walk_buckets() - Walk all buckets + * @table: struct ip_vs_rht __rcu *table + * @head: bucket address, used as cursor, struct hlist_bl_head *var + * + * Use if others can not add/delete/move entries + */ +#define ip_vs_rht_walk_buckets(table, head) \ + ip_vs_rht_for_each_table(table, _t, _p) \ + ip_vs_rht_for_each_bucket(_t, _bucket, head) + +/* Entries can be in one of two tables, so we flip bit when new table is + * created and store it as highest bit in hash keys + */ +#define IP_VS_RHT_TABLE_ID_MASK BIT(31) + +/* Check if hash key is from this table */ +static inline bool ip_vs_rht_same_table(struct ip_vs_rht *t, u32 hash_key) +{ + return !((t->table_id ^ hash_key) & IP_VS_RHT_TABLE_ID_MASK); +} + +/* Build per-table hash key from hash value */ +static inline u32 ip_vs_rht_build_hash_key(struct ip_vs_rht *t, u32 hash) +{ + return t->table_id | (hash & ~IP_VS_RHT_TABLE_ID_MASK); +} + +void ip_vs_rht_free(struct ip_vs_rht *t); +void ip_vs_rht_rcu_free(struct rcu_head *head); +struct ip_vs_rht *ip_vs_rht_alloc(int buckets, int scounts, int locks); +int ip_vs_rht_desired_size(struct netns_ipvs *ipvs, struct ip_vs_rht *t, int n, + int lfactor, int min_bits, int max_bits); +void ip_vs_rht_set_thresholds(struct ip_vs_rht *t, int size, int lfactor, + int min_bits, int max_bits); +u32 ip_vs_rht_hash_linfo(struct ip_vs_rht *t, int af, + const union nf_inet_addr *addr, u32 v1, u32 v2); + struct dst_entry; struct iphdr; struct ip_vs_conn; @@ -577,50 +786,48 @@ struct ip_vs_conn_param { __u8 pe_data_len; }; +/* Hash node in conn_tab */ +struct ip_vs_conn_hnode { + struct hlist_bl_node node; /* node in conn_tab */ + u32 hash_key; /* Key for the hash table */ + u8 dir; /* 0=out->in, 1=in->out */ +} __packed; + /* IP_VS structure allocated for each dynamically scheduled connection */ struct ip_vs_conn { - struct hlist_node c_list; /* hashed list heads */ - /* Protocol, addresses and port numbers */ + /* Cacheline for hash table nodes - rarely modified */ + + struct ip_vs_conn_hnode hn0; /* Original direction */ + u8 af; /* address family */ __be16 cport; + struct ip_vs_conn_hnode hn1; /* Reply direction */ + u8 daf; /* Address family of the dest */ __be16 dport; - __be16 vport; - u16 af; /* address family */ - union nf_inet_addr caddr; /* client address */ - union nf_inet_addr vaddr; /* virtual address */ - union nf_inet_addr daddr; /* destination address */ + struct ip_vs_dest *dest; /* real server */ + atomic_t n_control; /* Number of controlled ones */ volatile __u32 flags; /* status flags */ - __u16 protocol; /* Which protocol (TCP/UDP) */ - __u16 daf; /* Address family of the dest */ - struct netns_ipvs *ipvs; - - /* counter and timer */ - refcount_t refcnt; /* reference count */ - struct timer_list timer; /* Expiration timer */ - volatile unsigned long timeout; /* timeout */ + /* 44/64 */ - /* Flags and state transition */ - spinlock_t lock; /* lock for state transition */ + struct ip_vs_conn *control; /* Master control connection */ + const struct ip_vs_pe *pe; + char *pe_data; + __u8 pe_data_len; volatile __u16 state; /* state info */ volatile __u16 old_state; /* old state, to be used for * state transition triggered * synchronization */ - __u32 fwmark; /* Fire wall mark from skb */ - unsigned long sync_endtime; /* jiffies + sent_retries */ + /* 2-byte hole */ + /* 64/96 */ - /* Control members */ - struct ip_vs_conn *control; /* Master control connection */ - atomic_t n_control; /* Number of controlled ones */ - struct ip_vs_dest *dest; /* real server */ - atomic_t in_pkts; /* incoming packet counter */ + union nf_inet_addr caddr; /* client address */ + union nf_inet_addr vaddr; /* virtual address */ + /* 96/128 */ - /* Packet transmitter for different forwarding methods. If it - * mangles the packet, it must return NF_DROP or better NF_STOLEN, - * otherwise this must be changed to a sk_buff **. - * NF_ACCEPT can be returned when destination is local. - */ - int (*packet_xmit)(struct sk_buff *skb, struct ip_vs_conn *cp, - struct ip_vs_protocol *pp, struct ip_vs_iphdr *iph); + union nf_inet_addr daddr; /* destination address */ + __u32 fwmark; /* Fire wall mark from skb */ + __be16 vport; + __u16 protocol; /* Which protocol (TCP/UDP) */ /* Note: we can group the following members into a structure, * in order to save more space, and the following members are @@ -628,14 +835,31 @@ struct ip_vs_conn { */ struct ip_vs_app *app; /* bound ip_vs_app object */ void *app_data; /* Application private data */ + /* 128/168 */ struct_group(sync_conn_opt, struct ip_vs_seq in_seq; /* incoming seq. struct */ struct ip_vs_seq out_seq; /* outgoing seq. struct */ ); + /* 152/192 */ - const struct ip_vs_pe *pe; - char *pe_data; - __u8 pe_data_len; + struct timer_list timer; /* Expiration timer */ + volatile unsigned long timeout; /* timeout */ + spinlock_t lock; /* lock for state transition */ + refcount_t refcnt; /* reference count */ + atomic_t in_pkts; /* incoming packet counter */ + /* 64-bit: 4-byte gap */ + + /* 188/256 */ + unsigned long sync_endtime; /* jiffies + sent_retries */ + struct netns_ipvs *ipvs; + + /* Packet transmitter for different forwarding methods. If it + * mangles the packet, it must return NF_DROP or better NF_STOLEN, + * otherwise this must be changed to a sk_buff **. + * NF_ACCEPT can be returned when destination is local. + */ + int (*packet_xmit)(struct sk_buff *skb, struct ip_vs_conn *cp, + struct ip_vs_protocol *pp, struct ip_vs_iphdr *iph); struct rcu_head rcu_head; }; @@ -691,14 +915,15 @@ struct ip_vs_dest_user_kern { * forwarding entries. */ struct ip_vs_service { - struct hlist_node s_list; /* node in service table */ - atomic_t refcnt; /* reference counter */ - + struct hlist_bl_node s_list; /* node in service table */ + u32 hash_key; /* Key for the hash table */ u16 af; /* address family */ __u16 protocol; /* which protocol (TCP/UDP) */ + union nf_inet_addr addr; /* IP address for virtual service */ - __be16 port; /* port number for the service */ __u32 fwmark; /* firewall mark of the service */ + atomic_t refcnt; /* reference counter */ + __be16 port; /* port number for the service */ unsigned int flags; /* service status flags */ unsigned int timeout; /* persistent timeout in ticks */ __be32 netmask; /* grouping granularity, mask/plen */ @@ -808,8 +1033,8 @@ struct ip_vs_pe { int (*fill_param)(struct ip_vs_conn_param *p, struct sk_buff *skb); bool (*ct_match)(const struct ip_vs_conn_param *p, struct ip_vs_conn *ct); - u32 (*hashkey_raw)(const struct ip_vs_conn_param *p, u32 initval, - bool inverse); + u32 (*hashkey_raw)(const struct ip_vs_conn_param *p, + struct ip_vs_rht *t, bool inverse); int (*show_pe_data)(const struct ip_vs_conn *cp, char *buf); /* create connections for real-server outgoing packets */ struct ip_vs_conn* (*conn_out)(struct ip_vs_service *svc, @@ -949,6 +1174,7 @@ struct netns_ipvs { /* ip_vs_conn */ atomic_t conn_count; /* connection counter */ atomic_t no_cport_conns[IP_VS_AF_MAX]; + struct delayed_work conn_resize_work;/* resize conn_tab */ /* ip_vs_ctl */ struct ip_vs_stats_rcu *tot_stats; /* Statistics & est. */ @@ -957,6 +1183,10 @@ struct netns_ipvs { struct list_head dest_trash; spinlock_t dest_trash_lock; struct timer_list dest_trash_timer; /* expiration timer */ + struct mutex service_mutex; /* service reconfig */ + struct rw_semaphore svc_resize_sem; /* svc_table resizing */ + struct delayed_work svc_resize_work; /* resize svc_table */ + atomic_t svc_table_changes;/* ++ on new table */ /* Service counters */ atomic_t num_services[IP_VS_AF_MAX]; /* Services */ atomic_t fwm_services[IP_VS_AF_MAX]; /* Services */ @@ -1021,6 +1251,8 @@ struct netns_ipvs { int sysctl_est_nice; /* kthread nice */ int est_stopped; /* stop tasks */ #endif + int sysctl_conn_lfactor; + int sysctl_svc_lfactor; /* ip_vs_lblc */ int sysctl_lblc_expiration; @@ -1030,6 +1262,7 @@ struct netns_ipvs { int sysctl_lblcr_expiration; struct ctl_table_header *lblcr_ctl_header; struct ctl_table *lblcr_ctl_table; + unsigned long work_flags; /* IP_VS_WORK_* flags */ /* ip_vs_est */ struct delayed_work est_reload_work;/* Reload kthread tasks */ struct mutex est_mutex; /* protect kthread tasks */ @@ -1061,9 +1294,9 @@ struct netns_ipvs { unsigned int mixed_address_family_dests; unsigned int hooks_afmask; /* &1=AF_INET, &2=AF_INET6 */ - /* the service mutex that protect svc_table and svc_fwm_table */ - struct mutex service_mutex; - struct hlist_head svc_table[IP_VS_SVC_TAB_SIZE]; /* Services */ + struct ip_vs_rht __rcu *svc_table; /* Services */ + struct ip_vs_rht __rcu *conn_tab; /* Connections */ + atomic_t conn_tab_changes;/* ++ on new table */ }; #define DEFAULT_SYNC_THRESHOLD 3 @@ -1313,6 +1546,24 @@ static inline int sysctl_est_nice(struct netns_ipvs *ipvs) #endif +/* Get load factor to map conn_count/u_thresh to t->size */ +static inline int sysctl_conn_lfactor(struct netns_ipvs *ipvs) +{ + return READ_ONCE(ipvs->sysctl_conn_lfactor); +} + +/* Get load factor to map num_services/u_thresh to t->size + * Smaller value decreases u_thresh to reduce collisions but increases + * the table size + * Returns factor where: + * - <0: u_thresh = size >> -factor, eg. lfactor -2 = 25% load + * - >=0: u_thresh = size << factor, eg. lfactor 1 = 200% load + */ +static inline int sysctl_svc_lfactor(struct netns_ipvs *ipvs) +{ + return READ_ONCE(ipvs->sysctl_svc_lfactor); +} + /* IPVS core functions * (from ip_vs_core.c) */ @@ -1386,6 +1637,23 @@ static inline void __ip_vs_conn_put(struct ip_vs_conn *cp) } void ip_vs_conn_put(struct ip_vs_conn *cp); void ip_vs_conn_fill_cport(struct ip_vs_conn *cp, __be16 cport); +int ip_vs_conn_desired_size(struct netns_ipvs *ipvs, struct ip_vs_rht *t, + int lfactor); +struct ip_vs_rht *ip_vs_conn_tab_alloc(struct netns_ipvs *ipvs, int buckets, + int lfactor); + +static inline struct ip_vs_conn * +ip_vs_hn0_to_conn(struct ip_vs_conn_hnode *hn) +{ + return container_of(hn, struct ip_vs_conn, hn0); +} + +static inline struct ip_vs_conn * +ip_vs_hn_to_conn(struct ip_vs_conn_hnode *hn) +{ + return hn->dir ? container_of(hn, struct ip_vs_conn, hn1) : + container_of(hn, struct ip_vs_conn, hn0); +} struct ip_vs_conn *ip_vs_conn_new(const struct ip_vs_conn_param *p, int dest_af, const union nf_inet_addr *daddr, @@ -1739,6 +2007,13 @@ static inline char ip_vs_fwd_tag(struct ip_vs_conn *cp) return fwd; } +/* Check if connection uses double hashing */ +static inline bool ip_vs_conn_use_hash2(struct ip_vs_conn *cp) +{ + return IP_VS_FWD_METHOD(cp) == IP_VS_CONN_F_MASQ && + !(cp->flags & IP_VS_CONN_F_TEMPLATE); +} + void ip_vs_nat_icmp(struct sk_buff *skb, struct ip_vs_protocol *pp, struct ip_vs_conn *cp, int dir); |
